VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/PGMAllBth.h@ 105904

Last change on this file since 105904 was 104840, checked in by vboxsync, 6 months ago

VMM/PGM: Refactored RAM ranges, MMIO2 ranges and ROM ranges and added MMIO ranges (to PGM) so we can safely access RAM ranges at runtime w/o fear of them ever being freed up. It is now only possible to create these during VM creation and loading, and they will live till VM destruction (except for MMIO2 which could be destroyed during loading (PCNet fun)). The lookup handling is by table instead of pointer tree. No more ring-0 pointers in shared data. bugref:10687 bugref:10093

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id Revision
File size: 236.3 KB
Line 
1/* $Id: PGMAllBth.h 104840 2024-06-05 00:59:51Z vboxsync $ */
2/** @file
3 * VBox - Page Manager, Shadow+Guest Paging Template - All context code.
4 *
5 * @remarks Extended page tables (intel) are built with PGM_GST_TYPE set to
6 * PGM_TYPE_PROT (and PGM_SHW_TYPE set to PGM_TYPE_EPT).
7 * bird: WTF does this mean these days? Looking at PGMAll.cpp it's
8 *
9 * @remarks This file is one big \#ifdef-orgy!
10 *
11 */
12
13/*
14 * Copyright (C) 2006-2023 Oracle and/or its affiliates.
15 *
16 * This file is part of VirtualBox base platform packages, as
17 * available from https://www.virtualbox.org.
18 *
19 * This program is free software; you can redistribute it and/or
20 * modify it under the terms of the GNU General Public License
21 * as published by the Free Software Foundation, in version 3 of the
22 * License.
23 *
24 * This program is distributed in the hope that it will be useful, but
25 * WITHOUT ANY WARRANTY; without even the implied warranty of
26 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
27 * General Public License for more details.
28 *
29 * You should have received a copy of the GNU General Public License
30 * along with this program; if not, see <https://www.gnu.org/licenses>.
31 *
32 * SPDX-License-Identifier: GPL-3.0-only
33 */
34
35#ifdef _MSC_VER
36/** @todo we're generating unnecessary code in nested/ept shadow mode and for
37 * real/prot-guest+RC mode. */
38# pragma warning(disable: 4505)
39#endif
40
41
42/*********************************************************************************************************************************
43* Internal Functions *
44*********************************************************************************************************************************/
45RT_C_DECLS_BEGIN
46PGM_BTH_DECL(int, Enter)(PVMCPUCC pVCpu, RTGCPHYS GCPhysCR3);
47#ifndef IN_RING3
48PGM_BTH_DECL(int, Trap0eHandler)(PVMCPUCC pVCpu, RTGCUINT uErr, PCPUMCTX pCtx, RTGCPTR pvFault, bool *pfLockTaken);
49PGM_BTH_DECL(int, NestedTrap0eHandler)(PVMCPUCC pVCpu, RTGCUINT uErr, PCPUMCTX pCtx, RTGCPHYS GCPhysNestedFault,
50 bool fIsLinearAddrValid, RTGCPTR GCPtrNestedFault, PPGMPTWALK pWalk, bool *pfLockTaken);
51# if defined(VBOX_WITH_NESTED_HWVIRT_VMX_EPT) && PGM_SHW_TYPE == PGM_TYPE_EPT
52static void PGM_BTH_NAME(NestedSyncPageWorker)(PVMCPUCC pVCpu, PSHWPTE pPte, RTGCPHYS GCPhysPage, PPGMPOOLPAGE pShwPage,
53 unsigned iPte, SLATPTE GstSlatPte);
54static int PGM_BTH_NAME(NestedSyncPage)(PVMCPUCC pVCpu, RTGCPHYS GCPhysNestedPage, RTGCPHYS GCPhysPage, unsigned cPages,
55 uint32_t uErr, PPGMPTWALKGST pGstWalkAll);
56static int PGM_BTH_NAME(NestedSyncPT)(PVMCPUCC pVCpu, RTGCPHYS GCPhysNestedPage, RTGCPHYS GCPhysPage, PPGMPTWALKGST pGstWalkAll);
57# endif /* VBOX_WITH_NESTED_HWVIRT_VMX_EPT */
58#endif
59PGM_BTH_DECL(int, InvalidatePage)(PVMCPUCC pVCpu, RTGCPTR GCPtrPage);
60static int PGM_BTH_NAME(SyncPage)(PVMCPUCC pVCpu, GSTPDE PdeSrc, RTGCPTR GCPtrPage, unsigned cPages, unsigned uErr);
61static int PGM_BTH_NAME(CheckDirtyPageFault)(PVMCPUCC pVCpu, uint32_t uErr, PSHWPDE pPdeDst, GSTPDE const *pPdeSrc, RTGCPTR GCPtrPage);
62static int PGM_BTH_NAME(SyncPT)(PVMCPUCC pVCpu, unsigned iPD, PGSTPD pPDSrc, RTGCPTR GCPtrPage);
63#if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
64static void PGM_BTH_NAME(SyncPageWorker)(PVMCPUCC pVCpu, PSHWPTE pPteDst, GSTPDE PdeSrc, GSTPTE PteSrc, PPGMPOOLPAGE pShwPage, unsigned iPTDst);
65#else
66static void PGM_BTH_NAME(SyncPageWorker)(PVMCPUCC pVCpu, PSHWPTE pPteDst, RTGCPHYS GCPhysPage, PPGMPOOLPAGE pShwPage, unsigned iPTDst);
67#endif
68PGM_BTH_DECL(int, VerifyAccessSyncPage)(PVMCPUCC pVCpu, RTGCPTR Addr, unsigned fPage, unsigned uErr);
69PGM_BTH_DECL(int, PrefetchPage)(PVMCPUCC pVCpu, RTGCPTR GCPtrPage);
70PGM_BTH_DECL(int, SyncCR3)(PVMCPUCC pVCpu, uint64_t cr0, uint64_t cr3, uint64_t cr4, bool fGlobal);
71#ifdef VBOX_STRICT
72PGM_BTH_DECL(unsigned, AssertCR3)(PVMCPUCC pVCpu, uint64_t cr3, uint64_t cr4, RTGCPTR GCPtr = 0, RTGCPTR cb = ~(RTGCPTR)0);
73#endif
74PGM_BTH_DECL(int, MapCR3)(PVMCPUCC pVCpu, RTGCPHYS GCPhysCR3);
75PGM_BTH_DECL(int, UnmapCR3)(PVMCPUCC pVCpu);
76
77#ifdef IN_RING3
78PGM_BTH_DECL(int, Relocate)(PVMCPUCC pVCpu, RTGCPTR offDelta);
79#endif
80RT_C_DECLS_END
81
82
83
84
85/*
86 * Filter out some illegal combinations of guest and shadow paging, so we can
87 * remove redundant checks inside functions.
88 */
89#if PGM_GST_TYPE == PGM_TYPE_PAE && PGM_SHW_TYPE != PGM_TYPE_PAE \
90 && !PGM_TYPE_IS_NESTED_OR_EPT(PGM_SHW_TYPE) && PGM_SHW_TYPE != PGM_TYPE_NONE
91# error "Invalid combination; PAE guest implies PAE shadow"
92#endif
93
94#if (PGM_GST_TYPE == PGM_TYPE_REAL || PGM_GST_TYPE == PGM_TYPE_PROT) \
95 && !( PGM_SHW_TYPE == PGM_TYPE_32BIT || PGM_SHW_TYPE == PGM_TYPE_PAE || PGM_SHW_TYPE == PGM_TYPE_AMD64 \
96 || PGM_TYPE_IS_NESTED_OR_EPT(PGM_SHW_TYPE) || PGM_SHW_TYPE == PGM_TYPE_NONE)
97# error "Invalid combination; real or protected mode without paging implies 32 bits or PAE shadow paging."
98#endif
99
100#if (PGM_GST_TYPE == PGM_TYPE_32BIT || PGM_GST_TYPE == PGM_TYPE_PAE) \
101 && !( PGM_SHW_TYPE == PGM_TYPE_32BIT || PGM_SHW_TYPE == PGM_TYPE_PAE \
102 || PGM_TYPE_IS_NESTED_OR_EPT(PGM_SHW_TYPE) || PGM_SHW_TYPE == PGM_TYPE_NONE)
103# error "Invalid combination; 32 bits guest paging or PAE implies 32 bits or PAE shadow paging."
104#endif
105
106#if (PGM_GST_TYPE == PGM_TYPE_AMD64 && PGM_SHW_TYPE != PGM_TYPE_AMD64 && !PGM_TYPE_IS_NESTED_OR_EPT(PGM_SHW_TYPE) && PGM_SHW_TYPE != PGM_TYPE_NONE) \
107 || (PGM_SHW_TYPE == PGM_TYPE_AMD64 && PGM_GST_TYPE != PGM_TYPE_AMD64 && PGM_GST_TYPE != PGM_TYPE_PROT)
108# error "Invalid combination; AMD64 guest implies AMD64 shadow and vice versa"
109#endif
110
111
112/**
113 * Enters the shadow+guest mode.
114 *
115 * @returns VBox status code.
116 * @param pVCpu The cross context virtual CPU structure.
117 * @param GCPhysCR3 The physical address from the CR3 register.
118 */
119PGM_BTH_DECL(int, Enter)(PVMCPUCC pVCpu, RTGCPHYS GCPhysCR3)
120{
121 /* Here we deal with allocation of the root shadow page table for real and protected mode during mode switches;
122 * Other modes rely on MapCR3/UnmapCR3 to setup the shadow root page tables.
123 */
124#if ( ( PGM_SHW_TYPE == PGM_TYPE_32BIT \
125 || PGM_SHW_TYPE == PGM_TYPE_PAE \
126 || PGM_SHW_TYPE == PGM_TYPE_AMD64) \
127 && ( PGM_GST_TYPE == PGM_TYPE_REAL \
128 || PGM_GST_TYPE == PGM_TYPE_PROT))
129
130 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
131
132 Assert(!pVM->pgm.s.fNestedPaging);
133
134 PGM_LOCK_VOID(pVM);
135 /* Note: we only really need shadow paging in real and protected mode for VT-x and AMD-V (excluding nested paging/EPT modes),
136 * but any calls to GC need a proper shadow page setup as well.
137 */
138 /* Free the previous root mapping if still active. */
139 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
140 PPGMPOOLPAGE pOldShwPageCR3 = pVCpu->pgm.s.CTX_SUFF(pShwPageCR3);
141 if (pOldShwPageCR3)
142 {
143 Assert(pOldShwPageCR3->enmKind != PGMPOOLKIND_FREE);
144
145 /* Mark the page as unlocked; allow flushing again. */
146 pgmPoolUnlockPage(pPool, pVCpu->pgm.s.CTX_SUFF(pShwPageCR3));
147
148 pgmPoolFreeByPage(pPool, pOldShwPageCR3, NIL_PGMPOOL_IDX, UINT32_MAX);
149 pVCpu->pgm.s.pShwPageCR3R3 = NIL_RTR3PTR;
150 pVCpu->pgm.s.pShwPageCR3R0 = NIL_RTR0PTR;
151 }
152
153 /* construct a fake address. */
154 GCPhysCR3 = RT_BIT_64(63);
155 PPGMPOOLPAGE pNewShwPageCR3;
156 int rc = pgmPoolAlloc(pVM, GCPhysCR3, BTH_PGMPOOLKIND_ROOT, PGMPOOLACCESS_DONTCARE, PGM_A20_IS_ENABLED(pVCpu),
157 NIL_PGMPOOL_IDX, UINT32_MAX, false /*fLockPage*/,
158 &pNewShwPageCR3);
159 AssertRCReturn(rc, rc);
160
161 pVCpu->pgm.s.pShwPageCR3R3 = pgmPoolConvertPageToR3(pPool, pNewShwPageCR3);
162 pVCpu->pgm.s.pShwPageCR3R0 = pgmPoolConvertPageToR0(pPool, pNewShwPageCR3);
163
164 /* Mark the page as locked; disallow flushing. */
165 pgmPoolLockPage(pPool, pNewShwPageCR3);
166
167 /* Set the current hypervisor CR3. */
168 CPUMSetHyperCR3(pVCpu, PGMGetHyperCR3(pVCpu));
169
170 PGM_UNLOCK(pVM);
171 return rc;
172#else
173 NOREF(pVCpu); NOREF(GCPhysCR3);
174 return VINF_SUCCESS;
175#endif
176}
177
178
179#ifndef IN_RING3
180
181# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
182/**
183 * Deal with a guest page fault.
184 *
185 * @returns Strict VBox status code.
186 * @retval VINF_EM_RAW_GUEST_TRAP
187 * @retval VINF_EM_RAW_EMULATE_INSTR
188 *
189 * @param pVCpu The cross context virtual CPU structure of the calling EMT.
190 * @param pWalk The guest page table walk result.
191 * @param uErr The error code.
192 */
193PGM_BTH_DECL(VBOXSTRICTRC, Trap0eHandlerGuestFault)(PVMCPUCC pVCpu, PPGMPTWALK pWalk, RTGCUINT uErr)
194{
195 /*
196 * Calc the error code for the guest trap.
197 */
198 uint32_t uNewErr = GST_IS_NX_ACTIVE(pVCpu)
199 ? uErr & (X86_TRAP_PF_RW | X86_TRAP_PF_US | X86_TRAP_PF_ID)
200 : uErr & (X86_TRAP_PF_RW | X86_TRAP_PF_US);
201 if ( pWalk->fRsvdError
202 || pWalk->fBadPhysAddr)
203 {
204 uNewErr |= X86_TRAP_PF_RSVD | X86_TRAP_PF_P;
205 Assert(!pWalk->fNotPresent);
206 }
207 else if (!pWalk->fNotPresent)
208 uNewErr |= X86_TRAP_PF_P;
209 TRPMSetErrorCode(pVCpu, uNewErr);
210
211 LogFlow(("Guest trap; cr2=%RGv uErr=%RGv lvl=%d\n", pWalk->GCPtr, uErr, pWalk->uLevel));
212 STAM_STATS({ pVCpu->pgmr0.s.pStatTrap0eAttributionR0 = &pVCpu->pgm.s.Stats.StatRZTrap0eTime2GuestTrap; });
213 return VINF_EM_RAW_GUEST_TRAP;
214}
215# endif /* PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) */
216
217
218#if !PGM_TYPE_IS_NESTED(PGM_SHW_TYPE) && PGM_SHW_TYPE != PGM_TYPE_NONE
219/**
220 * Deal with a guest page fault.
221 *
222 * The caller has taken the PGM lock.
223 *
224 * @returns Strict VBox status code.
225 *
226 * @param pVCpu The cross context virtual CPU structure of the calling EMT.
227 * @param uErr The error code.
228 * @param pCtx Pointer to the register context for the CPU.
229 * @param pvFault The fault address.
230 * @param pPage The guest page at @a pvFault.
231 * @param pWalk The guest page table walk result.
232 * @param pGstWalk The guest paging-mode specific walk information.
233 * @param pfLockTaken PGM lock taken here or not (out). This is true
234 * when we're called.
235 */
236static VBOXSTRICTRC PGM_BTH_NAME(Trap0eHandlerDoAccessHandlers)(PVMCPUCC pVCpu, RTGCUINT uErr, PCPUMCTX pCtx,
237 RTGCPTR pvFault, PPGMPAGE pPage, bool *pfLockTaken
238# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) || defined(DOXYGEN_RUNNING)
239 , PPGMPTWALK pWalk
240 , PGSTPTWALK pGstWalk
241# endif
242 )
243{
244# if !PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
245 GSTPDE const PdeSrcDummy = { X86_PDE_P | X86_PDE_US | X86_PDE_RW | X86_PDE_A };
246# endif
247 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
248 VBOXSTRICTRC rcStrict;
249
250 if (PGM_PAGE_HAS_ANY_PHYSICAL_HANDLERS(pPage))
251 {
252 /*
253 * Physical page access handler.
254 */
255# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
256 const RTGCPHYS GCPhysFault = pWalk->GCPhys;
257# else
258 const RTGCPHYS GCPhysFault = PGM_A20_APPLY(pVCpu, (RTGCPHYS)pvFault);
259# endif
260 PPGMPHYSHANDLER pCur;
261 rcStrict = pgmHandlerPhysicalLookup(pVM, GCPhysFault, &pCur);
262 if (RT_SUCCESS(rcStrict))
263 {
264 PCPGMPHYSHANDLERTYPEINT const pCurType = PGMPHYSHANDLER_GET_TYPE(pVM, pCur);
265
266# ifdef PGM_SYNC_N_PAGES
267 /*
268 * If the region is write protected and we got a page not present fault, then sync
269 * the pages. If the fault was caused by a read, then restart the instruction.
270 * In case of write access continue to the GC write handler.
271 *
272 * ASSUMES that there is only one handler per page or that they have similar write properties.
273 */
274 if ( !(uErr & X86_TRAP_PF_P)
275 && pCurType->enmKind == PGMPHYSHANDLERKIND_WRITE)
276 {
277# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
278 rcStrict = PGM_BTH_NAME(SyncPage)(pVCpu, pGstWalk->Pde, pvFault, PGM_SYNC_NR_PAGES, uErr);
279# else
280 rcStrict = PGM_BTH_NAME(SyncPage)(pVCpu, PdeSrcDummy, pvFault, PGM_SYNC_NR_PAGES, uErr);
281# endif
282 if ( RT_FAILURE(rcStrict)
283 || !(uErr & X86_TRAP_PF_RW)
284 || rcStrict == VINF_PGM_SYNCPAGE_MODIFIED_PDE)
285 {
286 AssertMsgRC(rcStrict, ("%Rrc\n", VBOXSTRICTRC_VAL(rcStrict)));
287 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.StatRZTrap0eHandlersOutOfSync);
288 STAM_STATS({ pVCpu->pgmr0.s.pStatTrap0eAttributionR0 = &pVCpu->pgm.s.Stats.StatRZTrap0eTime2OutOfSyncHndPhys; });
289 return rcStrict;
290 }
291 }
292# endif
293# ifdef PGM_WITH_MMIO_OPTIMIZATIONS
294 /*
295 * If the access was not thru a #PF(RSVD|...) resync the page.
296 */
297 if ( !(uErr & X86_TRAP_PF_RSVD)
298 && pCurType->enmKind != PGMPHYSHANDLERKIND_WRITE
299# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
300 && (pWalk->fEffective & (PGM_PTATTRS_W_MASK | PGM_PTATTRS_US_MASK))
301 == PGM_PTATTRS_W_MASK /** @todo Remove pGstWalk->Core.fEffectiveUS and X86_PTE_US further down in the sync code. */
302# endif
303 )
304 {
305# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
306 rcStrict = PGM_BTH_NAME(SyncPage)(pVCpu, pGstWalk->Pde, pvFault, PGM_SYNC_NR_PAGES, uErr);
307# else
308 rcStrict = PGM_BTH_NAME(SyncPage)(pVCpu, PdeSrcDummy, pvFault, PGM_SYNC_NR_PAGES, uErr);
309# endif
310 if ( RT_FAILURE(rcStrict)
311 || rcStrict == VINF_PGM_SYNCPAGE_MODIFIED_PDE)
312 {
313 AssertMsgRC(rcStrict, ("%Rrc\n", VBOXSTRICTRC_VAL(rcStrict)));
314 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.StatRZTrap0eHandlersOutOfSync);
315 STAM_STATS({ pVCpu->pgmr0.s.pStatTrap0eAttributionR0 = &pVCpu->pgm.s.Stats.StatRZTrap0eTime2OutOfSyncHndPhys; });
316 return rcStrict;
317 }
318 }
319# endif
320
321 AssertMsg( pCurType->enmKind != PGMPHYSHANDLERKIND_WRITE
322 || (pCurType->enmKind == PGMPHYSHANDLERKIND_WRITE && (uErr & X86_TRAP_PF_RW)),
323 ("Unexpected trap for physical handler: %08X (phys=%08x) pPage=%R[pgmpage] uErr=%X, enmKind=%d\n",
324 pvFault, GCPhysFault, pPage, uErr, pCurType->enmKind));
325 if (pCurType->enmKind == PGMPHYSHANDLERKIND_WRITE)
326 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.StatRZTrap0eHandlersPhysWrite);
327 else
328 {
329 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.StatRZTrap0eHandlersPhysAll);
330 if (uErr & X86_TRAP_PF_RSVD) STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.StatRZTrap0eHandlersPhysAllOpt);
331 }
332
333 if (pCurType->pfnPfHandler)
334 {
335 STAM_PROFILE_START(&pCur->Stat, h);
336
337 if (pCurType->fKeepPgmLock)
338 {
339 rcStrict = pCurType->pfnPfHandler(pVM, pVCpu, uErr, pCtx, pvFault, GCPhysFault,
340 !pCurType->fRing0DevInsIdx ? pCur->uUser
341 : (uintptr_t)PDMDeviceRing0IdxToInstance(pVM, pCur->uUser));
342
343 STAM_PROFILE_STOP(&pCur->Stat, h); /* no locking needed, entry is unlikely reused before we get here. */
344 }
345 else
346 {
347 uint64_t const uUser = !pCurType->fRing0DevInsIdx ? pCur->uUser
348 : (uintptr_t)PDMDeviceRing0IdxToInstance(pVM, pCur->uUser);
349 PGM_UNLOCK(pVM);
350 *pfLockTaken = false;
351
352 rcStrict = pCurType->pfnPfHandler(pVM, pVCpu, uErr, pCtx, pvFault, GCPhysFault, uUser);
353
354 STAM_PROFILE_STOP(&pCur->Stat, h); /* no locking needed, entry is unlikely reused before we get here. */
355 }
356 }
357 else
358 rcStrict = VINF_EM_RAW_EMULATE_INSTR;
359
360 STAM_STATS({ pVCpu->pgmr0.s.pStatTrap0eAttributionR0 = &pVCpu->pgm.s.Stats.StatRZTrap0eTime2HndPhys; });
361 return rcStrict;
362 }
363 AssertMsgReturn(rcStrict == VERR_NOT_FOUND, ("%Rrc\n", VBOXSTRICTRC_VAL(rcStrict)), rcStrict);
364 }
365
366 /*
367 * There is a handled area of the page, but this fault doesn't belong to it.
368 * We must emulate the instruction.
369 *
370 * To avoid crashing (non-fatal) in the interpreter and go back to the recompiler
371 * we first check if this was a page-not-present fault for a page with only
372 * write access handlers. Restart the instruction if it wasn't a write access.
373 */
374 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.StatRZTrap0eHandlersUnhandled);
375
376 if ( !PGM_PAGE_HAS_ACTIVE_ALL_HANDLERS(pPage)
377 && !(uErr & X86_TRAP_PF_P))
378 {
379# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
380 rcStrict = PGM_BTH_NAME(SyncPage)(pVCpu, pGstWalk->Pde, pvFault, PGM_SYNC_NR_PAGES, uErr);
381# else
382 rcStrict = PGM_BTH_NAME(SyncPage)(pVCpu, PdeSrcDummy, pvFault, PGM_SYNC_NR_PAGES, uErr);
383# endif
384 if ( RT_FAILURE(rcStrict)
385 || rcStrict == VINF_PGM_SYNCPAGE_MODIFIED_PDE
386 || !(uErr & X86_TRAP_PF_RW))
387 {
388 AssertMsgRC(rcStrict, ("%Rrc\n", VBOXSTRICTRC_VAL(rcStrict)));
389 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.StatRZTrap0eHandlersOutOfSync);
390 STAM_STATS({ pVCpu->pgmr0.s.pStatTrap0eAttributionR0 = &pVCpu->pgm.s.Stats.StatRZTrap0eTime2OutOfSyncHndPhys; });
391 return rcStrict;
392 }
393 }
394
395 /** @todo This particular case can cause quite a lot of overhead. E.g. early stage of kernel booting in Ubuntu 6.06
396 * It's writing to an unhandled part of the LDT page several million times.
397 */
398 rcStrict = PGMInterpretInstruction(pVCpu, pvFault);
399 LogFlow(("PGM: PGMInterpretInstruction -> rcStrict=%d pPage=%R[pgmpage]\n", VBOXSTRICTRC_VAL(rcStrict), pPage));
400 STAM_STATS({ pVCpu->pgmr0.s.pStatTrap0eAttributionR0 = &pVCpu->pgm.s.Stats.StatRZTrap0eTime2HndUnhandled; });
401 return rcStrict;
402} /* if any kind of handler */
403# endif /* !PGM_TYPE_IS_NESTED(PGM_SHW_TYPE) && PGM_SHW_TYPE != PGM_TYPE_NONE*/
404
405
406/**
407 * \#PF Handler for raw-mode guest execution.
408 *
409 * @returns VBox status code (appropriate for trap handling and GC return).
410 *
411 * @param pVCpu The cross context virtual CPU structure.
412 * @param uErr The trap error code.
413 * @param pCtx Pointer to the register context for the CPU.
414 * @param pvFault The fault address.
415 * @param pfLockTaken PGM lock taken here or not (out)
416 */
417PGM_BTH_DECL(int, Trap0eHandler)(PVMCPUCC pVCpu, RTGCUINT uErr, PCPUMCTX pCtx, RTGCPTR pvFault, bool *pfLockTaken)
418{
419 PVMCC pVM = pVCpu->CTX_SUFF(pVM); NOREF(pVM);
420
421 *pfLockTaken = false;
422
423# if ( PGM_GST_TYPE == PGM_TYPE_32BIT || PGM_GST_TYPE == PGM_TYPE_REAL || PGM_GST_TYPE == PGM_TYPE_PROT \
424 || PGM_GST_TYPE == PGM_TYPE_PAE || PGM_GST_TYPE == PGM_TYPE_AMD64) \
425 && !PGM_TYPE_IS_NESTED(PGM_SHW_TYPE) \
426 && (PGM_SHW_TYPE != PGM_TYPE_EPT || PGM_GST_TYPE == PGM_TYPE_PROT) \
427 && PGM_SHW_TYPE != PGM_TYPE_NONE
428 int rc;
429
430# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
431 /*
432 * Walk the guest page translation tables and check if it's a guest fault.
433 */
434 PGMPTWALK Walk;
435 GSTPTWALK GstWalk;
436 rc = PGM_GST_NAME(Walk)(pVCpu, pvFault, &Walk, &GstWalk);
437 if (RT_FAILURE_NP(rc))
438 return VBOXSTRICTRC_TODO(PGM_BTH_NAME(Trap0eHandlerGuestFault)(pVCpu, &Walk, uErr));
439
440 /* assert some GstWalk sanity. */
441# if PGM_GST_TYPE == PGM_TYPE_AMD64
442 /*AssertMsg(GstWalk.Pml4e.u == GstWalk.pPml4e->u, ("%RX64 %RX64\n", (uint64_t)GstWalk.Pml4e.u, (uint64_t)GstWalk.pPml4e->u)); - not always true with SMP guests. */
443# endif
444# if PGM_GST_TYPE == PGM_TYPE_AMD64 || PGM_GST_TYPE == PGM_TYPE_PAE
445 /*AssertMsg(GstWalk.Pdpe.u == GstWalk.pPdpe->u, ("%RX64 %RX64\n", (uint64_t)GstWalk.Pdpe.u, (uint64_t)GstWalk.pPdpe->u)); - ditto */
446# endif
447 /*AssertMsg(GstWalk.Pde.u == GstWalk.pPde->u, ("%RX64 %RX64\n", (uint64_t)GstWalk.Pde.u, (uint64_t)GstWalk.pPde->u)); - ditto */
448 /*AssertMsg(GstWalk.Core.fBigPage || GstWalk.Pte.u == GstWalk.pPte->u, ("%RX64 %RX64\n", (uint64_t)GstWalk.Pte.u, (uint64_t)GstWalk.pPte->u)); - ditto */
449 Assert(Walk.fSucceeded);
450 Assert(Walk.fEffective & PGM_PTATTRS_R_MASK);
451
452 if (uErr & (X86_TRAP_PF_RW | X86_TRAP_PF_US | X86_TRAP_PF_ID))
453 {
454 if ( ( (uErr & X86_TRAP_PF_RW)
455 && !(Walk.fEffective & PGM_PTATTRS_W_MASK)
456 && ( (uErr & X86_TRAP_PF_US)
457 || CPUMIsGuestR0WriteProtEnabled(pVCpu)) )
458 || ((uErr & X86_TRAP_PF_US) && !(Walk.fEffective & PGM_PTATTRS_US_MASK))
459 || ((uErr & X86_TRAP_PF_ID) && (Walk.fEffective & PGM_PTATTRS_NX_MASK))
460 )
461 return VBOXSTRICTRC_TODO(PGM_BTH_NAME(Trap0eHandlerGuestFault)(pVCpu, &Walk, uErr));
462 }
463
464 /* Take the big lock now before we update flags. */
465 *pfLockTaken = true;
466 PGM_LOCK_VOID(pVM);
467
468 /*
469 * Set the accessed and dirty flags.
470 */
471 /** @todo Should probably use cmpxchg logic here as we're potentially racing
472 * other CPUs in SMP configs. (the lock isn't enough, since we take it
473 * after walking and the page tables could be stale already) */
474# if PGM_GST_TYPE == PGM_TYPE_AMD64
475 if (!(GstWalk.Pml4e.u & X86_PML4E_A))
476 {
477 GstWalk.Pml4e.u |= X86_PML4E_A;
478 GST_ATOMIC_OR(&GstWalk.pPml4e->u, X86_PML4E_A);
479 }
480 if (!(GstWalk.Pdpe.u & X86_PDPE_A))
481 {
482 GstWalk.Pdpe.u |= X86_PDPE_A;
483 GST_ATOMIC_OR(&GstWalk.pPdpe->u, X86_PDPE_A);
484 }
485# endif
486 if (Walk.fBigPage)
487 {
488 Assert(GstWalk.Pde.u & X86_PDE_PS);
489 if (uErr & X86_TRAP_PF_RW)
490 {
491 if ((GstWalk.Pde.u & (X86_PDE4M_A | X86_PDE4M_D)) != (X86_PDE4M_A | X86_PDE4M_D))
492 {
493 GstWalk.Pde.u |= X86_PDE4M_A | X86_PDE4M_D;
494 GST_ATOMIC_OR(&GstWalk.pPde->u, X86_PDE4M_A | X86_PDE4M_D);
495 }
496 }
497 else
498 {
499 if (!(GstWalk.Pde.u & X86_PDE4M_A))
500 {
501 GstWalk.Pde.u |= X86_PDE4M_A;
502 GST_ATOMIC_OR(&GstWalk.pPde->u, X86_PDE4M_A);
503 }
504 }
505 }
506 else
507 {
508 Assert(!(GstWalk.Pde.u & X86_PDE_PS));
509 if (!(GstWalk.Pde.u & X86_PDE_A))
510 {
511 GstWalk.Pde.u |= X86_PDE_A;
512 GST_ATOMIC_OR(&GstWalk.pPde->u, X86_PDE_A);
513 }
514
515 if (uErr & X86_TRAP_PF_RW)
516 {
517# ifdef VBOX_WITH_STATISTICS
518 if (GstWalk.Pte.u & X86_PTE_D)
519 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,PageAlreadyDirty));
520 else
521 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,DirtiedPage));
522# endif
523 if ((GstWalk.Pte.u & (X86_PTE_A | X86_PTE_D)) != (X86_PTE_A | X86_PTE_D))
524 {
525 GstWalk.Pte.u |= X86_PTE_A | X86_PTE_D;
526 GST_ATOMIC_OR(&GstWalk.pPte->u, X86_PTE_A | X86_PTE_D);
527 }
528 }
529 else
530 {
531 if (!(GstWalk.Pte.u & X86_PTE_A))
532 {
533 GstWalk.Pte.u |= X86_PTE_A;
534 GST_ATOMIC_OR(&GstWalk.pPte->u, X86_PTE_A);
535 }
536 }
537 Assert(GstWalk.Pte.u == GstWalk.pPte->u);
538 }
539#if 0
540 /* Disabling this since it's not reliable for SMP, see @bugref{10092#c22}. */
541 AssertMsg(GstWalk.Pde.u == GstWalk.pPde->u || GstWalk.pPte->u == GstWalk.pPde->u,
542 ("%RX64 %RX64 pPte=%p pPde=%p Pte=%RX64\n", (uint64_t)GstWalk.Pde.u, (uint64_t)GstWalk.pPde->u, GstWalk.pPte, GstWalk.pPde, (uint64_t)GstWalk.pPte->u));
543#endif
544
545# else /* !PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) */
546 GSTPDE const PdeSrcDummy = { X86_PDE_P | X86_PDE_US | X86_PDE_RW | X86_PDE_A}; /** @todo eliminate this */
547
548 /* Take the big lock now. */
549 *pfLockTaken = true;
550 PGM_LOCK_VOID(pVM);
551# endif /* !PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) */
552
553# ifdef PGM_WITH_MMIO_OPTIMIZATIONS
554 /*
555 * If it is a reserved bit fault we know that it is an MMIO (access
556 * handler) related fault and can skip some 200 lines of code.
557 */
558 if (uErr & X86_TRAP_PF_RSVD)
559 {
560 Assert(uErr & X86_TRAP_PF_P);
561 PPGMPAGE pPage;
562# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
563 rc = pgmPhysGetPageEx(pVM, Walk.GCPhys, &pPage);
564 if (RT_SUCCESS(rc) && PGM_PAGE_HAS_ACTIVE_ALL_HANDLERS(pPage))
565 return VBOXSTRICTRC_TODO(PGM_BTH_NAME(Trap0eHandlerDoAccessHandlers)(pVCpu, uErr, pCtx, pvFault, pPage,
566 pfLockTaken, &Walk, &GstWalk));
567 rc = PGM_BTH_NAME(SyncPage)(pVCpu, GstWalk.Pde, pvFault, 1, uErr);
568# else
569 rc = pgmPhysGetPageEx(pVM, PGM_A20_APPLY(pVCpu, (RTGCPHYS)pvFault), &pPage);
570 if (RT_SUCCESS(rc) && PGM_PAGE_HAS_ACTIVE_ALL_HANDLERS(pPage))
571 return VBOXSTRICTRC_TODO(PGM_BTH_NAME(Trap0eHandlerDoAccessHandlers)(pVCpu, uErr, pCtx, pvFault, pPage, pfLockTaken));
572 rc = PGM_BTH_NAME(SyncPage)(pVCpu, PdeSrcDummy, pvFault, 1, uErr);
573# endif
574 AssertRC(rc);
575 PGM_INVL_PG(pVCpu, pvFault);
576 return rc; /* Restart with the corrected entry. */
577 }
578# endif /* PGM_WITH_MMIO_OPTIMIZATIONS */
579
580 /*
581 * Fetch the guest PDE, PDPE and PML4E.
582 */
583# if PGM_SHW_TYPE == PGM_TYPE_32BIT
584 const unsigned iPDDst = pvFault >> SHW_PD_SHIFT;
585 PX86PD pPDDst = pgmShwGet32BitPDPtr(pVCpu);
586
587# elif PGM_SHW_TYPE == PGM_TYPE_PAE
588 const unsigned iPDDst = (pvFault >> SHW_PD_SHIFT) & SHW_PD_MASK; /* pPDDst index, not used with the pool. */
589 PX86PDPAE pPDDst;
590# if PGM_GST_TYPE == PGM_TYPE_PAE
591 rc = pgmShwSyncPaePDPtr(pVCpu, pvFault, GstWalk.Pdpe.u, &pPDDst);
592# else
593 rc = pgmShwSyncPaePDPtr(pVCpu, pvFault, X86_PDPE_P, &pPDDst); /* RW, US and A are reserved in PAE mode. */
594# endif
595 AssertMsgReturn(rc == VINF_SUCCESS, ("rc=%Rrc\n", rc), RT_FAILURE_NP(rc) ? rc : VERR_IPE_UNEXPECTED_INFO_STATUS);
596
597# elif PGM_SHW_TYPE == PGM_TYPE_AMD64
598 const unsigned iPDDst = ((pvFault >> SHW_PD_SHIFT) & SHW_PD_MASK);
599 PX86PDPAE pPDDst;
600# if PGM_GST_TYPE == PGM_TYPE_PROT /* (AMD-V nested paging) */
601 rc = pgmShwSyncLongModePDPtr(pVCpu, pvFault, X86_PML4E_P | X86_PML4E_RW | X86_PML4E_US | X86_PML4E_A,
602 X86_PDPE_P | X86_PDPE_RW | X86_PDPE_US | X86_PDPE_A, &pPDDst);
603# else
604 rc = pgmShwSyncLongModePDPtr(pVCpu, pvFault, GstWalk.Pml4e.u, GstWalk.Pdpe.u, &pPDDst);
605# endif
606 AssertMsgReturn(rc == VINF_SUCCESS, ("rc=%Rrc\n", rc), RT_FAILURE_NP(rc) ? rc : VERR_IPE_UNEXPECTED_INFO_STATUS);
607
608# elif PGM_SHW_TYPE == PGM_TYPE_EPT
609 const unsigned iPDDst = ((pvFault >> SHW_PD_SHIFT) & SHW_PD_MASK);
610 PEPTPD pPDDst;
611 rc = pgmShwGetEPTPDPtr(pVCpu, pvFault, NULL, &pPDDst);
612 AssertMsgReturn(rc == VINF_SUCCESS, ("rc=%Rrc\n", rc), RT_FAILURE_NP(rc) ? rc : VERR_IPE_UNEXPECTED_INFO_STATUS);
613# endif
614 Assert(pPDDst);
615
616# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
617 /*
618 * Dirty page handling.
619 *
620 * If we successfully correct the write protection fault due to dirty bit
621 * tracking, then return immediately.
622 */
623 if (uErr & X86_TRAP_PF_RW) /* write fault? */
624 {
625 STAM_PROFILE_START(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,DirtyBitTracking), a);
626 rc = PGM_BTH_NAME(CheckDirtyPageFault)(pVCpu, uErr, &pPDDst->a[iPDDst], GstWalk.pPde, pvFault);
627 STAM_PROFILE_STOP(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,DirtyBitTracking), a);
628 if (rc == VINF_PGM_HANDLED_DIRTY_BIT_FAULT)
629 {
630 STAM_STATS({ pVCpu->pgmr0.s.pStatTrap0eAttributionR0
631 = rc == VINF_PGM_HANDLED_DIRTY_BIT_FAULT
632 ? &pVCpu->pgm.s.Stats.StatRZTrap0eTime2DirtyAndAccessed
633 : &pVCpu->pgm.s.Stats.StatRZTrap0eTime2GuestTrap; });
634 Log8(("Trap0eHandler: returns VINF_SUCCESS\n"));
635 return VINF_SUCCESS;
636 }
637#ifdef DEBUG_bird
638 AssertMsg(GstWalk.Pde.u == GstWalk.pPde->u || GstWalk.pPte->u == GstWalk.pPde->u || pVM->cCpus > 1, ("%RX64 %RX64\n", (uint64_t)GstWalk.Pde.u, (uint64_t)GstWalk.pPde->u)); // - triggers with smp w7 guests.
639 AssertMsg(Walk.fBigPage || GstWalk.Pte.u == GstWalk.pPte->u || pVM->cCpus > 1, ("%RX64 %RX64\n", (uint64_t)GstWalk.Pte.u, (uint64_t)GstWalk.pPte->u)); // - ditto.
640#endif
641 }
642
643# if 0 /* rarely useful; leave for debugging. */
644 STAM_COUNTER_INC(&pVCpu->pgm.s.StatRZTrap0ePD[iPDSrc]);
645# endif
646# endif /* PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) */
647
648 /*
649 * A common case is the not-present error caused by lazy page table syncing.
650 *
651 * It is IMPORTANT that we weed out any access to non-present shadow PDEs
652 * here so we can safely assume that the shadow PT is present when calling
653 * SyncPage later.
654 *
655 * On failure, we ASSUME that SyncPT is out of memory or detected some kind
656 * of mapping conflict and defer to SyncCR3 in R3.
657 * (Again, we do NOT support access handlers for non-present guest pages.)
658 *
659 */
660# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
661 Assert(GstWalk.Pde.u & X86_PDE_P);
662# endif
663 if ( !(uErr & X86_TRAP_PF_P) /* not set means page not present instead of page protection violation */
664 && !SHW_PDE_IS_P(pPDDst->a[iPDDst]))
665 {
666 STAM_STATS({ pVCpu->pgmr0.s.pStatTrap0eAttributionR0 = &pVCpu->pgm.s.Stats.StatRZTrap0eTime2SyncPT; });
667# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
668 LogFlow(("=>SyncPT %04x = %08RX64\n", (pvFault >> GST_PD_SHIFT) & GST_PD_MASK, (uint64_t)GstWalk.Pde.u));
669 rc = PGM_BTH_NAME(SyncPT)(pVCpu, (pvFault >> GST_PD_SHIFT) & GST_PD_MASK, GstWalk.pPd, pvFault);
670# else
671 LogFlow(("=>SyncPT pvFault=%RGv\n", pvFault));
672 rc = PGM_BTH_NAME(SyncPT)(pVCpu, 0, NULL, pvFault);
673# endif
674 if (RT_SUCCESS(rc))
675 return rc;
676 Log(("SyncPT: %RGv failed!! rc=%Rrc\n", pvFault, rc));
677 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3); /** @todo no need to do global sync, right? */
678 return VINF_PGM_SYNC_CR3;
679 }
680
681 /*
682 * Check if this fault address is flagged for special treatment,
683 * which means we'll have to figure out the physical address and
684 * check flags associated with it.
685 *
686 * ASSUME that we can limit any special access handling to pages
687 * in page tables which the guest believes to be present.
688 */
689# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
690 RTGCPHYS GCPhys = Walk.GCPhys & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK;
691# else
692 RTGCPHYS GCPhys = PGM_A20_APPLY(pVCpu, (RTGCPHYS)pvFault & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK);
693# endif /* PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) */
694 PPGMPAGE pPage;
695 rc = pgmPhysGetPageEx(pVM, GCPhys, &pPage);
696 if (RT_FAILURE(rc))
697 {
698 /*
699 * When the guest accesses invalid physical memory (e.g. probing
700 * of RAM or accessing a remapped MMIO range), then we'll fall
701 * back to the recompiler to emulate the instruction.
702 */
703 LogFlow(("PGM #PF: pgmPhysGetPageEx(%RGp) failed with %Rrc\n", GCPhys, rc));
704 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.StatRZTrap0eHandlersInvalid);
705 STAM_STATS({ pVCpu->pgmr0.s.pStatTrap0eAttributionR0 = &pVCpu->pgm.s.Stats.StatRZTrap0eTime2InvalidPhys; });
706 return VINF_EM_RAW_EMULATE_INSTR;
707 }
708
709 /*
710 * Any handlers for this page?
711 */
712 if (PGM_PAGE_HAS_ACTIVE_HANDLERS(pPage) && !PGM_PAGE_IS_HNDL_PHYS_NOT_IN_HM(pPage))
713# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
714 return VBOXSTRICTRC_TODO(PGM_BTH_NAME(Trap0eHandlerDoAccessHandlers)(pVCpu, uErr, pCtx, pvFault, pPage, pfLockTaken,
715 &Walk, &GstWalk));
716# else
717 return VBOXSTRICTRC_TODO(PGM_BTH_NAME(Trap0eHandlerDoAccessHandlers)(pVCpu, uErr, pCtx, pvFault, pPage, pfLockTaken));
718# endif
719
720 /*
721 * We are here only if page is present in Guest page tables and
722 * trap is not handled by our handlers.
723 *
724 * Check it for page out-of-sync situation.
725 */
726 if (!(uErr & X86_TRAP_PF_P))
727 {
728 /*
729 * Page is not present in our page tables. Try to sync it!
730 */
731 if (uErr & X86_TRAP_PF_US)
732 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,PageOutOfSyncUser));
733 else /* supervisor */
734 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,PageOutOfSyncSupervisor));
735
736 if (PGM_PAGE_IS_BALLOONED(pPage))
737 {
738 /* Emulate reads from ballooned pages as they are not present in
739 our shadow page tables. (Required for e.g. Solaris guests; soft
740 ecc, random nr generator.) */
741 rc = VBOXSTRICTRC_TODO(PGMInterpretInstruction(pVCpu, pvFault));
742 LogFlow(("PGM: PGMInterpretInstruction balloon -> rc=%d pPage=%R[pgmpage]\n", rc, pPage));
743 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,PageOutOfSyncBallloon));
744 STAM_STATS({ pVCpu->pgmr0.s.pStatTrap0eAttributionR0 = &pVCpu->pgm.s.Stats.StatRZTrap0eTime2Ballooned; });
745 return rc;
746 }
747
748# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
749 rc = PGM_BTH_NAME(SyncPage)(pVCpu, GstWalk.Pde, pvFault, PGM_SYNC_NR_PAGES, uErr);
750# else
751 rc = PGM_BTH_NAME(SyncPage)(pVCpu, PdeSrcDummy, pvFault, PGM_SYNC_NR_PAGES, uErr);
752# endif
753 if (RT_SUCCESS(rc))
754 {
755 /* The page was successfully synced, return to the guest. */
756 STAM_STATS({ pVCpu->pgmr0.s.pStatTrap0eAttributionR0 = &pVCpu->pgm.s.Stats.StatRZTrap0eTime2OutOfSync; });
757 return VINF_SUCCESS;
758 }
759 }
760 else /* uErr & X86_TRAP_PF_P: */
761 {
762 /*
763 * Write protected pages are made writable when the guest makes the
764 * first write to it. This happens for pages that are shared, write
765 * monitored or not yet allocated.
766 *
767 * We may also end up here when CR0.WP=0 in the guest.
768 *
769 * Also, a side effect of not flushing global PDEs are out of sync
770 * pages due to physical monitored regions, that are no longer valid.
771 * Assume for now it only applies to the read/write flag.
772 */
773 if (uErr & X86_TRAP_PF_RW)
774 {
775 /*
776 * Check if it is a read-only page.
777 */
778 if (PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_ALLOCATED)
779 {
780 Log(("PGM #PF: Make writable: %RGp %R[pgmpage] pvFault=%RGp uErr=%#x\n", GCPhys, pPage, pvFault, uErr));
781# ifndef VBOX_WITH_NEW_LAZY_PAGE_ALLOC
782 Assert(!PGM_PAGE_IS_ZERO(pPage));
783# endif
784 AssertFatalMsg(!PGM_PAGE_IS_BALLOONED(pPage), ("Unexpected ballooned page at %RGp\n", GCPhys));
785# ifdef PGM_WITH_PAGE_ZEROING_DETECTION
786 if ( PGM_PAGE_GET_STATE(pPage) == PGM_PAGE_STATE_ZERO
787 && (pvFault & X86_PAGE_OFFSET_MASK) == 0
788 && pgmHandlePageZeroingCode(pVCpu, pCtx))
789 {
790 STAM_STATS({ pVCpu->pgmr0.s.pStatTrap0eAttributionR0 = &pVCpu->pgm.s.Stats.StatRZTrap0eTime2PageZeroing; });
791 return VINF_SUCCESS;
792 }
793# endif
794 STAM_STATS({ pVCpu->pgmr0.s.pStatTrap0eAttributionR0 = &pVCpu->pgm.s.Stats.StatRZTrap0eTime2MakeWritable; });
795
796 rc = pgmPhysPageMakeWritable(pVM, pPage, GCPhys);
797 if (rc != VINF_SUCCESS)
798 {
799 AssertMsg(rc == VINF_PGM_SYNC_CR3 || RT_FAILURE(rc), ("%Rrc\n", rc));
800 return rc;
801 }
802 if (RT_UNLIKELY(VM_FF_IS_SET(pVM, VM_FF_PGM_NO_MEMORY)))
803 return VINF_EM_NO_MEMORY;
804 }
805
806# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
807 /*
808 * Check to see if we need to emulate the instruction if CR0.WP=0.
809 */
810 if ( !(Walk.fEffective & PGM_PTATTRS_W_MASK)
811 && (CPUMGetGuestCR0(pVCpu) & (X86_CR0_WP | X86_CR0_PG)) == X86_CR0_PG
812 && CPUMGetGuestCPL(pVCpu) < 3)
813 {
814 Assert((uErr & (X86_TRAP_PF_RW | X86_TRAP_PF_P)) == (X86_TRAP_PF_RW | X86_TRAP_PF_P));
815
816 /*
817 * The Netware WP0+RO+US hack.
818 *
819 * Netware sometimes(/always?) runs with WP0. It has been observed doing
820 * excessive write accesses to pages which are mapped with US=1 and RW=0
821 * while WP=0. This causes a lot of exits and extremely slow execution.
822 * To avoid trapping and emulating every write here, we change the shadow
823 * page table entry to map it as US=0 and RW=1 until user mode tries to
824 * access it again (see further below). We count these shadow page table
825 * changes so we can avoid having to clear the page pool every time the WP
826 * bit changes to 1 (see PGMCr0WpEnabled()).
827 */
828# if (PGM_GST_TYPE == PGM_TYPE_32BIT || PGM_GST_TYPE == PGM_TYPE_PAE) && 1
829 if ( (Walk.fEffective & (PGM_PTATTRS_W_MASK | PGM_PTATTRS_US_MASK)) == PGM_PTATTRS_US_MASK
830 && (Walk.fBigPage || (GstWalk.Pde.u & X86_PDE_RW))
831 && pVM->cCpus == 1 /* Sorry, no go on SMP. Add CFGM option? */)
832 {
833 Log(("PGM #PF: Netware WP0+RO+US hack: pvFault=%RGp uErr=%#x (big=%d)\n", pvFault, uErr, Walk.fBigPage));
834 rc = pgmShwMakePageSupervisorAndWritable(pVCpu, pvFault, Walk.fBigPage, PGM_MK_PG_IS_WRITE_FAULT);
835 if (rc == VINF_SUCCESS || rc == VINF_PGM_SYNC_CR3)
836 {
837 PGM_INVL_PG(pVCpu, pvFault);
838 pVCpu->pgm.s.cNetwareWp0Hacks++;
839 STAM_STATS({ pVCpu->pgmr0.s.pStatTrap0eAttributionR0 = &pVCpu->pgm.s.Stats.StatRZTrap0eTime2Wp0RoUsHack; });
840 return rc;
841 }
842 AssertMsg(RT_FAILURE_NP(rc), ("%Rrc\n", rc));
843 Log(("pgmShwMakePageSupervisorAndWritable(%RGv) failed with rc=%Rrc - ignored\n", pvFault, rc));
844 }
845# endif
846
847 /* Interpret the access. */
848 rc = VBOXSTRICTRC_TODO(PGMInterpretInstruction(pVCpu, pvFault));
849 Log(("PGM #PF: WP0 emulation (pvFault=%RGp uErr=%#x cpl=%d fBig=%d fEffUs=%d)\n", pvFault, uErr, CPUMGetGuestCPL(pVCpu), Walk.fBigPage, !!(Walk.fEffective & PGM_PTATTRS_US_MASK)));
850 if (RT_SUCCESS(rc))
851 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.StatRZTrap0eWPEmulInRZ);
852 else
853 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.StatRZTrap0eWPEmulToR3);
854 STAM_STATS({ pVCpu->pgmr0.s.pStatTrap0eAttributionR0 = &pVCpu->pgm.s.Stats.StatRZTrap0eTime2WPEmulation; });
855 return rc;
856 }
857# endif
858 /// @todo count the above case; else
859 if (uErr & X86_TRAP_PF_US)
860 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,PageOutOfSyncUserWrite));
861 else /* supervisor */
862 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,PageOutOfSyncSupervisorWrite));
863
864 /*
865 * Sync the page.
866 *
867 * Note: Do NOT use PGM_SYNC_NR_PAGES here. That only works if the
868 * page is not present, which is not true in this case.
869 */
870# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
871 rc = PGM_BTH_NAME(SyncPage)(pVCpu, GstWalk.Pde, pvFault, 1, uErr);
872# else
873 rc = PGM_BTH_NAME(SyncPage)(pVCpu, PdeSrcDummy, pvFault, 1, uErr);
874# endif
875 if (RT_SUCCESS(rc))
876 {
877 /*
878 * Page was successfully synced, return to guest but invalidate
879 * the TLB first as the page is very likely to be in it.
880 */
881# if PGM_SHW_TYPE == PGM_TYPE_EPT
882 HMInvalidatePhysPage(pVM, (RTGCPHYS)pvFault);
883# else
884 PGM_INVL_PG(pVCpu, pvFault);
885# endif
886# ifdef VBOX_STRICT
887 PGMPTWALK GstPageWalk;
888 GstPageWalk.GCPhys = RTGCPHYS_MAX;
889 if (!pVM->pgm.s.fNestedPaging)
890 {
891 rc = PGMGstGetPage(pVCpu, pvFault, &GstPageWalk);
892 AssertMsg(RT_SUCCESS(rc) && ((GstPageWalk.fEffective & X86_PTE_RW) || ((CPUMGetGuestCR0(pVCpu) & (X86_CR0_WP | X86_CR0_PG)) == X86_CR0_PG && CPUMGetGuestCPL(pVCpu) < 3)), ("rc=%Rrc fPageGst=%RX64\n", rc, GstPageWalk.fEffective));
893 LogFlow(("Obsolete physical monitor page out of sync %RGv - phys %RGp flags=%08llx\n", pvFault, GstPageWalk.GCPhys, GstPageWalk.fEffective));
894 }
895# if 0 /* Bogus! Triggers incorrectly with w7-64 and later for the SyncPage case: "Pde at %RGv changed behind our back?" */
896 uint64_t fPageShw = 0;
897 rc = PGMShwGetPage(pVCpu, pvFault, &fPageShw, NULL);
898 AssertMsg((RT_SUCCESS(rc) && (fPageShw & X86_PTE_RW)) || pVM->cCpus > 1 /* new monitor can be installed/page table flushed between the trap exit and PGMTrap0eHandler */,
899 ("rc=%Rrc fPageShw=%RX64 GCPhys2=%RGp fPageGst=%RX64 pvFault=%RGv\n", rc, fPageShw, GstPageWalk.GCPhys, fPageGst, pvFault));
900# endif
901# endif /* VBOX_STRICT */
902 STAM_STATS({ pVCpu->pgmr0.s.pStatTrap0eAttributionR0 = &pVCpu->pgm.s.Stats.StatRZTrap0eTime2OutOfSyncHndObs; });
903 return VINF_SUCCESS;
904 }
905 }
906# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
907 /*
908 * Check for Netware WP0+RO+US hack from above and undo it when user
909 * mode accesses the page again.
910 */
911 else if ( (Walk.fEffective & (PGM_PTATTRS_W_MASK | PGM_PTATTRS_US_MASK)) == PGM_PTATTRS_US_MASK
912 && (Walk.fBigPage || (GstWalk.Pde.u & X86_PDE_RW))
913 && pVCpu->pgm.s.cNetwareWp0Hacks > 0
914 && (CPUMGetGuestCR0(pVCpu) & (X86_CR0_WP | X86_CR0_PG)) == X86_CR0_PG
915 && CPUMGetGuestCPL(pVCpu) == 3
916 && pVM->cCpus == 1
917 )
918 {
919 Log(("PGM #PF: Undo netware WP0+RO+US hack: pvFault=%RGp uErr=%#x\n", pvFault, uErr));
920 rc = PGM_BTH_NAME(SyncPage)(pVCpu, GstWalk.Pde, pvFault, 1, uErr);
921 if (RT_SUCCESS(rc))
922 {
923 PGM_INVL_PG(pVCpu, pvFault);
924 pVCpu->pgm.s.cNetwareWp0Hacks--;
925 STAM_STATS({ pVCpu->pgmr0.s.pStatTrap0eAttributionR0 = &pVCpu->pgm.s.Stats.StatRZTrap0eTime2Wp0RoUsUnhack; });
926 return VINF_SUCCESS;
927 }
928 }
929# endif /* PGM_WITH_PAGING */
930
931 /** @todo else: why are we here? */
932
933# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) && defined(VBOX_STRICT)
934 /*
935 * Check for VMM page flags vs. Guest page flags consistency.
936 * Currently only for debug purposes.
937 */
938 if (RT_SUCCESS(rc))
939 {
940 /* Get guest page flags. */
941 PGMPTWALK GstPageWalk;
942 int rc2 = PGMGstGetPage(pVCpu, pvFault, &GstPageWalk);
943 if (RT_SUCCESS(rc2))
944 {
945 uint64_t fPageShw = 0;
946 rc2 = PGMShwGetPage(pVCpu, pvFault, &fPageShw, NULL);
947
948#if 0
949 /*
950 * Compare page flags.
951 * Note: we have AVL, A, D bits desynced.
952 */
953 AssertMsg( (fPageShw & ~(X86_PTE_A | X86_PTE_D | X86_PTE_AVL_MASK))
954 == (fPageGst & ~(X86_PTE_A | X86_PTE_D | X86_PTE_AVL_MASK))
955 || ( pVCpu->pgm.s.cNetwareWp0Hacks > 0
956 && (fPageShw & ~(X86_PTE_A | X86_PTE_D | X86_PTE_AVL_MASK | X86_PTE_RW | X86_PTE_US))
957 == (fPageGst & ~(X86_PTE_A | X86_PTE_D | X86_PTE_AVL_MASK | X86_PTE_RW | X86_PTE_US))
958 && (fPageShw & (X86_PTE_RW | X86_PTE_US)) == X86_PTE_RW
959 && (fPageGst & (X86_PTE_RW | X86_PTE_US)) == X86_PTE_US),
960 ("Page flags mismatch! pvFault=%RGv uErr=%x GCPhys=%RGp fPageShw=%RX64 fPageGst=%RX64 rc=%d\n",
961 pvFault, (uint32_t)uErr, GCPhys, fPageShw, fPageGst, rc));
96201:01:15.623511 00:08:43.266063 Expression: (fPageShw & ~(X86_PTE_A | X86_PTE_D | X86_PTE_AVL_MASK)) == (fPageGst & ~(X86_PTE_A | X86_PTE_D | X86_PTE_AVL_MASK)) || ( pVCpu->pgm.s.cNetwareWp0Hacks > 0 && (fPageShw & ~(X86_PTE_A | X86_PTE_D | X86_PTE_AVL_MASK | X86_PTE_RW | X86_PTE_US)) == (fPageGst & ~(X86_PTE_A | X86_PTE_D | X86_PTE_AVL_MASK | X86_PTE_RW | X86_PTE_US)) && (fPageShw & (X86_PTE_RW | X86_PTE_US)) == X86_PTE_RW && (fPageGst & (X86_PTE_RW | X86_PTE_US)) == X86_PTE_US)
96301:01:15.623511 00:08:43.266064 Location : e:\vbox\svn\trunk\srcPage flags mismatch! pvFault=fffff801b0d7b000 uErr=11 GCPhys=0000000019b52000 fPageShw=0 fPageGst=77b0000000000121 rc=0
964
96501:01:15.625516 00:08:43.268051 Expression: (fPageShw & ~(X86_PTE_A | X86_PTE_D | X86_PTE_AVL_MASK)) == (fPageGst & ~(X86_PTE_A | X86_PTE_D | X86_PTE_AVL_MASK)) || ( pVCpu->pgm.s.cNetwareWp0Hacks > 0 && (fPageShw & ~(X86_PTE_A | X86_PTE_D | X86_PTE_AVL_MASK | X86_PTE_RW | X86_PTE_US)) == (fPageGst & ~(X86_PTE_A | X86_PTE_D | X86_PTE_AVL_MASK | X86_PTE_RW | X86_PTE_US)) && (fPageShw & (X86_PTE_RW | X86_PTE_US)) == X86_PTE_RW && (fPageGst & (X86_PTE_RW | X86_PTE_US)) == X86_PTE_US)
96601:01:15.625516 00:08:43.268051 Location :
967e:\vbox\svn\trunk\srcPage flags mismatch!
968pvFault=fffff801b0d7b000
969 uErr=11 X86_TRAP_PF_ID | X86_TRAP_PF_P
970GCPhys=0000000019b52000
971fPageShw=0
972fPageGst=77b0000000000121
973rc=0
974#endif
975
976 }
977 else
978 AssertMsgFailed(("PGMGstGetPage rc=%Rrc\n", rc));
979 }
980 else
981 AssertMsgFailed(("PGMGCGetPage rc=%Rrc\n", rc));
982# endif /* PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) && VBOX_STRICT */
983 }
984
985
986 /*
987 * If we get here it is because something failed above, i.e. most like guru
988 * meditiation time.
989 */
990 LogRel(("%s: returns rc=%Rrc pvFault=%RGv uErr=%RX64 cs:rip=%04x:%08RX64\n",
991 __PRETTY_FUNCTION__, rc, pvFault, (uint64_t)uErr, pCtx->cs.Sel, pCtx->rip));
992 return rc;
993
994# else /* Nested paging, EPT except PGM_GST_TYPE = PROT, NONE. */
995 NOREF(uErr); NOREF(pCtx); NOREF(pvFault);
996 AssertReleaseMsgFailed(("Shw=%d Gst=%d is not implemented!\n", PGM_SHW_TYPE, PGM_GST_TYPE));
997 return VERR_PGM_NOT_USED_IN_MODE;
998# endif
999}
1000
1001
1002# if defined(VBOX_WITH_NESTED_HWVIRT_VMX_EPT)
1003/**
1004 * Deals with a nested-guest \#PF fault for a guest-physical page with a handler.
1005 *
1006 * @returns Strict VBox status code.
1007 * @param pVCpu The cross context virtual CPU structure.
1008 * @param uErr The error code.
1009 * @param pCtx Pointer to the register context for the CPU.
1010 * @param GCPhysNestedFault The nested-guest physical address of the fault.
1011 * @param pPage The guest page at @a GCPhysNestedFault.
1012 * @param GCPhysFault The guest-physical address of the fault.
1013 * @param pGstWalkAll The guest page walk result.
1014 * @param pfLockTaken Where to store whether the PGM is still held when
1015 * this function completes.
1016 *
1017 * @note The caller has taken the PGM lock.
1018 */
1019static VBOXSTRICTRC PGM_BTH_NAME(NestedTrap0eHandlerDoAccessHandlers)(PVMCPUCC pVCpu, RTGCUINT uErr, PCPUMCTX pCtx,
1020 RTGCPHYS GCPhysNestedFault, PPGMPAGE pPage,
1021 RTGCPHYS GCPhysFault, PPGMPTWALKGST pGstWalkAll,
1022 bool *pfLockTaken)
1023{
1024# if PGM_GST_TYPE == PGM_TYPE_PROT \
1025 && PGM_SHW_TYPE == PGM_TYPE_EPT
1026
1027 /** @todo Assert uErr isn't X86_TRAP_PF_RSVD and remove release checks. */
1028 PGM_A20_ASSERT_MASKED(pVCpu, GCPhysFault);
1029 AssertMsgReturn(PGM_PAGE_HAS_ANY_PHYSICAL_HANDLERS(pPage), ("%RGp %RGp uErr=%u\n", GCPhysNestedFault, GCPhysFault, uErr),
1030 VERR_PGM_HANDLER_IPE_1);
1031
1032 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
1033 RTGCPHYS const GCPhysNestedPage = GCPhysNestedFault & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK;
1034 RTGCPHYS const GCPhysPage = GCPhysFault & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK;
1035
1036 /*
1037 * Physical page access handler.
1038 */
1039 PPGMPHYSHANDLER pCur;
1040 VBOXSTRICTRC rcStrict = pgmHandlerPhysicalLookup(pVM, GCPhysPage, &pCur);
1041 AssertRCReturn(VBOXSTRICTRC_VAL(rcStrict), rcStrict);
1042
1043 PCPGMPHYSHANDLERTYPEINT const pCurType = PGMPHYSHANDLER_GET_TYPE(pVM, pCur);
1044 Assert(pCurType);
1045
1046 /*
1047 * If the region is write protected and we got a page not present fault, then sync
1048 * the pages. If the fault was caused by a read, then restart the instruction.
1049 * In case of write access continue to the GC write handler.
1050 */
1051 if ( !(uErr & X86_TRAP_PF_P)
1052 && pCurType->enmKind == PGMPHYSHANDLERKIND_WRITE)
1053 {
1054 Log7Func(("Syncing Monitored: GCPhysNestedPage=%RGp GCPhysPage=%RGp uErr=%#x\n", GCPhysNestedPage, GCPhysPage, uErr));
1055 rcStrict = PGM_BTH_NAME(NestedSyncPage)(pVCpu, GCPhysNestedPage, GCPhysPage, 1 /*cPages*/, uErr, pGstWalkAll);
1056 Assert(rcStrict != VINF_PGM_SYNCPAGE_MODIFIED_PDE);
1057 if ( RT_FAILURE(rcStrict)
1058 || !(uErr & X86_TRAP_PF_RW))
1059 {
1060 AssertMsgRC(rcStrict, ("%Rrc\n", VBOXSTRICTRC_VAL(rcStrict)));
1061 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.StatRZTrap0eHandlersOutOfSync);
1062 STAM_STATS({ pVCpu->pgmr0.s.pStatTrap0eAttributionR0 = &pVCpu->pgm.s.Stats.StatRZTrap0eTime2OutOfSyncHndPhys; });
1063 return rcStrict;
1064 }
1065 }
1066 else if ( !(uErr & X86_TRAP_PF_RSVD)
1067 && pCurType->enmKind != PGMPHYSHANDLERKIND_WRITE)
1068 {
1069 /*
1070 * If the access was NOT through an EPT misconfig (i.e. RSVD), sync the page.
1071 * This can happen for the VMX APIC-access page.
1072 */
1073 Log7Func(("Syncing MMIO: GCPhysNestedPage=%RGp GCPhysPage=%RGp\n", GCPhysNestedPage, GCPhysPage));
1074 rcStrict = PGM_BTH_NAME(NestedSyncPage)(pVCpu, GCPhysNestedPage, GCPhysPage, 1 /*cPages*/, uErr, pGstWalkAll);
1075 Assert(rcStrict != VINF_PGM_SYNCPAGE_MODIFIED_PDE);
1076 if (RT_FAILURE(rcStrict))
1077 {
1078 AssertMsgRC(rcStrict, ("%Rrc\n", VBOXSTRICTRC_VAL(rcStrict)));
1079 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.StatRZTrap0eHandlersOutOfSync);
1080 STAM_STATS({ pVCpu->pgmr0.s.pStatTrap0eAttributionR0 = &pVCpu->pgm.s.Stats.StatRZTrap0eTime2OutOfSyncHndPhys; });
1081 return rcStrict;
1082 }
1083 }
1084
1085 AssertMsg( pCurType->enmKind != PGMPHYSHANDLERKIND_WRITE
1086 || (pCurType->enmKind == PGMPHYSHANDLERKIND_WRITE && (uErr & X86_TRAP_PF_RW)),
1087 ("Unexpected trap for physical handler: %08X (phys=%08x) pPage=%R[pgmpage] uErr=%X, enmKind=%d\n",
1088 GCPhysNestedFault, GCPhysFault, pPage, uErr, pCurType->enmKind));
1089 if (pCurType->enmKind == PGMPHYSHANDLERKIND_WRITE)
1090 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.StatRZTrap0eHandlersPhysWrite);
1091 else
1092 {
1093 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.StatRZTrap0eHandlersPhysAll);
1094 if (uErr & X86_TRAP_PF_RSVD)
1095 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.StatRZTrap0eHandlersPhysAllOpt);
1096 }
1097
1098 if (pCurType->pfnPfHandler)
1099 {
1100 STAM_PROFILE_START(&pCur->Stat, h);
1101 uint64_t const uUser = !pCurType->fRing0DevInsIdx ? pCur->uUser
1102 : (uintptr_t)PDMDeviceRing0IdxToInstance(pVM, pCur->uUser);
1103
1104 if (pCurType->fKeepPgmLock)
1105 {
1106 rcStrict = pCurType->pfnPfHandler(pVM, pVCpu, uErr, pCtx, GCPhysNestedFault, GCPhysFault, uUser);
1107 STAM_PROFILE_STOP(&pCur->Stat, h);
1108 }
1109 else
1110 {
1111 PGM_UNLOCK(pVM);
1112 *pfLockTaken = false;
1113 rcStrict = pCurType->pfnPfHandler(pVM, pVCpu, uErr, pCtx, GCPhysNestedFault, GCPhysFault, uUser);
1114 STAM_PROFILE_STOP(&pCur->Stat, h); /* no locking needed, entry is unlikely reused before we get here. */
1115 }
1116 }
1117 else
1118 {
1119 AssertMsgFailed(("What's going on here!? Fault falls outside handler range!?\n"));
1120 rcStrict = VINF_EM_RAW_EMULATE_INSTR;
1121 }
1122
1123 STAM_STATS({ pVCpu->pgmr0.s.pStatTrap0eAttributionR0 = &pVCpu->pgm.s.Stats.StatRZTrap0eTime2HndPhys; });
1124 return rcStrict;
1125
1126# else
1127 RT_NOREF8(pVCpu, uErr, pCtx, GCPhysNestedFault, pPage, GCPhysFault, pGstWalkAll, pfLockTaken);
1128 AssertReleaseMsgFailed(("Shw=%d Gst=%d is not implemented!\n", PGM_SHW_TYPE, PGM_GST_TYPE));
1129 return VERR_PGM_NOT_USED_IN_MODE;
1130# endif
1131}
1132# endif /* VBOX_WITH_NESTED_HWVIRT_VMX_EPT */
1133
1134
1135/**
1136 * Nested \#PF handler for nested-guest hardware-assisted execution using nested
1137 * paging.
1138 *
1139 * @returns VBox status code (appropriate for trap handling and GC return).
1140 * @param pVCpu The cross context virtual CPU structure.
1141 * @param uErr The fault error (X86_TRAP_PF_*).
1142 * @param pCtx Pointer to the register context for the CPU.
1143 * @param GCPhysNestedFault The nested-guest physical address of the fault.
1144 * @param fIsLinearAddrValid Whether translation of a nested-guest linear address
1145 * caused this fault. If @c false, GCPtrNestedFault
1146 * must be 0.
1147 * @param GCPtrNestedFault The nested-guest linear address of this fault.
1148 * @param pWalk The guest page table walk result.
1149 * @param pfLockTaken Where to store whether the PGM lock is still held
1150 * when this function completes.
1151 */
1152PGM_BTH_DECL(int, NestedTrap0eHandler)(PVMCPUCC pVCpu, RTGCUINT uErr, PCPUMCTX pCtx, RTGCPHYS GCPhysNestedFault,
1153 bool fIsLinearAddrValid, RTGCPTR GCPtrNestedFault, PPGMPTWALK pWalk, bool *pfLockTaken)
1154{
1155 *pfLockTaken = false;
1156# if defined(VBOX_WITH_NESTED_HWVIRT_VMX_EPT) \
1157 && PGM_GST_TYPE == PGM_TYPE_PROT \
1158 && PGM_SHW_TYPE == PGM_TYPE_EPT
1159
1160 Assert(CPUMIsGuestVmxEptPagingEnabled(pVCpu));
1161 Assert(PGM_A20_IS_ENABLED(pVCpu));
1162
1163 /* We don't support mode-based execute control for EPT yet. */
1164 Assert(!pVCpu->CTX_SUFF(pVM)->cpum.ro.GuestFeatures.fVmxModeBasedExecuteEpt);
1165 Assert(!(uErr & X86_TRAP_PF_US));
1166
1167 /* Take the big lock now. */
1168 *pfLockTaken = true;
1169 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
1170 PGM_LOCK_VOID(pVM);
1171
1172 /*
1173 * Walk the guest EPT tables and check if it's an EPT violation or misconfiguration.
1174 */
1175 if (fIsLinearAddrValid)
1176 Log7Func(("cs:rip=%04x:%#08RX64 GCPhysNestedFault=%RGp uErr=%#x GCPtrNestedFault=%RGv\n",
1177 pCtx->cs.Sel, pCtx->rip, GCPhysNestedFault, uErr, GCPtrNestedFault));
1178 else
1179 Log7Func(("cs:rip=%04x:%#08RX64 GCPhysNestedFault=%RGp uErr=%#x\n",
1180 pCtx->cs.Sel, pCtx->rip, GCPhysNestedFault, uErr));
1181 PGMPTWALKGST GstWalkAll;
1182 int rc = pgmGstSlatWalk(pVCpu, GCPhysNestedFault, fIsLinearAddrValid, GCPtrNestedFault, pWalk, &GstWalkAll);
1183 if (RT_FAILURE(rc))
1184 return rc;
1185
1186 Assert(GstWalkAll.enmType == PGMPTWALKGSTTYPE_EPT);
1187 Assert(pWalk->fSucceeded);
1188 Assert(pWalk->fEffective & (PGM_PTATTRS_EPT_R_MASK | PGM_PTATTRS_EPT_W_MASK | PGM_PTATTRS_EPT_X_SUPER_MASK));
1189 Assert(pWalk->fIsSlat);
1190
1191# ifdef DEBUG_ramshankar
1192 /* Paranoia. */
1193 Assert(RT_BOOL(pWalk->fEffective & PGM_PTATTRS_R_MASK) == RT_BOOL(pWalk->fEffective & PGM_PTATTRS_EPT_R_MASK));
1194 Assert(RT_BOOL(pWalk->fEffective & PGM_PTATTRS_W_MASK) == RT_BOOL(pWalk->fEffective & PGM_PTATTRS_EPT_W_MASK));
1195 Assert(RT_BOOL(pWalk->fEffective & PGM_PTATTRS_NX_MASK) == !RT_BOOL(pWalk->fEffective & PGM_PTATTRS_EPT_X_SUPER_MASK));
1196# endif
1197
1198 Log7Func(("SLAT: GCPhysNestedFault=%RGp -> GCPhys=%#RGp\n", GCPhysNestedFault, pWalk->GCPhys));
1199
1200 /*
1201 * Check page-access permissions.
1202 */
1203 if ( ((uErr & X86_TRAP_PF_RW) && !(pWalk->fEffective & PGM_PTATTRS_W_MASK))
1204 || ((uErr & X86_TRAP_PF_ID) && (pWalk->fEffective & PGM_PTATTRS_NX_MASK)))
1205 {
1206 Log7Func(("Permission failed! GCPtrNested=%RGv GCPhysNested=%RGp uErr=%#x fEffective=%#RX64\n", GCPtrNestedFault,
1207 GCPhysNestedFault, uErr, pWalk->fEffective));
1208 pWalk->fFailed = PGM_WALKFAIL_EPT_VIOLATION;
1209 return VERR_ACCESS_DENIED;
1210 }
1211
1212 PGM_A20_ASSERT_MASKED(pVCpu, pWalk->GCPhys);
1213 RTGCPHYS const GCPhysPage = pWalk->GCPhys & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK;
1214 RTGCPHYS const GCPhysNestedPage = GCPhysNestedFault & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK;
1215
1216 /*
1217 * If we were called via an EPT misconfig, it should've already resulted in a nested-guest VM-exit.
1218 */
1219 AssertMsgReturn(!(uErr & X86_TRAP_PF_RSVD),
1220 ("Unexpected EPT misconfig VM-exit. GCPhysPage=%RGp GCPhysNestedPage=%RGp\n", GCPhysPage, GCPhysNestedPage),
1221 VERR_PGM_MAPPING_IPE);
1222
1223 /*
1224 * Fetch and sync the nested-guest EPT page directory pointer.
1225 */
1226 PEPTPD pEptPd;
1227 rc = pgmShwGetNestedEPTPDPtr(pVCpu, GCPhysNestedPage, NULL /*ppPdpt*/, &pEptPd, &GstWalkAll);
1228 AssertRCReturn(rc, rc);
1229 Assert(pEptPd);
1230
1231 /*
1232 * A common case is the not-present error caused by lazy page table syncing.
1233 *
1234 * It is IMPORTANT that we weed out any access to non-present shadow PDEs
1235 * here so we can safely assume that the shadow PT is present when calling
1236 * NestedSyncPage later.
1237 *
1238 * NOTE: It's possible we will be syncing the VMX APIC-access page here.
1239 * In that case, we would sync the page but will NOT go ahead with emulating
1240 * the APIC-access VM-exit through IEM. However, once the page is mapped in
1241 * the shadow tables, subsequent APIC-access VM-exits for the nested-guest
1242 * will be triggered by hardware. Maybe calling the IEM #PF handler can be
1243 * considered as an optimization later.
1244 */
1245 unsigned const iPde = (GCPhysNestedPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
1246 if ( !(uErr & X86_TRAP_PF_P)
1247 && !(pEptPd->a[iPde].u & EPT_PRESENT_MASK))
1248 {
1249 STAM_STATS({ pVCpu->pgmr0.s.pStatTrap0eAttributionR0 = &pVCpu->pgm.s.Stats.StatRZTrap0eTime2SyncPT; });
1250 Log7Func(("NestedSyncPT: Lazy. GCPhysNestedPage=%RGp GCPhysPage=%RGp\n", GCPhysNestedPage, GCPhysPage));
1251 rc = PGM_BTH_NAME(NestedSyncPT)(pVCpu, GCPhysNestedPage, GCPhysPage, &GstWalkAll);
1252 if (RT_SUCCESS(rc))
1253 return rc;
1254 AssertMsgFailedReturn(("NestedSyncPT: %RGv failed! rc=%Rrc\n", GCPhysNestedPage, rc), VERR_PGM_MAPPING_IPE);
1255 }
1256
1257 /*
1258 * Check if this fault address is flagged for special treatment.
1259 * This handles faults on an MMIO or write-monitored page.
1260 *
1261 * If this happens to be the VMX APIC-access page, we don't treat is as MMIO
1262 * but rather sync it further below (as a regular guest page) which lets
1263 * hardware-assisted execution trigger the APIC-access VM-exits of the
1264 * nested-guest directly.
1265 */
1266 PPGMPAGE pPage;
1267 rc = pgmPhysGetPageEx(pVM, GCPhysPage, &pPage);
1268 if (RT_FAILURE(rc))
1269 {
1270 /*
1271 * We failed to get the physical page which means it's a reserved/invalid
1272 * page address (not MMIO even). This can typically be observed with
1273 * Microsoft Hyper-V enabled Windows guests. We must fall back to emulating
1274 * the instruction, see @bugref{10318#c7}.
1275 */
1276 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.StatRZTrap0eHandlersInvalid);
1277 STAM_STATS({ pVCpu->pgmr0.s.pStatTrap0eAttributionR0 = &pVCpu->pgm.s.Stats.StatRZTrap0eTime2InvalidPhys; });
1278 return VINF_EM_RAW_EMULATE_INSTR;
1279 }
1280 /* Check if this is an MMIO page and NOT the VMX APIC-access page. */
1281 if (PGM_PAGE_HAS_ACTIVE_HANDLERS(pPage) && !PGM_PAGE_IS_HNDL_PHYS_NOT_IN_HM(pPage))
1282 {
1283 Log7Func(("MMIO: Calling NestedTrap0eHandlerDoAccessHandlers for GCPhys %RGp\n", GCPhysPage));
1284 return VBOXSTRICTRC_TODO(PGM_BTH_NAME(NestedTrap0eHandlerDoAccessHandlers)(pVCpu, uErr, pCtx, GCPhysNestedFault,
1285 pPage, pWalk->GCPhys, &GstWalkAll,
1286 pfLockTaken));
1287 }
1288
1289 /*
1290 * We are here only if page is present in nested-guest page tables but the
1291 * trap is not handled by our handlers. Check for page out-of-sync situation.
1292 */
1293 if (!(uErr & X86_TRAP_PF_P))
1294 {
1295 Assert(!PGM_PAGE_IS_BALLOONED(pPage));
1296 Assert(!(uErr & X86_TRAP_PF_US)); /* Mode-based execute not supported yet. */
1297 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,PageOutOfSyncSupervisor));
1298
1299 Log7Func(("SyncPage: Not-Present: GCPhysNestedPage=%RGp GCPhysPage=%RGp\n", GCPhysNestedFault, GCPhysPage));
1300 rc = PGM_BTH_NAME(NestedSyncPage)(pVCpu, GCPhysNestedPage, GCPhysPage, PGM_SYNC_NR_PAGES, uErr, &GstWalkAll);
1301 if (RT_SUCCESS(rc))
1302 {
1303 STAM_STATS({ pVCpu->pgmr0.s.pStatTrap0eAttributionR0 = &pVCpu->pgm.s.Stats.StatRZTrap0eTime2OutOfSync; });
1304 return VINF_SUCCESS;
1305 }
1306 }
1307 else if (uErr & X86_TRAP_PF_RW)
1308 {
1309 /*
1310 * Write protected pages are made writable when the guest makes the
1311 * first write to it. This happens for pages that are shared, write
1312 * monitored or not yet allocated.
1313 *
1314 * We may also end up here when CR0.WP=0 in the guest.
1315 *
1316 * Also, a side effect of not flushing global PDEs are out of sync
1317 * pages due to physical monitored regions, that are no longer valid.
1318 * Assume for now it only applies to the read/write flag.
1319 */
1320 if (PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_ALLOCATED)
1321 {
1322 /* This is a read-only page. */
1323 AssertFatalMsg(!PGM_PAGE_IS_BALLOONED(pPage), ("Unexpected ballooned page at %RGp\n", GCPhysPage));
1324#ifdef PGM_WITH_PAGE_ZEROING_DETECTION
1325 if ( PGM_PAGE_GET_STATE(pPage) == PGM_PAGE_STATE_ZERO
1326 && (GCPhysNestedFault & X86_PAGE_OFFSET_MASK) == 0
1327 && pgmHandlePageZeroingCode(pVCpu, pCtx))
1328 {
1329 STAM_STATS({ pVCpu->pgmr0.s.pStatTrap0eAttributionR0 = &pVCpu->pgm.s.Stats.StatRZTrap0eTime2PageZeroing; });
1330 return VINF_SUCCESS;
1331 }
1332#endif
1333 STAM_STATS({ pVCpu->pgmr0.s.pStatTrap0eAttributionR0 = &pVCpu->pgm.s.Stats.StatRZTrap0eTime2MakeWritable; });
1334
1335 Log7Func(("Calling pgmPhysPageMakeWritable for GCPhysPage=%RGp\n", GCPhysPage));
1336 rc = pgmPhysPageMakeWritable(pVM, pPage, GCPhysPage);
1337 if (rc != VINF_SUCCESS)
1338 {
1339 AssertMsg(rc == VINF_PGM_SYNC_CR3 || RT_FAILURE(rc), ("%Rrc\n", rc));
1340 return rc;
1341 }
1342 if (RT_UNLIKELY(VM_FF_IS_SET(pVM, VM_FF_PGM_NO_MEMORY)))
1343 return VINF_EM_NO_MEMORY;
1344 }
1345
1346 Assert(!(uErr & X86_TRAP_PF_US)); /* Mode-based execute not supported yet. */
1347 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,PageOutOfSyncSupervisorWrite));
1348
1349 /*
1350 * Sync the write-protected page.
1351 * Note: Do NOT use PGM_SYNC_NR_PAGES here. That only works if the
1352 * page is not present, which is not true in this case.
1353 */
1354 Log7Func(("SyncPage: RW: cs:rip=%04x:%#RX64 GCPhysNestedPage=%RGp uErr=%#RX32 GCPhysPage=%RGp WalkGCPhys=%RGp\n",
1355 pCtx->cs.Sel, pCtx->rip, GCPhysNestedPage, (uint32_t)uErr, GCPhysPage, pWalk->GCPhys));
1356 rc = PGM_BTH_NAME(NestedSyncPage)(pVCpu, GCPhysNestedPage, GCPhysPage, 1 /* cPages */, uErr, &GstWalkAll);
1357 if (RT_SUCCESS(rc))
1358 {
1359 HMInvalidatePhysPage(pVM, GCPhysPage);
1360 STAM_STATS({ pVCpu->pgmr0.s.pStatTrap0eAttributionR0 = &pVCpu->pgm.s.Stats.StatRZTrap0eTime2OutOfSyncHndObs; });
1361 return VINF_SUCCESS;
1362 }
1363 }
1364
1365 /*
1366 * If we get here it is because something failed above => guru meditation time?
1367 */
1368 LogRelMaxFunc(32, ("rc=%Rrc GCPhysNestedFault=%#RGp (%#RGp) uErr=%#RX32 cs:rip=%04x:%08RX64\n",
1369 rc, GCPhysNestedFault, GCPhysPage, (uint32_t)uErr, pCtx->cs.Sel, pCtx->rip));
1370 return VERR_PGM_MAPPING_IPE;
1371
1372# else /* !VBOX_WITH_NESTED_HWVIRT_VMX_EPT || PGM_GST_TYPE != PGM_TYPE_PROT || PGM_SHW_TYPE != PGM_TYPE_EPT */
1373 RT_NOREF7(pVCpu, uErr, pCtx, GCPhysNestedFault, fIsLinearAddrValid, GCPtrNestedFault, pWalk);
1374 AssertReleaseMsgFailed(("Shw=%d Gst=%d is not implemented!\n", PGM_SHW_TYPE, PGM_GST_TYPE));
1375 return VERR_PGM_NOT_USED_IN_MODE;
1376# endif
1377}
1378
1379#endif /* !IN_RING3 */
1380
1381
1382/**
1383 * Emulation of the invlpg instruction.
1384 *
1385 *
1386 * @returns VBox status code.
1387 *
1388 * @param pVCpu The cross context virtual CPU structure.
1389 * @param GCPtrPage Page to invalidate.
1390 *
1391 * @remark ASSUMES that the guest is updating before invalidating. This order
1392 * isn't required by the CPU, so this is speculative and could cause
1393 * trouble.
1394 * @remark No TLB shootdown is done on any other VCPU as we assume that
1395 * invlpg emulation is the *only* reason for calling this function.
1396 * (The guest has to shoot down TLB entries on other CPUs itself)
1397 * Currently true, but keep in mind!
1398 *
1399 * @todo Clean this up! Most of it is (or should be) no longer necessary as we catch all page table accesses.
1400 * Should only be required when PGMPOOL_WITH_OPTIMIZED_DIRTY_PT is active (PAE or AMD64 (for now))
1401 */
1402PGM_BTH_DECL(int, InvalidatePage)(PVMCPUCC pVCpu, RTGCPTR GCPtrPage)
1403{
1404#if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) \
1405 && !PGM_TYPE_IS_NESTED_OR_EPT(PGM_SHW_TYPE) \
1406 && PGM_SHW_TYPE != PGM_TYPE_NONE
1407 int rc;
1408 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
1409 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1410
1411 PGM_LOCK_ASSERT_OWNER(pVM);
1412
1413 LogFlow(("InvalidatePage %RGv\n", GCPtrPage));
1414
1415 /*
1416 * Get the shadow PD entry and skip out if this PD isn't present.
1417 * (Guessing that it is frequent for a shadow PDE to not be present, do this first.)
1418 */
1419# if PGM_SHW_TYPE == PGM_TYPE_32BIT
1420 const unsigned iPDDst = (uint32_t)GCPtrPage >> SHW_PD_SHIFT;
1421 PX86PDE pPdeDst = pgmShwGet32BitPDEPtr(pVCpu, GCPtrPage);
1422 AssertReturn(pPdeDst, VERR_INTERNAL_ERROR_3);
1423
1424 /* Fetch the pgm pool shadow descriptor. */
1425 PPGMPOOLPAGE pShwPde = pVCpu->pgm.s.CTX_SUFF(pShwPageCR3);
1426# ifdef IN_RING3 /* Possible we didn't resync yet when called from REM. */
1427 if (!pShwPde)
1428 {
1429 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,InvalidatePageSkipped));
1430 return VINF_SUCCESS;
1431 }
1432# else
1433 Assert(pShwPde);
1434# endif
1435
1436# elif PGM_SHW_TYPE == PGM_TYPE_PAE
1437 const unsigned iPdpt = (uint32_t)GCPtrPage >> X86_PDPT_SHIFT;
1438 PX86PDPT pPdptDst = pgmShwGetPaePDPTPtr(pVCpu);
1439
1440 /* If the shadow PDPE isn't present, then skip the invalidate. */
1441# ifdef IN_RING3 /* Possible we didn't resync yet when called from REM. */
1442 if (!pPdptDst || !(pPdptDst->a[iPdpt].u & X86_PDPE_P))
1443# else
1444 if (!(pPdptDst->a[iPdpt].u & X86_PDPE_P))
1445# endif
1446 {
1447 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,InvalidatePageSkipped));
1448 PGM_INVL_PG(pVCpu, GCPtrPage);
1449 return VINF_SUCCESS;
1450 }
1451
1452 /* Fetch the pgm pool shadow descriptor. */
1453 PPGMPOOLPAGE pShwPde = pgmPoolGetPage(pPool, pPdptDst->a[iPdpt].u & X86_PDPE_PG_MASK);
1454 AssertReturn(pShwPde, VERR_PGM_POOL_GET_PAGE_FAILED);
1455
1456 PX86PDPAE pPDDst = (PX86PDPAE)PGMPOOL_PAGE_2_PTR_V2(pVM, pVCpu, pShwPde);
1457 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
1458 PX86PDEPAE pPdeDst = &pPDDst->a[iPDDst];
1459
1460# else /* PGM_SHW_TYPE == PGM_TYPE_AMD64 */
1461 /* PML4 */
1462 /*const unsigned iPml4 = (GCPtrPage >> X86_PML4_SHIFT) & X86_PML4_MASK;*/
1463 const unsigned iPdpt = (GCPtrPage >> X86_PDPT_SHIFT) & X86_PDPT_MASK_AMD64;
1464 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
1465 PX86PDPAE pPDDst;
1466 PX86PDPT pPdptDst;
1467 PX86PML4E pPml4eDst;
1468 rc = pgmShwGetLongModePDPtr(pVCpu, GCPtrPage, &pPml4eDst, &pPdptDst, &pPDDst);
1469 if (rc != VINF_SUCCESS)
1470 {
1471 AssertMsg(rc == VERR_PAGE_DIRECTORY_PTR_NOT_PRESENT || rc == VERR_PAGE_MAP_LEVEL4_NOT_PRESENT, ("Unexpected rc=%Rrc\n", rc));
1472 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,InvalidatePageSkipped));
1473 PGM_INVL_PG(pVCpu, GCPtrPage);
1474 return VINF_SUCCESS;
1475 }
1476 PX86PDEPAE pPdeDst = &pPDDst->a[iPDDst];
1477 Assert(pPDDst);
1478 Assert(pPdptDst->a[iPdpt].u & X86_PDPE_P);
1479
1480 /* Fetch the pgm pool shadow descriptor. */
1481 PPGMPOOLPAGE pShwPde = pgmPoolGetPage(pPool, pPdptDst->a[iPdpt].u & SHW_PDPE_PG_MASK);
1482 Assert(pShwPde);
1483
1484# endif /* PGM_SHW_TYPE == PGM_TYPE_AMD64 */
1485
1486 const SHWPDE PdeDst = *pPdeDst;
1487 if (!(PdeDst.u & X86_PDE_P))
1488 {
1489 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,InvalidatePageSkipped));
1490 PGM_INVL_PG(pVCpu, GCPtrPage);
1491 return VINF_SUCCESS;
1492 }
1493
1494 /*
1495 * Get the guest PD entry and calc big page.
1496 */
1497# if PGM_GST_TYPE == PGM_TYPE_32BIT
1498 PGSTPD pPDSrc = pgmGstGet32bitPDPtr(pVCpu);
1499 const unsigned iPDSrc = (uint32_t)GCPtrPage >> GST_PD_SHIFT;
1500 GSTPDE PdeSrc = pPDSrc->a[iPDSrc];
1501# else /* PGM_GST_TYPE != PGM_TYPE_32BIT */
1502 unsigned iPDSrc = 0;
1503# if PGM_GST_TYPE == PGM_TYPE_PAE
1504 X86PDPE PdpeSrcIgn;
1505 PX86PDPAE pPDSrc = pgmGstGetPaePDPtr(pVCpu, GCPtrPage, &iPDSrc, &PdpeSrcIgn);
1506# else /* AMD64 */
1507 PX86PML4E pPml4eSrcIgn;
1508 X86PDPE PdpeSrcIgn;
1509 PX86PDPAE pPDSrc = pgmGstGetLongModePDPtr(pVCpu, GCPtrPage, &pPml4eSrcIgn, &PdpeSrcIgn, &iPDSrc);
1510# endif
1511 GSTPDE PdeSrc;
1512
1513 if (pPDSrc)
1514 PdeSrc = pPDSrc->a[iPDSrc];
1515 else
1516 PdeSrc.u = 0;
1517# endif /* PGM_GST_TYPE != PGM_TYPE_32BIT */
1518 const bool fWasBigPage = RT_BOOL(PdeDst.u & PGM_PDFLAGS_BIG_PAGE);
1519 const bool fIsBigPage = (PdeSrc.u & X86_PDE_PS) && GST_IS_PSE_ACTIVE(pVCpu);
1520 if (fWasBigPage != fIsBigPage)
1521 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,InvalidatePageSkipped));
1522
1523# ifdef IN_RING3
1524 /*
1525 * If a CR3 Sync is pending we may ignore the invalidate page operation
1526 * depending on the kind of sync and if it's a global page or not.
1527 * This doesn't make sense in GC/R0 so we'll skip it entirely there.
1528 */
1529# ifdef PGM_SKIP_GLOBAL_PAGEDIRS_ON_NONGLOBAL_FLUSH
1530 if ( VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3)
1531 || ( VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3_NON_GLOBAL)
1532 && fIsBigPage
1533 && (PdeSrc.u & X86_PDE4M_G)
1534 )
1535 )
1536# else
1537 if (VM_FF_IS_ANY_SET(pVM, VM_FF_PGM_SYNC_CR3 | VM_FF_PGM_SYNC_CR3_NON_GLOBAL) )
1538# endif
1539 {
1540 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,InvalidatePageSkipped));
1541 return VINF_SUCCESS;
1542 }
1543# endif /* IN_RING3 */
1544
1545 /*
1546 * Deal with the Guest PDE.
1547 */
1548 rc = VINF_SUCCESS;
1549 if (PdeSrc.u & X86_PDE_P)
1550 {
1551 Assert( (PdeSrc.u & X86_PDE_US) == (PdeDst.u & X86_PDE_US)
1552 && ((PdeSrc.u & X86_PDE_RW) || !(PdeDst.u & X86_PDE_RW) || pVCpu->pgm.s.cNetwareWp0Hacks > 0));
1553 if (!fIsBigPage)
1554 {
1555 /*
1556 * 4KB - page.
1557 */
1558 PPGMPOOLPAGE pShwPage = pgmPoolGetPage(pPool, PdeDst.u & SHW_PDE_PG_MASK);
1559 RTGCPHYS GCPhys = GST_GET_PDE_GCPHYS(PdeSrc);
1560
1561# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
1562 /* Select the right PDE as we're emulating a 4kb page table with 2 shadow page tables. */
1563 GCPhys = PGM_A20_APPLY(pVCpu, GCPhys | ((iPDDst & 1) * (GUEST_PAGE_SIZE / 2)));
1564# endif
1565 if (pShwPage->GCPhys == GCPhys)
1566 {
1567 /* Syncing it here isn't 100% safe and it's probably not worth spending time syncing it. */
1568 PSHWPT pPTDst = (PSHWPT)PGMPOOL_PAGE_2_PTR_V2(pVM, pVCpu, pShwPage);
1569
1570 PGSTPT pPTSrc;
1571 rc = PGM_GCPHYS_2_PTR_V2(pVM, pVCpu, GST_GET_PDE_GCPHYS(PdeSrc), &pPTSrc);
1572 if (RT_SUCCESS(rc))
1573 {
1574 const unsigned iPTSrc = (GCPtrPage >> GST_PT_SHIFT) & GST_PT_MASK;
1575 GSTPTE PteSrc = pPTSrc->a[iPTSrc];
1576 const unsigned iPTDst = (GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK;
1577 PGM_BTH_NAME(SyncPageWorker)(pVCpu, &pPTDst->a[iPTDst], PdeSrc, PteSrc, pShwPage, iPTDst);
1578 Log2(("SyncPage: 4K %RGv PteSrc:{P=%d RW=%d U=%d raw=%08llx} PteDst=%08llx %s\n",
1579 GCPtrPage, PteSrc.u & X86_PTE_P,
1580 (PteSrc.u & PdeSrc.u & X86_PTE_RW),
1581 (PteSrc.u & PdeSrc.u & X86_PTE_US),
1582 (uint64_t)PteSrc.u,
1583 SHW_PTE_LOG64(pPTDst->a[iPTDst]),
1584 SHW_PTE_IS_TRACK_DIRTY(pPTDst->a[iPTDst]) ? " Track-Dirty" : ""));
1585 }
1586 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,InvalidatePage4KBPages));
1587 PGM_INVL_PG(pVCpu, GCPtrPage);
1588 }
1589 else
1590 {
1591 /*
1592 * The page table address changed.
1593 */
1594 LogFlow(("InvalidatePage: Out-of-sync at %RGp PdeSrc=%RX64 PdeDst=%RX64 ShwGCPhys=%RGp iPDDst=%#x\n",
1595 GCPtrPage, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u, pShwPage->GCPhys, iPDDst));
1596 pgmPoolFree(pVM, PdeDst.u & SHW_PDE_PG_MASK, pShwPde->idx, iPDDst);
1597 SHW_PDE_ATOMIC_SET(*pPdeDst, 0);
1598 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,InvalidatePagePDOutOfSync));
1599 PGM_INVL_VCPU_TLBS(pVCpu);
1600 }
1601 }
1602 else
1603 {
1604 /*
1605 * 2/4MB - page.
1606 */
1607 /* Before freeing the page, check if anything really changed. */
1608 PPGMPOOLPAGE pShwPage = pgmPoolGetPage(pPool, PdeDst.u & SHW_PDE_PG_MASK);
1609 RTGCPHYS GCPhys = GST_GET_BIG_PDE_GCPHYS(pVM, PdeSrc);
1610# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
1611 /* Select the right PDE as we're emulating a 4MB page directory with two 2 MB shadow PDEs.*/
1612 GCPhys = PGM_A20_APPLY(pVCpu, GCPhys | (GCPtrPage & (1 << X86_PD_PAE_SHIFT)));
1613# endif
1614 if ( pShwPage->GCPhys == GCPhys
1615 && pShwPage->enmKind == BTH_PGMPOOLKIND_PT_FOR_BIG)
1616 {
1617 /* ASSUMES a the given bits are identical for 4M and normal PDEs */
1618 /** @todo This test is wrong as it cannot check the G bit!
1619 * FIXME */
1620 if ( (PdeSrc.u & (X86_PDE_P | X86_PDE_RW | X86_PDE_US))
1621 == (PdeDst.u & (X86_PDE_P | X86_PDE_RW | X86_PDE_US))
1622 && ( (PdeSrc.u & X86_PDE4M_D) /** @todo rainy day: What about read-only 4M pages? not very common, but still... */
1623 || (PdeDst.u & PGM_PDFLAGS_TRACK_DIRTY)))
1624 {
1625 LogFlow(("Skipping flush for big page containing %RGv (PD=%X .u=%RX64)-> nothing has changed!\n", GCPtrPage, iPDSrc, PdeSrc.u));
1626 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,InvalidatePage4MBPagesSkip));
1627 return VINF_SUCCESS;
1628 }
1629 }
1630
1631 /*
1632 * Ok, the page table is present and it's been changed in the guest.
1633 * If we're in host context, we'll just mark it as not present taking the lazy approach.
1634 * We could do this for some flushes in GC too, but we need an algorithm for
1635 * deciding which 4MB pages containing code likely to be executed very soon.
1636 */
1637 LogFlow(("InvalidatePage: Out-of-sync PD at %RGp PdeSrc=%RX64 PdeDst=%RX64\n",
1638 GCPtrPage, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
1639 pgmPoolFree(pVM, PdeDst.u & SHW_PDE_PG_MASK, pShwPde->idx, iPDDst);
1640 SHW_PDE_ATOMIC_SET(*pPdeDst, 0);
1641 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,InvalidatePage4MBPages));
1642 PGM_INVL_BIG_PG(pVCpu, GCPtrPage);
1643 }
1644 }
1645 else
1646 {
1647 /*
1648 * Page directory is not present, mark shadow PDE not present.
1649 */
1650 pgmPoolFree(pVM, PdeDst.u & SHW_PDE_PG_MASK, pShwPde->idx, iPDDst);
1651 SHW_PDE_ATOMIC_SET(*pPdeDst, 0);
1652 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,InvalidatePagePDNPs));
1653 PGM_INVL_PG(pVCpu, GCPtrPage);
1654 }
1655 return rc;
1656
1657#else /* guest real and protected mode, nested + ept, none. */
1658 /* There's no such thing as InvalidatePage when paging is disabled, so just ignore. */
1659 NOREF(pVCpu); NOREF(GCPtrPage);
1660 return VINF_SUCCESS;
1661#endif
1662}
1663
1664#if PGM_SHW_TYPE != PGM_TYPE_NONE
1665
1666/**
1667 * Update the tracking of shadowed pages.
1668 *
1669 * @param pVCpu The cross context virtual CPU structure.
1670 * @param pShwPage The shadow page.
1671 * @param HCPhys The physical page we is being dereferenced.
1672 * @param iPte Shadow PTE index
1673 * @param GCPhysPage Guest physical address (only valid if pShwPage->fDirty is set)
1674 */
1675DECLINLINE(void) PGM_BTH_NAME(SyncPageWorkerTrackDeref)(PVMCPUCC pVCpu, PPGMPOOLPAGE pShwPage, RTHCPHYS HCPhys, uint16_t iPte,
1676 RTGCPHYS GCPhysPage)
1677{
1678 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
1679
1680# if defined(PGMPOOL_WITH_OPTIMIZED_DIRTY_PT) \
1681 && PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) \
1682 && (PGM_GST_TYPE == PGM_TYPE_PAE || PGM_GST_TYPE == PGM_TYPE_AMD64 || PGM_SHW_TYPE == PGM_TYPE_PAE /* pae/32bit combo */)
1683
1684 /* Use the hint we retrieved from the cached guest PT. */
1685 if (pShwPage->fDirty)
1686 {
1687 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1688
1689 Assert(pShwPage->cPresent);
1690 Assert(pPool->cPresent);
1691 pShwPage->cPresent--;
1692 pPool->cPresent--;
1693
1694 PPGMPAGE pPhysPage = pgmPhysGetPage(pVM, GCPhysPage);
1695 AssertRelease(pPhysPage);
1696 pgmTrackDerefGCPhys(pPool, pShwPage, pPhysPage, iPte);
1697 return;
1698 }
1699# else
1700 NOREF(GCPhysPage);
1701# endif
1702
1703 /** @todo If this turns out to be a bottle neck (*very* likely) two things can be done:
1704 * 1. have a medium sized HCPhys -> GCPhys TLB (hash?)
1705 * 2. write protect all shadowed pages. I.e. implement caching.
1706 *
1707 * 2023-08-24 bird: If we allow the ZeroPg to enter the shadow page tables,
1708 * this becomes a common occurence and we screw up. A better to the above would
1709 * be to have a parallel table that records the guest physical addresses of the
1710 * pages mapped by the shadow page table... For nested page tables,
1711 * we can easily correleate a table entry to a page entry, so it won't be
1712 * needed for those.
1713 */
1714# if PGM_TYPE_IS_NESTED_OR_EPT(PGM_SHW_TYPE) || !PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
1715 /*
1716 * For non-paged guest tables, EPT and nested tables we can figure out the
1717 * physical page corresponding to the entry and dereference it.
1718 * (This ASSUMES that shadow PTs won't be used ever be used out of place.)
1719 */
1720 if ( pShwPage->enmKind == PGMPOOLKIND_EPT_PT_FOR_PHYS
1721 || pShwPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PHYS
1722 || pShwPage->enmKind == PGMPOOLKIND_32BIT_PT_FOR_PHYS)
1723 {
1724 RTGCPHYS GCPhysNestedEntry = pShwPage->GCPhys + ((uint32_t)iPte << X86_PAGE_SHIFT);
1725 if (!pShwPage->fA20Enabled)
1726 GCPhysNestedEntry &= ~(uint64_t)RT_BIT_64(20);
1727 PPGMPAGE const pPhysPage = pgmPhysGetPage(pVM, GCPhysNestedEntry);
1728 AssertRelease(pPhysPage);
1729 pgmTrackDerefGCPhys(pVM->pgm.s.CTX_SUFF(pPool), pShwPage, pPhysPage, iPte);
1730 }
1731 else
1732 AssertMsgFailed(("enmKind=%d GCPhys=%RGp\n", pShwPage->enmKind, pShwPage->GCPhys));
1733# endif
1734
1735 /** @todo duplicated in the 2nd half of pgmPoolTracDerefGCPhysHint */
1736
1737 /*
1738 * Find the guest address.
1739 */
1740 STAM_PROFILE_START(&pVM->pgm.s.Stats.StatTrackDeref, a);
1741 LogFlow(("SyncPageWorkerTrackDeref(%d,%d): Damn HCPhys=%RHp pShwPage->idx=%#x!!!\n",
1742 PGM_SHW_TYPE, PGM_GST_TYPE, HCPhys, pShwPage->idx));
1743 uint32_t const idRamRangeMax = RT_MIN(pVM->pgm.s.idRamRangeMax, RT_ELEMENTS(pVM->pgm.s.apRamRanges) - 1U);
1744 Assert(pVM->pgm.s.apRamRanges[0] == NULL);
1745 for (uint32_t idx = 1; idx <= idRamRangeMax; idx++)
1746 {
1747 PPGMRAMRANGE const pRam = pVM->CTX_EXPR(pgm, pgmr0, pgm).s.apRamRanges[idx];
1748 AssertContinue(pRam);
1749 unsigned iPage = pRam->cb >> GUEST_PAGE_SHIFT;
1750 while (iPage-- > 0)
1751 {
1752 if (PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]) == HCPhys)
1753 {
1754 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1755
1756 Assert(pShwPage->cPresent);
1757 Assert(pPool->cPresent);
1758 pShwPage->cPresent--;
1759 pPool->cPresent--;
1760
1761 pgmTrackDerefGCPhys(pPool, pShwPage, &pRam->aPages[iPage], iPte);
1762 STAM_PROFILE_STOP(&pVM->pgm.s.Stats.StatTrackDeref, a);
1763 return;
1764 }
1765 }
1766 }
1767
1768 for (;;)
1769 AssertReleaseMsgFailed(("HCPhys=%RHp wasn't found!\n", HCPhys));
1770}
1771
1772
1773/**
1774 * Update the tracking of shadowed pages.
1775 *
1776 * @param pVCpu The cross context virtual CPU structure.
1777 * @param pShwPage The shadow page.
1778 * @param u16 The top 16-bit of the pPage->HCPhys.
1779 * @param pPage Pointer to the guest page. this will be modified.
1780 * @param iPTDst The index into the shadow table.
1781 */
1782DECLINLINE(void) PGM_BTH_NAME(SyncPageWorkerTrackAddref)(PVMCPUCC pVCpu, PPGMPOOLPAGE pShwPage, uint16_t u16,
1783 PPGMPAGE pPage, const unsigned iPTDst)
1784{
1785 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
1786
1787 /*
1788 * Just deal with the simple first time here.
1789 */
1790 if (!u16)
1791 {
1792 STAM_COUNTER_INC(&pVM->pgm.s.Stats.StatTrackVirgin);
1793 u16 = PGMPOOL_TD_MAKE(1, pShwPage->idx);
1794 /* Save the page table index. */
1795 PGM_PAGE_SET_PTE_INDEX(pVM, pPage, iPTDst);
1796 }
1797 else
1798 u16 = pgmPoolTrackPhysExtAddref(pVM, pPage, u16, pShwPage->idx, iPTDst);
1799
1800 /* write back */
1801 Log2(("SyncPageWorkerTrackAddRef: u16=%#x->%#x iPTDst=%#x pPage=%p\n", u16, PGM_PAGE_GET_TRACKING(pPage), iPTDst, pPage));
1802 PGM_PAGE_SET_TRACKING(pVM, pPage, u16);
1803
1804 /* update statistics. */
1805 pVM->pgm.s.CTX_SUFF(pPool)->cPresent++;
1806 pShwPage->cPresent++;
1807 if (pShwPage->iFirstPresent > iPTDst)
1808 pShwPage->iFirstPresent = iPTDst;
1809}
1810
1811
1812/**
1813 * Modifies a shadow PTE to account for access handlers.
1814 *
1815 * @param pVM The cross context VM structure.
1816 * @param pVCpu The cross context virtual CPU structure.
1817 * @param pPage The page in question.
1818 * @param GCPhysPage The guest-physical address of the page.
1819 * @param fPteSrc The shadowed flags of the source PTE. Must include the
1820 * A (accessed) bit so it can be emulated correctly.
1821 * @param pPteDst The shadow PTE (output). This is temporary storage and
1822 * does not need to be set atomically.
1823 */
1824DECLINLINE(void) PGM_BTH_NAME(SyncHandlerPte)(PVMCC pVM, PVMCPUCC pVCpu, PCPGMPAGE pPage, RTGCPHYS GCPhysPage, uint64_t fPteSrc,
1825 PSHWPTE pPteDst)
1826{
1827 RT_NOREF_PV(pVM); RT_NOREF_PV(fPteSrc); RT_NOREF_PV(pVCpu); RT_NOREF_PV(GCPhysPage);
1828
1829 /** @todo r=bird: Are we actually handling dirty and access bits for pages with access handlers correctly? No.
1830 * Update: \#PF should deal with this before or after calling the handlers. It has all the info to do the job efficiently. */
1831 if (!PGM_PAGE_HAS_ACTIVE_ALL_HANDLERS(pPage))
1832 {
1833 LogFlow(("SyncHandlerPte: monitored page (%R[pgmpage]) -> mark read-only\n", pPage));
1834# if PGM_SHW_TYPE == PGM_TYPE_EPT
1835 pPteDst->u = PGM_PAGE_GET_HCPHYS(pPage) | EPT_E_READ | EPT_E_EXECUTE | EPT_E_MEMTYPE_WB | EPT_E_IGNORE_PAT;
1836# else
1837 if (fPteSrc & X86_PTE_A)
1838 {
1839 SHW_PTE_SET(*pPteDst, fPteSrc | PGM_PAGE_GET_HCPHYS(pPage));
1840 SHW_PTE_SET_RO(*pPteDst);
1841 }
1842 else
1843 SHW_PTE_SET(*pPteDst, 0);
1844# endif
1845 }
1846# ifdef PGM_WITH_MMIO_OPTIMIZATIONS
1847# if PGM_SHW_TYPE == PGM_TYPE_EPT || PGM_SHW_TYPE == PGM_TYPE_PAE || PGM_SHW_TYPE == PGM_TYPE_AMD64
1848 else if ( PGM_PAGE_HAS_ACTIVE_ALL_HANDLERS(pPage)
1849 && ( BTH_IS_NP_ACTIVE(pVM)
1850 || (fPteSrc & (X86_PTE_RW | X86_PTE_US)) == X86_PTE_RW) /** @todo Remove X86_PTE_US here and pGstWalk->Core.fEffectiveUS before the sync page test. */
1851# if PGM_SHW_TYPE == PGM_TYPE_AMD64
1852 && pVM->pgm.s.fLessThan52PhysicalAddressBits
1853# endif
1854 )
1855 {
1856 LogFlow(("SyncHandlerPte: MMIO page -> invalid \n"));
1857# if PGM_SHW_TYPE == PGM_TYPE_EPT
1858 /* 25.2.3.1: Reserved physical address bit -> EPT Misconfiguration (exit 49) */
1859 pPteDst->u = pVM->pgm.s.HCPhysInvMmioPg
1860 /* 25.2.3.1: bits 2:0 = 010b -> EPT Misconfiguration (exit 49) */
1861 | EPT_E_WRITE
1862 /* 25.2.3.1: leaf && 2:0 != 0 && u3Emt in {2, 3, 7} -> EPT Misconfiguration */
1863 | EPT_E_MEMTYPE_INVALID_3;
1864# else
1865 /* Set high page frame bits that MBZ (bankers on PAE, CPU dependent on AMD64). */
1866 SHW_PTE_SET(*pPteDst, pVM->pgm.s.HCPhysInvMmioPg | X86_PTE_PAE_MBZ_MASK_NO_NX | X86_PTE_P);
1867# endif
1868 }
1869# endif
1870# endif /* PGM_WITH_MMIO_OPTIMIZATIONS */
1871 else
1872 {
1873 LogFlow(("SyncHandlerPte: monitored page (%R[pgmpage]) -> mark not present\n", pPage));
1874 SHW_PTE_SET(*pPteDst, 0);
1875 }
1876 /** @todo count these kinds of entries. */
1877}
1878
1879
1880/**
1881 * Creates a 4K shadow page for a guest page.
1882 *
1883 * For 4M pages the caller must convert the PDE4M to a PTE, this includes adjusting the
1884 * physical address. The PdeSrc argument only the flags are used. No page
1885 * structured will be mapped in this function.
1886 *
1887 * @param pVCpu The cross context virtual CPU structure.
1888 * @param pPteDst Destination page table entry.
1889 * @param PdeSrc Source page directory entry (i.e. Guest OS page directory entry).
1890 * Can safely assume that only the flags are being used.
1891 * @param PteSrc Source page table entry (i.e. Guest OS page table entry).
1892 * @param pShwPage Pointer to the shadow page.
1893 * @param iPTDst The index into the shadow table.
1894 *
1895 * @remark Not used for 2/4MB pages!
1896 */
1897# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) || defined(DOXYGEN_RUNNING)
1898static void PGM_BTH_NAME(SyncPageWorker)(PVMCPUCC pVCpu, PSHWPTE pPteDst, GSTPDE PdeSrc, GSTPTE PteSrc,
1899 PPGMPOOLPAGE pShwPage, unsigned iPTDst)
1900# else
1901static void PGM_BTH_NAME(SyncPageWorker)(PVMCPUCC pVCpu, PSHWPTE pPteDst, RTGCPHYS GCPhysPage,
1902 PPGMPOOLPAGE pShwPage, unsigned iPTDst)
1903# endif
1904{
1905 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
1906 RTGCPHYS GCPhysOldPage = NIL_RTGCPHYS;
1907
1908# if defined(PGMPOOL_WITH_OPTIMIZED_DIRTY_PT) \
1909 && PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) \
1910 && (PGM_GST_TYPE == PGM_TYPE_PAE || PGM_GST_TYPE == PGM_TYPE_AMD64 || PGM_SHW_TYPE == PGM_TYPE_PAE /* pae/32bit combo */)
1911
1912 if (pShwPage->fDirty)
1913 {
1914 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1915 PGSTPT pGstPT;
1916
1917 /* Note that iPTDst can be used to index the guest PT even in the pae/32bit combo as we copy only half the table; see pgmPoolAddDirtyPage. */
1918 pGstPT = (PGSTPT)&pPool->aDirtyPages[pShwPage->idxDirtyEntry].aPage[0];
1919 GCPhysOldPage = GST_GET_PTE_GCPHYS(pGstPT->a[iPTDst]);
1920 pGstPT->a[iPTDst].u = PteSrc.u;
1921 }
1922# else
1923 Assert(!pShwPage->fDirty);
1924# endif
1925
1926# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
1927 if ( (PteSrc.u & X86_PTE_P)
1928 && GST_IS_PTE_VALID(pVCpu, PteSrc))
1929# endif
1930 {
1931# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
1932 RTGCPHYS GCPhysPage = GST_GET_PTE_GCPHYS(PteSrc);
1933# endif
1934 PGM_A20_ASSERT_MASKED(pVCpu, GCPhysPage);
1935
1936 /*
1937 * Find the ram range.
1938 */
1939 PPGMPAGE pPage;
1940 int rc = pgmPhysGetPageEx(pVM, GCPhysPage, &pPage);
1941 if (RT_SUCCESS(rc))
1942 {
1943 /* Ignore ballooned pages.
1944 Don't return errors or use a fatal assert here as part of a
1945 shadow sync range might included ballooned pages. */
1946 if (PGM_PAGE_IS_BALLOONED(pPage))
1947 {
1948 Assert(!SHW_PTE_IS_P(*pPteDst)); /** @todo user tracking needs updating if this triggers. */
1949 return;
1950 }
1951
1952# ifndef VBOX_WITH_NEW_LAZY_PAGE_ALLOC
1953 /* Make the page writable if necessary. */
1954 if ( PGM_PAGE_GET_TYPE(pPage) == PGMPAGETYPE_RAM
1955 && ( PGM_PAGE_IS_ZERO(pPage)
1956# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
1957 || ( (PteSrc.u & X86_PTE_RW)
1958# else
1959 || ( 1
1960# endif
1961 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_ALLOCATED
1962# ifdef VBOX_WITH_REAL_WRITE_MONITORED_PAGES
1963 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_WRITE_MONITORED
1964# endif
1965# ifdef VBOX_WITH_PAGE_SHARING
1966 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_SHARED
1967# endif
1968 )
1969 )
1970 )
1971 {
1972 rc = pgmPhysPageMakeWritable(pVM, pPage, GCPhysPage);
1973 AssertRC(rc);
1974 }
1975# endif
1976
1977 /*
1978 * Make page table entry.
1979 */
1980 SHWPTE PteDst;
1981# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
1982 uint64_t fGstShwPteFlags = GST_GET_PTE_SHW_FLAGS(pVCpu, PteSrc);
1983# else
1984 uint64_t fGstShwPteFlags = X86_PTE_P | X86_PTE_RW | X86_PTE_US | X86_PTE_A | X86_PTE_D;
1985# endif
1986 if (!PGM_PAGE_HAS_ACTIVE_HANDLERS(pPage) || PGM_PAGE_IS_HNDL_PHYS_NOT_IN_HM(pPage))
1987 {
1988# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
1989 /*
1990 * If the page or page directory entry is not marked accessed,
1991 * we mark the page not present.
1992 */
1993 if (!(PteSrc.u & X86_PTE_A) || !(PdeSrc.u & X86_PDE_A))
1994 {
1995 LogFlow(("SyncPageWorker: page and or page directory not accessed -> mark not present\n"));
1996 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,AccessedPage));
1997 SHW_PTE_SET(PteDst, 0);
1998 }
1999 /*
2000 * If the page is not flagged as dirty and is writable, then make it read-only, so we can set the dirty bit
2001 * when the page is modified.
2002 */
2003 else if (!(PteSrc.u & X86_PTE_D) && (PdeSrc.u & PteSrc.u & X86_PTE_RW))
2004 {
2005 AssertCompile(X86_PTE_RW == X86_PDE_RW);
2006 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,DirtyPage));
2007 SHW_PTE_SET(PteDst,
2008 fGstShwPteFlags
2009 | PGM_PAGE_GET_HCPHYS(pPage)
2010 | PGM_PTFLAGS_TRACK_DIRTY);
2011 SHW_PTE_SET_RO(PteDst);
2012 }
2013 else
2014# endif
2015 {
2016 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,DirtyPageSkipped));
2017# if PGM_SHW_TYPE == PGM_TYPE_EPT
2018 PteDst.u = PGM_PAGE_GET_HCPHYS(pPage)
2019 | EPT_E_READ | EPT_E_WRITE | EPT_E_EXECUTE | EPT_E_MEMTYPE_WB | EPT_E_IGNORE_PAT;
2020# else
2021 SHW_PTE_SET(PteDst, fGstShwPteFlags | PGM_PAGE_GET_HCPHYS(pPage));
2022# endif
2023 }
2024
2025 /*
2026 * Make sure only allocated pages are mapped writable.
2027 */
2028 if ( SHW_PTE_IS_P_RW(PteDst)
2029 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_ALLOCATED)
2030 {
2031# ifndef VBOX_WITH_NEW_LAZY_PAGE_ALLOC
2032 /* Still applies to shared pages. */
2033 Assert(!PGM_PAGE_IS_ZERO(pPage));
2034# endif
2035 SHW_PTE_SET_RO(PteDst); /** @todo this isn't quite working yet. Why, isn't it? */
2036 Log3(("SyncPageWorker: write-protecting %RGp pPage=%R[pgmpage]at iPTDst=%d\n", GCPhysPage, pPage, iPTDst));
2037 }
2038 }
2039 else
2040 PGM_BTH_NAME(SyncHandlerPte)(pVM, pVCpu, pPage, GCPhysPage, fGstShwPteFlags, &PteDst);
2041
2042 /*
2043 * Keep user track up to date.
2044 */
2045 if (SHW_PTE_IS_P(PteDst))
2046 {
2047 if (!SHW_PTE_IS_P(*pPteDst))
2048 PGM_BTH_NAME(SyncPageWorkerTrackAddref)(pVCpu, pShwPage, PGM_PAGE_GET_TRACKING(pPage), pPage, iPTDst);
2049 else if (SHW_PTE_GET_HCPHYS(*pPteDst) != SHW_PTE_GET_HCPHYS(PteDst))
2050 {
2051 Log2(("SyncPageWorker: deref! *pPteDst=%RX64 PteDst=%RX64\n", SHW_PTE_LOG64(*pPteDst), SHW_PTE_LOG64(PteDst)));
2052 PGM_BTH_NAME(SyncPageWorkerTrackDeref)(pVCpu, pShwPage, SHW_PTE_GET_HCPHYS(*pPteDst), iPTDst, GCPhysOldPage);
2053 PGM_BTH_NAME(SyncPageWorkerTrackAddref)(pVCpu, pShwPage, PGM_PAGE_GET_TRACKING(pPage), pPage, iPTDst);
2054 }
2055 }
2056 else if (SHW_PTE_IS_P(*pPteDst))
2057 {
2058 Log2(("SyncPageWorker: deref! *pPteDst=%RX64\n", SHW_PTE_LOG64(*pPteDst)));
2059 PGM_BTH_NAME(SyncPageWorkerTrackDeref)(pVCpu, pShwPage, SHW_PTE_GET_HCPHYS(*pPteDst), iPTDst, GCPhysOldPage);
2060 }
2061
2062 /*
2063 * Update statistics and commit the entry.
2064 */
2065# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
2066 if (!(PteSrc.u & X86_PTE_G))
2067 pShwPage->fSeenNonGlobal = true;
2068# endif
2069 SHW_PTE_ATOMIC_SET2(*pPteDst, PteDst);
2070 return;
2071 }
2072
2073/** @todo count these three different kinds. */
2074 Log2(("SyncPageWorker: invalid address in Pte\n"));
2075 }
2076# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
2077 else if (!(PteSrc.u & X86_PTE_P))
2078 Log2(("SyncPageWorker: page not present in Pte\n"));
2079 else
2080 Log2(("SyncPageWorker: invalid Pte\n"));
2081# endif
2082
2083 /*
2084 * The page is not present or the PTE is bad. Replace the shadow PTE by
2085 * an empty entry, making sure to keep the user tracking up to date.
2086 */
2087 if (SHW_PTE_IS_P(*pPteDst))
2088 {
2089 Log2(("SyncPageWorker: deref! *pPteDst=%RX64\n", SHW_PTE_LOG64(*pPteDst)));
2090 PGM_BTH_NAME(SyncPageWorkerTrackDeref)(pVCpu, pShwPage, SHW_PTE_GET_HCPHYS(*pPteDst), iPTDst, GCPhysOldPage);
2091 }
2092 SHW_PTE_ATOMIC_SET(*pPteDst, 0);
2093}
2094
2095
2096/**
2097 * Syncs a guest OS page.
2098 *
2099 * There are no conflicts at this point, neither is there any need for
2100 * page table allocations.
2101 *
2102 * When called in PAE or AMD64 guest mode, the guest PDPE shall be valid.
2103 * When called in AMD64 guest mode, the guest PML4E shall be valid.
2104 *
2105 * @returns VBox status code.
2106 * @returns VINF_PGM_SYNCPAGE_MODIFIED_PDE if it modifies the PDE in any way.
2107 * @param pVCpu The cross context virtual CPU structure.
2108 * @param PdeSrc Page directory entry of the guest.
2109 * @param GCPtrPage Guest context page address.
2110 * @param cPages Number of pages to sync (PGM_SYNC_N_PAGES) (default=1).
2111 * @param uErr Fault error (X86_TRAP_PF_*).
2112 */
2113static int PGM_BTH_NAME(SyncPage)(PVMCPUCC pVCpu, GSTPDE PdeSrc, RTGCPTR GCPtrPage, unsigned cPages, unsigned uErr)
2114{
2115 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
2116 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool); NOREF(pPool);
2117 LogFlow(("SyncPage: GCPtrPage=%RGv cPages=%u uErr=%#x\n", GCPtrPage, cPages, uErr));
2118 RT_NOREF_PV(uErr); RT_NOREF_PV(cPages); RT_NOREF_PV(GCPtrPage);
2119
2120 PGM_LOCK_ASSERT_OWNER(pVM);
2121
2122# if ( PGM_GST_TYPE == PGM_TYPE_32BIT \
2123 || PGM_GST_TYPE == PGM_TYPE_PAE \
2124 || PGM_GST_TYPE == PGM_TYPE_AMD64) \
2125 && !PGM_TYPE_IS_NESTED_OR_EPT(PGM_SHW_TYPE)
2126
2127 /*
2128 * Assert preconditions.
2129 */
2130 Assert(PdeSrc.u & X86_PDE_P);
2131 Assert(cPages);
2132# if 0 /* rarely useful; leave for debugging. */
2133 STAM_COUNTER_INC(&pVCpu->pgm.s.StatSyncPagePD[(GCPtrPage >> GST_PD_SHIFT) & GST_PD_MASK]);
2134# endif
2135
2136 /*
2137 * Get the shadow PDE, find the shadow page table in the pool.
2138 */
2139# if PGM_SHW_TYPE == PGM_TYPE_32BIT
2140 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
2141 PX86PDE pPdeDst = pgmShwGet32BitPDEPtr(pVCpu, GCPtrPage);
2142 AssertReturn(pPdeDst, VERR_INTERNAL_ERROR_3);
2143
2144 /* Fetch the pgm pool shadow descriptor. */
2145 PPGMPOOLPAGE pShwPde = pVCpu->pgm.s.CTX_SUFF(pShwPageCR3);
2146 Assert(pShwPde);
2147
2148# elif PGM_SHW_TYPE == PGM_TYPE_PAE
2149 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
2150 PPGMPOOLPAGE pShwPde = NULL;
2151 PX86PDPAE pPDDst;
2152
2153 /* Fetch the pgm pool shadow descriptor. */
2154 int rc2 = pgmShwGetPaePoolPagePD(pVCpu, GCPtrPage, &pShwPde);
2155 AssertRCSuccessReturn(rc2, rc2);
2156 Assert(pShwPde);
2157
2158 pPDDst = (PX86PDPAE)PGMPOOL_PAGE_2_PTR_V2(pVM, pVCpu, pShwPde);
2159 PX86PDEPAE pPdeDst = &pPDDst->a[iPDDst];
2160
2161# elif PGM_SHW_TYPE == PGM_TYPE_AMD64
2162 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
2163 const unsigned iPdpt = (GCPtrPage >> X86_PDPT_SHIFT) & X86_PDPT_MASK_AMD64;
2164 PX86PDPAE pPDDst = NULL; /* initialized to shut up gcc */
2165 PX86PDPT pPdptDst = NULL; /* initialized to shut up gcc */
2166
2167 int rc2 = pgmShwGetLongModePDPtr(pVCpu, GCPtrPage, NULL, &pPdptDst, &pPDDst);
2168 AssertRCSuccessReturn(rc2, rc2);
2169 Assert(pPDDst && pPdptDst);
2170 PX86PDEPAE pPdeDst = &pPDDst->a[iPDDst];
2171# endif
2172 SHWPDE PdeDst = *pPdeDst;
2173
2174 /*
2175 * - In the guest SMP case we could have blocked while another VCPU reused
2176 * this page table.
2177 * - With W7-64 we may also take this path when the A bit is cleared on
2178 * higher level tables (PDPE/PML4E). The guest does not invalidate the
2179 * relevant TLB entries. If we're write monitoring any page mapped by
2180 * the modified entry, we may end up here with a "stale" TLB entry.
2181 */
2182 if (!(PdeDst.u & X86_PDE_P))
2183 {
2184 Log(("CPU%u: SyncPage: Pde at %RGv changed behind our back? (pPdeDst=%p/%RX64) uErr=%#x\n", pVCpu->idCpu, GCPtrPage, pPdeDst, (uint64_t)PdeDst.u, (uint32_t)uErr));
2185 AssertMsg(pVM->cCpus > 1 || (uErr & (X86_TRAP_PF_P | X86_TRAP_PF_RW)) == (X86_TRAP_PF_P | X86_TRAP_PF_RW),
2186 ("Unexpected missing PDE p=%p/%RX64 uErr=%#x\n", pPdeDst, (uint64_t)PdeDst.u, (uint32_t)uErr));
2187 if (uErr & X86_TRAP_PF_P)
2188 PGM_INVL_PG(pVCpu, GCPtrPage);
2189 return VINF_SUCCESS; /* force the instruction to be executed again. */
2190 }
2191
2192 PPGMPOOLPAGE pShwPage = pgmPoolGetPage(pPool, PdeDst.u & SHW_PDE_PG_MASK);
2193 Assert(pShwPage);
2194
2195# if PGM_GST_TYPE == PGM_TYPE_AMD64
2196 /* Fetch the pgm pool shadow descriptor. */
2197 PPGMPOOLPAGE pShwPde = pgmPoolGetPage(pPool, pPdptDst->a[iPdpt].u & X86_PDPE_PG_MASK);
2198 Assert(pShwPde);
2199# endif
2200
2201 /*
2202 * Check that the page is present and that the shadow PDE isn't out of sync.
2203 */
2204 const bool fBigPage = (PdeSrc.u & X86_PDE_PS) && GST_IS_PSE_ACTIVE(pVCpu);
2205 const bool fPdeValid = !fBigPage ? GST_IS_PDE_VALID(pVCpu, PdeSrc) : GST_IS_BIG_PDE_VALID(pVCpu, PdeSrc);
2206 RTGCPHYS GCPhys;
2207 if (!fBigPage)
2208 {
2209 GCPhys = GST_GET_PDE_GCPHYS(PdeSrc);
2210# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
2211 /* Select the right PDE as we're emulating a 4kb page table with 2 shadow page tables. */
2212 GCPhys = PGM_A20_APPLY(pVCpu, GCPhys | ((iPDDst & 1) * (GUEST_PAGE_SIZE / 2)));
2213# endif
2214 }
2215 else
2216 {
2217 GCPhys = GST_GET_BIG_PDE_GCPHYS(pVM, PdeSrc);
2218# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
2219 /* Select the right PDE as we're emulating a 4MB page directory with two 2 MB shadow PDEs.*/
2220 GCPhys = PGM_A20_APPLY(pVCpu, GCPhys | (GCPtrPage & (1 << X86_PD_PAE_SHIFT)));
2221# endif
2222 }
2223 /** @todo This doesn't check the G bit of 2/4MB pages. FIXME */
2224 if ( fPdeValid
2225 && pShwPage->GCPhys == GCPhys
2226 && (PdeSrc.u & X86_PDE_P)
2227 && (PdeSrc.u & X86_PDE_US) == (PdeDst.u & X86_PDE_US)
2228 && ((PdeSrc.u & X86_PDE_RW) == (PdeDst.u & X86_PDE_RW) || !(PdeDst.u & X86_PDE_RW))
2229# if PGM_WITH_NX(PGM_GST_TYPE, PGM_SHW_TYPE)
2230 && ((PdeSrc.u & X86_PDE_PAE_NX) == (PdeDst.u & X86_PDE_PAE_NX) || !GST_IS_NX_ACTIVE(pVCpu))
2231# endif
2232 )
2233 {
2234 /*
2235 * Check that the PDE is marked accessed already.
2236 * Since we set the accessed bit *before* getting here on a #PF, this
2237 * check is only meant for dealing with non-#PF'ing paths.
2238 */
2239 if (PdeSrc.u & X86_PDE_A)
2240 {
2241 PSHWPT pPTDst = (PSHWPT)PGMPOOL_PAGE_2_PTR_V2(pVM, pVCpu, pShwPage);
2242 if (!fBigPage)
2243 {
2244 /*
2245 * 4KB Page - Map the guest page table.
2246 */
2247 PGSTPT pPTSrc;
2248 int rc = PGM_GCPHYS_2_PTR_V2(pVM, pVCpu, GST_GET_PDE_GCPHYS(PdeSrc), &pPTSrc);
2249 if (RT_SUCCESS(rc))
2250 {
2251# ifdef PGM_SYNC_N_PAGES
2252 Assert(cPages == 1 || !(uErr & X86_TRAP_PF_P));
2253 if ( cPages > 1
2254 && !(uErr & X86_TRAP_PF_P)
2255 && !VM_FF_IS_SET(pVM, VM_FF_PGM_NO_MEMORY))
2256 {
2257 /*
2258 * This code path is currently only taken when the caller is PGMTrap0eHandler
2259 * for non-present pages!
2260 *
2261 * We're setting PGM_SYNC_NR_PAGES pages around the faulting page to sync it and
2262 * deal with locality.
2263 */
2264 unsigned iPTDst = (GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK;
2265# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
2266 /* Select the right PDE as we're emulating a 4kb page table with 2 shadow page tables. */
2267 const unsigned offPTSrc = ((GCPtrPage >> SHW_PD_SHIFT) & 1) * 512;
2268# else
2269 const unsigned offPTSrc = 0;
2270# endif
2271 const unsigned iPTDstEnd = RT_MIN(iPTDst + PGM_SYNC_NR_PAGES / 2, RT_ELEMENTS(pPTDst->a));
2272 if (iPTDst < PGM_SYNC_NR_PAGES / 2)
2273 iPTDst = 0;
2274 else
2275 iPTDst -= PGM_SYNC_NR_PAGES / 2;
2276
2277 for (; iPTDst < iPTDstEnd; iPTDst++)
2278 {
2279 const PGSTPTE pPteSrc = &pPTSrc->a[offPTSrc + iPTDst];
2280
2281 if ( (pPteSrc->u & X86_PTE_P)
2282 && !SHW_PTE_IS_P(pPTDst->a[iPTDst]))
2283 {
2284 RTGCPTR GCPtrCurPage = (GCPtrPage & ~(RTGCPTR)(GST_PT_MASK << GST_PT_SHIFT))
2285 | ((offPTSrc + iPTDst) << GUEST_PAGE_SHIFT);
2286 NOREF(GCPtrCurPage);
2287 PGM_BTH_NAME(SyncPageWorker)(pVCpu, &pPTDst->a[iPTDst], PdeSrc, *pPteSrc, pShwPage, iPTDst);
2288 Log2(("SyncPage: 4K+ %RGv PteSrc:{P=%d RW=%d U=%d raw=%08llx} PteDst=%08llx%s\n",
2289 GCPtrCurPage, pPteSrc->u & X86_PTE_P,
2290 !!(pPteSrc->u & PdeSrc.u & X86_PTE_RW),
2291 !!(pPteSrc->u & PdeSrc.u & X86_PTE_US),
2292 (uint64_t)pPteSrc->u,
2293 SHW_PTE_LOG64(pPTDst->a[iPTDst]),
2294 SHW_PTE_IS_TRACK_DIRTY(pPTDst->a[iPTDst]) ? " Track-Dirty" : ""));
2295 }
2296 }
2297 }
2298 else
2299# endif /* PGM_SYNC_N_PAGES */
2300 {
2301 const unsigned iPTSrc = (GCPtrPage >> GST_PT_SHIFT) & GST_PT_MASK;
2302 GSTPTE PteSrc = pPTSrc->a[iPTSrc];
2303 const unsigned iPTDst = (GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK;
2304 PGM_BTH_NAME(SyncPageWorker)(pVCpu, &pPTDst->a[iPTDst], PdeSrc, PteSrc, pShwPage, iPTDst);
2305 Log2(("SyncPage: 4K %RGv PteSrc:{P=%d RW=%d U=%d raw=%08llx} PteDst=%08llx %s\n",
2306 GCPtrPage, PteSrc.u & X86_PTE_P,
2307 !!(PteSrc.u & PdeSrc.u & X86_PTE_RW),
2308 !!(PteSrc.u & PdeSrc.u & X86_PTE_US),
2309 (uint64_t)PteSrc.u,
2310 SHW_PTE_LOG64(pPTDst->a[iPTDst]),
2311 SHW_PTE_IS_TRACK_DIRTY(pPTDst->a[iPTDst]) ? " Track-Dirty" : ""));
2312 }
2313 }
2314 else /* MMIO or invalid page: emulated in #PF handler. */
2315 {
2316 LogFlow(("PGM_GCPHYS_2_PTR_V2 %RGp failed with %Rrc\n", GCPhys, rc));
2317 Assert(!SHW_PTE_IS_P(pPTDst->a[(GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK]));
2318 }
2319 }
2320 else
2321 {
2322 /*
2323 * 4/2MB page - lazy syncing shadow 4K pages.
2324 * (There are many causes of getting here, it's no longer only CSAM.)
2325 */
2326 /* Calculate the GC physical address of this 4KB shadow page. */
2327 GCPhys = PGM_A20_APPLY(pVCpu, GST_GET_BIG_PDE_GCPHYS(pVM, PdeSrc) | (GCPtrPage & GST_BIG_PAGE_OFFSET_MASK));
2328 /* Find ram range. */
2329 PPGMPAGE pPage;
2330 int rc = pgmPhysGetPageEx(pVM, GCPhys, &pPage);
2331 if (RT_SUCCESS(rc))
2332 {
2333 AssertFatalMsg(!PGM_PAGE_IS_BALLOONED(pPage), ("Unexpected ballooned page at %RGp\n", GCPhys));
2334
2335# ifndef VBOX_WITH_NEW_LAZY_PAGE_ALLOC
2336 /* Try to make the page writable if necessary. */
2337 if ( PGM_PAGE_GET_TYPE(pPage) == PGMPAGETYPE_RAM
2338 && ( PGM_PAGE_IS_ZERO(pPage)
2339 || ( (PdeSrc.u & X86_PDE_RW)
2340 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_ALLOCATED
2341# ifdef VBOX_WITH_REAL_WRITE_MONITORED_PAGES
2342 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_WRITE_MONITORED
2343# endif
2344# ifdef VBOX_WITH_PAGE_SHARING
2345 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_SHARED
2346# endif
2347 )
2348 )
2349 )
2350 {
2351 rc = pgmPhysPageMakeWritable(pVM, pPage, GCPhys);
2352 AssertRC(rc);
2353 }
2354# endif
2355
2356 /*
2357 * Make shadow PTE entry.
2358 */
2359 SHWPTE PteDst;
2360 if (!PGM_PAGE_HAS_ACTIVE_HANDLERS(pPage) || PGM_PAGE_IS_HNDL_PHYS_NOT_IN_HM(pPage))
2361 SHW_PTE_SET(PteDst, GST_GET_BIG_PDE_SHW_FLAGS_4_PTE(pVCpu, PdeSrc) | PGM_PAGE_GET_HCPHYS(pPage));
2362 else
2363 PGM_BTH_NAME(SyncHandlerPte)(pVM, pVCpu, pPage, GCPhys, GST_GET_BIG_PDE_SHW_FLAGS_4_PTE(pVCpu, PdeSrc), &PteDst);
2364
2365 const unsigned iPTDst = (GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK;
2366 if ( SHW_PTE_IS_P(PteDst)
2367 && !SHW_PTE_IS_P(pPTDst->a[iPTDst]))
2368 PGM_BTH_NAME(SyncPageWorkerTrackAddref)(pVCpu, pShwPage, PGM_PAGE_GET_TRACKING(pPage), pPage, iPTDst);
2369
2370 /* Make sure only allocated pages are mapped writable. */
2371 if ( SHW_PTE_IS_P_RW(PteDst)
2372 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_ALLOCATED)
2373 {
2374# ifndef VBOX_WITH_NEW_LAZY_PAGE_ALLOC
2375 /* Still applies to shared pages. */
2376 Assert(!PGM_PAGE_IS_ZERO(pPage));
2377# endif
2378 SHW_PTE_SET_RO(PteDst); /** @todo this isn't quite working yet... */
2379 Log3(("SyncPage: write-protecting %RGp pPage=%R[pgmpage] at %RGv\n", GCPhys, pPage, GCPtrPage));
2380 }
2381
2382 SHW_PTE_ATOMIC_SET2(pPTDst->a[iPTDst], PteDst);
2383
2384 /*
2385 * If the page is not flagged as dirty and is writable, then make it read-only
2386 * at PD level, so we can set the dirty bit when the page is modified.
2387 *
2388 * ASSUMES that page access handlers are implemented on page table entry level.
2389 * Thus we will first catch the dirty access and set PDE.D and restart. If
2390 * there is an access handler, we'll trap again and let it work on the problem.
2391 */
2392 /** @todo r=bird: figure out why we need this here, SyncPT should've taken care of this already.
2393 * As for invlpg, it simply frees the whole shadow PT.
2394 * ...It's possibly because the guest clears it and the guest doesn't really tell us... */
2395 if ((PdeSrc.u & (X86_PDE4M_D | X86_PDE_RW)) == X86_PDE_RW)
2396 {
2397 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,DirtyPageBig));
2398 PdeDst.u |= PGM_PDFLAGS_TRACK_DIRTY;
2399 PdeDst.u &= ~(SHWUINT)X86_PDE_RW;
2400 }
2401 else
2402 {
2403 PdeDst.u &= ~(SHWUINT)(PGM_PDFLAGS_TRACK_DIRTY | X86_PDE_RW);
2404 PdeDst.u |= PdeSrc.u & X86_PDE_RW;
2405 }
2406 SHW_PDE_ATOMIC_SET2(*pPdeDst, PdeDst);
2407 Log2(("SyncPage: BIG %RGv PdeSrc:{P=%d RW=%d U=%d raw=%08llx} GCPhys=%RGp%s\n",
2408 GCPtrPage, PdeSrc.u & X86_PDE_P, !!(PdeSrc.u & X86_PDE_RW), !!(PdeSrc.u & X86_PDE_US),
2409 (uint64_t)PdeSrc.u, GCPhys, PdeDst.u & PGM_PDFLAGS_TRACK_DIRTY ? " Track-Dirty" : ""));
2410 }
2411 else
2412 {
2413 LogFlow(("pgmPhysGetPageEx %RGp (big) failed with %Rrc\n", GCPhys, rc));
2414 /** @todo must wipe the shadow page table entry in this
2415 * case. */
2416 }
2417 }
2418 PGM_DYNMAP_UNUSED_HINT(pVCpu, pPdeDst);
2419 return VINF_SUCCESS;
2420 }
2421
2422 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,SyncPagePDNAs));
2423 }
2424 else if (fPdeValid)
2425 {
2426 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,SyncPagePDOutOfSync));
2427 Log2(("SyncPage: Out-Of-Sync PDE at %RGp PdeSrc=%RX64 PdeDst=%RX64 (GCPhys %RGp vs %RGp)\n",
2428 GCPtrPage, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u, pShwPage->GCPhys, GCPhys));
2429 }
2430 else
2431 {
2432/// @todo STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,SyncPagePDOutOfSyncAndInvalid));
2433 Log2(("SyncPage: Bad PDE at %RGp PdeSrc=%RX64 PdeDst=%RX64 (GCPhys %RGp vs %RGp)\n",
2434 GCPtrPage, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u, pShwPage->GCPhys, GCPhys));
2435 }
2436
2437 /*
2438 * Mark the PDE not present. Restart the instruction and let #PF call SyncPT.
2439 * Yea, I'm lazy.
2440 */
2441 pgmPoolFreeByPage(pPool, pShwPage, pShwPde->idx, iPDDst);
2442 SHW_PDE_ATOMIC_SET(*pPdeDst, 0);
2443
2444 PGM_DYNMAP_UNUSED_HINT(pVCpu, pPdeDst);
2445 PGM_INVL_VCPU_TLBS(pVCpu);
2446 return VINF_PGM_SYNCPAGE_MODIFIED_PDE;
2447
2448
2449# elif (PGM_GST_TYPE == PGM_TYPE_REAL || PGM_GST_TYPE == PGM_TYPE_PROT) \
2450 && !PGM_TYPE_IS_NESTED(PGM_SHW_TYPE) \
2451 && (PGM_SHW_TYPE != PGM_TYPE_EPT || PGM_GST_TYPE == PGM_TYPE_PROT)
2452 NOREF(PdeSrc);
2453
2454# ifdef PGM_SYNC_N_PAGES
2455 /*
2456 * Get the shadow PDE, find the shadow page table in the pool.
2457 */
2458# if PGM_SHW_TYPE == PGM_TYPE_32BIT
2459 X86PDE PdeDst = pgmShwGet32BitPDE(pVCpu, GCPtrPage);
2460
2461# elif PGM_SHW_TYPE == PGM_TYPE_PAE
2462 X86PDEPAE PdeDst = pgmShwGetPaePDE(pVCpu, GCPtrPage);
2463
2464# elif PGM_SHW_TYPE == PGM_TYPE_AMD64
2465 const unsigned iPDDst = ((GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK);
2466 const unsigned iPdpt = (GCPtrPage >> X86_PDPT_SHIFT) & X86_PDPT_MASK_AMD64; NOREF(iPdpt);
2467 PX86PDPAE pPDDst = NULL; /* initialized to shut up gcc */
2468 X86PDEPAE PdeDst;
2469 PX86PDPT pPdptDst = NULL; /* initialized to shut up gcc */
2470
2471 int rc = pgmShwGetLongModePDPtr(pVCpu, GCPtrPage, NULL, &pPdptDst, &pPDDst);
2472 AssertRCSuccessReturn(rc, rc);
2473 Assert(pPDDst && pPdptDst);
2474 PdeDst = pPDDst->a[iPDDst];
2475
2476# elif PGM_SHW_TYPE == PGM_TYPE_EPT
2477 const unsigned iPDDst = ((GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK);
2478 PEPTPD pPDDst;
2479 EPTPDE PdeDst;
2480
2481 int rc = pgmShwGetEPTPDPtr(pVCpu, GCPtrPage, NULL, &pPDDst);
2482 if (rc != VINF_SUCCESS)
2483 {
2484 AssertRC(rc);
2485 return rc;
2486 }
2487 Assert(pPDDst);
2488 PdeDst = pPDDst->a[iPDDst];
2489# endif
2490 /* In the guest SMP case we could have blocked while another VCPU reused this page table. */
2491 if (!SHW_PDE_IS_P(PdeDst))
2492 {
2493 AssertMsg(pVM->cCpus > 1, ("Unexpected missing PDE %RX64\n", (uint64_t)PdeDst.u));
2494 Log(("CPU%d: SyncPage: Pde at %RGv changed behind our back!\n", pVCpu->idCpu, GCPtrPage));
2495 return VINF_SUCCESS; /* force the instruction to be executed again. */
2496 }
2497
2498 /* Can happen in the guest SMP case; other VCPU activated this PDE while we were blocking to handle the page fault. */
2499 if (SHW_PDE_IS_BIG(PdeDst))
2500 {
2501 Assert(pVM->pgm.s.fNestedPaging);
2502 Log(("CPU%d: SyncPage: Pde (big:%RX64) at %RGv changed behind our back!\n", pVCpu->idCpu, PdeDst.u, GCPtrPage));
2503 return VINF_SUCCESS;
2504 }
2505
2506 /* Mask away the page offset. */
2507 GCPtrPage &= ~((RTGCPTR)0xfff);
2508
2509 PPGMPOOLPAGE pShwPage = pgmPoolGetPage(pPool, PdeDst.u & SHW_PDE_PG_MASK);
2510 PSHWPT pPTDst = (PSHWPT)PGMPOOL_PAGE_2_PTR_V2(pVM, pVCpu, pShwPage);
2511
2512 Assert(cPages == 1 || !(uErr & X86_TRAP_PF_P));
2513 if ( cPages > 1
2514 && !(uErr & X86_TRAP_PF_P)
2515 && !VM_FF_IS_SET(pVM, VM_FF_PGM_NO_MEMORY))
2516 {
2517 /*
2518 * This code path is currently only taken when the caller is PGMTrap0eHandler
2519 * for non-present pages!
2520 *
2521 * We're setting PGM_SYNC_NR_PAGES pages around the faulting page to sync it and
2522 * deal with locality.
2523 */
2524 unsigned iPTDst = (GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK;
2525 const unsigned iPTDstEnd = RT_MIN(iPTDst + PGM_SYNC_NR_PAGES / 2, RT_ELEMENTS(pPTDst->a));
2526 if (iPTDst < PGM_SYNC_NR_PAGES / 2)
2527 iPTDst = 0;
2528 else
2529 iPTDst -= PGM_SYNC_NR_PAGES / 2;
2530 for (; iPTDst < iPTDstEnd; iPTDst++)
2531 {
2532 if (!SHW_PTE_IS_P(pPTDst->a[iPTDst]))
2533 {
2534 RTGCPTR GCPtrCurPage = PGM_A20_APPLY(pVCpu, (GCPtrPage & ~(RTGCPTR)(SHW_PT_MASK << SHW_PT_SHIFT))
2535 | (iPTDst << GUEST_PAGE_SHIFT));
2536
2537 PGM_BTH_NAME(SyncPageWorker)(pVCpu, &pPTDst->a[iPTDst], GCPtrCurPage, pShwPage, iPTDst);
2538 Log2(("SyncPage: 4K+ %RGv PteSrc:{P=1 RW=1 U=1} PteDst=%08llx%s\n",
2539 GCPtrCurPage,
2540 SHW_PTE_LOG64(pPTDst->a[iPTDst]),
2541 SHW_PTE_IS_TRACK_DIRTY(pPTDst->a[iPTDst]) ? " Track-Dirty" : ""));
2542
2543 if (RT_UNLIKELY(VM_FF_IS_SET(pVM, VM_FF_PGM_NO_MEMORY)))
2544 break;
2545 }
2546 else
2547 Log4(("%RGv iPTDst=%x pPTDst->a[iPTDst] %RX64\n",
2548 (GCPtrPage & ~(RTGCPTR)(SHW_PT_MASK << SHW_PT_SHIFT)) | (iPTDst << GUEST_PAGE_SHIFT), iPTDst, SHW_PTE_LOG64(pPTDst->a[iPTDst]) ));
2549 }
2550 }
2551 else
2552# endif /* PGM_SYNC_N_PAGES */
2553 {
2554 const unsigned iPTDst = (GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK;
2555 RTGCPTR GCPtrCurPage = PGM_A20_APPLY(pVCpu, (GCPtrPage & ~(RTGCPTR)(SHW_PT_MASK << SHW_PT_SHIFT))
2556 | (iPTDst << GUEST_PAGE_SHIFT));
2557
2558 PGM_BTH_NAME(SyncPageWorker)(pVCpu, &pPTDst->a[iPTDst], GCPtrCurPage, pShwPage, iPTDst);
2559
2560 Log2(("SyncPage: 4K %RGv PteSrc:{P=1 RW=1 U=1}PteDst=%08llx%s\n",
2561 GCPtrPage,
2562 SHW_PTE_LOG64(pPTDst->a[iPTDst]),
2563 SHW_PTE_IS_TRACK_DIRTY(pPTDst->a[iPTDst]) ? " Track-Dirty" : ""));
2564 }
2565 return VINF_SUCCESS;
2566
2567# else
2568 NOREF(PdeSrc);
2569 AssertReleaseMsgFailed(("Shw=%d Gst=%d is not implemented!\n", PGM_GST_TYPE, PGM_SHW_TYPE));
2570 return VERR_PGM_NOT_USED_IN_MODE;
2571# endif
2572}
2573
2574#endif /* PGM_SHW_TYPE != PGM_TYPE_NONE */
2575
2576#if !defined(IN_RING3) && defined(VBOX_WITH_NESTED_HWVIRT_VMX_EPT) && PGM_SHW_TYPE == PGM_TYPE_EPT
2577
2578/**
2579 * Sync a shadow page for a nested-guest page.
2580 *
2581 * @param pVCpu The cross context virtual CPU structure.
2582 * @param pPte The shadow page table entry.
2583 * @param GCPhysPage The guest-physical address of the page.
2584 * @param pShwPage The shadow page of the page table.
2585 * @param iPte The index of the page table entry.
2586 * @param pGstSlatPte The guest SLAT page table entry.
2587 *
2588 * @note Not to be used for 2/4MB pages!
2589 */
2590static void PGM_BTH_NAME(NestedSyncPageWorker)(PVMCPUCC pVCpu, PSHWPTE pPte, RTGCPHYS GCPhysPage, PPGMPOOLPAGE pShwPage,
2591 unsigned iPte, SLATPTE GstSlatPte)
2592{
2593 PGM_A20_ASSERT_MASKED(pVCpu, GCPhysPage);
2594 Assert(PGMPOOL_PAGE_IS_NESTED(pShwPage));
2595 Assert(!pShwPage->fDirty);
2596 Assert(pVCpu->pgm.s.enmGuestSlatMode == PGMSLAT_EPT);
2597 AssertMsg(!(GstSlatPte.u & EPT_E_LEAF), ("Large page unexpected: %RX64\n", GstSlatPte.u));
2598 AssertMsg((GstSlatPte.u & EPT_PTE_PG_MASK) == GCPhysPage,
2599 ("PTE address mismatch. GCPhysPage=%RGp Pte=%RX64\n", GCPhysPage, GstSlatPte.u & EPT_PTE_PG_MASK));
2600
2601 /*
2602 * Find the ram range.
2603 */
2604 PPGMPAGE pPage;
2605 int rc = pgmPhysGetPageEx(pVCpu->CTX_SUFF(pVM), GCPhysPage, &pPage);
2606 if (RT_SUCCESS(rc))
2607 { /* likely */ }
2608 else
2609 {
2610 /*
2611 * This is a RAM hole/invalid/reserved address (not MMIO).
2612 * Nested Microsoft Hyper-V maps addresses like 0xf0220000 as RW WB memory.
2613 * Shadow a not-present page similar to MMIO, see @bugref{10318#c7}.
2614 */
2615 Assert(rc == VERR_PGM_INVALID_GC_PHYSICAL_ADDRESS);
2616 if (SHW_PTE_IS_P(*pPte))
2617 {
2618 Log2(("NestedSyncPageWorker: deref! *pPte=%RX64\n", SHW_PTE_LOG64(*pPte)));
2619 PGM_BTH_NAME(SyncPageWorkerTrackDeref)(pVCpu, pShwPage, SHW_PTE_GET_HCPHYS(*pPte), iPte, NIL_RTGCPHYS);
2620 }
2621 Log7Func(("RAM hole/reserved %RGp -> ShwPte=0\n", GCPhysPage));
2622 SHW_PTE_ATOMIC_SET(*pPte, 0);
2623 return;
2624 }
2625
2626 Assert(!PGM_PAGE_IS_BALLOONED(pPage));
2627
2628 /*
2629 * Make page table entry.
2630 */
2631 SHWPTE Pte;
2632 uint64_t const fGstShwPteFlags = (GstSlatPte.u & pVCpu->pgm.s.fGstEptShadowedPteMask)
2633 | EPT_E_MEMTYPE_WB | EPT_E_IGNORE_PAT;
2634 if (!PGM_PAGE_HAS_ACTIVE_HANDLERS(pPage) || PGM_PAGE_IS_HNDL_PHYS_NOT_IN_HM(pPage))
2635 {
2636# ifndef VBOX_WITH_NEW_LAZY_PAGE_ALLOC
2637 /* If it's the zero page or write to an unallocated page, allocate it to make it writable. */
2638 if ( PGM_PAGE_GET_TYPE(pPage) == PGMPAGETYPE_RAM
2639 && ( PGM_PAGE_IS_ZERO(pPage)
2640 || ( (GstSlatPte.u & EPT_E_WRITE)
2641 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_ALLOCATED
2642# ifdef VBOX_WITH_REAL_WRITE_MONITORED_PAGES
2643 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_WRITE_MONITORED
2644# endif
2645# ifdef VBOX_WITH_PAGE_SHARING
2646 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_SHARED
2647# endif
2648 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_BALLOONED
2649 )
2650 )
2651 )
2652 {
2653 rc = pgmPhysPageMakeWritable(pVCpu->CTX_SUFF(pVM), pPage, GCPhysPage);
2654 AssertRC(rc);
2655 Log7Func(("made writable (%R[pgmpage]) at %RGp\n", pPage, GCPhysPage));
2656 }
2657# endif
2658 /** @todo access bit. */
2659 Pte.u = PGM_PAGE_GET_HCPHYS(pPage) | fGstShwPteFlags;
2660 Log7Func(("regular page (%R[pgmpage]) at %RGp -> %RX64\n", pPage, GCPhysPage, Pte.u));
2661
2662 /* Make sure only allocated pages are mapped writable. */
2663 if ( (fGstShwPteFlags & EPT_E_WRITE)
2664 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_ALLOCATED)
2665 {
2666 Pte.u &= ~EPT_E_WRITE;
2667 Log7Func(("write-protecting page (%R[pgmpage]) at %RGp -> %RX64\n", pPage, GCPhysPage, Pte.u));
2668 }
2669 }
2670 else if (!PGM_PAGE_HAS_ACTIVE_ALL_HANDLERS(pPage))
2671 {
2672 /** @todo access bit. */
2673 Pte.u = PGM_PAGE_GET_HCPHYS(pPage) | (fGstShwPteFlags & ~EPT_E_WRITE);
2674 Log7Func(("monitored page (%R[pgmpage]) at %RGp -> %RX64\n", pPage, GCPhysPage, Pte.u));
2675 }
2676 else
2677 {
2678 /** @todo Do MMIO optimizations here too? */
2679 Log7Func(("mmio/all page (%R[pgmpage]) at %RGp -> 0\n", pPage, GCPhysPage));
2680 Pte.u = 0;
2681 }
2682
2683 /* Make sure only allocated pages are mapped writable. */
2684 Assert(!SHW_PTE_IS_P_RW(Pte) || PGM_PAGE_IS_ALLOCATED(pPage));
2685
2686 /*
2687 * Keep user track up to date.
2688 */
2689 if (SHW_PTE_IS_P(Pte))
2690 {
2691 if (!SHW_PTE_IS_P(*pPte))
2692 PGM_BTH_NAME(SyncPageWorkerTrackAddref)(pVCpu, pShwPage, PGM_PAGE_GET_TRACKING(pPage), pPage, iPte);
2693 else if (SHW_PTE_GET_HCPHYS(*pPte) != SHW_PTE_GET_HCPHYS(Pte))
2694 {
2695 Log2(("NestedSyncPageWorker: deref! *pPte=%RX64 Pte=%RX64\n", SHW_PTE_LOG64(*pPte), SHW_PTE_LOG64(Pte)));
2696 PGM_BTH_NAME(SyncPageWorkerTrackDeref)(pVCpu, pShwPage, SHW_PTE_GET_HCPHYS(*pPte), iPte, NIL_RTGCPHYS);
2697 PGM_BTH_NAME(SyncPageWorkerTrackAddref)(pVCpu, pShwPage, PGM_PAGE_GET_TRACKING(pPage), pPage, iPte);
2698 }
2699 }
2700 else if (SHW_PTE_IS_P(*pPte))
2701 {
2702 Log2(("NestedSyncPageWorker: deref! *pPte=%RX64\n", SHW_PTE_LOG64(*pPte)));
2703 PGM_BTH_NAME(SyncPageWorkerTrackDeref)(pVCpu, pShwPage, SHW_PTE_GET_HCPHYS(*pPte), iPte, NIL_RTGCPHYS);
2704 }
2705
2706 /*
2707 * Commit the entry.
2708 */
2709 SHW_PTE_ATOMIC_SET2(*pPte, Pte);
2710 return;
2711}
2712
2713
2714/**
2715 * Syncs a nested-guest page.
2716 *
2717 * There are no conflicts at this point, neither is there any need for
2718 * page table allocations.
2719 *
2720 * @returns VBox status code.
2721 * @param pVCpu The cross context virtual CPU structure.
2722 * @param GCPhysNestedPage The nested-guest physical address of the page being
2723 * synced.
2724 * @param GCPhysPage The guest-physical address of the page being synced.
2725 * @param cPages Number of pages to sync (PGM_SYNC_N_PAGES) (default=1).
2726 * @param uErr The page fault error (X86_TRAP_PF_XXX).
2727 * @param pGstWalkAll The guest page table walk result.
2728 */
2729static int PGM_BTH_NAME(NestedSyncPage)(PVMCPUCC pVCpu, RTGCPHYS GCPhysNestedPage, RTGCPHYS GCPhysPage, unsigned cPages,
2730 uint32_t uErr, PPGMPTWALKGST pGstWalkAll)
2731{
2732 PGM_A20_ASSERT_MASKED(pVCpu, GCPhysPage);
2733 Assert(!(GCPhysNestedPage & GUEST_PAGE_OFFSET_MASK));
2734 Assert(!(GCPhysPage & GUEST_PAGE_OFFSET_MASK));
2735
2736 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
2737 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool); NOREF(pPool);
2738 Log7Func(("GCPhysNestedPage=%RGv GCPhysPage=%RGp cPages=%u uErr=%#x\n", GCPhysNestedPage, GCPhysPage, cPages, uErr));
2739 RT_NOREF_PV(uErr); RT_NOREF_PV(cPages);
2740
2741 PGM_LOCK_ASSERT_OWNER(pVM);
2742
2743 /*
2744 * Get the shadow PDE, find the shadow page table in the pool.
2745 */
2746 unsigned const iPde = ((GCPhysNestedPage >> EPT_PD_SHIFT) & EPT_PD_MASK);
2747 PEPTPD pPd;
2748 int rc = pgmShwGetNestedEPTPDPtr(pVCpu, GCPhysNestedPage, NULL, &pPd, pGstWalkAll);
2749 if (RT_SUCCESS(rc))
2750 { /* likely */ }
2751 else
2752 {
2753 Log(("Failed to fetch EPT PD for %RGp (%RGp) rc=%Rrc\n", GCPhysNestedPage, GCPhysPage, rc));
2754 return rc;
2755 }
2756 Assert(pPd);
2757 EPTPDE Pde = pPd->a[iPde];
2758
2759 /* In the guest SMP case we could have blocked while another VCPU reused this page table. */
2760 if (!SHW_PDE_IS_P(Pde))
2761 {
2762 AssertMsg(pVM->cCpus > 1, ("Unexpected missing PDE %RX64\n", (uint64_t)Pde.u));
2763 Log7Func(("CPU%d: SyncPage: Pde at %RGp changed behind our back!\n", pVCpu->idCpu, GCPhysNestedPage));
2764 return VINF_SUCCESS; /* force the instruction to be executed again. */
2765 }
2766
2767 /* Can happen in the guest SMP case; other VCPU activated this PDE while we were blocking to handle the page fault. */
2768 if (SHW_PDE_IS_BIG(Pde))
2769 {
2770 Log7Func(("CPU%d: SyncPage: %RGp changed behind our back!\n", pVCpu->idCpu, GCPhysNestedPage));
2771 return VINF_SUCCESS;
2772 }
2773
2774 PPGMPOOLPAGE pShwPage = pgmPoolGetPage(pPool, Pde.u & EPT_PDE_PG_MASK);
2775 PEPTPT pPt = (PEPTPT)PGMPOOL_PAGE_2_PTR_V2(pVM, pVCpu, pShwPage);
2776
2777 /*
2778 * If we've shadowed a guest EPT PDE that maps a 2M page using a 4K table,
2779 * then sync the 4K sub-page in the 2M range.
2780 */
2781 if (pGstWalkAll->u.Ept.Pde.u & EPT_E_LEAF)
2782 {
2783 Assert(!SHW_PDE_IS_BIG(Pde));
2784
2785 Assert(pGstWalkAll->u.Ept.Pte.u == 0);
2786 Assert((Pde.u & EPT_PRESENT_MASK) == (pGstWalkAll->u.Ept.Pde.u & EPT_PRESENT_MASK));
2787 Assert(pShwPage->GCPhys == (pGstWalkAll->u.Ept.Pde.u & EPT_PDE2M_PG_MASK));
2788
2789#if defined(VBOX_STRICT) && defined(DEBUG_ramshankar)
2790 PPGMPAGE pPage;
2791 rc = pgmPhysGetPageEx(pVM, GCPhysPage, &pPage); AssertRC(rc);
2792 Assert(PGM_PAGE_GET_PDE_TYPE(pPage) != PGM_PAGE_PDE_TYPE_PDE);
2793 Assert(pShwPage->enmKind == PGMPOOLKIND_EPT_PT_FOR_EPT_2MB);
2794#endif
2795 uint64_t const fGstShwPteFlags = (pGstWalkAll->u.Ept.Pde.u & pVCpu->pgm.s.fGstEptShadowedBigPdeMask & ~EPT_E_LEAF)
2796 | EPT_E_MEMTYPE_WB | EPT_E_IGNORE_PAT;
2797 SLATPTE GstSlatPte;
2798 GstSlatPte.u = GCPhysPage | fGstShwPteFlags;
2799
2800 unsigned const iPte = (GCPhysNestedPage >> SHW_PT_SHIFT) & SHW_PT_MASK;
2801 PGM_BTH_NAME(NestedSyncPageWorker)(pVCpu, &pPt->a[iPte], GCPhysPage, pShwPage, iPte, GstSlatPte);
2802 Log7Func(("4K: GCPhysPage=%RGp iPte=%u ShwPte=%08llx\n", GCPhysPage, iPte, SHW_PTE_LOG64(pPt->a[iPte])));
2803 return VINF_SUCCESS;
2804 }
2805
2806 Assert(cPages == 1 || !(uErr & X86_TRAP_PF_P));
2807# ifdef PGM_SYNC_N_PAGES
2808 if ( cPages > 1
2809 && !(uErr & X86_TRAP_PF_P)
2810 && !VM_FF_IS_SET(pVM, VM_FF_PGM_NO_MEMORY))
2811 {
2812 /*
2813 * This code path is currently only taken for non-present pages!
2814 *
2815 * We're setting PGM_SYNC_NR_PAGES pages around the faulting page to sync it and
2816 * deal with locality.
2817 */
2818 unsigned iPte = (GCPhysNestedPage >> SHW_PT_SHIFT) & SHW_PT_MASK;
2819 unsigned const iPteEnd = RT_MIN(iPte + PGM_SYNC_NR_PAGES / 2, RT_ELEMENTS(pPt->a));
2820 if (iPte < PGM_SYNC_NR_PAGES / 2)
2821 iPte = 0;
2822 else
2823 iPte -= PGM_SYNC_NR_PAGES / 2;
2824 for (; iPte < iPteEnd; iPte++)
2825 {
2826 if (!SHW_PTE_IS_P(pPt->a[iPte]))
2827 {
2828 PGMPTWALKGST GstWalkPt;
2829 PGMPTWALK WalkPt;
2830 GCPhysNestedPage &= ~(SHW_PT_MASK << SHW_PT_SHIFT);
2831 GCPhysNestedPage |= (iPte << GUEST_PAGE_SHIFT);
2832 rc = pgmGstSlatWalk(pVCpu, GCPhysNestedPage, false /*fIsLinearAddrValid*/, 0 /*GCPtrNested*/, &WalkPt,
2833 &GstWalkPt);
2834 if (RT_SUCCESS(rc))
2835 PGM_BTH_NAME(NestedSyncPageWorker)(pVCpu, &pPt->a[iPte], WalkPt.GCPhys, pShwPage, iPte, GstWalkPt.u.Ept.Pte);
2836 else
2837 {
2838 /*
2839 * This could be MMIO pages reserved by the nested-hypevisor or genuinely not-present pages.
2840 * Ensure the shadow tables entry is not-present.
2841 */
2842 /** @todo Potential room for optimization (explained in NestedSyncPT). */
2843 AssertMsg(!pPt->a[iPte].u, ("%RX64\n", pPt->a[iPte].u));
2844 }
2845 Log7Func(("Many: %RGp iPte=%u ShwPte=%RX64\n", GCPhysNestedPage, iPte, SHW_PTE_LOG64(pPt->a[iPte])));
2846 if (RT_UNLIKELY(VM_FF_IS_SET(pVM, VM_FF_PGM_NO_MEMORY)))
2847 break;
2848 }
2849 else
2850 {
2851# ifdef VBOX_STRICT
2852 /* Paranoia - Verify address of the page is what it should be. */
2853 PGMPTWALKGST GstWalkPt;
2854 PGMPTWALK WalkPt;
2855 GCPhysNestedPage &= ~(SHW_PT_MASK << SHW_PT_SHIFT);
2856 GCPhysNestedPage |= (iPte << GUEST_PAGE_SHIFT);
2857 rc = pgmGstSlatWalk(pVCpu, GCPhysNestedPage, false /*fIsLinearAddrValid*/, 0 /*GCPtrNested*/, &WalkPt, &GstWalkPt);
2858 AssertRC(rc);
2859 PPGMPAGE pPage;
2860 rc = pgmPhysGetPageEx(pVM, WalkPt.GCPhys, &pPage);
2861 AssertRC(rc);
2862 AssertMsg(PGM_PAGE_GET_HCPHYS(pPage) == SHW_PTE_GET_HCPHYS(pPt->a[iPte]),
2863 ("PGM page and shadow PTE address conflict. GCPhysNestedPage=%RGp GCPhysPage=%RGp HCPhys=%RHp Shw=%RHp\n",
2864 GCPhysNestedPage, WalkPt.GCPhys, PGM_PAGE_GET_HCPHYS(pPage), SHW_PTE_GET_HCPHYS(pPt->a[iPte])));
2865# endif
2866 Log7Func(("Many3: %RGp iPte=%u ShwPte=%RX64\n", GCPhysNestedPage, iPte, SHW_PTE_LOG64(pPt->a[iPte])));
2867 }
2868 }
2869 }
2870 else
2871# endif /* PGM_SYNC_N_PAGES */
2872 {
2873 unsigned const iPte = (GCPhysNestedPage >> SHW_PT_SHIFT) & SHW_PT_MASK;
2874 PGM_BTH_NAME(NestedSyncPageWorker)(pVCpu, &pPt->a[iPte], GCPhysPage, pShwPage, iPte, pGstWalkAll->u.Ept.Pte);
2875 Log7Func(("4K: GCPhysPage=%RGp iPte=%u ShwPte=%08llx\n", GCPhysPage, iPte, SHW_PTE_LOG64(pPt->a[iPte])));
2876 }
2877
2878 return VINF_SUCCESS;
2879}
2880
2881
2882/**
2883 * Sync a shadow page table for a nested-guest page table.
2884 *
2885 * The shadow page table is not present in the shadow PDE.
2886 *
2887 * Handles mapping conflicts.
2888 *
2889 * A precondition for this method is that the shadow PDE is not present. The
2890 * caller must take the PGM lock before checking this and continue to hold it
2891 * when calling this method.
2892 *
2893 * @returns VBox status code.
2894 * @param pVCpu The cross context virtual CPU structure.
2895 * @param GCPhysNestedPage The nested-guest physical page address of the page
2896 * being synced.
2897 * @param GCPhysPage The guest-physical address of the page being synced.
2898 * @param pGstWalkAll The guest page table walk result.
2899 */
2900static int PGM_BTH_NAME(NestedSyncPT)(PVMCPUCC pVCpu, RTGCPHYS GCPhysNestedPage, RTGCPHYS GCPhysPage, PPGMPTWALKGST pGstWalkAll)
2901{
2902 PGM_A20_ASSERT_MASKED(pVCpu, GCPhysPage);
2903 Assert(!(GCPhysNestedPage & GUEST_PAGE_OFFSET_MASK));
2904 Assert(!(GCPhysPage & GUEST_PAGE_OFFSET_MASK));
2905
2906 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
2907 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
2908
2909 Log7Func(("GCPhysNestedPage=%RGp GCPhysPage=%RGp\n", GCPhysNestedPage, GCPhysPage));
2910
2911 PGM_LOCK_ASSERT_OWNER(pVM);
2912 STAM_PROFILE_START(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,SyncPT), a);
2913
2914 PEPTPD pPd;
2915 PEPTPDPT pPdpt;
2916 unsigned const iPde = (GCPhysNestedPage >> EPT_PD_SHIFT) & EPT_PD_MASK;
2917 int rc = pgmShwGetNestedEPTPDPtr(pVCpu, GCPhysNestedPage, &pPdpt, &pPd, pGstWalkAll);
2918 if (RT_SUCCESS(rc))
2919 { /* likely */ }
2920 else
2921 {
2922 STAM_PROFILE_STOP(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,SyncPT), a);
2923 AssertRC(rc);
2924 return rc;
2925 }
2926 Assert(pPd);
2927 PSHWPDE pPde = &pPd->a[iPde];
2928
2929 unsigned const iPdpt = (GCPhysNestedPage >> EPT_PDPT_SHIFT) & EPT_PDPT_MASK;
2930 PPGMPOOLPAGE pShwPde = pgmPoolGetPage(pPool, pPdpt->a[iPdpt].u & EPT_PDPTE_PG_MASK);
2931 Assert(pShwPde->enmKind == PGMPOOLKIND_EPT_PD_FOR_EPT_PD);
2932
2933 SHWPDE Pde = *pPde;
2934 Assert(!SHW_PDE_IS_P(Pde)); /* We're only supposed to call SyncPT on PDE!P and conflicts. */
2935
2936# ifdef PGM_WITH_LARGE_PAGES
2937 Assert(BTH_IS_NP_ACTIVE(pVM));
2938
2939 /*
2940 * Check if the guest is mapping a 2M page.
2941 */
2942 if (pGstWalkAll->u.Ept.Pde.u & EPT_E_LEAF)
2943 {
2944 PPGMPAGE pPage;
2945 rc = pgmPhysGetPageEx(pVM, GCPhysPage & X86_PDE2M_PAE_PG_MASK, &pPage);
2946 AssertRCReturn(rc, rc);
2947
2948 /* A20 is always enabled in VMX root and non-root operation. */
2949 Assert(PGM_A20_IS_ENABLED(pVCpu));
2950
2951 /*
2952 * Check if we have or can get a 2M backing page here.
2953 */
2954 RTHCPHYS HCPhys = NIL_RTHCPHYS;
2955 if (PGM_PAGE_GET_PDE_TYPE(pPage) == PGM_PAGE_PDE_TYPE_PDE)
2956 {
2957 STAM_REL_COUNTER_INC(&pVM->pgm.s.StatLargePageReused);
2958 AssertRelease(PGM_PAGE_GET_STATE(pPage) == PGM_PAGE_STATE_ALLOCATED);
2959 HCPhys = PGM_PAGE_GET_HCPHYS(pPage);
2960 }
2961 else if (PGM_PAGE_GET_PDE_TYPE(pPage) == PGM_PAGE_PDE_TYPE_PDE_DISABLED)
2962 {
2963 /* Recheck the entire 2 MB range to see if we can use it again as a large page. */
2964 rc = pgmPhysRecheckLargePage(pVM, GCPhysPage, pPage);
2965 if (RT_SUCCESS(rc))
2966 {
2967 Assert(PGM_PAGE_GET_STATE(pPage) == PGM_PAGE_STATE_ALLOCATED);
2968 Assert(PGM_PAGE_GET_PDE_TYPE(pPage) == PGM_PAGE_PDE_TYPE_PDE);
2969 HCPhys = PGM_PAGE_GET_HCPHYS(pPage);
2970 }
2971 }
2972 else if (PGMIsUsingLargePages(pVM))
2973 {
2974 rc = pgmPhysAllocLargePage(pVM, GCPhysPage);
2975 if (RT_SUCCESS(rc))
2976 {
2977 Assert(PGM_PAGE_GET_STATE(pPage) == PGM_PAGE_STATE_ALLOCATED);
2978 Assert(PGM_PAGE_GET_PDE_TYPE(pPage) == PGM_PAGE_PDE_TYPE_PDE);
2979 HCPhys = PGM_PAGE_GET_HCPHYS(pPage);
2980 }
2981 }
2982
2983 /*
2984 * If we have a 2M backing page, we can map the guest's 2M page right away.
2985 */
2986 uint64_t const fGstShwBigPdeFlags = (pGstWalkAll->u.Ept.Pde.u & pVCpu->pgm.s.fGstEptShadowedBigPdeMask)
2987 | EPT_E_MEMTYPE_WB | EPT_E_IGNORE_PAT;
2988 if (HCPhys != NIL_RTHCPHYS)
2989 {
2990 Pde.u = HCPhys | fGstShwBigPdeFlags;
2991 Assert(!(Pde.u & pVCpu->pgm.s.fGstEptMbzBigPdeMask));
2992 Assert(Pde.u & EPT_E_LEAF);
2993 SHW_PDE_ATOMIC_SET2(*pPde, Pde);
2994
2995 /* Add a reference to the first page only. */
2996 PGM_BTH_NAME(SyncPageWorkerTrackAddref)(pVCpu, pShwPde, PGM_PAGE_GET_TRACKING(pPage), pPage, iPde);
2997
2998 Assert(PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_WRITE_MONITORED);
2999
3000 STAM_PROFILE_STOP(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,SyncPT), a);
3001 Log7Func(("GstPde=%RGp ShwPde=%RX64 [2M]\n", pGstWalkAll->u.Ept.Pde.u, Pde.u));
3002 return VINF_SUCCESS;
3003 }
3004
3005 /*
3006 * We didn't get a perfect 2M fit. Split the 2M page into 4K pages.
3007 * The page ought not to be marked as a big (2M) page at this point.
3008 */
3009 Assert(PGM_PAGE_GET_PDE_TYPE(pPage) != PGM_PAGE_PDE_TYPE_PDE);
3010
3011 /* Determine the right kind of large page to avoid incorrect cached entry reuse. */
3012 PGMPOOLACCESS enmAccess;
3013 {
3014 /*
3015 * Mode-based execute control for EPT not supported.
3016 *
3017 * However, Windows 10 with Hyper-V enabled sets the EPT_E_USER_EXECUTE bit but does
3018 * not enable "mode-based execute control for EPT" in the VT-x secondary VM-execution
3019 * controls. The CPU ignores this bit when the control isn't set. Hence, the assertion
3020 * below is commented out.
3021 */
3022 /* Assert(!(pGstWalkAll->u.Ept.Pde.u & EPT_E_USER_EXECUTE)); */
3023 Assert(!pVCpu->CTX_SUFF(pVM)->cpum.ro.GuestFeatures.fVmxModeBasedExecuteEpt);
3024 bool const fNoExecute = !(pGstWalkAll->u.Ept.Pde.u & EPT_E_EXECUTE);
3025 if (pGstWalkAll->u.Ept.Pde.u & EPT_E_WRITE)
3026 enmAccess = fNoExecute ? PGMPOOLACCESS_SUPERVISOR_RW_NX : PGMPOOLACCESS_SUPERVISOR_RW;
3027 else
3028 enmAccess = fNoExecute ? PGMPOOLACCESS_SUPERVISOR_R_NX : PGMPOOLACCESS_SUPERVISOR_R;
3029 }
3030
3031 /*
3032 * Allocate & map a 4K shadow table to cover the 2M guest page.
3033 */
3034 PPGMPOOLPAGE pShwPage;
3035 RTGCPHYS const GCPhysPt = pGstWalkAll->u.Ept.Pde.u & EPT_PDE2M_PG_MASK;
3036 rc = pgmPoolAlloc(pVM, GCPhysPt, PGMPOOLKIND_EPT_PT_FOR_EPT_2MB, enmAccess, PGM_A20_IS_ENABLED(pVCpu),
3037 pShwPde->idx, iPde, false /*fLockPage*/, &pShwPage);
3038 if ( rc == VINF_SUCCESS
3039 || rc == VINF_PGM_CACHED_PAGE)
3040 { /* likely */ }
3041 else
3042 {
3043 STAM_PROFILE_STOP(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,SyncPT), a);
3044 AssertMsgFailedReturn(("rc=%Rrc\n", rc), RT_FAILURE_NP(rc) ? rc : VERR_IPE_UNEXPECTED_INFO_STATUS);
3045 }
3046
3047 PSHWPT pPt = (PSHWPT)PGMPOOL_PAGE_2_PTR_V2(pVM, pVCpu, pShwPage);
3048 Assert(pPt);
3049 Assert(PGMPOOL_PAGE_IS_NESTED(pShwPage));
3050 if (rc == VINF_SUCCESS)
3051 {
3052 /* The 4K PTEs shall inherit the flags of the 2M PDE page sans the leaf bit. */
3053 uint64_t const fGstShwPteFlags = fGstShwBigPdeFlags & ~EPT_E_LEAF;
3054
3055 /* Sync each 4K pages in the 2M range. */
3056 for (unsigned iPte = 0; iPte < RT_ELEMENTS(pPt->a); iPte++)
3057 {
3058 RTGCPHYS const GCPhysSubPage = GCPhysPt | (iPte << GUEST_PAGE_SHIFT);
3059 SLATPTE GstSlatPte;
3060 GstSlatPte.u = GCPhysSubPage | fGstShwPteFlags;
3061 Assert(!(GstSlatPte.u & pVCpu->pgm.s.fGstEptMbzPteMask));
3062 PGM_BTH_NAME(NestedSyncPageWorker)(pVCpu, &pPt->a[iPte], GCPhysSubPage, pShwPage, iPte, GstSlatPte);
3063 Log7Func(("GstPte=%RGp ShwPte=%RX64 iPte=%u [2M->4K]\n", pGstWalkAll->u.Ept.Pte, pPt->a[iPte].u, iPte));
3064 if (RT_UNLIKELY(VM_FF_IS_SET(pVM, VM_FF_PGM_NO_MEMORY)))
3065 break;
3066 }
3067 }
3068 else
3069 {
3070 Assert(rc == VINF_PGM_CACHED_PAGE);
3071# if defined(VBOX_STRICT) && defined(DEBUG_ramshankar)
3072 /* Paranoia - Verify address of each of the subpages are what they should be. */
3073 RTGCPHYS GCPhysSubPage = GCPhysPt;
3074 for (unsigned iPte = 0; iPte < RT_ELEMENTS(pPt->a); iPte++, GCPhysSubPage += GUEST_PAGE_SIZE)
3075 {
3076 PPGMPAGE pSubPage;
3077 rc = pgmPhysGetPageEx(pVM, GCPhysSubPage, &pSubPage);
3078 AssertRC(rc);
3079 AssertMsg( PGM_PAGE_GET_HCPHYS(pSubPage) == SHW_PTE_GET_HCPHYS(pPt->a[iPte])
3080 || !SHW_PTE_IS_P(pPt->a[iPte]),
3081 ("PGM 2M page and shadow PTE conflict. GCPhysSubPage=%RGp Page=%RHp Shw=%RHp\n",
3082 GCPhysSubPage, PGM_PAGE_GET_HCPHYS(pSubPage), SHW_PTE_GET_HCPHYS(pPt->a[iPte])));
3083 }
3084# endif
3085 rc = VINF_SUCCESS; /* Cached entry; assume it's still fully valid. */
3086 }
3087
3088 /* Save the new PDE. */
3089 uint64_t const fShwPdeFlags = pGstWalkAll->u.Ept.Pde.u & pVCpu->pgm.s.fGstEptShadowedPdeMask;
3090 Pde.u = pShwPage->Core.Key | fShwPdeFlags;
3091 Assert(!(Pde.u & EPT_E_LEAF));
3092 Assert(!(Pde.u & pVCpu->pgm.s.fGstEptMbzPdeMask));
3093 SHW_PDE_ATOMIC_SET2(*pPde, Pde);
3094 STAM_PROFILE_STOP(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,SyncPT), a);
3095 Log7Func(("GstPde=%RGp ShwPde=%RX64 iPde=%u\n", pGstWalkAll->u.Ept.Pde.u, pPde->u, iPde));
3096 return rc;
3097 }
3098# endif /* PGM_WITH_LARGE_PAGES */
3099
3100 /*
3101 * Allocate & map the shadow page table.
3102 */
3103 PSHWPT pPt;
3104 PPGMPOOLPAGE pShwPage;
3105
3106 RTGCPHYS const GCPhysPt = pGstWalkAll->u.Ept.Pde.u & EPT_PDE_PG_MASK;
3107 rc = pgmPoolAlloc(pVM, GCPhysPt, PGMPOOLKIND_EPT_PT_FOR_EPT_PT, PGMPOOLACCESS_DONTCARE,
3108 PGM_A20_IS_ENABLED(pVCpu), pShwPde->idx, iPde, false /*fLockPage*/, &pShwPage);
3109 if ( rc == VINF_SUCCESS
3110 || rc == VINF_PGM_CACHED_PAGE)
3111 { /* likely */ }
3112 else
3113 {
3114 STAM_PROFILE_STOP(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,SyncPT), a);
3115 AssertMsgFailedReturn(("rc=%Rrc\n", rc), RT_FAILURE_NP(rc) ? rc : VERR_IPE_UNEXPECTED_INFO_STATUS);
3116 }
3117
3118 pPt = (PSHWPT)PGMPOOL_PAGE_2_PTR_V2(pVM, pVCpu, pShwPage);
3119 Assert(pPt);
3120 Assert(PGMPOOL_PAGE_IS_NESTED(pShwPage));
3121
3122 if (rc == VINF_SUCCESS)
3123 {
3124 /* Sync the page we've already translated through SLAT. */
3125 const unsigned iPte = (GCPhysNestedPage >> SHW_PT_SHIFT) & SHW_PT_MASK;
3126 PGM_BTH_NAME(NestedSyncPageWorker)(pVCpu, &pPt->a[iPte], GCPhysPage, pShwPage, iPte, pGstWalkAll->u.Ept.Pte);
3127 Log7Func(("GstPte=%RGp ShwPte=%RX64 iPte=%u\n", pGstWalkAll->u.Ept.Pte.u, pPt->a[iPte].u, iPte));
3128
3129 /* Sync the rest of page table (expensive but might be cheaper than nested-guest VM-exits in hardware). */
3130 for (unsigned iPteCur = 0; iPteCur < RT_ELEMENTS(pPt->a); iPteCur++)
3131 {
3132 if (iPteCur != iPte)
3133 {
3134 PGMPTWALKGST GstWalkPt;
3135 PGMPTWALK WalkPt;
3136 GCPhysNestedPage &= ~(SHW_PT_MASK << SHW_PT_SHIFT);
3137 GCPhysNestedPage |= (iPteCur << GUEST_PAGE_SHIFT);
3138 int const rc2 = pgmGstSlatWalk(pVCpu, GCPhysNestedPage, false /*fIsLinearAddrValid*/, 0 /*GCPtrNested*/,
3139 &WalkPt, &GstWalkPt);
3140 if (RT_SUCCESS(rc2))
3141 {
3142 PGM_BTH_NAME(NestedSyncPageWorker)(pVCpu, &pPt->a[iPteCur], WalkPt.GCPhys, pShwPage, iPteCur,
3143 GstWalkPt.u.Ept.Pte);
3144 Log7Func(("GstPte=%RGp ShwPte=%RX64 iPte=%u\n", GstWalkPt.u.Ept.Pte.u, pPt->a[iPteCur].u, iPteCur));
3145 }
3146 else
3147 {
3148 /*
3149 * This could be MMIO pages reserved by the nested-hypevisor or genuinely not-present pages.
3150 * Ensure the shadow tables entry is not-present.
3151 */
3152 /** @todo We currently don't configure these to cause EPT misconfigs but rather trap
3153 * them using EPT violations and walk the guest EPT tables to determine
3154 * whether they are EPT misconfigs VM-exits for the nested-hypervisor. We
3155 * could optimize this by using a specific combination of reserved bits
3156 * which we could immediately identify as EPT misconfigs of the
3157 * nested-hypervisor without having to walk its EPT tables. However, tracking
3158 * non-present entries might be tricky...
3159 */
3160 AssertMsg(!pPt->a[iPteCur].u, ("%RX64\n", pPt->a[iPteCur].u));
3161 }
3162 if (RT_UNLIKELY(VM_FF_IS_SET(pVM, VM_FF_PGM_NO_MEMORY)))
3163 break;
3164 }
3165 }
3166 }
3167 else
3168 {
3169 Assert(rc == VINF_PGM_CACHED_PAGE);
3170# if defined(VBOX_STRICT) && defined(DEBUG_ramshankar)
3171 /* Paranoia - Verify address of the page is what it should be. */
3172 PPGMPAGE pPage;
3173 rc = pgmPhysGetPageEx(pVM, GCPhysPage, &pPage);
3174 AssertRC(rc);
3175 const unsigned iPte = (GCPhysNestedPage >> SHW_PT_SHIFT) & SHW_PT_MASK;
3176 AssertMsg(PGM_PAGE_GET_HCPHYS(pPage) == SHW_PTE_GET_HCPHYS(pPt->a[iPte]) || !SHW_PTE_IS_P(pPt->a[iPte]),
3177 ("PGM page and shadow PTE address conflict. GCPhysNestedPage=%RGp GCPhysPage=%RGp Page=%RHp Shw=%RHp\n",
3178 GCPhysNestedPage, GCPhysPage, PGM_PAGE_GET_HCPHYS(pPage), SHW_PTE_GET_HCPHYS(pPt->a[iPte])));
3179 Log7Func(("GstPte=%RGp ShwPte=%RX64 iPte=%u [cache]\n", pGstWalkAll->u.Ept.Pte.u, pPt->a[iPte].u, iPte));
3180# endif
3181 rc = VINF_SUCCESS; /* Cached entry; assume it's still fully valid. */
3182 }
3183
3184 /* Save the new PDE. */
3185 uint64_t const fShwPdeFlags = pGstWalkAll->u.Ept.Pde.u & pVCpu->pgm.s.fGstEptShadowedPdeMask;
3186 Assert(!(pGstWalkAll->u.Ept.Pde.u & EPT_E_LEAF));
3187 Assert(!(pGstWalkAll->u.Ept.Pde.u & pVCpu->pgm.s.fGstEptMbzPdeMask));
3188 Pde.u = pShwPage->Core.Key | fShwPdeFlags;
3189 SHW_PDE_ATOMIC_SET2(*pPde, Pde);
3190 Log7Func(("GstPde=%RGp ShwPde=%RX64 iPde=%u\n", pGstWalkAll->u.Ept.Pde.u, pPde->u, iPde));
3191
3192 STAM_PROFILE_STOP(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,SyncPT), a);
3193 return rc;
3194}
3195
3196#endif /* !IN_RING3 && VBOX_WITH_NESTED_HWVIRT_VMX_EPT && PGM_SHW_TYPE == PGM_TYPE_EPT*/
3197#if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) && PGM_SHW_TYPE != PGM_TYPE_NONE
3198
3199/**
3200 * Handle dirty bit tracking faults.
3201 *
3202 * @returns VBox status code.
3203 * @param pVCpu The cross context virtual CPU structure.
3204 * @param uErr Page fault error code.
3205 * @param pPdeSrc Guest page directory entry.
3206 * @param pPdeDst Shadow page directory entry.
3207 * @param GCPtrPage Guest context page address.
3208 */
3209static int PGM_BTH_NAME(CheckDirtyPageFault)(PVMCPUCC pVCpu, uint32_t uErr, PSHWPDE pPdeDst, GSTPDE const *pPdeSrc,
3210 RTGCPTR GCPtrPage)
3211{
3212 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
3213 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3214 NOREF(uErr);
3215
3216 PGM_LOCK_ASSERT_OWNER(pVM);
3217
3218 /*
3219 * Handle big page.
3220 */
3221 if ((pPdeSrc->u & X86_PDE_PS) && GST_IS_PSE_ACTIVE(pVCpu))
3222 {
3223 if ((pPdeDst->u & (X86_PDE_P | PGM_PDFLAGS_TRACK_DIRTY)) == (X86_PDE_P | PGM_PDFLAGS_TRACK_DIRTY))
3224 {
3225 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,DirtyPageTrap));
3226 Assert(pPdeSrc->u & X86_PDE_RW);
3227
3228 /* Note: No need to invalidate this entry on other VCPUs as a stale TLB entry will not harm; write access will simply
3229 * fault again and take this path to only invalidate the entry (see below). */
3230 SHWPDE PdeDst = *pPdeDst;
3231 PdeDst.u &= ~(SHWUINT)PGM_PDFLAGS_TRACK_DIRTY;
3232 PdeDst.u |= X86_PDE_RW | X86_PDE_A;
3233 SHW_PDE_ATOMIC_SET2(*pPdeDst, PdeDst);
3234 PGM_INVL_BIG_PG(pVCpu, GCPtrPage);
3235 return VINF_PGM_HANDLED_DIRTY_BIT_FAULT; /* restarts the instruction. */
3236 }
3237
3238# ifdef IN_RING0
3239 /* Check for stale TLB entry; only applies to the SMP guest case. */
3240 if ( pVM->cCpus > 1
3241 && (pPdeDst->u & (X86_PDE_P | X86_PDE_RW | X86_PDE_A)) == (X86_PDE_P | X86_PDE_RW | X86_PDE_A))
3242 {
3243 PPGMPOOLPAGE pShwPage = pgmPoolGetPage(pPool, pPdeDst->u & SHW_PDE_PG_MASK);
3244 if (pShwPage)
3245 {
3246 PSHWPT pPTDst = (PSHWPT)PGMPOOL_PAGE_2_PTR_V2(pVM, pVCpu, pShwPage);
3247 PSHWPTE pPteDst = &pPTDst->a[(GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK];
3248 if (SHW_PTE_IS_P_RW(*pPteDst))
3249 {
3250 /* Stale TLB entry. */
3251 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,DirtyPageStale));
3252 PGM_INVL_PG(pVCpu, GCPtrPage);
3253 return VINF_PGM_HANDLED_DIRTY_BIT_FAULT; /* restarts the instruction. */
3254 }
3255 }
3256 }
3257# endif /* IN_RING0 */
3258 return VINF_PGM_NO_DIRTY_BIT_TRACKING;
3259 }
3260
3261 /*
3262 * Map the guest page table.
3263 */
3264 PGSTPT pPTSrc;
3265 int rc = PGM_GCPHYS_2_PTR_V2(pVM, pVCpu, GST_GET_PDE_GCPHYS(*pPdeSrc), &pPTSrc);
3266 AssertRCReturn(rc, rc);
3267
3268 if (SHW_PDE_IS_P(*pPdeDst))
3269 {
3270 GSTPTE const *pPteSrc = &pPTSrc->a[(GCPtrPage >> GST_PT_SHIFT) & GST_PT_MASK];
3271 const GSTPTE PteSrc = *pPteSrc;
3272
3273 /*
3274 * Map shadow page table.
3275 */
3276 PPGMPOOLPAGE pShwPage = pgmPoolGetPage(pPool, pPdeDst->u & SHW_PDE_PG_MASK);
3277 if (pShwPage)
3278 {
3279 PSHWPT pPTDst = (PSHWPT)PGMPOOL_PAGE_2_PTR_V2(pVM, pVCpu, pShwPage);
3280 PSHWPTE pPteDst = &pPTDst->a[(GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK];
3281 if (SHW_PTE_IS_P(*pPteDst)) /** @todo Optimize accessed bit emulation? */
3282 {
3283 if (SHW_PTE_IS_TRACK_DIRTY(*pPteDst))
3284 {
3285 PPGMPAGE pPage = pgmPhysGetPage(pVM, GST_GET_PTE_GCPHYS(PteSrc));
3286 SHWPTE PteDst = *pPteDst;
3287
3288 LogFlow(("DIRTY page trap addr=%RGv\n", GCPtrPage));
3289 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,DirtyPageTrap));
3290
3291 Assert(PteSrc.u & X86_PTE_RW);
3292
3293 /* Note: No need to invalidate this entry on other VCPUs as a stale TLB
3294 * entry will not harm; write access will simply fault again and
3295 * take this path to only invalidate the entry.
3296 */
3297 if (RT_LIKELY(pPage))
3298 {
3299 if (PGM_PAGE_HAS_ACTIVE_HANDLERS(pPage) && !PGM_PAGE_IS_HNDL_PHYS_NOT_IN_HM(pPage))
3300 {
3301 //AssertMsgFailed(("%R[pgmpage] - we don't set PGM_PTFLAGS_TRACK_DIRTY for these pages\n", pPage));
3302 Assert(!PGM_PAGE_HAS_ACTIVE_ALL_HANDLERS(pPage));
3303 /* Assuming write handlers here as the PTE is present (otherwise we wouldn't be here). */
3304 SHW_PTE_SET_RO(PteDst);
3305 }
3306 else
3307 {
3308 if ( PGM_PAGE_GET_STATE(pPage) == PGM_PAGE_STATE_WRITE_MONITORED
3309 && PGM_PAGE_GET_TYPE(pPage) == PGMPAGETYPE_RAM)
3310 {
3311 rc = pgmPhysPageMakeWritable(pVM, pPage, GST_GET_PTE_GCPHYS(PteSrc));
3312 AssertRC(rc);
3313 }
3314 if (PGM_PAGE_GET_STATE(pPage) == PGM_PAGE_STATE_ALLOCATED)
3315 SHW_PTE_SET_RW(PteDst);
3316 else
3317 {
3318 /* Still applies to shared pages. */
3319 Assert(!PGM_PAGE_IS_ZERO(pPage));
3320 SHW_PTE_SET_RO(PteDst);
3321 }
3322 }
3323 }
3324 else
3325 SHW_PTE_SET_RW(PteDst); /** @todo r=bird: This doesn't make sense to me. */
3326
3327 SHW_PTE_SET(PteDst, (SHW_PTE_GET_U(PteDst) | X86_PTE_D | X86_PTE_A) & ~(uint64_t)PGM_PTFLAGS_TRACK_DIRTY);
3328 SHW_PTE_ATOMIC_SET2(*pPteDst, PteDst);
3329 PGM_INVL_PG(pVCpu, GCPtrPage);
3330 return VINF_PGM_HANDLED_DIRTY_BIT_FAULT; /* restarts the instruction. */
3331 }
3332
3333# ifdef IN_RING0
3334 /* Check for stale TLB entry; only applies to the SMP guest case. */
3335 if ( pVM->cCpus > 1
3336 && SHW_PTE_IS_RW(*pPteDst)
3337 && SHW_PTE_IS_A(*pPteDst))
3338 {
3339 /* Stale TLB entry. */
3340 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,DirtyPageStale));
3341 PGM_INVL_PG(pVCpu, GCPtrPage);
3342 return VINF_PGM_HANDLED_DIRTY_BIT_FAULT; /* restarts the instruction. */
3343 }
3344# endif
3345 }
3346 }
3347 else
3348 AssertMsgFailed(("pgmPoolGetPageByHCPhys %RGp failed!\n", pPdeDst->u & SHW_PDE_PG_MASK));
3349 }
3350
3351 return VINF_PGM_NO_DIRTY_BIT_TRACKING;
3352}
3353
3354#endif /* PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) && PGM_SHW_TYPE != PGM_TYPE_NONE */
3355
3356/**
3357 * Sync a shadow page table.
3358 *
3359 * The shadow page table is not present in the shadow PDE.
3360 *
3361 * Handles mapping conflicts.
3362 *
3363 * This is called by VerifyAccessSyncPage, PrefetchPage, InvalidatePage (on
3364 * conflict), and Trap0eHandler.
3365 *
3366 * A precondition for this method is that the shadow PDE is not present. The
3367 * caller must take the PGM lock before checking this and continue to hold it
3368 * when calling this method.
3369 *
3370 * @returns VBox status code.
3371 * @param pVCpu The cross context virtual CPU structure.
3372 * @param iPDSrc Page directory index.
3373 * @param pPDSrc Source page directory (i.e. Guest OS page directory).
3374 * Assume this is a temporary mapping.
3375 * @param GCPtrPage GC Pointer of the page that caused the fault
3376 */
3377static int PGM_BTH_NAME(SyncPT)(PVMCPUCC pVCpu, unsigned iPDSrc, PGSTPD pPDSrc, RTGCPTR GCPtrPage)
3378{
3379 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
3380 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool); NOREF(pPool);
3381
3382#if 0 /* rarely useful; leave for debugging. */
3383 STAM_COUNTER_INC(&pVCpu->pgm.s.StatSyncPtPD[iPDSrc]);
3384#endif
3385 LogFlow(("SyncPT: GCPtrPage=%RGv\n", GCPtrPage)); RT_NOREF_PV(GCPtrPage);
3386
3387 PGM_LOCK_ASSERT_OWNER(pVM);
3388
3389#if ( PGM_GST_TYPE == PGM_TYPE_32BIT \
3390 || PGM_GST_TYPE == PGM_TYPE_PAE \
3391 || PGM_GST_TYPE == PGM_TYPE_AMD64) \
3392 && !PGM_TYPE_IS_NESTED_OR_EPT(PGM_SHW_TYPE) \
3393 && PGM_SHW_TYPE != PGM_TYPE_NONE
3394 int rc = VINF_SUCCESS;
3395
3396 STAM_PROFILE_START(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,SyncPT), a);
3397
3398 /*
3399 * Some input validation first.
3400 */
3401 AssertMsg(iPDSrc == ((GCPtrPage >> GST_PD_SHIFT) & GST_PD_MASK), ("iPDSrc=%x GCPtrPage=%RGv\n", iPDSrc, GCPtrPage));
3402
3403 /*
3404 * Get the relevant shadow PDE entry.
3405 */
3406# if PGM_SHW_TYPE == PGM_TYPE_32BIT
3407 const unsigned iPDDst = GCPtrPage >> SHW_PD_SHIFT;
3408 PSHWPDE pPdeDst = pgmShwGet32BitPDEPtr(pVCpu, GCPtrPage);
3409 AssertReturn(pPdeDst, VERR_INTERNAL_ERROR_3);
3410
3411 /* Fetch the pgm pool shadow descriptor. */
3412 PPGMPOOLPAGE pShwPde = pVCpu->pgm.s.CTX_SUFF(pShwPageCR3);
3413 Assert(pShwPde);
3414
3415# elif PGM_SHW_TYPE == PGM_TYPE_PAE
3416 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
3417 PPGMPOOLPAGE pShwPde = NULL;
3418 PX86PDPAE pPDDst;
3419 PSHWPDE pPdeDst;
3420
3421 /* Fetch the pgm pool shadow descriptor. */
3422 rc = pgmShwGetPaePoolPagePD(pVCpu, GCPtrPage, &pShwPde);
3423 AssertRCSuccessReturn(rc, rc);
3424 Assert(pShwPde);
3425
3426 pPDDst = (PX86PDPAE)PGMPOOL_PAGE_2_PTR_V2(pVM, pVCpu, pShwPde);
3427 pPdeDst = &pPDDst->a[iPDDst];
3428
3429# elif PGM_SHW_TYPE == PGM_TYPE_AMD64
3430 const unsigned iPdpt = (GCPtrPage >> X86_PDPT_SHIFT) & X86_PDPT_MASK_AMD64;
3431 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
3432 PX86PDPAE pPDDst = NULL; /* initialized to shut up gcc */
3433 PX86PDPT pPdptDst = NULL; /* initialized to shut up gcc */
3434 rc = pgmShwGetLongModePDPtr(pVCpu, GCPtrPage, NULL, &pPdptDst, &pPDDst);
3435 AssertRCSuccessReturn(rc, rc);
3436 Assert(pPDDst);
3437 PSHWPDE pPdeDst = &pPDDst->a[iPDDst];
3438
3439# endif
3440 SHWPDE PdeDst = *pPdeDst;
3441
3442# if PGM_GST_TYPE == PGM_TYPE_AMD64
3443 /* Fetch the pgm pool shadow descriptor. */
3444 PPGMPOOLPAGE pShwPde = pgmPoolGetPage(pPool, pPdptDst->a[iPdpt].u & X86_PDPE_PG_MASK);
3445 Assert(pShwPde);
3446# endif
3447
3448 Assert(!SHW_PDE_IS_P(PdeDst)); /* We're only supposed to call SyncPT on PDE!P.*/
3449
3450 /*
3451 * Sync the page directory entry.
3452 */
3453 GSTPDE PdeSrc = pPDSrc->a[iPDSrc];
3454 const bool fPageTable = !(PdeSrc.u & X86_PDE_PS) || !GST_IS_PSE_ACTIVE(pVCpu);
3455 if ( (PdeSrc.u & X86_PDE_P)
3456 && (fPageTable ? GST_IS_PDE_VALID(pVCpu, PdeSrc) : GST_IS_BIG_PDE_VALID(pVCpu, PdeSrc)) )
3457 {
3458 /*
3459 * Allocate & map the page table.
3460 */
3461 PSHWPT pPTDst;
3462 PPGMPOOLPAGE pShwPage;
3463 RTGCPHYS GCPhys;
3464 if (fPageTable)
3465 {
3466 GCPhys = GST_GET_PDE_GCPHYS(PdeSrc);
3467# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
3468 /* Select the right PDE as we're emulating a 4kb page table with 2 shadow page tables. */
3469 GCPhys = PGM_A20_APPLY(pVCpu, GCPhys | ((iPDDst & 1) * (GUEST_PAGE_SIZE / 2)));
3470# endif
3471 rc = pgmPoolAlloc(pVM, GCPhys, BTH_PGMPOOLKIND_PT_FOR_PT, PGMPOOLACCESS_DONTCARE, PGM_A20_IS_ENABLED(pVCpu),
3472 pShwPde->idx, iPDDst, false /*fLockPage*/,
3473 &pShwPage);
3474 }
3475 else
3476 {
3477 PGMPOOLACCESS enmAccess;
3478# if PGM_WITH_NX(PGM_GST_TYPE, PGM_SHW_TYPE)
3479 const bool fNoExecute = (PdeSrc.u & X86_PDE_PAE_NX) && GST_IS_NX_ACTIVE(pVCpu);
3480# else
3481 const bool fNoExecute = false;
3482# endif
3483
3484 GCPhys = GST_GET_BIG_PDE_GCPHYS(pVM, PdeSrc);
3485# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
3486 /* Select the right PDE as we're emulating a 4MB page directory with two 2 MB shadow PDEs.*/
3487 GCPhys = PGM_A20_APPLY(pVCpu, GCPhys | (GCPtrPage & (1 << X86_PD_PAE_SHIFT)));
3488# endif
3489 /* Determine the right kind of large page to avoid incorrect cached entry reuse. */
3490 if (PdeSrc.u & X86_PDE_US)
3491 {
3492 if (PdeSrc.u & X86_PDE_RW)
3493 enmAccess = (fNoExecute) ? PGMPOOLACCESS_USER_RW_NX : PGMPOOLACCESS_USER_RW;
3494 else
3495 enmAccess = (fNoExecute) ? PGMPOOLACCESS_USER_R_NX : PGMPOOLACCESS_USER_R;
3496 }
3497 else
3498 {
3499 if (PdeSrc.u & X86_PDE_RW)
3500 enmAccess = (fNoExecute) ? PGMPOOLACCESS_SUPERVISOR_RW_NX : PGMPOOLACCESS_SUPERVISOR_RW;
3501 else
3502 enmAccess = (fNoExecute) ? PGMPOOLACCESS_SUPERVISOR_R_NX : PGMPOOLACCESS_SUPERVISOR_R;
3503 }
3504 rc = pgmPoolAlloc(pVM, GCPhys, BTH_PGMPOOLKIND_PT_FOR_BIG, enmAccess, PGM_A20_IS_ENABLED(pVCpu),
3505 pShwPde->idx, iPDDst, false /*fLockPage*/,
3506 &pShwPage);
3507 }
3508 if (rc == VINF_SUCCESS)
3509 pPTDst = (PSHWPT)PGMPOOL_PAGE_2_PTR_V2(pVM, pVCpu, pShwPage);
3510 else if (rc == VINF_PGM_CACHED_PAGE)
3511 {
3512 /*
3513 * The PT was cached, just hook it up.
3514 */
3515 if (fPageTable)
3516 PdeDst.u = pShwPage->Core.Key | GST_GET_PDE_SHW_FLAGS(pVCpu, PdeSrc);
3517 else
3518 {
3519 PdeDst.u = pShwPage->Core.Key | GST_GET_BIG_PDE_SHW_FLAGS(pVCpu, PdeSrc);
3520 /* (see explanation and assumptions further down.) */
3521 if ((PdeSrc.u & (X86_PDE_RW | X86_PDE4M_D)) == X86_PDE_RW)
3522 {
3523 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,DirtyPageBig));
3524 PdeDst.u |= PGM_PDFLAGS_TRACK_DIRTY;
3525 PdeDst.u &= ~(SHWUINT)X86_PDE_RW;
3526 }
3527 }
3528 SHW_PDE_ATOMIC_SET2(*pPdeDst, PdeDst);
3529 PGM_DYNMAP_UNUSED_HINT(pVCpu, pPdeDst);
3530 return VINF_SUCCESS;
3531 }
3532 else
3533 AssertMsgFailedReturn(("rc=%Rrc\n", rc), RT_FAILURE_NP(rc) ? rc : VERR_IPE_UNEXPECTED_INFO_STATUS);
3534 /** @todo Why do we bother preserving X86_PDE_AVL_MASK here?
3535 * Both PGM_PDFLAGS_MAPPING and PGM_PDFLAGS_TRACK_DIRTY should be
3536 * irrelevant at this point. */
3537 PdeDst.u &= X86_PDE_AVL_MASK;
3538 PdeDst.u |= pShwPage->Core.Key;
3539
3540 /*
3541 * Page directory has been accessed (this is a fault situation, remember).
3542 */
3543 /** @todo
3544 * Well, when the caller is PrefetchPage or InvalidatePage is isn't a
3545 * fault situation. What's more, the Trap0eHandler has already set the
3546 * accessed bit. So, it's actually just VerifyAccessSyncPage which
3547 * might need setting the accessed flag.
3548 *
3549 * The best idea is to leave this change to the caller and add an
3550 * assertion that it's set already. */
3551 pPDSrc->a[iPDSrc].u |= X86_PDE_A;
3552 if (fPageTable)
3553 {
3554 /*
3555 * Page table - 4KB.
3556 *
3557 * Sync all or just a few entries depending on PGM_SYNC_N_PAGES.
3558 */
3559 Log2(("SyncPT: 4K %RGv PdeSrc:{P=%d RW=%d U=%d raw=%08llx}\n",
3560 GCPtrPage, PdeSrc.u & X86_PTE_P, !!(PdeSrc.u & X86_PTE_RW), !!(PdeSrc.u & X86_PDE_US), (uint64_t)PdeSrc.u));
3561 PGSTPT pPTSrc;
3562 rc = PGM_GCPHYS_2_PTR(pVM, GST_GET_PDE_GCPHYS(PdeSrc), &pPTSrc);
3563 if (RT_SUCCESS(rc))
3564 {
3565 /*
3566 * Start by syncing the page directory entry so CSAM's TLB trick works.
3567 */
3568 PdeDst.u = (PdeDst.u & (SHW_PDE_PG_MASK | X86_PDE_AVL_MASK))
3569 | GST_GET_PDE_SHW_FLAGS(pVCpu, PdeSrc);
3570 SHW_PDE_ATOMIC_SET2(*pPdeDst, PdeDst);
3571 PGM_DYNMAP_UNUSED_HINT(pVCpu, pPdeDst);
3572
3573 /*
3574 * Directory/page user or supervisor privilege: (same goes for read/write)
3575 *
3576 * Directory Page Combined
3577 * U/S U/S U/S
3578 * 0 0 0
3579 * 0 1 0
3580 * 1 0 0
3581 * 1 1 1
3582 *
3583 * Simple AND operation. Table listed for completeness.
3584 *
3585 */
3586 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,SyncPT4K));
3587# ifdef PGM_SYNC_N_PAGES
3588 unsigned iPTBase = (GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK;
3589 unsigned iPTDst = iPTBase;
3590 const unsigned iPTDstEnd = RT_MIN(iPTDst + PGM_SYNC_NR_PAGES / 2, RT_ELEMENTS(pPTDst->a));
3591 if (iPTDst <= PGM_SYNC_NR_PAGES / 2)
3592 iPTDst = 0;
3593 else
3594 iPTDst -= PGM_SYNC_NR_PAGES / 2;
3595# else /* !PGM_SYNC_N_PAGES */
3596 unsigned iPTDst = 0;
3597 const unsigned iPTDstEnd = RT_ELEMENTS(pPTDst->a);
3598# endif /* !PGM_SYNC_N_PAGES */
3599 RTGCPTR GCPtrCur = (GCPtrPage & ~(RTGCPTR)((1 << SHW_PD_SHIFT) - 1))
3600 | ((RTGCPTR)iPTDst << GUEST_PAGE_SHIFT);
3601# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
3602 /* Select the right PDE as we're emulating a 4kb page table with 2 shadow page tables. */
3603 const unsigned offPTSrc = ((GCPtrPage >> SHW_PD_SHIFT) & 1) * 512;
3604# else
3605 const unsigned offPTSrc = 0;
3606# endif
3607 for (; iPTDst < iPTDstEnd; iPTDst++, GCPtrCur += GUEST_PAGE_SIZE)
3608 {
3609 const unsigned iPTSrc = iPTDst + offPTSrc;
3610 const GSTPTE PteSrc = pPTSrc->a[iPTSrc];
3611 if (PteSrc.u & X86_PTE_P)
3612 {
3613 PGM_BTH_NAME(SyncPageWorker)(pVCpu, &pPTDst->a[iPTDst], PdeSrc, PteSrc, pShwPage, iPTDst);
3614 Log2(("SyncPT: 4K+ %RGv PteSrc:{P=%d RW=%d U=%d raw=%08llx}%s dst.raw=%08llx iPTSrc=%x PdeSrc.u=%x physpte=%RGp\n",
3615 GCPtrCur,
3616 PteSrc.u & X86_PTE_P,
3617 !!(PteSrc.u & PdeSrc.u & X86_PTE_RW),
3618 !!(PteSrc.u & PdeSrc.u & X86_PTE_US),
3619 (uint64_t)PteSrc.u,
3620 SHW_PTE_IS_TRACK_DIRTY(pPTDst->a[iPTDst]) ? " Track-Dirty" : "", SHW_PTE_LOG64(pPTDst->a[iPTDst]), iPTSrc, PdeSrc.au32[0],
3621 (RTGCPHYS)(GST_GET_PDE_GCPHYS(PdeSrc) + iPTSrc*sizeof(PteSrc)) ));
3622 }
3623 /* else: the page table was cleared by the pool */
3624 } /* for PTEs */
3625 }
3626 }
3627 else
3628 {
3629 /*
3630 * Big page - 2/4MB.
3631 *
3632 * We'll walk the ram range list in parallel and optimize lookups.
3633 * We will only sync one shadow page table at a time.
3634 */
3635 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,SyncPT4M));
3636
3637 /**
3638 * @todo It might be more efficient to sync only a part of the 4MB
3639 * page (similar to what we do for 4KB PDs).
3640 */
3641
3642 /*
3643 * Start by syncing the page directory entry.
3644 */
3645 PdeDst.u = (PdeDst.u & (SHW_PDE_PG_MASK | (X86_PDE_AVL_MASK & ~PGM_PDFLAGS_TRACK_DIRTY)))
3646 | GST_GET_BIG_PDE_SHW_FLAGS(pVCpu, PdeSrc);
3647
3648 /*
3649 * If the page is not flagged as dirty and is writable, then make it read-only
3650 * at PD level, so we can set the dirty bit when the page is modified.
3651 *
3652 * ASSUMES that page access handlers are implemented on page table entry level.
3653 * Thus we will first catch the dirty access and set PDE.D and restart. If
3654 * there is an access handler, we'll trap again and let it work on the problem.
3655 */
3656 /** @todo move the above stuff to a section in the PGM documentation. */
3657 Assert(!(PdeDst.u & PGM_PDFLAGS_TRACK_DIRTY));
3658 if ((PdeSrc.u & (X86_PDE_RW | X86_PDE4M_D)) == X86_PDE_RW)
3659 {
3660 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,DirtyPageBig));
3661 PdeDst.u |= PGM_PDFLAGS_TRACK_DIRTY;
3662 PdeDst.u &= ~(SHWUINT)X86_PDE_RW;
3663 }
3664 SHW_PDE_ATOMIC_SET2(*pPdeDst, PdeDst);
3665 PGM_DYNMAP_UNUSED_HINT(pVCpu, pPdeDst);
3666
3667 /*
3668 * Fill the shadow page table.
3669 */
3670 /* Get address and flags from the source PDE. */
3671 SHWPTE PteDstBase;
3672 SHW_PTE_SET(PteDstBase, GST_GET_BIG_PDE_SHW_FLAGS_4_PTE(pVCpu, PdeSrc));
3673
3674 /* Loop thru the entries in the shadow PT. */
3675 const RTGCPTR GCPtr = (GCPtrPage >> SHW_PD_SHIFT) << SHW_PD_SHIFT; NOREF(GCPtr);
3676 Log2(("SyncPT: BIG %RGv PdeSrc:{P=%d RW=%d U=%d raw=%08llx} Shw=%RGv GCPhys=%RGp %s\n",
3677 GCPtrPage, PdeSrc.u & X86_PDE_P, !!(PdeSrc.u & X86_PDE_RW), !!(PdeSrc.u & X86_PDE_US), (uint64_t)PdeSrc.u, GCPtr,
3678 GCPhys, PdeDst.u & PGM_PDFLAGS_TRACK_DIRTY ? " Track-Dirty" : ""));
3679 unsigned iPTDst = 0;
3680 while ( iPTDst < RT_ELEMENTS(pPTDst->a)
3681 && !VM_FF_IS_SET(pVM, VM_FF_PGM_NO_MEMORY))
3682 {
3683 PPGMRAMRANGE const pRam = pgmPhysGetRangeAtOrAbove(pVM, GCPhys);
3684 if (pRam && GCPhys >= pRam->GCPhys)
3685 {
3686# ifndef PGM_WITH_A20
3687 unsigned iHCPage = (GCPhys - pRam->GCPhys) >> GUEST_PAGE_SHIFT;
3688# endif
3689 do
3690 {
3691 /* Make shadow PTE. */
3692# ifdef PGM_WITH_A20
3693 PPGMPAGE pPage = &pRam->aPages[(GCPhys - pRam->GCPhys) >> GUEST_PAGE_SHIFT];
3694# else
3695 PPGMPAGE pPage = &pRam->aPages[iHCPage];
3696# endif
3697 SHWPTE PteDst;
3698
3699# ifndef VBOX_WITH_NEW_LAZY_PAGE_ALLOC
3700 /* Try to make the page writable if necessary. */
3701 if ( PGM_PAGE_GET_TYPE(pPage) == PGMPAGETYPE_RAM
3702 && ( PGM_PAGE_IS_ZERO(pPage)
3703 || ( SHW_PTE_IS_RW(PteDstBase)
3704 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_ALLOCATED
3705# ifdef VBOX_WITH_REAL_WRITE_MONITORED_PAGES
3706 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_WRITE_MONITORED
3707# endif
3708# ifdef VBOX_WITH_PAGE_SHARING
3709 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_SHARED
3710# endif
3711 && !PGM_PAGE_IS_BALLOONED(pPage))
3712 )
3713 )
3714 {
3715 rc = pgmPhysPageMakeWritable(pVM, pPage, GCPhys);
3716 AssertRCReturn(rc, rc);
3717 if (VM_FF_IS_SET(pVM, VM_FF_PGM_NO_MEMORY))
3718 break;
3719 }
3720# endif
3721
3722 if (PGM_PAGE_HAS_ACTIVE_HANDLERS(pPage) && !PGM_PAGE_IS_HNDL_PHYS_NOT_IN_HM(pPage))
3723 PGM_BTH_NAME(SyncHandlerPte)(pVM, pVCpu, pPage, GCPhys, SHW_PTE_GET_U(PteDstBase), &PteDst);
3724 else if (PGM_PAGE_IS_BALLOONED(pPage))
3725 SHW_PTE_SET(PteDst, 0); /* Handle ballooned pages at #PF time. */
3726 else
3727 SHW_PTE_SET(PteDst, PGM_PAGE_GET_HCPHYS(pPage) | SHW_PTE_GET_U(PteDstBase));
3728
3729 /* Only map writable pages writable. */
3730 if ( SHW_PTE_IS_P_RW(PteDst)
3731 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_ALLOCATED)
3732 {
3733# ifndef VBOX_WITH_NEW_LAZY_PAGE_ALLOC
3734 /* Still applies to shared pages. */
3735 Assert(!PGM_PAGE_IS_ZERO(pPage));
3736# endif
3737 SHW_PTE_SET_RO(PteDst); /** @todo this isn't quite working yet... */
3738 Log3(("SyncPT: write-protecting %RGp pPage=%R[pgmpage] at %RGv\n", GCPhys, pPage, (RTGCPTR)(GCPtr | (iPTDst << SHW_PT_SHIFT))));
3739 }
3740
3741 if (SHW_PTE_IS_P(PteDst))
3742 PGM_BTH_NAME(SyncPageWorkerTrackAddref)(pVCpu, pShwPage, PGM_PAGE_GET_TRACKING(pPage), pPage, iPTDst);
3743
3744 /* commit it (not atomic, new table) */
3745 pPTDst->a[iPTDst] = PteDst;
3746 Log4(("SyncPT: BIG %RGv PteDst:{P=%d RW=%d U=%d raw=%08llx}%s\n",
3747 (RTGCPTR)(GCPtr | (iPTDst << SHW_PT_SHIFT)), SHW_PTE_IS_P(PteDst), SHW_PTE_IS_RW(PteDst), SHW_PTE_IS_US(PteDst), SHW_PTE_LOG64(PteDst),
3748 SHW_PTE_IS_TRACK_DIRTY(PteDst) ? " Track-Dirty" : ""));
3749
3750 /* advance */
3751 GCPhys += GUEST_PAGE_SIZE;
3752 PGM_A20_APPLY_TO_VAR(pVCpu, GCPhys);
3753# ifndef PGM_WITH_A20
3754 iHCPage++;
3755# endif
3756 iPTDst++;
3757 } while ( iPTDst < RT_ELEMENTS(pPTDst->a)
3758 && GCPhys <= pRam->GCPhysLast);
3759 }
3760 else if (pRam)
3761 {
3762 Log(("Invalid pages at %RGp\n", GCPhys));
3763 do
3764 {
3765 SHW_PTE_SET(pPTDst->a[iPTDst], 0); /* Invalid page, we must handle them manually. */
3766 GCPhys += GUEST_PAGE_SIZE;
3767 iPTDst++;
3768 } while ( iPTDst < RT_ELEMENTS(pPTDst->a)
3769 && GCPhys < pRam->GCPhys);
3770 PGM_A20_APPLY_TO_VAR(pVCpu,GCPhys);
3771 }
3772 else
3773 {
3774 Log(("Invalid pages at %RGp (2)\n", GCPhys));
3775 for ( ; iPTDst < RT_ELEMENTS(pPTDst->a); iPTDst++)
3776 SHW_PTE_SET(pPTDst->a[iPTDst], 0); /* Invalid page, we must handle them manually. */
3777 }
3778 } /* while more PTEs */
3779 } /* 4KB / 4MB */
3780 }
3781 else
3782 AssertRelease(!SHW_PDE_IS_P(PdeDst));
3783
3784 STAM_PROFILE_STOP(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,SyncPT), a);
3785 if (RT_FAILURE(rc))
3786 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,SyncPTFailed));
3787 return rc;
3788
3789#elif (PGM_GST_TYPE == PGM_TYPE_REAL || PGM_GST_TYPE == PGM_TYPE_PROT) \
3790 && !PGM_TYPE_IS_NESTED(PGM_SHW_TYPE) \
3791 && (PGM_SHW_TYPE != PGM_TYPE_EPT || PGM_GST_TYPE == PGM_TYPE_PROT) \
3792 && PGM_SHW_TYPE != PGM_TYPE_NONE
3793 NOREF(iPDSrc); NOREF(pPDSrc);
3794
3795 STAM_PROFILE_START(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,SyncPT), a);
3796
3797 /*
3798 * Validate input a little bit.
3799 */
3800 int rc = VINF_SUCCESS;
3801# if PGM_SHW_TYPE == PGM_TYPE_32BIT
3802 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
3803 PSHWPDE pPdeDst = pgmShwGet32BitPDEPtr(pVCpu, GCPtrPage);
3804 AssertReturn(pPdeDst, VERR_INTERNAL_ERROR_3);
3805
3806 /* Fetch the pgm pool shadow descriptor. */
3807 PPGMPOOLPAGE pShwPde = pVCpu->pgm.s.CTX_SUFF(pShwPageCR3);
3808 Assert(pShwPde);
3809
3810# elif PGM_SHW_TYPE == PGM_TYPE_PAE
3811 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
3812 PPGMPOOLPAGE pShwPde = NULL; /* initialized to shut up gcc */
3813 PX86PDPAE pPDDst;
3814 PSHWPDE pPdeDst;
3815
3816 /* Fetch the pgm pool shadow descriptor. */
3817 rc = pgmShwGetPaePoolPagePD(pVCpu, GCPtrPage, &pShwPde);
3818 AssertRCSuccessReturn(rc, rc);
3819 Assert(pShwPde);
3820
3821 pPDDst = (PX86PDPAE)PGMPOOL_PAGE_2_PTR_V2(pVM, pVCpu, pShwPde);
3822 pPdeDst = &pPDDst->a[iPDDst];
3823
3824# elif PGM_SHW_TYPE == PGM_TYPE_AMD64
3825 const unsigned iPdpt = (GCPtrPage >> X86_PDPT_SHIFT) & X86_PDPT_MASK_AMD64;
3826 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
3827 PX86PDPAE pPDDst = NULL; /* initialized to shut up gcc */
3828 PX86PDPT pPdptDst= NULL; /* initialized to shut up gcc */
3829 rc = pgmShwGetLongModePDPtr(pVCpu, GCPtrPage, NULL, &pPdptDst, &pPDDst);
3830 AssertRCSuccessReturn(rc, rc);
3831 Assert(pPDDst);
3832 PSHWPDE pPdeDst = &pPDDst->a[iPDDst];
3833
3834 /* Fetch the pgm pool shadow descriptor. */
3835 PPGMPOOLPAGE pShwPde = pgmPoolGetPage(pPool, pPdptDst->a[iPdpt].u & X86_PDPE_PG_MASK);
3836 Assert(pShwPde);
3837
3838# elif PGM_SHW_TYPE == PGM_TYPE_EPT
3839 const unsigned iPdpt = (GCPtrPage >> EPT_PDPT_SHIFT) & EPT_PDPT_MASK;
3840 const unsigned iPDDst = ((GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK);
3841 PEPTPD pPDDst;
3842 PEPTPDPT pPdptDst;
3843
3844 rc = pgmShwGetEPTPDPtr(pVCpu, GCPtrPage, &pPdptDst, &pPDDst);
3845 if (rc != VINF_SUCCESS)
3846 {
3847 STAM_PROFILE_STOP(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,SyncPT), a);
3848 AssertRC(rc);
3849 return rc;
3850 }
3851 Assert(pPDDst);
3852 PSHWPDE pPdeDst = &pPDDst->a[iPDDst];
3853
3854 /* Fetch the pgm pool shadow descriptor. */
3855 /** @todo r=bird: didn't pgmShwGetEPTPDPtr just do this lookup already? */
3856 PPGMPOOLPAGE pShwPde = pgmPoolGetPage(pPool, pPdptDst->a[iPdpt].u & EPT_PDPTE_PG_MASK);
3857 Assert(pShwPde);
3858# endif
3859 SHWPDE PdeDst = *pPdeDst;
3860
3861 Assert(!SHW_PDE_IS_P(PdeDst)); /* We're only supposed to call SyncPT on PDE!P and conflicts.*/
3862
3863# if defined(PGM_WITH_LARGE_PAGES) && PGM_SHW_TYPE != PGM_TYPE_32BIT && PGM_SHW_TYPE != PGM_TYPE_PAE
3864 if (BTH_IS_NP_ACTIVE(pVM))
3865 {
3866 Assert(!VM_IS_NEM_ENABLED(pVM));
3867
3868 /* Check if we allocated a big page before for this 2 MB range. */
3869 PPGMPAGE pPage;
3870 rc = pgmPhysGetPageEx(pVM, PGM_A20_APPLY(pVCpu, GCPtrPage & X86_PDE2M_PAE_PG_MASK), &pPage);
3871 if (RT_SUCCESS(rc))
3872 {
3873 RTHCPHYS HCPhys = NIL_RTHCPHYS;
3874 if (PGM_PAGE_GET_PDE_TYPE(pPage) == PGM_PAGE_PDE_TYPE_PDE)
3875 {
3876 if (PGM_A20_IS_ENABLED(pVCpu))
3877 {
3878 STAM_REL_COUNTER_INC(&pVM->pgm.s.StatLargePageReused);
3879 AssertRelease(PGM_PAGE_GET_STATE(pPage) == PGM_PAGE_STATE_ALLOCATED);
3880 HCPhys = PGM_PAGE_GET_HCPHYS(pPage);
3881 }
3882 else
3883 {
3884 PGM_PAGE_SET_PDE_TYPE(pVM, pPage, PGM_PAGE_PDE_TYPE_PDE_DISABLED);
3885 pVM->pgm.s.cLargePagesDisabled++;
3886 }
3887 }
3888 else if ( PGM_PAGE_GET_PDE_TYPE(pPage) == PGM_PAGE_PDE_TYPE_PDE_DISABLED
3889 && PGM_A20_IS_ENABLED(pVCpu))
3890 {
3891 /* Recheck the entire 2 MB range to see if we can use it again as a large page. */
3892 rc = pgmPhysRecheckLargePage(pVM, GCPtrPage, pPage);
3893 if (RT_SUCCESS(rc))
3894 {
3895 Assert(PGM_PAGE_GET_STATE(pPage) == PGM_PAGE_STATE_ALLOCATED);
3896 Assert(PGM_PAGE_GET_PDE_TYPE(pPage) == PGM_PAGE_PDE_TYPE_PDE);
3897 HCPhys = PGM_PAGE_GET_HCPHYS(pPage);
3898 }
3899 }
3900# if !defined(VBOX_WITH_NEW_LAZY_PAGE_ALLOC) && !defined(PGM_WITH_PAGE_ZEROING_DETECTION) /* This code is too aggresive! */
3901 else if ( PGMIsUsingLargePages(pVM)
3902 && PGM_A20_IS_ENABLED(pVCpu))
3903 {
3904 rc = pgmPhysAllocLargePage(pVM, GCPtrPage);
3905 if (RT_SUCCESS(rc))
3906 {
3907 Assert(PGM_PAGE_GET_STATE(pPage) == PGM_PAGE_STATE_ALLOCATED);
3908 Assert(PGM_PAGE_GET_PDE_TYPE(pPage) == PGM_PAGE_PDE_TYPE_PDE);
3909 HCPhys = PGM_PAGE_GET_HCPHYS(pPage);
3910 }
3911 else
3912 LogFlow(("pgmPhysAllocLargePage failed with %Rrc\n", rc));
3913 }
3914# endif
3915
3916 if (HCPhys != NIL_RTHCPHYS)
3917 {
3918# if PGM_SHW_TYPE == PGM_TYPE_EPT
3919 PdeDst.u = HCPhys | EPT_E_READ | EPT_E_WRITE | EPT_E_EXECUTE | EPT_E_LEAF | EPT_E_IGNORE_PAT | EPT_E_MEMTYPE_WB
3920 | (PdeDst.u & X86_PDE_AVL_MASK) /** @todo do we need this? */;
3921# else
3922 PdeDst.u = HCPhys | X86_PDE_P | X86_PDE_RW | X86_PDE_US | X86_PDE_PS
3923 | (PdeDst.u & X86_PDE_AVL_MASK) /** @todo PGM_PD_FLAGS? */;
3924# endif
3925 SHW_PDE_ATOMIC_SET2(*pPdeDst, PdeDst);
3926
3927 Log(("SyncPT: Use large page at %RGp PDE=%RX64\n", GCPtrPage, PdeDst.u));
3928 /* Add a reference to the first page only. */
3929 PGM_BTH_NAME(SyncPageWorkerTrackAddref)(pVCpu, pShwPde, PGM_PAGE_GET_TRACKING(pPage), pPage, iPDDst);
3930
3931 STAM_PROFILE_STOP(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,SyncPT), a);
3932 return VINF_SUCCESS;
3933 }
3934 }
3935 }
3936# endif /* defined(PGM_WITH_LARGE_PAGES) && PGM_SHW_TYPE != PGM_TYPE_32BIT && PGM_SHW_TYPE != PGM_TYPE_PAE */
3937
3938 /*
3939 * Allocate & map the page table.
3940 */
3941 PSHWPT pPTDst;
3942 PPGMPOOLPAGE pShwPage;
3943 RTGCPHYS GCPhys;
3944
3945 /* Virtual address = physical address */
3946 GCPhys = PGM_A20_APPLY(pVCpu, GCPtrPage & X86_PAGE_4K_BASE_MASK);
3947 rc = pgmPoolAlloc(pVM, GCPhys & ~(RT_BIT_64(SHW_PD_SHIFT) - 1), BTH_PGMPOOLKIND_PT_FOR_PT, PGMPOOLACCESS_DONTCARE,
3948 PGM_A20_IS_ENABLED(pVCpu), pShwPde->idx, iPDDst, false /*fLockPage*/,
3949 &pShwPage);
3950 if ( rc == VINF_SUCCESS
3951 || rc == VINF_PGM_CACHED_PAGE)
3952 pPTDst = (PSHWPT)PGMPOOL_PAGE_2_PTR_V2(pVM, pVCpu, pShwPage);
3953 else
3954 {
3955 STAM_PROFILE_STOP(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,SyncPT), a);
3956 AssertMsgFailedReturn(("rc=%Rrc\n", rc), RT_FAILURE_NP(rc) ? rc : VERR_IPE_UNEXPECTED_INFO_STATUS);
3957 }
3958
3959 if (rc == VINF_SUCCESS)
3960 {
3961 /* New page table; fully set it up. */
3962 Assert(pPTDst);
3963
3964 /* Mask away the page offset. */
3965 GCPtrPage &= ~(RTGCPTR)GUEST_PAGE_OFFSET_MASK;
3966
3967 for (unsigned iPTDst = 0; iPTDst < RT_ELEMENTS(pPTDst->a); iPTDst++)
3968 {
3969 RTGCPTR GCPtrCurPage = PGM_A20_APPLY(pVCpu, (GCPtrPage & ~(RTGCPTR)(SHW_PT_MASK << SHW_PT_SHIFT))
3970 | (iPTDst << GUEST_PAGE_SHIFT));
3971
3972 PGM_BTH_NAME(SyncPageWorker)(pVCpu, &pPTDst->a[iPTDst], GCPtrCurPage, pShwPage, iPTDst);
3973 Log2(("SyncPage: 4K+ %RGv PteSrc:{P=1 RW=1 U=1} PteDst=%08llx%s\n",
3974 GCPtrCurPage,
3975 SHW_PTE_LOG64(pPTDst->a[iPTDst]),
3976 SHW_PTE_IS_TRACK_DIRTY(pPTDst->a[iPTDst]) ? " Track-Dirty" : ""));
3977
3978 if (RT_UNLIKELY(VM_FF_IS_SET(pVM, VM_FF_PGM_NO_MEMORY)))
3979 break;
3980 }
3981 }
3982 else
3983 rc = VINF_SUCCESS; /* Cached entry; assume it's still fully valid. */
3984
3985 /* Save the new PDE. */
3986# if PGM_SHW_TYPE == PGM_TYPE_EPT
3987 PdeDst.u = pShwPage->Core.Key | EPT_E_READ | EPT_E_WRITE | EPT_E_EXECUTE
3988 | (PdeDst.u & X86_PDE_AVL_MASK /** @todo do we really need this? */);
3989# else
3990 PdeDst.u = pShwPage->Core.Key | X86_PDE_P | X86_PDE_RW | X86_PDE_US | X86_PDE_A
3991 | (PdeDst.u & X86_PDE_AVL_MASK /** @todo use a PGM_PD_FLAGS define */);
3992# endif
3993 SHW_PDE_ATOMIC_SET2(*pPdeDst, PdeDst);
3994
3995 STAM_PROFILE_STOP(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,SyncPT), a);
3996 if (RT_FAILURE(rc))
3997 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,SyncPTFailed));
3998 return rc;
3999
4000#else
4001 NOREF(iPDSrc); NOREF(pPDSrc);
4002 AssertReleaseMsgFailed(("Shw=%d Gst=%d is not implemented!\n", PGM_SHW_TYPE, PGM_GST_TYPE));
4003 return VERR_PGM_NOT_USED_IN_MODE;
4004#endif
4005}
4006
4007
4008
4009/**
4010 * Prefetch a page/set of pages.
4011 *
4012 * Typically used to sync commonly used pages before entering raw mode
4013 * after a CR3 reload.
4014 *
4015 * @returns VBox status code.
4016 * @param pVCpu The cross context virtual CPU structure.
4017 * @param GCPtrPage Page to invalidate.
4018 */
4019PGM_BTH_DECL(int, PrefetchPage)(PVMCPUCC pVCpu, RTGCPTR GCPtrPage)
4020{
4021#if ( PGM_GST_TYPE == PGM_TYPE_32BIT \
4022 || PGM_GST_TYPE == PGM_TYPE_REAL \
4023 || PGM_GST_TYPE == PGM_TYPE_PROT \
4024 || PGM_GST_TYPE == PGM_TYPE_PAE \
4025 || PGM_GST_TYPE == PGM_TYPE_AMD64 ) \
4026 && !PGM_TYPE_IS_NESTED_OR_EPT(PGM_SHW_TYPE) \
4027 && PGM_SHW_TYPE != PGM_TYPE_NONE
4028 /*
4029 * Check that all Guest levels thru the PDE are present, getting the
4030 * PD and PDE in the processes.
4031 */
4032 int rc = VINF_SUCCESS;
4033# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
4034# if PGM_GST_TYPE == PGM_TYPE_32BIT
4035 const unsigned iPDSrc = (uint32_t)GCPtrPage >> GST_PD_SHIFT;
4036 PGSTPD pPDSrc = pgmGstGet32bitPDPtr(pVCpu);
4037# elif PGM_GST_TYPE == PGM_TYPE_PAE
4038 unsigned iPDSrc;
4039 X86PDPE PdpeSrc;
4040 PGSTPD pPDSrc = pgmGstGetPaePDPtr(pVCpu, GCPtrPage, &iPDSrc, &PdpeSrc);
4041 if (!pPDSrc)
4042 return VINF_SUCCESS; /* not present */
4043# elif PGM_GST_TYPE == PGM_TYPE_AMD64
4044 unsigned iPDSrc;
4045 PX86PML4E pPml4eSrc;
4046 X86PDPE PdpeSrc;
4047 PGSTPD pPDSrc = pgmGstGetLongModePDPtr(pVCpu, GCPtrPage, &pPml4eSrc, &PdpeSrc, &iPDSrc);
4048 if (!pPDSrc)
4049 return VINF_SUCCESS; /* not present */
4050# endif
4051 const GSTPDE PdeSrc = pPDSrc->a[iPDSrc];
4052# else
4053 PGSTPD pPDSrc = NULL;
4054 const unsigned iPDSrc = 0;
4055 GSTPDE const PdeSrc = { X86_PDE_P | X86_PDE_RW | X86_PDE_US | X86_PDE_A }; /* faked so we don't have to #ifdef everything */
4056# endif
4057
4058 if ((PdeSrc.u & (X86_PDE_P | X86_PDE_A)) == (X86_PDE_P | X86_PDE_A))
4059 {
4060 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
4061 PGM_LOCK_VOID(pVM);
4062
4063# if PGM_SHW_TYPE == PGM_TYPE_32BIT
4064 const X86PDE PdeDst = pgmShwGet32BitPDE(pVCpu, GCPtrPage);
4065# elif PGM_SHW_TYPE == PGM_TYPE_PAE
4066 const unsigned iPDDst = ((GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK);
4067 PX86PDPAE pPDDst;
4068 X86PDEPAE PdeDst;
4069# if PGM_GST_TYPE != PGM_TYPE_PAE
4070 X86PDPE PdpeSrc;
4071
4072 /* Fake PDPT entry; access control handled on the page table level, so allow everything. */
4073 PdpeSrc.u = X86_PDPE_P; /* rw/us are reserved for PAE pdpte's; accessed bit causes invalid VT-x guest state errors */
4074# endif
4075 rc = pgmShwSyncPaePDPtr(pVCpu, GCPtrPage, PdpeSrc.u, &pPDDst);
4076 if (rc != VINF_SUCCESS)
4077 {
4078 PGM_UNLOCK(pVM);
4079 AssertRC(rc);
4080 return rc;
4081 }
4082 Assert(pPDDst);
4083 PdeDst = pPDDst->a[iPDDst];
4084
4085# elif PGM_SHW_TYPE == PGM_TYPE_AMD64
4086 const unsigned iPDDst = ((GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK);
4087 PX86PDPAE pPDDst;
4088 X86PDEPAE PdeDst;
4089
4090# if PGM_GST_TYPE == PGM_TYPE_PROT
4091 /* AMD-V nested paging */
4092 X86PML4E Pml4eSrc;
4093 X86PDPE PdpeSrc;
4094 PX86PML4E pPml4eSrc = &Pml4eSrc;
4095
4096 /* Fake PML4 & PDPT entry; access control handled on the page table level, so allow everything. */
4097 Pml4eSrc.u = X86_PML4E_P | X86_PML4E_RW | X86_PML4E_US | X86_PML4E_A;
4098 PdpeSrc.u = X86_PDPE_P | X86_PDPE_RW | X86_PDPE_US | X86_PDPE_A;
4099# endif
4100
4101 rc = pgmShwSyncLongModePDPtr(pVCpu, GCPtrPage, pPml4eSrc->u, PdpeSrc.u, &pPDDst);
4102 if (rc != VINF_SUCCESS)
4103 {
4104 PGM_UNLOCK(pVM);
4105 AssertRC(rc);
4106 return rc;
4107 }
4108 Assert(pPDDst);
4109 PdeDst = pPDDst->a[iPDDst];
4110# endif
4111 if (!(PdeDst.u & X86_PDE_P))
4112 {
4113 /** @todo r=bird: This guy will set the A bit on the PDE,
4114 * probably harmless. */
4115 rc = PGM_BTH_NAME(SyncPT)(pVCpu, iPDSrc, pPDSrc, GCPtrPage);
4116 }
4117 else
4118 {
4119 /* Note! We used to sync PGM_SYNC_NR_PAGES pages, which triggered assertions in CSAM, because
4120 * R/W attributes of nearby pages were reset. Not sure how that could happen. Anyway, it
4121 * makes no sense to prefetch more than one page.
4122 */
4123 rc = PGM_BTH_NAME(SyncPage)(pVCpu, PdeSrc, GCPtrPage, 1, 0);
4124 if (RT_SUCCESS(rc))
4125 rc = VINF_SUCCESS;
4126 }
4127 PGM_UNLOCK(pVM);
4128 }
4129 return rc;
4130
4131#elif PGM_TYPE_IS_NESTED_OR_EPT(PGM_SHW_TYPE) || PGM_SHW_TYPE == PGM_TYPE_NONE
4132 NOREF(pVCpu); NOREF(GCPtrPage);
4133 return VINF_SUCCESS; /* ignore */
4134#else
4135 AssertCompile(0);
4136#endif
4137}
4138
4139
4140
4141
4142/**
4143 * Syncs a page during a PGMVerifyAccess() call.
4144 *
4145 * @returns VBox status code (informational included).
4146 * @param pVCpu The cross context virtual CPU structure.
4147 * @param GCPtrPage The address of the page to sync.
4148 * @param fPage The effective guest page flags.
4149 * @param uErr The trap error code.
4150 * @remarks This will normally never be called on invalid guest page
4151 * translation entries.
4152 */
4153PGM_BTH_DECL(int, VerifyAccessSyncPage)(PVMCPUCC pVCpu, RTGCPTR GCPtrPage, unsigned fPage, unsigned uErr)
4154{
4155 PVMCC pVM = pVCpu->CTX_SUFF(pVM); NOREF(pVM);
4156
4157 LogFlow(("VerifyAccessSyncPage: GCPtrPage=%RGv fPage=%#x uErr=%#x\n", GCPtrPage, fPage, uErr));
4158 RT_NOREF_PV(GCPtrPage); RT_NOREF_PV(fPage); RT_NOREF_PV(uErr);
4159
4160 Assert(!pVM->pgm.s.fNestedPaging);
4161#if ( PGM_GST_TYPE == PGM_TYPE_32BIT \
4162 || PGM_GST_TYPE == PGM_TYPE_REAL \
4163 || PGM_GST_TYPE == PGM_TYPE_PROT \
4164 || PGM_GST_TYPE == PGM_TYPE_PAE \
4165 || PGM_GST_TYPE == PGM_TYPE_AMD64 ) \
4166 && !PGM_TYPE_IS_NESTED_OR_EPT(PGM_SHW_TYPE) \
4167 && PGM_SHW_TYPE != PGM_TYPE_NONE
4168
4169 /*
4170 * Get guest PD and index.
4171 */
4172 /** @todo Performance: We've done all this a jiffy ago in the
4173 * PGMGstGetPage call. */
4174# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
4175# if PGM_GST_TYPE == PGM_TYPE_32BIT
4176 const unsigned iPDSrc = (uint32_t)GCPtrPage >> GST_PD_SHIFT;
4177 PGSTPD pPDSrc = pgmGstGet32bitPDPtr(pVCpu);
4178
4179# elif PGM_GST_TYPE == PGM_TYPE_PAE
4180 unsigned iPDSrc = 0;
4181 X86PDPE PdpeSrc;
4182 PGSTPD pPDSrc = pgmGstGetPaePDPtr(pVCpu, GCPtrPage, &iPDSrc, &PdpeSrc);
4183 if (RT_UNLIKELY(!pPDSrc))
4184 {
4185 Log(("PGMVerifyAccess: access violation for %RGv due to non-present PDPTR\n", GCPtrPage));
4186 return VINF_EM_RAW_GUEST_TRAP;
4187 }
4188
4189# elif PGM_GST_TYPE == PGM_TYPE_AMD64
4190 unsigned iPDSrc = 0; /* shut up gcc */
4191 PX86PML4E pPml4eSrc = NULL; /* ditto */
4192 X86PDPE PdpeSrc;
4193 PGSTPD pPDSrc = pgmGstGetLongModePDPtr(pVCpu, GCPtrPage, &pPml4eSrc, &PdpeSrc, &iPDSrc);
4194 if (RT_UNLIKELY(!pPDSrc))
4195 {
4196 Log(("PGMVerifyAccess: access violation for %RGv due to non-present PDPTR\n", GCPtrPage));
4197 return VINF_EM_RAW_GUEST_TRAP;
4198 }
4199# endif
4200
4201# else /* !PGM_WITH_PAGING */
4202 PGSTPD pPDSrc = NULL;
4203 const unsigned iPDSrc = 0;
4204# endif /* !PGM_WITH_PAGING */
4205 int rc = VINF_SUCCESS;
4206
4207 PGM_LOCK_VOID(pVM);
4208
4209 /*
4210 * First check if the shadow pd is present.
4211 */
4212# if PGM_SHW_TYPE == PGM_TYPE_32BIT
4213 PX86PDE pPdeDst = pgmShwGet32BitPDEPtr(pVCpu, GCPtrPage);
4214 AssertReturn(pPdeDst, VERR_INTERNAL_ERROR_3);
4215
4216# elif PGM_SHW_TYPE == PGM_TYPE_PAE
4217 PX86PDEPAE pPdeDst;
4218 const unsigned iPDDst = ((GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK);
4219 PX86PDPAE pPDDst;
4220# if PGM_GST_TYPE != PGM_TYPE_PAE
4221 /* Fake PDPT entry; access control handled on the page table level, so allow everything. */
4222 X86PDPE PdpeSrc;
4223 PdpeSrc.u = X86_PDPE_P; /* rw/us are reserved for PAE pdpte's; accessed bit causes invalid VT-x guest state errors */
4224# endif
4225 rc = pgmShwSyncPaePDPtr(pVCpu, GCPtrPage, PdpeSrc.u, &pPDDst);
4226 if (rc != VINF_SUCCESS)
4227 {
4228 PGM_UNLOCK(pVM);
4229 AssertRC(rc);
4230 return rc;
4231 }
4232 Assert(pPDDst);
4233 pPdeDst = &pPDDst->a[iPDDst];
4234
4235# elif PGM_SHW_TYPE == PGM_TYPE_AMD64
4236 const unsigned iPDDst = ((GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK);
4237 PX86PDPAE pPDDst;
4238 PX86PDEPAE pPdeDst;
4239
4240# if PGM_GST_TYPE == PGM_TYPE_PROT
4241 /* AMD-V nested paging: Fake PML4 & PDPT entry; access control handled on the page table level, so allow everything. */
4242 X86PML4E Pml4eSrc;
4243 X86PDPE PdpeSrc;
4244 PX86PML4E pPml4eSrc = &Pml4eSrc;
4245 Pml4eSrc.u = X86_PML4E_P | X86_PML4E_RW | X86_PML4E_US | X86_PML4E_A;
4246 PdpeSrc.u = X86_PDPE_P | X86_PDPE_RW | X86_PDPE_US | X86_PDPE_A;
4247# endif
4248
4249 rc = pgmShwSyncLongModePDPtr(pVCpu, GCPtrPage, pPml4eSrc->u, PdpeSrc.u, &pPDDst);
4250 if (rc != VINF_SUCCESS)
4251 {
4252 PGM_UNLOCK(pVM);
4253 AssertRC(rc);
4254 return rc;
4255 }
4256 Assert(pPDDst);
4257 pPdeDst = &pPDDst->a[iPDDst];
4258# endif
4259
4260 if (!(pPdeDst->u & X86_PDE_P))
4261 {
4262 rc = PGM_BTH_NAME(SyncPT)(pVCpu, iPDSrc, pPDSrc, GCPtrPage);
4263 if (rc != VINF_SUCCESS)
4264 {
4265 PGM_DYNMAP_UNUSED_HINT(pVCpu, pPdeDst);
4266 PGM_UNLOCK(pVM);
4267 AssertRC(rc);
4268 return rc;
4269 }
4270 }
4271
4272# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
4273 /* Check for dirty bit fault */
4274 rc = PGM_BTH_NAME(CheckDirtyPageFault)(pVCpu, uErr, pPdeDst, &pPDSrc->a[iPDSrc], GCPtrPage);
4275 if (rc == VINF_PGM_HANDLED_DIRTY_BIT_FAULT)
4276 Log(("PGMVerifyAccess: success (dirty)\n"));
4277 else
4278# endif
4279 {
4280# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
4281 GSTPDE PdeSrc = pPDSrc->a[iPDSrc];
4282# else
4283 GSTPDE const PdeSrc = { X86_PDE_P | X86_PDE_RW | X86_PDE_US | X86_PDE_A }; /* faked so we don't have to #ifdef everything */
4284# endif
4285
4286 Assert(rc != VINF_EM_RAW_GUEST_TRAP);
4287 if (uErr & X86_TRAP_PF_US)
4288 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,PageOutOfSyncUser));
4289 else /* supervisor */
4290 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,PageOutOfSyncSupervisor));
4291
4292 rc = PGM_BTH_NAME(SyncPage)(pVCpu, PdeSrc, GCPtrPage, 1, 0);
4293 if (RT_SUCCESS(rc))
4294 {
4295 /* Page was successfully synced */
4296 Log2(("PGMVerifyAccess: success (sync)\n"));
4297 rc = VINF_SUCCESS;
4298 }
4299 else
4300 {
4301 Log(("PGMVerifyAccess: access violation for %RGv rc=%Rrc\n", GCPtrPage, rc));
4302 rc = VINF_EM_RAW_GUEST_TRAP;
4303 }
4304 }
4305 PGM_DYNMAP_UNUSED_HINT(pVCpu, pPdeDst);
4306 PGM_UNLOCK(pVM);
4307 return rc;
4308
4309#else /* PGM_TYPE_IS_NESTED_OR_EPT(PGM_SHW_TYPE) */
4310
4311 AssertLogRelMsgFailed(("Shw=%d Gst=%d is not implemented!\n", PGM_GST_TYPE, PGM_SHW_TYPE));
4312 return VERR_PGM_NOT_USED_IN_MODE;
4313#endif /* PGM_TYPE_IS_NESTED_OR_EPT(PGM_SHW_TYPE) */
4314}
4315
4316
4317/**
4318 * Syncs the paging hierarchy starting at CR3.
4319 *
4320 * @returns VBox status code, R0/RC may return VINF_PGM_SYNC_CR3, no other
4321 * informational status codes.
4322 * @retval VERR_PGM_NO_HYPERVISOR_ADDRESS in raw-mode when we're unable to map
4323 * the VMM into guest context.
4324 * @param pVCpu The cross context virtual CPU structure.
4325 * @param cr0 Guest context CR0 register.
4326 * @param cr3 Guest context CR3 register. Not subjected to the A20
4327 * mask.
4328 * @param cr4 Guest context CR4 register.
4329 * @param fGlobal Including global page directories or not
4330 */
4331PGM_BTH_DECL(int, SyncCR3)(PVMCPUCC pVCpu, uint64_t cr0, uint64_t cr3, uint64_t cr4, bool fGlobal)
4332{
4333 PVMCC pVM = pVCpu->CTX_SUFF(pVM); NOREF(pVM);
4334 NOREF(cr0); NOREF(cr3); NOREF(cr4); NOREF(fGlobal);
4335
4336 LogFlow(("SyncCR3 FF=%d fGlobal=%d\n", !!VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3), fGlobal));
4337
4338#if !PGM_TYPE_IS_NESTED_OR_EPT(PGM_SHW_TYPE) && PGM_SHW_TYPE != PGM_TYPE_NONE
4339# ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
4340 PGM_LOCK_VOID(pVM);
4341 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
4342 if (pPool->cDirtyPages)
4343 pgmPoolResetDirtyPages(pVM);
4344 PGM_UNLOCK(pVM);
4345# endif
4346#endif /* !NESTED && !EPT */
4347
4348#if PGM_TYPE_IS_NESTED_OR_EPT(PGM_SHW_TYPE) || PGM_SHW_TYPE == PGM_TYPE_NONE
4349 /*
4350 * Nested / EPT / None - No work.
4351 */
4352 return VINF_SUCCESS;
4353
4354#elif PGM_SHW_TYPE == PGM_TYPE_AMD64
4355 /*
4356 * AMD64 (Shw & Gst) - No need to check all paging levels; we zero
4357 * out the shadow parts when the guest modifies its tables.
4358 */
4359 return VINF_SUCCESS;
4360
4361#else /* !PGM_TYPE_IS_NESTED_OR_EPT(PGM_SHW_TYPE) && PGM_SHW_TYPE != PGM_TYPE_AMD64 */
4362
4363 return VINF_SUCCESS;
4364#endif /* !PGM_TYPE_IS_NESTED_OR_EPT(PGM_SHW_TYPE) && PGM_SHW_TYPE != PGM_TYPE_AMD64 */
4365}
4366
4367
4368
4369
4370#ifdef VBOX_STRICT
4371
4372/**
4373 * Checks that the shadow page table is in sync with the guest one.
4374 *
4375 * @returns The number of errors.
4376 * @param pVCpu The cross context virtual CPU structure.
4377 * @param cr3 Guest context CR3 register.
4378 * @param cr4 Guest context CR4 register.
4379 * @param GCPtr Where to start. Defaults to 0.
4380 * @param cb How much to check. Defaults to everything.
4381 */
4382PGM_BTH_DECL(unsigned, AssertCR3)(PVMCPUCC pVCpu, uint64_t cr3, uint64_t cr4, RTGCPTR GCPtr, RTGCPTR cb)
4383{
4384 NOREF(pVCpu); NOREF(cr3); NOREF(cr4); NOREF(GCPtr); NOREF(cb);
4385#if PGM_TYPE_IS_NESTED_OR_EPT(PGM_SHW_TYPE) || PGM_SHW_TYPE == PGM_TYPE_NONE
4386 return 0;
4387#else
4388 unsigned cErrors = 0;
4389 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
4390 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool); NOREF(pPool);
4391
4392# if PGM_GST_TYPE == PGM_TYPE_PAE
4393 /** @todo currently broken; crashes below somewhere */
4394 AssertFailed();
4395# endif
4396
4397# if PGM_GST_TYPE == PGM_TYPE_32BIT \
4398 || PGM_GST_TYPE == PGM_TYPE_PAE \
4399 || PGM_GST_TYPE == PGM_TYPE_AMD64
4400
4401 bool fBigPagesSupported = GST_IS_PSE_ACTIVE(pVCpu);
4402 PPGMCPU pPGM = &pVCpu->pgm.s;
4403 RTGCPHYS GCPhysGst; /* page address derived from the guest page tables. */
4404 RTHCPHYS HCPhysShw; /* page address derived from the shadow page tables. */
4405# ifndef IN_RING0
4406 RTHCPHYS HCPhys; /* general usage. */
4407# endif
4408 int rc;
4409
4410 /*
4411 * Check that the Guest CR3 and all its mappings are correct.
4412 */
4413 AssertMsgReturn(pPGM->GCPhysCR3 == PGM_A20_APPLY(pVCpu, cr3 & GST_CR3_PAGE_MASK),
4414 ("Invalid GCPhysCR3=%RGp cr3=%RGp\n", pPGM->GCPhysCR3, (RTGCPHYS)cr3),
4415 false);
4416# if !defined(IN_RING0) && PGM_GST_TYPE != PGM_TYPE_AMD64
4417# if 0
4418# if PGM_GST_TYPE == PGM_TYPE_32BIT
4419 rc = PGMShwGetPage(pVCpu, (RTRCUINTPTR)pPGM->pGst32BitPdRC, NULL, &HCPhysShw);
4420# else
4421 rc = PGMShwGetPage(pVCpu, (RTRCUINTPTR)pPGM->pGstPaePdptRC, NULL, &HCPhysShw);
4422# endif
4423 AssertRCReturn(rc, 1);
4424 HCPhys = NIL_RTHCPHYS;
4425 rc = pgmRamGCPhys2HCPhys(pVM, PGM_A20_APPLY(pVCpu, cr3 & GST_CR3_PAGE_MASK), &HCPhys);
4426 AssertMsgReturn(HCPhys == HCPhysShw, ("HCPhys=%RHp HCPhyswShw=%RHp (cr3)\n", HCPhys, HCPhysShw), false);
4427# endif
4428# if PGM_GST_TYPE == PGM_TYPE_32BIT && defined(IN_RING3)
4429 pgmGstGet32bitPDPtr(pVCpu);
4430 RTGCPHYS GCPhys;
4431 rc = PGMR3DbgR3Ptr2GCPhys(pVM->pUVM, pPGM->pGst32BitPdR3, &GCPhys);
4432 AssertRCReturn(rc, 1);
4433 AssertMsgReturn(PGM_A20_APPLY(pVCpu, cr3 & GST_CR3_PAGE_MASK) == GCPhys, ("GCPhys=%RGp cr3=%RGp\n", GCPhys, (RTGCPHYS)cr3), false);
4434# endif
4435# endif /* !IN_RING0 */
4436
4437 /*
4438 * Get and check the Shadow CR3.
4439 */
4440# if PGM_SHW_TYPE == PGM_TYPE_32BIT
4441 unsigned cPDEs = X86_PG_ENTRIES;
4442 unsigned cIncrement = X86_PG_ENTRIES * GUEST_PAGE_SIZE;
4443# elif PGM_SHW_TYPE == PGM_TYPE_PAE
4444# if PGM_GST_TYPE == PGM_TYPE_32BIT
4445 unsigned cPDEs = X86_PG_PAE_ENTRIES * 4; /* treat it as a 2048 entry table. */
4446# else
4447 unsigned cPDEs = X86_PG_PAE_ENTRIES;
4448# endif
4449 unsigned cIncrement = X86_PG_PAE_ENTRIES * GUEST_PAGE_SIZE;
4450# elif PGM_SHW_TYPE == PGM_TYPE_AMD64
4451 unsigned cPDEs = X86_PG_PAE_ENTRIES;
4452 unsigned cIncrement = X86_PG_PAE_ENTRIES * GUEST_PAGE_SIZE;
4453# endif
4454 if (cb != ~(RTGCPTR)0)
4455 cPDEs = RT_MIN(cb >> SHW_PD_SHIFT, 1);
4456
4457/** @todo call the other two PGMAssert*() functions. */
4458
4459# if PGM_GST_TYPE == PGM_TYPE_AMD64
4460 unsigned iPml4 = (GCPtr >> X86_PML4_SHIFT) & X86_PML4_MASK;
4461
4462 for (; iPml4 < X86_PG_PAE_ENTRIES; iPml4++)
4463 {
4464 PX86PML4E const pPml4eSrc = pgmGstGetLongModePML4EPtr(pVCpu, iPml4);
4465 AssertContinueStmt(pPml4eSrc, cErrors++);
4466
4467 PX86PML4E const pPml4eDst = pgmShwGetLongModePML4EPtr(pVCpu, iPml4);
4468 AssertContinueStmt(pPml4eDst, cErrors++);
4469
4470 /* Fetch the pgm pool shadow descriptor if the shadow pml4e is present. */
4471 if (!(pPml4eDst->u & X86_PML4E_P))
4472 {
4473 GCPtr += _2M * UINT64_C(512) * UINT64_C(512);
4474 continue;
4475 }
4476
4477 PPGMPOOLPAGE pShwPdpt = pgmPoolGetPage(pPool, pPml4eDst->u & X86_PML4E_PG_MASK);
4478 RTGCPHYS GCPhysPdptSrc = PGM_A20_APPLY(pVCpu, pPml4eSrc->u & X86_PML4E_PG_MASK);
4479
4480 if ((pPml4eSrc->u & X86_PML4E_P) != (pPml4eDst->u & X86_PML4E_P))
4481 {
4482 AssertMsgFailed(("Present bit doesn't match! pPml4eDst.u=%#RX64 pPml4eSrc.u=%RX64\n", pPml4eDst->u, pPml4eSrc->u));
4483 GCPtr += _2M * UINT64_C(512) * UINT64_C(512);
4484 cErrors++;
4485 continue;
4486 }
4487
4488 if (GCPhysPdptSrc != pShwPdpt->GCPhys)
4489 {
4490 AssertMsgFailed(("Physical address doesn't match! iPml4 %d pPml4eDst.u=%#RX64 pPml4eSrc.u=%RX64 Phys %RX64 vs %RX64\n", iPml4, pPml4eDst->u, pPml4eSrc->u, pShwPdpt->GCPhys, GCPhysPdptSrc));
4491 GCPtr += _2M * UINT64_C(512) * UINT64_C(512);
4492 cErrors++;
4493 continue;
4494 }
4495
4496 if ( (pPml4eDst->u & (X86_PML4E_US | X86_PML4E_RW | X86_PML4E_NX))
4497 != (pPml4eSrc->u & (X86_PML4E_US | X86_PML4E_RW | X86_PML4E_NX)))
4498 {
4499 AssertMsgFailed(("User/Write/NoExec bits don't match! pPml4eDst.u=%#RX64 pPml4eSrc.u=%RX64\n", pPml4eDst->u, pPml4eSrc->u));
4500 GCPtr += _2M * UINT64_C(512) * UINT64_C(512);
4501 cErrors++;
4502 continue;
4503 }
4504# else /* PGM_GST_TYPE != PGM_TYPE_AMD64 */
4505 {
4506# endif /* PGM_GST_TYPE != PGM_TYPE_AMD64 */
4507
4508# if PGM_GST_TYPE == PGM_TYPE_AMD64 || PGM_GST_TYPE == PGM_TYPE_PAE
4509 /*
4510 * Check the PDPTEs too.
4511 */
4512 unsigned iPdpt = (GCPtr >> SHW_PDPT_SHIFT) & SHW_PDPT_MASK;
4513
4514 for (;iPdpt <= SHW_PDPT_MASK; iPdpt++)
4515 {
4516 unsigned iPDSrc = 0; /* initialized to shut up gcc */
4517 PPGMPOOLPAGE pShwPde = NULL;
4518 PX86PDPE pPdpeDst;
4519 RTGCPHYS GCPhysPdeSrc;
4520 X86PDPE PdpeSrc;
4521 PdpeSrc.u = 0; /* initialized to shut up gcc 4.5 */
4522# if PGM_GST_TYPE == PGM_TYPE_PAE
4523 PGSTPD pPDSrc = pgmGstGetPaePDPtr(pVCpu, GCPtr, &iPDSrc, &PdpeSrc);
4524 PX86PDPT pPdptDst = pgmShwGetPaePDPTPtr(pVCpu);
4525# else
4526 PX86PML4E pPml4eSrcIgn;
4527 PX86PDPT pPdptDst;
4528 PX86PDPAE pPDDst;
4529 PGSTPD pPDSrc = pgmGstGetLongModePDPtr(pVCpu, GCPtr, &pPml4eSrcIgn, &PdpeSrc, &iPDSrc);
4530
4531 rc = pgmShwGetLongModePDPtr(pVCpu, GCPtr, NULL, &pPdptDst, &pPDDst);
4532 if (rc != VINF_SUCCESS)
4533 {
4534 AssertMsg(rc == VERR_PAGE_DIRECTORY_PTR_NOT_PRESENT, ("Unexpected rc=%Rrc\n", rc));
4535 GCPtr += 512 * _2M;
4536 continue; /* next PDPTE */
4537 }
4538 Assert(pPDDst);
4539# endif
4540 Assert(iPDSrc == 0);
4541
4542 pPdpeDst = &pPdptDst->a[iPdpt];
4543
4544 if (!(pPdpeDst->u & X86_PDPE_P))
4545 {
4546 GCPtr += 512 * _2M;
4547 continue; /* next PDPTE */
4548 }
4549
4550 pShwPde = pgmPoolGetPage(pPool, pPdpeDst->u & X86_PDPE_PG_MASK);
4551 GCPhysPdeSrc = PGM_A20_APPLY(pVCpu, PdpeSrc.u & X86_PDPE_PG_MASK);
4552
4553 if ((pPdpeDst->u & X86_PDPE_P) != (PdpeSrc.u & X86_PDPE_P))
4554 {
4555 AssertMsgFailed(("Present bit doesn't match! pPdpeDst.u=%#RX64 pPdpeSrc.u=%RX64\n", pPdpeDst->u, PdpeSrc.u));
4556 GCPtr += 512 * _2M;
4557 cErrors++;
4558 continue;
4559 }
4560
4561 if (GCPhysPdeSrc != pShwPde->GCPhys)
4562 {
4563# if PGM_GST_TYPE == PGM_TYPE_AMD64
4564 AssertMsgFailed(("Physical address doesn't match! iPml4 %d iPdpt %d pPdpeDst.u=%#RX64 pPdpeSrc.u=%RX64 Phys %RX64 vs %RX64\n", iPml4, iPdpt, pPdpeDst->u, PdpeSrc.u, pShwPde->GCPhys, GCPhysPdeSrc));
4565# else
4566 AssertMsgFailed(("Physical address doesn't match! iPdpt %d pPdpeDst.u=%#RX64 pPdpeSrc.u=%RX64 Phys %RX64 vs %RX64\n", iPdpt, pPdpeDst->u, PdpeSrc.u, pShwPde->GCPhys, GCPhysPdeSrc));
4567# endif
4568 GCPtr += 512 * _2M;
4569 cErrors++;
4570 continue;
4571 }
4572
4573# if PGM_GST_TYPE == PGM_TYPE_AMD64
4574 if ( (pPdpeDst->u & (X86_PDPE_US | X86_PDPE_RW | X86_PDPE_LM_NX))
4575 != (PdpeSrc.u & (X86_PDPE_US | X86_PDPE_RW | X86_PDPE_LM_NX)))
4576 {
4577 AssertMsgFailed(("User/Write/NoExec bits don't match! pPdpeDst.u=%#RX64 pPdpeSrc.u=%RX64\n", pPdpeDst->u, PdpeSrc.u));
4578 GCPtr += 512 * _2M;
4579 cErrors++;
4580 continue;
4581 }
4582# endif
4583
4584# else /* PGM_GST_TYPE != PGM_TYPE_AMD64 && PGM_GST_TYPE != PGM_TYPE_PAE */
4585 {
4586# endif /* PGM_GST_TYPE != PGM_TYPE_AMD64 && PGM_GST_TYPE != PGM_TYPE_PAE */
4587# if PGM_GST_TYPE == PGM_TYPE_32BIT
4588 GSTPD const *pPDSrc = pgmGstGet32bitPDPtr(pVCpu);
4589# if PGM_SHW_TYPE == PGM_TYPE_32BIT
4590 PCX86PD pPDDst = pgmShwGet32BitPDPtr(pVCpu);
4591# endif
4592# endif /* PGM_GST_TYPE == PGM_TYPE_32BIT */
4593 /*
4594 * Iterate the shadow page directory.
4595 */
4596 GCPtr = (GCPtr >> SHW_PD_SHIFT) << SHW_PD_SHIFT;
4597 unsigned iPDDst = (GCPtr >> SHW_PD_SHIFT) & SHW_PD_MASK;
4598
4599 for (;
4600 iPDDst < cPDEs;
4601 iPDDst++, GCPtr += cIncrement)
4602 {
4603# if PGM_SHW_TYPE == PGM_TYPE_PAE
4604 const SHWPDE PdeDst = *pgmShwGetPaePDEPtr(pVCpu, GCPtr);
4605# else
4606 const SHWPDE PdeDst = pPDDst->a[iPDDst];
4607# endif
4608 if ( (PdeDst.u & X86_PDE_P)
4609 || ((PdeDst.u & (X86_PDE_P | PGM_PDFLAGS_TRACK_DIRTY)) == (X86_PDE_P | PGM_PDFLAGS_TRACK_DIRTY)) )
4610 {
4611 HCPhysShw = PdeDst.u & SHW_PDE_PG_MASK;
4612 PPGMPOOLPAGE pPoolPage = pgmPoolGetPage(pPool, HCPhysShw);
4613 if (!pPoolPage)
4614 {
4615 AssertMsgFailed(("Invalid page table address %RHp at %RGv! PdeDst=%#RX64\n",
4616 HCPhysShw, GCPtr, (uint64_t)PdeDst.u));
4617 cErrors++;
4618 continue;
4619 }
4620 const SHWPT *pPTDst = (const SHWPT *)PGMPOOL_PAGE_2_PTR_V2(pVM, pVCpu, pPoolPage);
4621
4622 if (PdeDst.u & (X86_PDE4M_PWT | X86_PDE4M_PCD))
4623 {
4624 AssertMsgFailed(("PDE flags PWT and/or PCD is set at %RGv! These flags are not virtualized! PdeDst=%#RX64\n",
4625 GCPtr, (uint64_t)PdeDst.u));
4626 cErrors++;
4627 }
4628
4629 if (PdeDst.u & (X86_PDE4M_G | X86_PDE4M_D))
4630 {
4631 AssertMsgFailed(("4K PDE reserved flags at %RGv! PdeDst=%#RX64\n",
4632 GCPtr, (uint64_t)PdeDst.u));
4633 cErrors++;
4634 }
4635
4636 const GSTPDE PdeSrc = pPDSrc->a[(iPDDst >> (GST_PD_SHIFT - SHW_PD_SHIFT)) & GST_PD_MASK];
4637 if (!(PdeSrc.u & X86_PDE_P))
4638 {
4639 AssertMsgFailed(("Guest PDE at %RGv is not present! PdeDst=%#RX64 PdeSrc=%#RX64\n",
4640 GCPtr, (uint64_t)PdeDst.u, (uint64_t)PdeSrc.u));
4641 cErrors++;
4642 continue;
4643 }
4644
4645 if ( !(PdeSrc.u & X86_PDE_PS)
4646 || !fBigPagesSupported)
4647 {
4648 GCPhysGst = GST_GET_PDE_GCPHYS(PdeSrc);
4649# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
4650 GCPhysGst = PGM_A20_APPLY(pVCpu, GCPhysGst | ((iPDDst & 1) * (GUEST_PAGE_SIZE / 2)));
4651# endif
4652 }
4653 else
4654 {
4655# if PGM_GST_TYPE == PGM_TYPE_32BIT
4656 if (PdeSrc.u & X86_PDE4M_PG_HIGH_MASK)
4657 {
4658 AssertMsgFailed(("Guest PDE at %RGv is using PSE36 or similar! PdeSrc=%#RX64\n",
4659 GCPtr, (uint64_t)PdeSrc.u));
4660 cErrors++;
4661 continue;
4662 }
4663# endif
4664 GCPhysGst = GST_GET_BIG_PDE_GCPHYS(pVM, PdeSrc);
4665# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
4666 GCPhysGst = PGM_A20_APPLY(pVCpu, GCPhysGst | (GCPtr & RT_BIT(X86_PAGE_2M_SHIFT)));
4667# endif
4668 }
4669
4670 if ( pPoolPage->enmKind
4671 != (!(PdeSrc.u & X86_PDE_PS) || !fBigPagesSupported ? BTH_PGMPOOLKIND_PT_FOR_PT : BTH_PGMPOOLKIND_PT_FOR_BIG))
4672 {
4673 AssertMsgFailed(("Invalid shadow page table kind %d at %RGv! PdeSrc=%#RX64\n",
4674 pPoolPage->enmKind, GCPtr, (uint64_t)PdeSrc.u));
4675 cErrors++;
4676 }
4677
4678 PPGMPAGE pPhysPage = pgmPhysGetPage(pVM, GCPhysGst);
4679 if (!pPhysPage)
4680 {
4681 AssertMsgFailed(("Cannot find guest physical address %RGp in the PDE at %RGv! PdeSrc=%#RX64\n",
4682 GCPhysGst, GCPtr, (uint64_t)PdeSrc.u));
4683 cErrors++;
4684 continue;
4685 }
4686
4687 if (GCPhysGst != pPoolPage->GCPhys)
4688 {
4689 AssertMsgFailed(("GCPhysGst=%RGp != pPage->GCPhys=%RGp at %RGv\n",
4690 GCPhysGst, pPoolPage->GCPhys, GCPtr));
4691 cErrors++;
4692 continue;
4693 }
4694
4695 if ( !(PdeSrc.u & X86_PDE_PS)
4696 || !fBigPagesSupported)
4697 {
4698 /*
4699 * Page Table.
4700 */
4701 const GSTPT *pPTSrc;
4702 rc = PGM_GCPHYS_2_PTR_V2(pVM, pVCpu, PGM_A20_APPLY(pVCpu, GCPhysGst & ~(RTGCPHYS)(GUEST_PAGE_SIZE - 1)),
4703 &pPTSrc);
4704 if (RT_FAILURE(rc))
4705 {
4706 AssertMsgFailed(("Cannot map/convert guest physical address %RGp in the PDE at %RGv! PdeSrc=%#RX64\n",
4707 GCPhysGst, GCPtr, (uint64_t)PdeSrc.u));
4708 cErrors++;
4709 continue;
4710 }
4711 if ( (PdeSrc.u & (X86_PDE_P | X86_PDE_US | X86_PDE_RW/* | X86_PDE_A*/))
4712 != (PdeDst.u & (X86_PDE_P | X86_PDE_US | X86_PDE_RW/* | X86_PDE_A*/)))
4713 {
4714 /// @todo We get here a lot on out-of-sync CR3 entries. The access handler should zap them to avoid false alarms here!
4715 // (This problem will go away when/if we shadow multiple CR3s.)
4716 AssertMsgFailed(("4K PDE flags mismatch at %RGv! PdeSrc=%#RX64 PdeDst=%#RX64\n",
4717 GCPtr, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
4718 cErrors++;
4719 continue;
4720 }
4721 if (PdeDst.u & PGM_PDFLAGS_TRACK_DIRTY)
4722 {
4723 AssertMsgFailed(("4K PDEs cannot have PGM_PDFLAGS_TRACK_DIRTY set! GCPtr=%RGv PdeDst=%#RX64\n",
4724 GCPtr, (uint64_t)PdeDst.u));
4725 cErrors++;
4726 continue;
4727 }
4728
4729 /* iterate the page table. */
4730# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
4731 /* Select the right PDE as we're emulating a 4kb page table with 2 shadow page tables. */
4732 const unsigned offPTSrc = ((GCPtr >> SHW_PD_SHIFT) & 1) * 512;
4733# else
4734 const unsigned offPTSrc = 0;
4735# endif
4736 for (unsigned iPT = 0, off = 0;
4737 iPT < RT_ELEMENTS(pPTDst->a);
4738 iPT++, off += GUEST_PAGE_SIZE)
4739 {
4740 const SHWPTE PteDst = pPTDst->a[iPT];
4741
4742 /* skip not-present and dirty tracked entries. */
4743 if (!(SHW_PTE_GET_U(PteDst) & (X86_PTE_P | PGM_PTFLAGS_TRACK_DIRTY))) /** @todo deal with ALL handlers and CSAM !P pages! */
4744 continue;
4745 Assert(SHW_PTE_IS_P(PteDst));
4746
4747 const GSTPTE PteSrc = pPTSrc->a[iPT + offPTSrc];
4748 if (!(PteSrc.u & X86_PTE_P))
4749 {
4750# ifdef IN_RING3
4751 PGMAssertHandlerAndFlagsInSync(pVM);
4752 DBGFR3PagingDumpEx(pVM->pUVM, pVCpu->idCpu, DBGFPGDMP_FLAGS_CURRENT_CR3 | DBGFPGDMP_FLAGS_CURRENT_MODE
4753 | DBGFPGDMP_FLAGS_GUEST | DBGFPGDMP_FLAGS_HEADER | DBGFPGDMP_FLAGS_PRINT_CR3,
4754 0, 0, UINT64_MAX, 99, NULL);
4755# endif
4756 AssertMsgFailed(("Out of sync (!P) PTE at %RGv! PteSrc=%#RX64 PteDst=%#RX64 pPTSrc=%RGv iPTSrc=%x PdeSrc=%x physpte=%RGp\n",
4757 GCPtr + off, (uint64_t)PteSrc.u, SHW_PTE_LOG64(PteDst), pPTSrc, iPT + offPTSrc, PdeSrc.au32[0],
4758 (uint64_t)GST_GET_PDE_GCPHYS(PdeSrc) + (iPT + offPTSrc) * sizeof(PteSrc)));
4759 cErrors++;
4760 continue;
4761 }
4762
4763 uint64_t fIgnoreFlags = GST_PTE_PG_MASK | X86_PTE_AVL_MASK | X86_PTE_G | X86_PTE_D | X86_PTE_PWT | X86_PTE_PCD | X86_PTE_PAT;
4764# if 1 /** @todo sync accessed bit properly... */
4765 fIgnoreFlags |= X86_PTE_A;
4766# endif
4767
4768 /* match the physical addresses */
4769 HCPhysShw = SHW_PTE_GET_HCPHYS(PteDst);
4770 GCPhysGst = GST_GET_PTE_GCPHYS(PteSrc);
4771
4772# ifdef IN_RING3
4773 rc = PGMPhysGCPhys2HCPhys(pVM, GCPhysGst, &HCPhys);
4774 if (RT_FAILURE(rc))
4775 {
4776# if 0
4777 if (HCPhysShw != MMR3PageDummyHCPhys(pVM)) /** @todo this is wrong. */
4778 {
4779 AssertMsgFailed(("Cannot find guest physical address %RGp at %RGv! PteSrc=%#RX64 PteDst=%#RX64\n",
4780 GCPhysGst, GCPtr + off, (uint64_t)PteSrc.u, SHW_PTE_LOG64(PteDst)));
4781 cErrors++;
4782 continue;
4783 }
4784# endif
4785 }
4786 else if (HCPhysShw != (HCPhys & SHW_PTE_PG_MASK))
4787 {
4788 AssertMsgFailed(("Out of sync (phys) at %RGv! HCPhysShw=%RHp HCPhys=%RHp GCPhysGst=%RGp PteSrc=%#RX64 PteDst=%#RX64\n",
4789 GCPtr + off, HCPhysShw, HCPhys, GCPhysGst, (uint64_t)PteSrc.u, SHW_PTE_LOG64(PteDst)));
4790 cErrors++;
4791 continue;
4792 }
4793# endif
4794
4795 pPhysPage = pgmPhysGetPage(pVM, GCPhysGst);
4796 if (!pPhysPage)
4797 {
4798# if 0
4799 if (HCPhysShw != MMR3PageDummyHCPhys(pVM)) /** @todo this is wrong. */
4800 {
4801 AssertMsgFailed(("Cannot find guest physical address %RGp at %RGv! PteSrc=%#RX64 PteDst=%#RX64\n",
4802 GCPhysGst, GCPtr + off, (uint64_t)PteSrc.u, SHW_PTE_LOG64(PteDst)));
4803 cErrors++;
4804 continue;
4805 }
4806# endif
4807 if (SHW_PTE_IS_RW(PteDst))
4808 {
4809 AssertMsgFailed(("Invalid guest page at %RGv is writable! GCPhysGst=%RGp PteSrc=%#RX64 PteDst=%#RX64\n",
4810 GCPtr + off, GCPhysGst, (uint64_t)PteSrc.u, SHW_PTE_LOG64(PteDst)));
4811 cErrors++;
4812 }
4813 fIgnoreFlags |= X86_PTE_RW;
4814 }
4815 else if (HCPhysShw != PGM_PAGE_GET_HCPHYS(pPhysPage))
4816 {
4817 AssertMsgFailed(("Out of sync (phys) at %RGv! HCPhysShw=%RHp pPhysPage:%R[pgmpage] GCPhysGst=%RGp PteSrc=%#RX64 PteDst=%#RX64\n",
4818 GCPtr + off, HCPhysShw, pPhysPage, GCPhysGst, (uint64_t)PteSrc.u, SHW_PTE_LOG64(PteDst)));
4819 cErrors++;
4820 continue;
4821 }
4822
4823 /* flags */
4824 if (PGM_PAGE_HAS_ACTIVE_HANDLERS(pPhysPage) && !PGM_PAGE_IS_HNDL_PHYS_NOT_IN_HM(pPhysPage))
4825 {
4826 if (!PGM_PAGE_HAS_ACTIVE_ALL_HANDLERS(pPhysPage))
4827 {
4828 if (SHW_PTE_IS_RW(PteDst))
4829 {
4830 AssertMsgFailed(("WRITE access flagged at %RGv but the page is writable! pPhysPage=%R[pgmpage] PteSrc=%#RX64 PteDst=%#RX64\n",
4831 GCPtr + off, pPhysPage, (uint64_t)PteSrc.u, SHW_PTE_LOG64(PteDst)));
4832 cErrors++;
4833 continue;
4834 }
4835 fIgnoreFlags |= X86_PTE_RW;
4836 }
4837 else
4838 {
4839 if ( SHW_PTE_IS_P(PteDst)
4840# if PGM_SHW_TYPE == PGM_TYPE_EPT || PGM_SHW_TYPE == PGM_TYPE_PAE || PGM_SHW_TYPE == PGM_TYPE_AMD64
4841 && !PGM_PAGE_IS_MMIO(pPhysPage)
4842# endif
4843 )
4844 {
4845 AssertMsgFailed(("ALL access flagged at %RGv but the page is present! pPhysPage=%R[pgmpage] PteSrc=%#RX64 PteDst=%#RX64\n",
4846 GCPtr + off, pPhysPage, (uint64_t)PteSrc.u, SHW_PTE_LOG64(PteDst)));
4847 cErrors++;
4848 continue;
4849 }
4850 fIgnoreFlags |= X86_PTE_P;
4851 }
4852 }
4853 else
4854 {
4855 if ((PteSrc.u & (X86_PTE_RW | X86_PTE_D)) == X86_PTE_RW)
4856 {
4857 if (SHW_PTE_IS_RW(PteDst))
4858 {
4859 AssertMsgFailed(("!DIRTY page at %RGv is writable! PteSrc=%#RX64 PteDst=%#RX64\n",
4860 GCPtr + off, (uint64_t)PteSrc.u, SHW_PTE_LOG64(PteDst)));
4861 cErrors++;
4862 continue;
4863 }
4864 if (!SHW_PTE_IS_TRACK_DIRTY(PteDst))
4865 {
4866 AssertMsgFailed(("!DIRTY page at %RGv is not marked TRACK_DIRTY! PteSrc=%#RX64 PteDst=%#RX64\n",
4867 GCPtr + off, (uint64_t)PteSrc.u, SHW_PTE_LOG64(PteDst)));
4868 cErrors++;
4869 continue;
4870 }
4871 if (SHW_PTE_IS_D(PteDst))
4872 {
4873 AssertMsgFailed(("!DIRTY page at %RGv is marked DIRTY! PteSrc=%#RX64 PteDst=%#RX64\n",
4874 GCPtr + off, (uint64_t)PteSrc.u, SHW_PTE_LOG64(PteDst)));
4875 cErrors++;
4876 }
4877# if 0 /** @todo sync access bit properly... */
4878 if (PteDst.n.u1Accessed != PteSrc.n.u1Accessed)
4879 {
4880 AssertMsgFailed(("!DIRTY page at %RGv is has mismatching accessed bit! PteSrc=%#RX64 PteDst=%#RX64\n",
4881 GCPtr + off, (uint64_t)PteSrc.u, SHW_PTE_LOG64(PteDst)));
4882 cErrors++;
4883 }
4884 fIgnoreFlags |= X86_PTE_RW;
4885# else
4886 fIgnoreFlags |= X86_PTE_RW | X86_PTE_A;
4887# endif
4888 }
4889 else if (SHW_PTE_IS_TRACK_DIRTY(PteDst))
4890 {
4891 /* access bit emulation (not implemented). */
4892 if ((PteSrc.u & X86_PTE_A) || SHW_PTE_IS_P(PteDst))
4893 {
4894 AssertMsgFailed(("PGM_PTFLAGS_TRACK_DIRTY set at %RGv but no accessed bit emulation! PteSrc=%#RX64 PteDst=%#RX64\n",
4895 GCPtr + off, (uint64_t)PteSrc.u, SHW_PTE_LOG64(PteDst)));
4896 cErrors++;
4897 continue;
4898 }
4899 if (!SHW_PTE_IS_A(PteDst))
4900 {
4901 AssertMsgFailed(("!ACCESSED page at %RGv is has the accessed bit set! PteSrc=%#RX64 PteDst=%#RX64\n",
4902 GCPtr + off, (uint64_t)PteSrc.u, SHW_PTE_LOG64(PteDst)));
4903 cErrors++;
4904 }
4905 fIgnoreFlags |= X86_PTE_P;
4906 }
4907# ifdef DEBUG_sandervl
4908 fIgnoreFlags |= X86_PTE_D | X86_PTE_A;
4909# endif
4910 }
4911
4912 if ( (PteSrc.u & ~fIgnoreFlags) != (SHW_PTE_GET_U(PteDst) & ~fIgnoreFlags)
4913 && (PteSrc.u & ~(fIgnoreFlags | X86_PTE_RW)) != (SHW_PTE_GET_U(PteDst) & ~fIgnoreFlags)
4914 )
4915 {
4916 AssertMsgFailed(("Flags mismatch at %RGv! %#RX64 != %#RX64 fIgnoreFlags=%#RX64 PteSrc=%#RX64 PteDst=%#RX64\n",
4917 GCPtr + off, (uint64_t)PteSrc.u & ~fIgnoreFlags, SHW_PTE_LOG64(PteDst) & ~fIgnoreFlags,
4918 fIgnoreFlags, (uint64_t)PteSrc.u, SHW_PTE_LOG64(PteDst)));
4919 cErrors++;
4920 continue;
4921 }
4922 } /* foreach PTE */
4923 }
4924 else
4925 {
4926 /*
4927 * Big Page.
4928 */
4929 uint64_t fIgnoreFlags = X86_PDE_AVL_MASK | GST_PDE_PG_MASK | X86_PDE4M_G | X86_PDE4M_D | X86_PDE4M_PS | X86_PDE4M_PWT | X86_PDE4M_PCD;
4930 if ((PdeSrc.u & (X86_PDE_RW | X86_PDE4M_D)) == X86_PDE_RW)
4931 {
4932 if (PdeDst.u & X86_PDE_RW)
4933 {
4934 AssertMsgFailed(("!DIRTY page at %RGv is writable! PdeSrc=%#RX64 PdeDst=%#RX64\n",
4935 GCPtr, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
4936 cErrors++;
4937 continue;
4938 }
4939 if (!(PdeDst.u & PGM_PDFLAGS_TRACK_DIRTY))
4940 {
4941 AssertMsgFailed(("!DIRTY page at %RGv is not marked TRACK_DIRTY! PteSrc=%#RX64 PteDst=%#RX64\n",
4942 GCPtr, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
4943 cErrors++;
4944 continue;
4945 }
4946# if 0 /** @todo sync access bit properly... */
4947 if (PdeDst.n.u1Accessed != PdeSrc.b.u1Accessed)
4948 {
4949 AssertMsgFailed(("!DIRTY page at %RGv is has mismatching accessed bit! PteSrc=%#RX64 PteDst=%#RX64\n",
4950 GCPtr, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
4951 cErrors++;
4952 }
4953 fIgnoreFlags |= X86_PTE_RW;
4954# else
4955 fIgnoreFlags |= X86_PTE_RW | X86_PTE_A;
4956# endif
4957 }
4958 else if (PdeDst.u & PGM_PDFLAGS_TRACK_DIRTY)
4959 {
4960 /* access bit emulation (not implemented). */
4961 if ((PdeSrc.u & X86_PDE_A) || SHW_PDE_IS_P(PdeDst))
4962 {
4963 AssertMsgFailed(("PGM_PDFLAGS_TRACK_DIRTY set at %RGv but no accessed bit emulation! PdeSrc=%#RX64 PdeDst=%#RX64\n",
4964 GCPtr, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
4965 cErrors++;
4966 continue;
4967 }
4968 if (!SHW_PDE_IS_A(PdeDst))
4969 {
4970 AssertMsgFailed(("!ACCESSED page at %RGv is has the accessed bit set! PdeSrc=%#RX64 PdeDst=%#RX64\n",
4971 GCPtr, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
4972 cErrors++;
4973 }
4974 fIgnoreFlags |= X86_PTE_P;
4975 }
4976
4977 if ((PdeSrc.u & ~fIgnoreFlags) != (PdeDst.u & ~fIgnoreFlags))
4978 {
4979 AssertMsgFailed(("Flags mismatch (B) at %RGv! %#RX64 != %#RX64 fIgnoreFlags=%#RX64 PdeSrc=%#RX64 PdeDst=%#RX64\n",
4980 GCPtr, (uint64_t)PdeSrc.u & ~fIgnoreFlags, (uint64_t)PdeDst.u & ~fIgnoreFlags,
4981 fIgnoreFlags, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
4982 cErrors++;
4983 }
4984
4985 /* iterate the page table. */
4986 for (unsigned iPT = 0, off = 0;
4987 iPT < RT_ELEMENTS(pPTDst->a);
4988 iPT++, off += GUEST_PAGE_SIZE, GCPhysGst = PGM_A20_APPLY(pVCpu, GCPhysGst + GUEST_PAGE_SIZE))
4989 {
4990 const SHWPTE PteDst = pPTDst->a[iPT];
4991
4992 if (SHW_PTE_IS_TRACK_DIRTY(PteDst))
4993 {
4994 AssertMsgFailed(("The PTE at %RGv emulating a 2/4M page is marked TRACK_DIRTY! PdeSrc=%#RX64 PteDst=%#RX64\n",
4995 GCPtr + off, (uint64_t)PdeSrc.u, SHW_PTE_LOG64(PteDst)));
4996 cErrors++;
4997 }
4998
4999 /* skip not-present entries. */
5000 if (!SHW_PTE_IS_P(PteDst)) /** @todo deal with ALL handlers and CSAM !P pages! */
5001 continue;
5002
5003 fIgnoreFlags = X86_PTE_PAE_PG_MASK | X86_PTE_AVL_MASK | X86_PTE_PWT | X86_PTE_PCD | X86_PTE_PAT | X86_PTE_D | X86_PTE_A | X86_PTE_G | X86_PTE_PAE_NX;
5004
5005 /* match the physical addresses */
5006 HCPhysShw = SHW_PTE_GET_HCPHYS(PteDst);
5007
5008# ifdef IN_RING3
5009 rc = PGMPhysGCPhys2HCPhys(pVM, GCPhysGst, &HCPhys);
5010 if (RT_FAILURE(rc))
5011 {
5012# if 0
5013 if (HCPhysShw != MMR3PageDummyHCPhys(pVM)) /** @todo this is wrong. */
5014 {
5015 AssertMsgFailed(("Cannot find guest physical address %RGp at %RGv! PdeSrc=%#RX64 PteDst=%#RX64\n",
5016 GCPhysGst, GCPtr + off, (uint64_t)PdeSrc.u, SHW_PTE_LOG64(PteDst)));
5017 cErrors++;
5018 }
5019# endif
5020 }
5021 else if (HCPhysShw != (HCPhys & X86_PTE_PAE_PG_MASK))
5022 {
5023 AssertMsgFailed(("Out of sync (phys) at %RGv! HCPhysShw=%RHp HCPhys=%RHp GCPhysGst=%RGp PdeSrc=%#RX64 PteDst=%#RX64\n",
5024 GCPtr + off, HCPhysShw, HCPhys, GCPhysGst, (uint64_t)PdeSrc.u, SHW_PTE_LOG64(PteDst)));
5025 cErrors++;
5026 continue;
5027 }
5028# endif
5029 pPhysPage = pgmPhysGetPage(pVM, GCPhysGst);
5030 if (!pPhysPage)
5031 {
5032# if 0 /** @todo make MMR3PageDummyHCPhys an 'All' function! */
5033 if (HCPhysShw != MMR3PageDummyHCPhys(pVM)) /** @todo this is wrong. */
5034 {
5035 AssertMsgFailed(("Cannot find guest physical address %RGp at %RGv! PdeSrc=%#RX64 PteDst=%#RX64\n",
5036 GCPhysGst, GCPtr + off, (uint64_t)PdeSrc.u, SHW_PTE_LOG64(PteDst)));
5037 cErrors++;
5038 continue;
5039 }
5040# endif
5041 if (SHW_PTE_IS_RW(PteDst))
5042 {
5043 AssertMsgFailed(("Invalid guest page at %RGv is writable! GCPhysGst=%RGp PdeSrc=%#RX64 PteDst=%#RX64\n",
5044 GCPtr + off, GCPhysGst, (uint64_t)PdeSrc.u, SHW_PTE_LOG64(PteDst)));
5045 cErrors++;
5046 }
5047 fIgnoreFlags |= X86_PTE_RW;
5048 }
5049 else if (HCPhysShw != PGM_PAGE_GET_HCPHYS(pPhysPage))
5050 {
5051 AssertMsgFailed(("Out of sync (phys) at %RGv! HCPhysShw=%RHp pPhysPage=%R[pgmpage] GCPhysGst=%RGp PdeSrc=%#RX64 PteDst=%#RX64\n",
5052 GCPtr + off, HCPhysShw, pPhysPage, GCPhysGst, (uint64_t)PdeSrc.u, SHW_PTE_LOG64(PteDst)));
5053 cErrors++;
5054 continue;
5055 }
5056
5057 /* flags */
5058 if (PGM_PAGE_HAS_ACTIVE_HANDLERS(pPhysPage))
5059 {
5060 if (!PGM_PAGE_HAS_ACTIVE_ALL_HANDLERS(pPhysPage))
5061 {
5062 if (PGM_PAGE_GET_HNDL_PHYS_STATE(pPhysPage) != PGM_PAGE_HNDL_PHYS_STATE_DISABLED)
5063 {
5064 if ( SHW_PTE_IS_RW(PteDst)
5065 && !PGM_PAGE_IS_HNDL_PHYS_NOT_IN_HM(pPhysPage))
5066 {
5067 AssertMsgFailed(("WRITE access flagged at %RGv but the page is writable! pPhysPage=%R[pgmpage] PdeSrc=%#RX64 PteDst=%#RX64\n",
5068 GCPtr + off, pPhysPage, (uint64_t)PdeSrc.u, SHW_PTE_LOG64(PteDst)));
5069 cErrors++;
5070 continue;
5071 }
5072 fIgnoreFlags |= X86_PTE_RW;
5073 }
5074 }
5075 else
5076 {
5077 if ( SHW_PTE_IS_P(PteDst)
5078 && !PGM_PAGE_IS_HNDL_PHYS_NOT_IN_HM(pPhysPage)
5079# if PGM_SHW_TYPE == PGM_TYPE_EPT || PGM_SHW_TYPE == PGM_TYPE_PAE || PGM_SHW_TYPE == PGM_TYPE_AMD64
5080 && !PGM_PAGE_IS_MMIO(pPhysPage)
5081# endif
5082 )
5083 {
5084 AssertMsgFailed(("ALL access flagged at %RGv but the page is present! pPhysPage=%R[pgmpage] PdeSrc=%#RX64 PteDst=%#RX64\n",
5085 GCPtr + off, pPhysPage, (uint64_t)PdeSrc.u, SHW_PTE_LOG64(PteDst)));
5086 cErrors++;
5087 continue;
5088 }
5089 fIgnoreFlags |= X86_PTE_P;
5090 }
5091 }
5092
5093 if ( (PdeSrc.u & ~fIgnoreFlags) != (SHW_PTE_GET_U(PteDst) & ~fIgnoreFlags)
5094 && (PdeSrc.u & ~(fIgnoreFlags | X86_PTE_RW)) != (SHW_PTE_GET_U(PteDst) & ~fIgnoreFlags) /* lazy phys handler dereg. */
5095 )
5096 {
5097 AssertMsgFailed(("Flags mismatch (BT) at %RGv! %#RX64 != %#RX64 fIgnoreFlags=%#RX64 PdeSrc=%#RX64 PteDst=%#RX64\n",
5098 GCPtr + off, (uint64_t)PdeSrc.u & ~fIgnoreFlags, SHW_PTE_LOG64(PteDst) & ~fIgnoreFlags,
5099 fIgnoreFlags, (uint64_t)PdeSrc.u, SHW_PTE_LOG64(PteDst)));
5100 cErrors++;
5101 continue;
5102 }
5103 } /* for each PTE */
5104 }
5105 }
5106 /* not present */
5107
5108 } /* for each PDE */
5109
5110 } /* for each PDPTE */
5111
5112 } /* for each PML4E */
5113
5114# ifdef DEBUG
5115 if (cErrors)
5116 LogFlow(("AssertCR3: cErrors=%d\n", cErrors));
5117# endif
5118# endif /* GST is in {32BIT, PAE, AMD64} */
5119 return cErrors;
5120#endif /* !PGM_TYPE_IS_NESTED_OR_EPT(PGM_SHW_TYPE) && PGM_SHW_TYPE != PGM_TYPE_NONE */
5121}
5122#endif /* VBOX_STRICT */
5123
5124
5125/**
5126 * Sets up the CR3 for shadow paging
5127 *
5128 * @returns Strict VBox status code.
5129 * @retval VINF_SUCCESS.
5130 *
5131 * @param pVCpu The cross context virtual CPU structure.
5132 * @param GCPhysCR3 The physical address in the CR3 register. (A20 mask
5133 * already applied.)
5134 */
5135PGM_BTH_DECL(int, MapCR3)(PVMCPUCC pVCpu, RTGCPHYS GCPhysCR3)
5136{
5137 PVMCC pVM = pVCpu->CTX_SUFF(pVM); NOREF(pVM);
5138 int rc = VINF_SUCCESS;
5139
5140 /* Update guest paging info. */
5141#if PGM_GST_TYPE == PGM_TYPE_32BIT \
5142 || PGM_GST_TYPE == PGM_TYPE_PAE \
5143 || PGM_GST_TYPE == PGM_TYPE_AMD64
5144
5145 LogFlow(("MapCR3: %RGp\n", GCPhysCR3));
5146 PGM_A20_ASSERT_MASKED(pVCpu, GCPhysCR3);
5147
5148# if PGM_GST_TYPE == PGM_TYPE_PAE
5149 if ( !pVCpu->pgm.s.CTX_SUFF(fPaePdpesAndCr3Mapped)
5150 || pVCpu->pgm.s.GCPhysPaeCR3 != GCPhysCR3)
5151# endif
5152 {
5153 /*
5154 * Map the page CR3 points at.
5155 */
5156 RTHCPTR HCPtrGuestCR3;
5157 rc = pgmGstMapCr3(pVCpu, GCPhysCR3, &HCPtrGuestCR3);
5158 if (RT_SUCCESS(rc))
5159 {
5160# if PGM_GST_TYPE == PGM_TYPE_32BIT
5161# ifdef IN_RING3
5162 pVCpu->pgm.s.pGst32BitPdR3 = (PX86PD)HCPtrGuestCR3;
5163 pVCpu->pgm.s.pGst32BitPdR0 = NIL_RTR0PTR;
5164# else
5165 pVCpu->pgm.s.pGst32BitPdR3 = NIL_RTR3PTR;
5166 pVCpu->pgm.s.pGst32BitPdR0 = (PX86PD)HCPtrGuestCR3;
5167# endif
5168
5169# elif PGM_GST_TYPE == PGM_TYPE_PAE
5170# ifdef IN_RING3
5171 pVCpu->pgm.s.pGstPaePdptR3 = (PX86PDPT)HCPtrGuestCR3;
5172 pVCpu->pgm.s.pGstPaePdptR0 = NIL_RTR0PTR;
5173# else
5174 pVCpu->pgm.s.pGstPaePdptR3 = NIL_RTR3PTR;
5175 pVCpu->pgm.s.pGstPaePdptR0 = (PX86PDPT)HCPtrGuestCR3;
5176# endif
5177
5178 X86PDPE aGstPaePdpes[X86_PG_PAE_PDPE_ENTRIES];
5179#ifdef VBOX_WITH_NESTED_HWVIRT_VMX_EPT
5180 /*
5181 * When EPT is enabled by the nested-hypervisor and the nested-guest is in PAE mode,
5182 * the guest-CPU context would've already been updated with the 4 PAE PDPEs specified
5183 * in the virtual VMCS. The PDPEs can differ from those in guest memory referenced by
5184 * the translated nested-guest CR3. We -MUST- use the PDPEs provided in the virtual VMCS
5185 * rather than those in guest memory.
5186 *
5187 * See Intel spec. 26.3.2.4 "Loading Page-Directory-Pointer-Table Entries".
5188 */
5189 if (pVCpu->pgm.s.enmGuestSlatMode == PGMSLAT_EPT)
5190 CPUMGetGuestPaePdpes(pVCpu, &aGstPaePdpes[0]);
5191 else
5192#endif
5193 {
5194 /* Update CPUM with the PAE PDPEs referenced by CR3. */
5195 memcpy(&aGstPaePdpes, HCPtrGuestCR3, sizeof(aGstPaePdpes));
5196 CPUMSetGuestPaePdpes(pVCpu, &aGstPaePdpes[0]);
5197 }
5198
5199 /*
5200 * Map the 4 PAE PDPEs.
5201 */
5202 rc = PGMGstMapPaePdpes(pVCpu, &aGstPaePdpes[0]);
5203 if (RT_SUCCESS(rc))
5204 {
5205# ifdef IN_RING3
5206 pVCpu->pgm.s.fPaePdpesAndCr3MappedR3 = true;
5207 pVCpu->pgm.s.fPaePdpesAndCr3MappedR0 = false;
5208# else
5209 pVCpu->pgm.s.fPaePdpesAndCr3MappedR3 = false;
5210 pVCpu->pgm.s.fPaePdpesAndCr3MappedR0 = true;
5211# endif
5212 pVCpu->pgm.s.GCPhysPaeCR3 = GCPhysCR3;
5213 }
5214
5215# elif PGM_GST_TYPE == PGM_TYPE_AMD64
5216# ifdef IN_RING3
5217 pVCpu->pgm.s.pGstAmd64Pml4R3 = (PX86PML4)HCPtrGuestCR3;
5218 pVCpu->pgm.s.pGstAmd64Pml4R0 = NIL_RTR0PTR;
5219# else
5220 pVCpu->pgm.s.pGstAmd64Pml4R3 = NIL_RTR3PTR;
5221 pVCpu->pgm.s.pGstAmd64Pml4R0 = (PX86PML4)HCPtrGuestCR3;
5222# endif
5223# endif
5224 }
5225 else
5226 AssertMsgFailed(("rc=%Rrc GCPhysGuestPD=%RGp\n", rc, GCPhysCR3));
5227 }
5228#endif
5229
5230 /*
5231 * Update shadow paging info for guest modes with paging (32-bit, PAE, AMD64).
5232 */
5233# if ( ( PGM_SHW_TYPE == PGM_TYPE_32BIT \
5234 || PGM_SHW_TYPE == PGM_TYPE_PAE \
5235 || PGM_SHW_TYPE == PGM_TYPE_AMD64) \
5236 && ( PGM_GST_TYPE != PGM_TYPE_REAL \
5237 && PGM_GST_TYPE != PGM_TYPE_PROT))
5238
5239 Assert(!pVM->pgm.s.fNestedPaging);
5240 PGM_A20_ASSERT_MASKED(pVCpu, GCPhysCR3);
5241
5242 /*
5243 * Update the shadow root page as well since that's not fixed.
5244 */
5245 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
5246 PPGMPOOLPAGE pOldShwPageCR3 = pVCpu->pgm.s.CTX_SUFF(pShwPageCR3);
5247 PPGMPOOLPAGE pNewShwPageCR3;
5248
5249 PGM_LOCK_VOID(pVM);
5250
5251# ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
5252 if (pPool->cDirtyPages)
5253 pgmPoolResetDirtyPages(pVM);
5254# endif
5255
5256 Assert(!(GCPhysCR3 >> (GUEST_PAGE_SHIFT + 32))); /** @todo what is this for? */
5257 int const rc2 = pgmPoolAlloc(pVM, GCPhysCR3 & GST_CR3_PAGE_MASK, BTH_PGMPOOLKIND_ROOT, PGMPOOLACCESS_DONTCARE,
5258 PGM_A20_IS_ENABLED(pVCpu), NIL_PGMPOOL_IDX, UINT32_MAX, true /*fLockPage*/, &pNewShwPageCR3);
5259 AssertFatalRC(rc2);
5260
5261 pVCpu->pgm.s.pShwPageCR3R3 = pgmPoolConvertPageToR3(pPool, pNewShwPageCR3);
5262 pVCpu->pgm.s.pShwPageCR3R0 = pgmPoolConvertPageToR0(pPool, pNewShwPageCR3);
5263
5264 /* Set the current hypervisor CR3. */
5265 CPUMSetHyperCR3(pVCpu, PGMGetHyperCR3(pVCpu));
5266
5267 /* Clean up the old CR3 root. */
5268 if ( pOldShwPageCR3
5269 && pOldShwPageCR3 != pNewShwPageCR3 /* @todo can happen due to incorrect syncing between REM & PGM; find the real cause */)
5270 {
5271 Assert(pOldShwPageCR3->enmKind != PGMPOOLKIND_FREE);
5272
5273 /* Mark the page as unlocked; allow flushing again. */
5274 pgmPoolUnlockPage(pPool, pOldShwPageCR3);
5275
5276 pgmPoolFreeByPage(pPool, pOldShwPageCR3, NIL_PGMPOOL_IDX, UINT32_MAX);
5277 }
5278 PGM_UNLOCK(pVM);
5279# else
5280 NOREF(GCPhysCR3);
5281# endif
5282
5283 return rc;
5284}
5285
5286/**
5287 * Unmaps the shadow CR3.
5288 *
5289 * @returns VBox status, no specials.
5290 * @param pVCpu The cross context virtual CPU structure.
5291 */
5292PGM_BTH_DECL(int, UnmapCR3)(PVMCPUCC pVCpu)
5293{
5294 LogFlow(("UnmapCR3\n"));
5295
5296 int rc = VINF_SUCCESS;
5297 PVMCC pVM = pVCpu->CTX_SUFF(pVM); NOREF(pVM);
5298
5299 /*
5300 * Update guest paging info.
5301 */
5302#if PGM_GST_TYPE == PGM_TYPE_32BIT
5303 pVCpu->pgm.s.pGst32BitPdR3 = 0;
5304 pVCpu->pgm.s.pGst32BitPdR0 = 0;
5305
5306#elif PGM_GST_TYPE == PGM_TYPE_PAE
5307 pVCpu->pgm.s.pGstPaePdptR3 = 0;
5308 pVCpu->pgm.s.pGstPaePdptR0 = 0;
5309 for (unsigned i = 0; i < X86_PG_PAE_PDPE_ENTRIES; i++)
5310 {
5311 pVCpu->pgm.s.apGstPaePDsR3[i] = 0;
5312 pVCpu->pgm.s.apGstPaePDsR0[i] = 0;
5313 pVCpu->pgm.s.aGCPhysGstPaePDs[i] = NIL_RTGCPHYS;
5314 }
5315
5316#elif PGM_GST_TYPE == PGM_TYPE_AMD64
5317 pVCpu->pgm.s.pGstAmd64Pml4R3 = 0;
5318 pVCpu->pgm.s.pGstAmd64Pml4R0 = 0;
5319
5320#else /* prot/real mode stub */
5321 /* nothing to do */
5322#endif
5323
5324 /*
5325 * PAE PDPEs (and CR3) might have been mapped via PGMGstMapPaePdpesAtCr3()
5326 * prior to switching to PAE in pfnMapCr3(), so we need to clear them here.
5327 */
5328 pVCpu->pgm.s.fPaePdpesAndCr3MappedR3 = false;
5329 pVCpu->pgm.s.fPaePdpesAndCr3MappedR0 = false;
5330 pVCpu->pgm.s.GCPhysPaeCR3 = NIL_RTGCPHYS;
5331
5332 /*
5333 * Update shadow paging info.
5334 */
5335#if ( ( PGM_SHW_TYPE == PGM_TYPE_32BIT \
5336 || PGM_SHW_TYPE == PGM_TYPE_PAE \
5337 || PGM_SHW_TYPE == PGM_TYPE_AMD64))
5338# if PGM_GST_TYPE != PGM_TYPE_REAL
5339 Assert(!pVM->pgm.s.fNestedPaging);
5340# endif
5341 PGM_LOCK_VOID(pVM);
5342
5343 if (pVCpu->pgm.s.CTX_SUFF(pShwPageCR3))
5344 {
5345 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
5346
5347# ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
5348 if (pPool->cDirtyPages)
5349 pgmPoolResetDirtyPages(pVM);
5350# endif
5351
5352 /* Mark the page as unlocked; allow flushing again. */
5353 pgmPoolUnlockPage(pPool, pVCpu->pgm.s.CTX_SUFF(pShwPageCR3));
5354
5355 pgmPoolFreeByPage(pPool, pVCpu->pgm.s.CTX_SUFF(pShwPageCR3), NIL_PGMPOOL_IDX, UINT32_MAX);
5356 pVCpu->pgm.s.pShwPageCR3R3 = 0;
5357 pVCpu->pgm.s.pShwPageCR3R0 = 0;
5358 }
5359
5360 PGM_UNLOCK(pVM);
5361#endif
5362
5363 return rc;
5364}
5365
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette