VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/PGMAllPool.cpp@ 96979

Last change on this file since 96979 was 96979, checked in by vboxsync, 2 years ago

VMM/PGM,IEM,HM: Added a PGMPHYSHANDLER_F_NOT_IN_HM flag to better deal with a nested APIC access page. bugref:10092

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id Revision
File size: 222.0 KB
Line 
1/* $Id: PGMAllPool.cpp 96979 2022-10-04 12:46:05Z vboxsync $ */
2/** @file
3 * PGM Shadow Page Pool.
4 */
5
6/*
7 * Copyright (C) 2006-2022 Oracle and/or its affiliates.
8 *
9 * This file is part of VirtualBox base platform packages, as
10 * available from https://www.virtualbox.org.
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation, in version 3 of the
15 * License.
16 *
17 * This program is distributed in the hope that it will be useful, but
18 * WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * General Public License for more details.
21 *
22 * You should have received a copy of the GNU General Public License
23 * along with this program; if not, see <https://www.gnu.org/licenses>.
24 *
25 * SPDX-License-Identifier: GPL-3.0-only
26 */
27
28
29/*********************************************************************************************************************************
30* Header Files *
31*********************************************************************************************************************************/
32#define LOG_GROUP LOG_GROUP_PGM_POOL
33#define VBOX_WITHOUT_PAGING_BIT_FIELDS /* 64-bit bitfields are just asking for trouble. See @bugref{9841} and others. */
34#include <VBox/vmm/pgm.h>
35#include <VBox/vmm/mm.h>
36#include <VBox/vmm/em.h>
37#include <VBox/vmm/cpum.h>
38#include "PGMInternal.h"
39#include <VBox/vmm/vmcc.h>
40#include "PGMInline.h"
41#include <VBox/disopcode.h>
42#include <VBox/vmm/hm_vmx.h>
43
44#include <VBox/log.h>
45#include <VBox/err.h>
46#include <iprt/asm.h>
47#include <iprt/string.h>
48
49
50/*********************************************************************************************************************************
51* Internal Functions *
52*********************************************************************************************************************************/
53RT_C_DECLS_BEGIN
54#if 0 /* unused */
55DECLINLINE(unsigned) pgmPoolTrackGetShadowEntrySize(PGMPOOLKIND enmKind);
56DECLINLINE(unsigned) pgmPoolTrackGetGuestEntrySize(PGMPOOLKIND enmKind);
57#endif /* unused */
58static void pgmPoolTrackClearPageUsers(PPGMPOOL pPool, PPGMPOOLPAGE pPage);
59static void pgmPoolTrackDeref(PPGMPOOL pPool, PPGMPOOLPAGE pPage);
60static int pgmPoolTrackAddUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable);
61static void pgmPoolMonitorModifiedRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage);
62#if defined(LOG_ENABLED) || defined(VBOX_STRICT)
63static const char *pgmPoolPoolKindToStr(uint8_t enmKind);
64#endif
65#if 0 /*defined(VBOX_STRICT) && defined(PGMPOOL_WITH_OPTIMIZED_DIRTY_PT)*/
66static void pgmPoolTrackCheckPTPaePae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT, PCX86PTPAE pGstPT);
67#endif
68
69int pgmPoolTrackFlushGCPhysPTsSlow(PVMCC pVM, PPGMPAGE pPhysPage);
70PPGMPOOLPHYSEXT pgmPoolTrackPhysExtAlloc(PVMCC pVM, uint16_t *piPhysExt);
71void pgmPoolTrackPhysExtFree(PVMCC pVM, uint16_t iPhysExt);
72void pgmPoolTrackPhysExtFreeList(PVMCC pVM, uint16_t iPhysExt);
73
74RT_C_DECLS_END
75
76
77#if 0 /* unused */
78/**
79 * Checks if the specified page pool kind is for a 4MB or 2MB guest page.
80 *
81 * @returns true if it's the shadow of a 4MB or 2MB guest page, otherwise false.
82 * @param enmKind The page kind.
83 */
84DECLINLINE(bool) pgmPoolIsBigPage(PGMPOOLKIND enmKind)
85{
86 switch (enmKind)
87 {
88 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
89 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
90 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
91 return true;
92 default:
93 return false;
94 }
95}
96#endif /* unused */
97
98
99/**
100 * Flushes a chain of pages sharing the same access monitor.
101 *
102 * @param pPool The pool.
103 * @param pPage A page in the chain.
104 */
105void pgmPoolMonitorChainFlush(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
106{
107 LogFlow(("pgmPoolMonitorChainFlush: Flush page %RGp type=%d\n", pPage->GCPhys, pPage->enmKind));
108
109 /*
110 * Find the list head.
111 */
112 uint16_t idx = pPage->idx;
113 if (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
114 {
115 while (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
116 {
117 idx = pPage->iMonitoredPrev;
118 Assert(idx != pPage->idx);
119 pPage = &pPool->aPages[idx];
120 }
121 }
122
123 /*
124 * Iterate the list flushing each shadow page.
125 */
126 for (;;)
127 {
128 idx = pPage->iMonitoredNext;
129 Assert(idx != pPage->idx);
130 if (pPage->idx >= PGMPOOL_IDX_FIRST)
131 {
132 int rc2 = pgmPoolFlushPage(pPool, pPage);
133 AssertRC(rc2);
134 }
135 /* next */
136 if (idx == NIL_PGMPOOL_IDX)
137 break;
138 pPage = &pPool->aPages[idx];
139 }
140}
141
142
143/**
144 * Wrapper for getting the current context pointer to the entry being modified.
145 *
146 * @returns VBox status code suitable for scheduling.
147 * @param pVM The cross context VM structure.
148 * @param pvDst Destination address
149 * @param pvSrc Pointer to the mapping of @a GCPhysSrc or NULL depending
150 * on the context (e.g. \#PF in R0 & RC).
151 * @param GCPhysSrc The source guest physical address.
152 * @param cb Size of data to read
153 */
154DECLINLINE(int) pgmPoolPhysSimpleReadGCPhys(PVMCC pVM, void *pvDst, void const *pvSrc, RTGCPHYS GCPhysSrc, size_t cb)
155{
156#if defined(IN_RING3)
157 NOREF(pVM); NOREF(GCPhysSrc);
158 memcpy(pvDst, (RTHCPTR)((uintptr_t)pvSrc & ~(RTHCUINTPTR)(cb - 1)), cb);
159 return VINF_SUCCESS;
160#else
161 /** @todo in RC we could attempt to use the virtual address, although this can cause many faults (PAE Windows XP guest). */
162 NOREF(pvSrc);
163 return PGMPhysSimpleReadGCPhys(pVM, pvDst, GCPhysSrc & ~(RTGCPHYS)(cb - 1), cb);
164#endif
165}
166
167
168/**
169 * Process shadow entries before they are changed by the guest.
170 *
171 * For PT entries we will clear them. For PD entries, we'll simply check
172 * for mapping conflicts and set the SyncCR3 FF if found.
173 *
174 * @param pVCpu The cross context virtual CPU structure.
175 * @param pPool The pool.
176 * @param pPage The head page.
177 * @param GCPhysFault The guest physical fault address.
178 * @param pvAddress Pointer to the mapping of @a GCPhysFault or NULL
179 * depending on the context (e.g. \#PF in R0 & RC).
180 * @param cbWrite Write size; might be zero if the caller knows we're not crossing entry boundaries
181 */
182static void pgmPoolMonitorChainChanging(PVMCPU pVCpu, PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTGCPHYS GCPhysFault,
183 void const *pvAddress, unsigned cbWrite)
184{
185 AssertMsg(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX, ("%u (idx=%u)\n", pPage->iMonitoredPrev, pPage->idx));
186 const unsigned off = GCPhysFault & GUEST_PAGE_OFFSET_MASK;
187 PVMCC pVM = pPool->CTX_SUFF(pVM);
188 NOREF(pVCpu);
189
190 LogFlow(("pgmPoolMonitorChainChanging: %RGv phys=%RGp cbWrite=%d\n",
191 (RTGCPTR)(CTXTYPE(RTGCPTR, uintptr_t, RTGCPTR))(uintptr_t)pvAddress, GCPhysFault, cbWrite));
192
193 if (PGMPOOL_PAGE_IS_NESTED(pPage))
194 Log7Func(("%RGv phys=%RGp cbWrite=%d\n", (RTGCPTR)(CTXTYPE(RTGCPTR, uintptr_t, RTGCPTR))(uintptr_t)pvAddress, GCPhysFault, cbWrite));
195
196 for (;;)
197 {
198 union
199 {
200 void *pv;
201 PX86PT pPT;
202 PPGMSHWPTPAE pPTPae;
203 PX86PD pPD;
204 PX86PDPAE pPDPae;
205 PX86PDPT pPDPT;
206 PX86PML4 pPML4;
207#ifdef VBOX_WITH_NESTED_HWVIRT_VMX_EPT
208 PEPTPDPT pEptPdpt;
209 PEPTPD pEptPd;
210 PEPTPT pEptPt;
211#endif
212 } uShw;
213
214 LogFlow(("pgmPoolMonitorChainChanging: page idx=%d phys=%RGp (next=%d) kind=%s write=%#x\n",
215 pPage->idx, pPage->GCPhys, pPage->iMonitoredNext, pgmPoolPoolKindToStr(pPage->enmKind), cbWrite));
216
217 uShw.pv = NULL;
218 switch (pPage->enmKind)
219 {
220 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
221 {
222 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPT));
223 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
224 const unsigned iShw = off / sizeof(X86PTE);
225 LogFlow(("PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT iShw=%x\n", iShw));
226 X86PGUINT const uPde = uShw.pPT->a[iShw].u;
227 if (uPde & X86_PTE_P)
228 {
229 X86PTE GstPte;
230 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvAddress, GCPhysFault, sizeof(GstPte));
231 AssertRC(rc);
232 Log4(("pgmPoolMonitorChainChanging 32_32: deref %016RX64 GCPhys %08RX32\n", uPde & X86_PTE_PG_MASK, GstPte.u & X86_PTE_PG_MASK));
233 pgmPoolTracDerefGCPhysHint(pPool, pPage, uPde & X86_PTE_PG_MASK, GstPte.u & X86_PTE_PG_MASK, iShw);
234 ASMAtomicWriteU32(&uShw.pPT->a[iShw].u, 0);
235 }
236 break;
237 }
238
239 /* page/2 sized */
240 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
241 {
242 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPT));
243 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
244 if (!((off ^ pPage->GCPhys) & (PAGE_SIZE / 2)))
245 {
246 const unsigned iShw = (off / sizeof(X86PTE)) & (X86_PG_PAE_ENTRIES - 1);
247 LogFlow(("PGMPOOLKIND_PAE_PT_FOR_32BIT_PT iShw=%x\n", iShw));
248 if (PGMSHWPTEPAE_IS_P(uShw.pPTPae->a[iShw]))
249 {
250 X86PTE GstPte;
251 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvAddress, GCPhysFault, sizeof(GstPte));
252 AssertRC(rc);
253
254 Log4(("pgmPoolMonitorChainChanging pae_32: deref %016RX64 GCPhys %08RX32\n", uShw.pPT->a[iShw].u & X86_PTE_PAE_PG_MASK, GstPte.u & X86_PTE_PG_MASK));
255 pgmPoolTracDerefGCPhysHint(pPool, pPage,
256 PGMSHWPTEPAE_GET_HCPHYS(uShw.pPTPae->a[iShw]),
257 GstPte.u & X86_PTE_PG_MASK,
258 iShw);
259 PGMSHWPTEPAE_ATOMIC_SET(uShw.pPTPae->a[iShw], 0);
260 }
261 }
262 break;
263 }
264
265 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
266 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
267 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
268 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
269 {
270 unsigned iGst = off / sizeof(X86PDE);
271 unsigned iShwPdpt = iGst / 256;
272 unsigned iShw = (iGst % 256) * 2;
273 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
274
275 LogFlow(("pgmPoolMonitorChainChanging PAE for 32 bits: iGst=%x iShw=%x idx = %d page idx=%d\n", iGst, iShw, iShwPdpt, pPage->enmKind - PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD));
276 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPD));
277 if (iShwPdpt == pPage->enmKind - (unsigned)PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD)
278 {
279 for (unsigned i = 0; i < 2; i++)
280 {
281 X86PGPAEUINT const uPde = uShw.pPDPae->a[iShw + i].u;
282 if (uPde & X86_PDE_P)
283 {
284 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw + i, uPde));
285 pgmPoolFree(pVM, uPde & X86_PDE_PAE_PG_MASK, pPage->idx, iShw + i);
286 ASMAtomicWriteU64(&uShw.pPDPae->a[iShw + i].u, 0);
287 }
288
289 /* paranoia / a bit assumptive. */
290 if ( (off & 3)
291 && (off & 3) + cbWrite > 4)
292 {
293 const unsigned iShw2 = iShw + 2 + i;
294 if (iShw2 < RT_ELEMENTS(uShw.pPDPae->a))
295 {
296 X86PGPAEUINT const uPde2 = uShw.pPDPae->a[iShw2].u;
297 if (uPde2 & X86_PDE_P)
298 {
299 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw2, uPde2));
300 pgmPoolFree(pVM, uPde2 & X86_PDE_PAE_PG_MASK, pPage->idx, iShw2);
301 ASMAtomicWriteU64(&uShw.pPDPae->a[iShw2].u, 0);
302 }
303 }
304 }
305 }
306 }
307 break;
308 }
309
310 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
311 {
312 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
313 const unsigned iShw = off / sizeof(X86PTEPAE);
314 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPT));
315 if (PGMSHWPTEPAE_IS_P(uShw.pPTPae->a[iShw]))
316 {
317 X86PTEPAE GstPte;
318 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvAddress, GCPhysFault, sizeof(GstPte));
319 AssertRC(rc);
320
321 Log4(("pgmPoolMonitorChainChanging pae: deref %016RX64 GCPhys %016RX64\n", PGMSHWPTEPAE_GET_HCPHYS(uShw.pPTPae->a[iShw]), GstPte.u & X86_PTE_PAE_PG_MASK));
322 pgmPoolTracDerefGCPhysHint(pPool, pPage,
323 PGMSHWPTEPAE_GET_HCPHYS(uShw.pPTPae->a[iShw]),
324 GstPte.u & X86_PTE_PAE_PG_MASK,
325 iShw);
326 PGMSHWPTEPAE_ATOMIC_SET(uShw.pPTPae->a[iShw], 0);
327 }
328
329 /* paranoia / a bit assumptive. */
330 if ( (off & 7)
331 && (off & 7) + cbWrite > sizeof(X86PTEPAE))
332 {
333 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PTEPAE);
334 AssertBreak(iShw2 < RT_ELEMENTS(uShw.pPTPae->a));
335
336 if (PGMSHWPTEPAE_IS_P(uShw.pPTPae->a[iShw2]))
337 {
338 X86PTEPAE GstPte;
339 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte,
340 pvAddress ? (uint8_t const *)pvAddress + sizeof(GstPte) : NULL,
341 GCPhysFault + sizeof(GstPte), sizeof(GstPte));
342 AssertRC(rc);
343 Log4(("pgmPoolMonitorChainChanging pae: deref %016RX64 GCPhys %016RX64\n", PGMSHWPTEPAE_GET_HCPHYS(uShw.pPTPae->a[iShw2]), GstPte.u & X86_PTE_PAE_PG_MASK));
344 pgmPoolTracDerefGCPhysHint(pPool, pPage,
345 PGMSHWPTEPAE_GET_HCPHYS(uShw.pPTPae->a[iShw2]),
346 GstPte.u & X86_PTE_PAE_PG_MASK,
347 iShw2);
348 PGMSHWPTEPAE_ATOMIC_SET(uShw.pPTPae->a[iShw2], 0);
349 }
350 }
351 break;
352 }
353
354 case PGMPOOLKIND_32BIT_PD:
355 {
356 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
357 const unsigned iShw = off / sizeof(X86PTE); // ASSUMING 32-bit guest paging!
358
359 LogFlow(("pgmPoolMonitorChainChanging: PGMPOOLKIND_32BIT_PD %x\n", iShw));
360 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPD));
361 X86PGUINT const uPde = uShw.pPD->a[iShw].u;
362 if (uPde & X86_PDE_P)
363 {
364 LogFlow(("pgmPoolMonitorChainChanging: 32 bit pd iShw=%#x: %RX64 -> freeing it!\n", iShw, uPde));
365 pgmPoolFree(pVM, uPde & X86_PDE_PG_MASK, pPage->idx, iShw);
366 ASMAtomicWriteU32(&uShw.pPD->a[iShw].u, 0);
367 }
368
369 /* paranoia / a bit assumptive. */
370 if ( (off & 3)
371 && (off & 3) + cbWrite > sizeof(X86PTE))
372 {
373 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PTE);
374 if ( iShw2 != iShw
375 && iShw2 < RT_ELEMENTS(uShw.pPD->a))
376 {
377 X86PGUINT const uPde2 = uShw.pPD->a[iShw2].u;
378 if (uPde2 & X86_PDE_P)
379 {
380 LogFlow(("pgmPoolMonitorChainChanging: 32 bit pd iShw=%#x: %RX64 -> freeing it!\n", iShw2, uPde2));
381 pgmPoolFree(pVM, uPde2 & X86_PDE_PG_MASK, pPage->idx, iShw2);
382 ASMAtomicWriteU32(&uShw.pPD->a[iShw2].u, 0);
383 }
384 }
385 }
386#if 0 /* useful when running PGMAssertCR3(), a bit too troublesome for general use (TLBs). - not working any longer... */
387 if ( uShw.pPD->a[iShw].n.u1Present
388 && !VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3))
389 {
390 LogFlow(("pgmPoolMonitorChainChanging: iShw=%#x: %RX32 -> freeing it!\n", iShw, uShw.pPD->a[iShw].u));
391 pgmPoolFree(pVM, uShw.pPD->a[iShw].u & X86_PDE_PG_MASK, pPage->idx, iShw);
392 ASMAtomicWriteU32(&uShw.pPD->a[iShw].u, 0);
393 }
394#endif
395 break;
396 }
397
398 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
399 {
400 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
401 const unsigned iShw = off / sizeof(X86PDEPAE);
402 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPD));
403
404 /*
405 * Causes trouble when the guest uses a PDE to refer to the whole page table level
406 * structure. (Invalidate here; faults later on when it tries to change the page
407 * table entries -> recheck; probably only applies to the RC case.)
408 */
409 X86PGPAEUINT const uPde = uShw.pPDPae->a[iShw].u;
410 if (uPde & X86_PDE_P)
411 {
412 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw, uPde));
413 pgmPoolFree(pVM, uPde & X86_PDE_PAE_PG_MASK, pPage->idx, iShw);
414 ASMAtomicWriteU64(&uShw.pPDPae->a[iShw].u, 0);
415 }
416
417 /* paranoia / a bit assumptive. */
418 if ( (off & 7)
419 && (off & 7) + cbWrite > sizeof(X86PDEPAE))
420 {
421 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PDEPAE);
422 AssertBreak(iShw2 < RT_ELEMENTS(uShw.pPDPae->a));
423
424 X86PGPAEUINT const uPde2 = uShw.pPDPae->a[iShw2].u;
425 if (uPde2 & X86_PDE_P)
426 {
427 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uPde2));
428 pgmPoolFree(pVM, uPde2 & X86_PDE_PAE_PG_MASK, pPage->idx, iShw2);
429 ASMAtomicWriteU64(&uShw.pPDPae->a[iShw2].u, 0);
430 }
431 }
432 break;
433 }
434
435 case PGMPOOLKIND_PAE_PDPT:
436 {
437 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPDPT));
438 /*
439 * Hopefully this doesn't happen very often:
440 * - touching unused parts of the page
441 * - messing with the bits of pd pointers without changing the physical address
442 */
443 /* PDPT roots are not page aligned; 32 byte only! */
444 const unsigned offPdpt = GCPhysFault - pPage->GCPhys;
445
446 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
447 const unsigned iShw = offPdpt / sizeof(X86PDPE);
448 if (iShw < X86_PG_PAE_PDPE_ENTRIES) /* don't use RT_ELEMENTS(uShw.pPDPT->a), because that's for long mode only */
449 {
450 X86PGPAEUINT const uPdpe = uShw.pPDPT->a[iShw].u;
451 if (uPdpe & X86_PDPE_P)
452 {
453 LogFlow(("pgmPoolMonitorChainChanging: pae pdpt iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPT->a[iShw].u));
454 pgmPoolFree(pVM, uPdpe & X86_PDPE_PG_MASK, pPage->idx, iShw);
455 ASMAtomicWriteU64(&uShw.pPDPT->a[iShw].u, 0);
456 }
457
458 /* paranoia / a bit assumptive. */
459 if ( (offPdpt & 7)
460 && (offPdpt & 7) + cbWrite > sizeof(X86PDPE))
461 {
462 const unsigned iShw2 = (offPdpt + cbWrite - 1) / sizeof(X86PDPE);
463 if ( iShw2 != iShw
464 && iShw2 < X86_PG_PAE_PDPE_ENTRIES)
465 {
466 X86PGPAEUINT const uPdpe2 = uShw.pPDPT->a[iShw2].u;
467 if (uPdpe2 & X86_PDPE_P)
468 {
469 LogFlow(("pgmPoolMonitorChainChanging: pae pdpt iShw=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPT->a[iShw2].u));
470 pgmPoolFree(pVM, uPdpe2 & X86_PDPE_PG_MASK, pPage->idx, iShw2);
471 ASMAtomicWriteU64(&uShw.pPDPT->a[iShw2].u, 0);
472 }
473 }
474 }
475 }
476 break;
477 }
478
479 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
480 {
481 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPD));
482 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
483 const unsigned iShw = off / sizeof(X86PDEPAE);
484 X86PGPAEUINT const uPde = uShw.pPDPae->a[iShw].u;
485 if (uPde & X86_PDE_P)
486 {
487 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw, uPde));
488 pgmPoolFree(pVM, uPde & X86_PDE_PAE_PG_MASK, pPage->idx, iShw);
489 ASMAtomicWriteU64(&uShw.pPDPae->a[iShw].u, 0);
490 }
491
492 /* paranoia / a bit assumptive. */
493 if ( (off & 7)
494 && (off & 7) + cbWrite > sizeof(X86PDEPAE))
495 {
496 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PDEPAE);
497 AssertBreak(iShw2 < RT_ELEMENTS(uShw.pPDPae->a));
498 X86PGPAEUINT const uPde2 = uShw.pPDPae->a[iShw2].u;
499 if (uPde2 & X86_PDE_P)
500 {
501 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uPde2));
502 pgmPoolFree(pVM, uPde2 & X86_PDE_PAE_PG_MASK, pPage->idx, iShw2);
503 ASMAtomicWriteU64(&uShw.pPDPae->a[iShw2].u, 0);
504 }
505 }
506 break;
507 }
508
509 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
510 {
511 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPDPT));
512 /*
513 * Hopefully this doesn't happen very often:
514 * - messing with the bits of pd pointers without changing the physical address
515 */
516 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
517 const unsigned iShw = off / sizeof(X86PDPE);
518 X86PGPAEUINT const uPdpe = uShw.pPDPT->a[iShw].u;
519 if (uPdpe & X86_PDPE_P)
520 {
521 LogFlow(("pgmPoolMonitorChainChanging: pdpt iShw=%#x: %RX64 -> freeing it!\n", iShw, uPdpe));
522 pgmPoolFree(pVM, uPdpe & X86_PDPE_PG_MASK, pPage->idx, iShw);
523 ASMAtomicWriteU64(&uShw.pPDPT->a[iShw].u, 0);
524 }
525 /* paranoia / a bit assumptive. */
526 if ( (off & 7)
527 && (off & 7) + cbWrite > sizeof(X86PDPE))
528 {
529 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PDPE);
530 X86PGPAEUINT const uPdpe2 = uShw.pPDPT->a[iShw2].u;
531 if (uPdpe2 & X86_PDPE_P)
532 {
533 LogFlow(("pgmPoolMonitorChainChanging: pdpt iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uPdpe2));
534 pgmPoolFree(pVM, uPdpe2 & X86_PDPE_PG_MASK, pPage->idx, iShw2);
535 ASMAtomicWriteU64(&uShw.pPDPT->a[iShw2].u, 0);
536 }
537 }
538 break;
539 }
540
541 case PGMPOOLKIND_64BIT_PML4:
542 {
543 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPML4));
544 /*
545 * Hopefully this doesn't happen very often:
546 * - messing with the bits of pd pointers without changing the physical address
547 */
548 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
549 const unsigned iShw = off / sizeof(X86PDPE);
550 X86PGPAEUINT const uPml4e = uShw.pPML4->a[iShw].u;
551 if (uPml4e & X86_PML4E_P)
552 {
553 LogFlow(("pgmPoolMonitorChainChanging: pml4 iShw=%#x: %RX64 -> freeing it!\n", iShw, uPml4e));
554 pgmPoolFree(pVM, uPml4e & X86_PML4E_PG_MASK, pPage->idx, iShw);
555 ASMAtomicWriteU64(&uShw.pPML4->a[iShw].u, 0);
556 }
557 /* paranoia / a bit assumptive. */
558 if ( (off & 7)
559 && (off & 7) + cbWrite > sizeof(X86PDPE))
560 {
561 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PML4E);
562 X86PGPAEUINT const uPml4e2 = uShw.pPML4->a[iShw2].u;
563 if (uPml4e2 & X86_PML4E_P)
564 {
565 LogFlow(("pgmPoolMonitorChainChanging: pml4 iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uPml4e2));
566 pgmPoolFree(pVM, uPml4e2 & X86_PML4E_PG_MASK, pPage->idx, iShw2);
567 ASMAtomicWriteU64(&uShw.pPML4->a[iShw2].u, 0);
568 }
569 }
570 break;
571 }
572
573#ifdef VBOX_WITH_NESTED_HWVIRT_VMX_EPT
574 case PGMPOOLKIND_EPT_PML4_FOR_EPT_PML4:
575 {
576 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
577 const unsigned iShw = off / sizeof(EPTPML4E);
578 X86PGPAEUINT const uPml4e = uShw.pPML4->a[iShw].u;
579 if (uPml4e & EPT_PRESENT_MASK)
580 {
581 Log7Func(("PML4 iShw=%#x: %RX64 (%RGp) -> freeing it!\n", iShw, uPml4e, pPage->GCPhys));
582 pgmPoolFree(pVM, uPml4e & X86_PML4E_PG_MASK, pPage->idx, iShw);
583 ASMAtomicWriteU64(&uShw.pPML4->a[iShw].u, 0);
584 }
585
586 /* paranoia / a bit assumptive. */
587 if ( (off & 7)
588 && (off & 7) + cbWrite > sizeof(X86PML4E))
589 {
590 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PML4E);
591 X86PGPAEUINT const uPml4e2 = uShw.pPML4->a[iShw2].u;
592 if (uPml4e2 & EPT_PRESENT_MASK)
593 {
594 Log7Func(("PML4 iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uPml4e2));
595 pgmPoolFree(pVM, uPml4e2 & X86_PML4E_PG_MASK, pPage->idx, iShw2);
596 ASMAtomicWriteU64(&uShw.pPML4->a[iShw2].u, 0);
597 }
598 }
599 break;
600 }
601
602 case PGMPOOLKIND_EPT_PDPT_FOR_EPT_PDPT:
603 {
604 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
605 const unsigned iShw = off / sizeof(EPTPDPTE);
606 X86PGPAEUINT const uPdpte = uShw.pEptPdpt->a[iShw].u;
607 if (uPdpte & EPT_PRESENT_MASK)
608 {
609 Log7Func(("EPT PDPT iShw=%#x: %RX64 (%RGp) -> freeing it!\n", iShw, uPdpte, pPage->GCPhys));
610 pgmPoolFree(pVM, uPdpte & EPT_PDPTE_PG_MASK, pPage->idx, iShw);
611 ASMAtomicWriteU64(&uShw.pEptPdpt->a[iShw].u, 0);
612 }
613
614 /* paranoia / a bit assumptive. */
615 if ( (off & 7)
616 && (off & 7) + cbWrite > sizeof(EPTPDPTE))
617 {
618 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(EPTPDPTE);
619 X86PGPAEUINT const uPdpte2 = uShw.pEptPdpt->a[iShw2].u;
620 if (uPdpte2 & EPT_PRESENT_MASK)
621 {
622 Log7Func(("EPT PDPT iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uPdpte2));
623 pgmPoolFree(pVM, uPdpte2 & EPT_PDPTE_PG_MASK, pPage->idx, iShw2);
624 ASMAtomicWriteU64(&uShw.pEptPdpt->a[iShw2].u, 0);
625 }
626 }
627 break;
628 }
629
630 case PGMPOOLKIND_EPT_PD_FOR_EPT_PD:
631 {
632 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
633 const unsigned iShw = off / sizeof(EPTPDE);
634 X86PGPAEUINT const uPde = uShw.pEptPd->a[iShw].u;
635 if (uPde & EPT_PRESENT_MASK)
636 {
637 Log7Func(("EPT PD iShw=%#x: %RX64 (%RGp) -> freeing it!\n", iShw, uPde, pPage->GCPhys));
638 pgmPoolFree(pVM, uPde & EPT_PDE_PG_MASK, pPage->idx, iShw);
639 ASMAtomicWriteU64(&uShw.pEptPd->a[iShw].u, 0);
640 }
641
642 /* paranoia / a bit assumptive. */
643 if ( (off & 7)
644 && (off & 7) + cbWrite > sizeof(EPTPDE))
645 {
646 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(EPTPDE);
647 AssertBreak(iShw2 < RT_ELEMENTS(uShw.pEptPd->a));
648 X86PGPAEUINT const uPde2 = uShw.pEptPd->a[iShw2].u;
649 if (uPde2 & EPT_PRESENT_MASK)
650 {
651 Log7Func(("EPT PD (2): iShw2=%#x: %RX64 (%RGp) -> freeing it!\n", iShw2, uPde2, pPage->GCPhys));
652 pgmPoolFree(pVM, uPde2 & EPT_PDE_PG_MASK, pPage->idx, iShw2);
653 ASMAtomicWriteU64(&uShw.pEptPd->a[iShw2].u, 0);
654 }
655 }
656 break;
657 }
658
659 case PGMPOOLKIND_EPT_PT_FOR_EPT_PT:
660 {
661 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
662 const unsigned iShw = off / sizeof(EPTPTE);
663 X86PGPAEUINT const uPte = uShw.pEptPt->a[iShw].u;
664 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPT));
665 if (uPte & EPT_PRESENT_MASK)
666 {
667 EPTPTE GstPte;
668 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvAddress, GCPhysFault, sizeof(GstPte));
669 AssertRC(rc);
670
671 Log7Func(("EPT PT: iShw=%#x %RX64 (%RGp)\n", iShw, uPte, pPage->GCPhys));
672 pgmPoolTracDerefGCPhysHint(pPool, pPage,
673 uShw.pEptPt->a[iShw].u & EPT_PTE_PG_MASK,
674 GstPte.u & EPT_PTE_PG_MASK,
675 iShw);
676 ASMAtomicWriteU64(&uShw.pEptPt->a[iShw].u, 0);
677 }
678
679 /* paranoia / a bit assumptive. */
680 if ( (off & 7)
681 && (off & 7) + cbWrite > sizeof(EPTPTE))
682 {
683 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(EPTPTE);
684 AssertBreak(iShw2 < RT_ELEMENTS(uShw.pEptPt->a));
685 X86PGPAEUINT const uPte2 = uShw.pEptPt->a[iShw2].u;
686 if (uPte2 & EPT_PRESENT_MASK)
687 {
688 EPTPTE GstPte;
689 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte,
690 pvAddress ? (uint8_t const *)pvAddress + sizeof(GstPte) : NULL,
691 GCPhysFault + sizeof(GstPte), sizeof(GstPte));
692 AssertRC(rc);
693 Log7Func(("EPT PT (2): iShw=%#x %RX64 (%RGp)\n", iShw2, uPte2, pPage->GCPhys));
694 pgmPoolTracDerefGCPhysHint(pPool, pPage,
695 uShw.pEptPt->a[iShw2].u & EPT_PTE_PG_MASK,
696 GstPte.u & EPT_PTE_PG_MASK,
697 iShw2);
698 ASMAtomicWriteU64(&uShw.pEptPt->a[iShw2].u, 0);
699 }
700 }
701 break;
702 }
703#endif /* VBOX_WITH_NESTED_HWVIRT_VMX_EPT */
704
705 default:
706 AssertFatalMsgFailed(("enmKind=%d\n", pPage->enmKind));
707 }
708 PGM_DYNMAP_UNUSED_HINT_VM(pVM, uShw.pv);
709
710 /* next */
711 if (pPage->iMonitoredNext == NIL_PGMPOOL_IDX)
712 return;
713 pPage = &pPool->aPages[pPage->iMonitoredNext];
714 }
715}
716
717#ifndef IN_RING3
718
719/**
720 * Checks if a access could be a fork operation in progress.
721 *
722 * Meaning, that the guest is setting up the parent process for Copy-On-Write.
723 *
724 * @returns true if it's likely that we're forking, otherwise false.
725 * @param pPool The pool.
726 * @param pDis The disassembled instruction.
727 * @param offFault The access offset.
728 */
729DECLINLINE(bool) pgmRZPoolMonitorIsForking(PPGMPOOL pPool, PDISCPUSTATE pDis, unsigned offFault)
730{
731 /*
732 * i386 linux is using btr to clear X86_PTE_RW.
733 * The functions involved are (2.6.16 source inspection):
734 * clear_bit
735 * ptep_set_wrprotect
736 * copy_one_pte
737 * copy_pte_range
738 * copy_pmd_range
739 * copy_pud_range
740 * copy_page_range
741 * dup_mmap
742 * dup_mm
743 * copy_mm
744 * copy_process
745 * do_fork
746 */
747 if ( pDis->pCurInstr->uOpcode == OP_BTR
748 && !(offFault & 4)
749 /** @todo Validate that the bit index is X86_PTE_RW. */
750 )
751 {
752 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitorPf,Fork)); RT_NOREF_PV(pPool);
753 return true;
754 }
755 return false;
756}
757
758
759/**
760 * Determine whether the page is likely to have been reused.
761 *
762 * @returns true if we consider the page as being reused for a different purpose.
763 * @returns false if we consider it to still be a paging page.
764 * @param pVM The cross context VM structure.
765 * @param pVCpu The cross context virtual CPU structure.
766 * @param pRegFrame Trap register frame.
767 * @param pDis The disassembly info for the faulting instruction.
768 * @param pvFault The fault address.
769 * @param pPage The pool page being accessed.
770 *
771 * @remark The REP prefix check is left to the caller because of STOSD/W.
772 */
773DECLINLINE(bool) pgmRZPoolMonitorIsReused(PVMCC pVM, PVMCPUCC pVCpu, PCPUMCTXCORE pRegFrame, PDISCPUSTATE pDis, RTGCPTR pvFault,
774 PPGMPOOLPAGE pPage)
775{
776 /* Locked (CR3, PDPTR*4) should not be reusable. Considering them as
777 such may cause loops booting tst-ubuntu-15_10-64-efi, ++. */
778 if (pPage->cLocked)
779 {
780 Log2(("pgmRZPoolMonitorIsReused: %RGv (%p) can't have been resued, because it's locked!\n", pvFault, pPage));
781 return false;
782 }
783
784 /** @todo could make this general, faulting close to rsp should be a safe reuse heuristic. */
785 if ( HMHasPendingIrq(pVM)
786 && pRegFrame->rsp - pvFault < 32)
787 {
788 /* Fault caused by stack writes while trying to inject an interrupt event. */
789 Log(("pgmRZPoolMonitorIsReused: reused %RGv for interrupt stack (rsp=%RGv).\n", pvFault, pRegFrame->rsp));
790 return true;
791 }
792
793 LogFlow(("Reused instr %RGv %d at %RGv param1.fUse=%llx param1.reg=%d\n", pRegFrame->rip, pDis->pCurInstr->uOpcode, pvFault, pDis->Param1.fUse, pDis->Param1.Base.idxGenReg));
794
795 /* Non-supervisor mode write means it's used for something else. */
796 if (CPUMGetGuestCPL(pVCpu) == 3)
797 return true;
798
799 switch (pDis->pCurInstr->uOpcode)
800 {
801 /* call implies the actual push of the return address faulted */
802 case OP_CALL:
803 Log4(("pgmRZPoolMonitorIsReused: CALL\n"));
804 return true;
805 case OP_PUSH:
806 Log4(("pgmRZPoolMonitorIsReused: PUSH\n"));
807 return true;
808 case OP_PUSHF:
809 Log4(("pgmRZPoolMonitorIsReused: PUSHF\n"));
810 return true;
811 case OP_PUSHA:
812 Log4(("pgmRZPoolMonitorIsReused: PUSHA\n"));
813 return true;
814 case OP_FXSAVE:
815 Log4(("pgmRZPoolMonitorIsReused: FXSAVE\n"));
816 return true;
817 case OP_MOVNTI: /* solaris - block_zero_no_xmm */
818 Log4(("pgmRZPoolMonitorIsReused: MOVNTI\n"));
819 return true;
820 case OP_MOVNTDQ: /* solaris - hwblkclr & hwblkpagecopy */
821 Log4(("pgmRZPoolMonitorIsReused: MOVNTDQ\n"));
822 return true;
823 case OP_MOVSWD:
824 case OP_STOSWD:
825 if ( pDis->fPrefix == (DISPREFIX_REP|DISPREFIX_REX)
826 && pRegFrame->rcx >= 0x40
827 )
828 {
829 Assert(pDis->uCpuMode == DISCPUMODE_64BIT);
830
831 Log(("pgmRZPoolMonitorIsReused: OP_STOSQ\n"));
832 return true;
833 }
834 break;
835
836 default:
837 /*
838 * Anything having ESP on the left side means stack writes.
839 */
840 if ( ( (pDis->Param1.fUse & DISUSE_REG_GEN32)
841 || (pDis->Param1.fUse & DISUSE_REG_GEN64))
842 && (pDis->Param1.Base.idxGenReg == DISGREG_ESP))
843 {
844 Log4(("pgmRZPoolMonitorIsReused: ESP\n"));
845 return true;
846 }
847 break;
848 }
849
850 /*
851 * Page table updates are very very unlikely to be crossing page boundraries,
852 * and we don't want to deal with that in pgmPoolMonitorChainChanging and such.
853 */
854 uint32_t const cbWrite = DISGetParamSize(pDis, &pDis->Param1);
855 if ( (((uintptr_t)pvFault + cbWrite) >> X86_PAGE_SHIFT) != ((uintptr_t)pvFault >> X86_PAGE_SHIFT) )
856 {
857 Log4(("pgmRZPoolMonitorIsReused: cross page write\n"));
858 return true;
859 }
860
861 /*
862 * Nobody does an unaligned 8 byte write to a page table, right.
863 */
864 if (cbWrite >= 8 && ((uintptr_t)pvFault & 7) != 0)
865 {
866 Log4(("pgmRZPoolMonitorIsReused: Unaligned 8+ byte write\n"));
867 return true;
868 }
869
870 return false;
871}
872
873
874/**
875 * Flushes the page being accessed.
876 *
877 * @returns VBox status code suitable for scheduling.
878 * @param pVM The cross context VM structure.
879 * @param pVCpu The cross context virtual CPU structure.
880 * @param pPool The pool.
881 * @param pPage The pool page (head).
882 * @param pDis The disassembly of the write instruction.
883 * @param pRegFrame The trap register frame.
884 * @param GCPhysFault The fault address as guest physical address.
885 * @param pvFault The fault address.
886 * @todo VBOXSTRICTRC
887 */
888static int pgmRZPoolAccessPfHandlerFlush(PVMCC pVM, PVMCPUCC pVCpu, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pDis,
889 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault)
890{
891 NOREF(pVM); NOREF(GCPhysFault);
892
893 /*
894 * First, do the flushing.
895 */
896 pgmPoolMonitorChainFlush(pPool, pPage);
897
898 /*
899 * Emulate the instruction (xp/w2k problem, requires pc/cr2/sp detection).
900 * Must do this in raw mode (!); XP boot will fail otherwise.
901 */
902 int rc = VINF_SUCCESS;
903 VBOXSTRICTRC rc2 = EMInterpretInstructionDisasState(pVCpu, pDis, pRegFrame, pvFault, EMCODETYPE_ALL);
904 if (rc2 == VINF_SUCCESS)
905 { /* do nothing */ }
906 else if (rc2 == VINF_EM_RESCHEDULE)
907 {
908 rc = VBOXSTRICTRC_VAL(rc2);
909# ifndef IN_RING3
910 VMCPU_FF_SET(pVCpu, VMCPU_FF_TO_R3);
911# endif
912 }
913 else if (rc2 == VERR_EM_INTERPRETER)
914 {
915 rc = VINF_EM_RAW_EMULATE_INSTR;
916 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitorPf,EmulateInstr));
917 }
918 else if (RT_FAILURE_NP(rc2))
919 rc = VBOXSTRICTRC_VAL(rc2);
920 else
921 AssertMsgFailed(("%Rrc\n", VBOXSTRICTRC_VAL(rc2))); /* ASSUMES no complicated stuff here. */
922
923 LogFlow(("pgmRZPoolAccessPfHandlerFlush: returns %Rrc (flushed)\n", rc));
924 return rc;
925}
926
927
928/**
929 * Handles the STOSD write accesses.
930 *
931 * @returns VBox status code suitable for scheduling.
932 * @param pVM The cross context VM structure.
933 * @param pPool The pool.
934 * @param pPage The pool page (head).
935 * @param pDis The disassembly of the write instruction.
936 * @param pRegFrame The trap register frame.
937 * @param GCPhysFault The fault address as guest physical address.
938 * @param pvFault The fault address.
939 */
940DECLINLINE(int) pgmRZPoolAccessPfHandlerSTOSD(PVMCC pVM, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pDis,
941 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault)
942{
943 unsigned uIncrement = pDis->Param1.cb;
944 NOREF(pVM);
945
946 Assert(pDis->uCpuMode == DISCPUMODE_32BIT || pDis->uCpuMode == DISCPUMODE_64BIT);
947 Assert(pRegFrame->rcx <= 0x20);
948
949# ifdef VBOX_STRICT
950 if (pDis->uOpMode == DISCPUMODE_32BIT)
951 Assert(uIncrement == 4);
952 else
953 Assert(uIncrement == 8);
954# endif
955
956 Log3(("pgmRZPoolAccessPfHandlerSTOSD\n"));
957
958 /*
959 * Increment the modification counter and insert it into the list
960 * of modified pages the first time.
961 */
962 if (!pPage->cModifications++)
963 pgmPoolMonitorModifiedInsert(pPool, pPage);
964
965 /*
966 * Execute REP STOSD.
967 *
968 * This ASSUMES that we're not invoked by Trap0e on in a out-of-sync
969 * write situation, meaning that it's safe to write here.
970 */
971 PVMCPUCC pVCpu = VMMGetCpu(pPool->CTX_SUFF(pVM));
972 RTGCUINTPTR pu32 = (RTGCUINTPTR)pvFault;
973 while (pRegFrame->rcx)
974 {
975 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault, NULL, uIncrement);
976 PGMPhysSimpleWriteGCPhys(pVM, GCPhysFault, &pRegFrame->rax, uIncrement);
977 pu32 += uIncrement;
978 GCPhysFault += uIncrement;
979 pRegFrame->rdi += uIncrement;
980 pRegFrame->rcx--;
981 }
982 pRegFrame->rip += pDis->cbInstr;
983
984 LogFlow(("pgmRZPoolAccessPfHandlerSTOSD: returns\n"));
985 return VINF_SUCCESS;
986}
987
988
989/**
990 * Handles the simple write accesses.
991 *
992 * @returns VBox status code suitable for scheduling.
993 * @param pVM The cross context VM structure.
994 * @param pVCpu The cross context virtual CPU structure.
995 * @param pPool The pool.
996 * @param pPage The pool page (head).
997 * @param pDis The disassembly of the write instruction.
998 * @param pRegFrame The trap register frame.
999 * @param GCPhysFault The fault address as guest physical address.
1000 * @param pvFault The fault address.
1001 * @param pfReused Reused state (in/out)
1002 */
1003DECLINLINE(int) pgmRZPoolAccessPfHandlerSimple(PVMCC pVM, PVMCPUCC pVCpu, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pDis,
1004 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault, bool *pfReused)
1005{
1006 Log3(("pgmRZPoolAccessPfHandlerSimple\n"));
1007 NOREF(pVM);
1008 NOREF(pfReused); /* initialized by caller */
1009
1010 /*
1011 * Increment the modification counter and insert it into the list
1012 * of modified pages the first time.
1013 */
1014 if (!pPage->cModifications++)
1015 pgmPoolMonitorModifiedInsert(pPool, pPage);
1016
1017 /*
1018 * Clear all the pages. ASSUMES that pvFault is readable.
1019 */
1020 uint32_t cbWrite = DISGetParamSize(pDis, &pDis->Param1);
1021 if (cbWrite <= 8)
1022 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault, NULL, cbWrite);
1023 else if (cbWrite <= 16)
1024 {
1025 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault, NULL, 8);
1026 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault + 8, NULL, cbWrite - 8);
1027 }
1028 else
1029 {
1030 Assert(cbWrite <= 32);
1031 for (uint32_t off = 0; off < cbWrite; off += 8)
1032 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault + off, NULL, RT_MIN(8, cbWrite - off));
1033 }
1034
1035 /*
1036 * Interpret the instruction.
1037 */
1038 VBOXSTRICTRC rc = EMInterpretInstructionDisasState(pVCpu, pDis, pRegFrame, pvFault, EMCODETYPE_ALL);
1039 if (RT_SUCCESS(rc))
1040 AssertMsg(rc == VINF_SUCCESS, ("%Rrc\n", VBOXSTRICTRC_VAL(rc))); /* ASSUMES no complicated stuff here. */
1041 else if (rc == VERR_EM_INTERPRETER)
1042 {
1043 LogFlow(("pgmRZPoolAccessPfHandlerSimple: Interpretation failed for %04x:%RGv - opcode=%d\n",
1044 pRegFrame->cs.Sel, (RTGCPTR)pRegFrame->rip, pDis->pCurInstr->uOpcode));
1045 rc = VINF_EM_RAW_EMULATE_INSTR;
1046 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitorPf,EmulateInstr));
1047 }
1048
1049# if 0 /* experimental code */
1050 if (rc == VINF_SUCCESS)
1051 {
1052 switch (pPage->enmKind)
1053 {
1054 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1055 {
1056 X86PTEPAE GstPte;
1057 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvFault, GCPhysFault, sizeof(GstPte));
1058 AssertRC(rc);
1059
1060 /* Check the new value written by the guest. If present and with a bogus physical address, then
1061 * it's fairly safe to assume the guest is reusing the PT.
1062 */
1063 if (GstPte.n.u1Present)
1064 {
1065 RTHCPHYS HCPhys = -1;
1066 int rc = PGMPhysGCPhys2HCPhys(pVM, GstPte.u & X86_PTE_PAE_PG_MASK, &HCPhys);
1067 if (rc != VINF_SUCCESS)
1068 {
1069 *pfReused = true;
1070 STAM_COUNTER_INC(&pPool->StatForceFlushReused);
1071 }
1072 }
1073 break;
1074 }
1075 }
1076 }
1077# endif
1078
1079 LogFlow(("pgmRZPoolAccessPfHandlerSimple: returns %Rrc\n", VBOXSTRICTRC_VAL(rc)));
1080 return VBOXSTRICTRC_VAL(rc);
1081}
1082
1083
1084/**
1085 * @callback_method_impl{FNPGMRZPHYSPFHANDLER,
1086 * \#PF access handler callback for page table pages.}
1087 *
1088 * @remarks The @a uUser argument is the index of the PGMPOOLPAGE.
1089 */
1090DECLCALLBACK(VBOXSTRICTRC) pgmRZPoolAccessPfHandler(PVMCC pVM, PVMCPUCC pVCpu, RTGCUINT uErrorCode, PCPUMCTXCORE pRegFrame,
1091 RTGCPTR pvFault, RTGCPHYS GCPhysFault, uint64_t uUser)
1092{
1093 STAM_PROFILE_START(&pVM->pgm.s.CTX_SUFF(pPool)->StatMonitorRZ, a);
1094 PPGMPOOL const pPool = pVM->pgm.s.CTX_SUFF(pPool);
1095 AssertReturn(uUser < pPool->cCurPages, VERR_PGM_POOL_IPE);
1096 PPGMPOOLPAGE const pPage = &pPool->aPages[uUser];
1097 unsigned cMaxModifications;
1098 bool fForcedFlush = false;
1099 RT_NOREF_PV(uErrorCode);
1100
1101# ifdef VBOX_WITH_NESTED_HWVIRT_VMX_EPT
1102 AssertMsg(pVCpu->pgm.s.enmGuestSlatMode == PGMSLAT_DIRECT,
1103 ("pvFault=%RGv pPage=%p:{.idx=%d} GCPhysFault=%RGp\n", pvFault, pPage, pPage->idx, GCPhysFault));
1104# endif
1105 LogFlow(("pgmRZPoolAccessPfHandler: pvFault=%RGv pPage=%p:{.idx=%d} GCPhysFault=%RGp\n", pvFault, pPage, pPage->idx, GCPhysFault));
1106
1107 PGM_LOCK_VOID(pVM);
1108 if (PHYS_PAGE_ADDRESS(GCPhysFault) != PHYS_PAGE_ADDRESS(pPage->GCPhys))
1109 {
1110 /* Pool page changed while we were waiting for the lock; ignore. */
1111 Log(("CPU%d: pgmRZPoolAccessPfHandler pgm pool page for %RGp changed (to %RGp) while waiting!\n", pVCpu->idCpu, PHYS_PAGE_ADDRESS(GCPhysFault), PHYS_PAGE_ADDRESS(pPage->GCPhys)));
1112 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->StatMonitorPfRZ, &pPool->StatMonitorPfRZHandled, a);
1113 PGM_UNLOCK(pVM);
1114 return VINF_SUCCESS;
1115 }
1116# ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
1117 if (pPage->fDirty)
1118 {
1119# ifdef VBOX_WITH_NESTED_HWVIRT_VMX_EPT
1120 Assert(!PGMPOOL_PAGE_IS_NESTED(pPage));
1121# endif
1122 Assert(VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_TLB_FLUSH));
1123 PGM_UNLOCK(pVM);
1124 return VINF_SUCCESS; /* SMP guest case where we were blocking on the pgm lock while the same page was being marked dirty. */
1125 }
1126# endif
1127
1128# if 0 /* test code defined(VBOX_STRICT) && defined(PGMPOOL_WITH_OPTIMIZED_DIRTY_PT) */
1129 if (pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT)
1130 {
1131 void *pvShw = PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage);
1132 void *pvGst;
1133 int rc = PGM_GCPHYS_2_PTR(pPool->CTX_SUFF(pVM), pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
1134 pgmPoolTrackCheckPTPaePae(pPool, pPage, (PPGMSHWPTPAE)pvShw, (PCX86PTPAE)pvGst);
1135 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvGst);
1136 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvShw);
1137 }
1138# endif
1139
1140# ifdef VBOX_WITH_NESTED_HWVIRT_VMX_EPT
1141 if (PGMPOOL_PAGE_IS_NESTED(pPage))
1142 {
1143 Assert(!CPUMIsGuestInVmxNonRootMode(CPUMQueryGuestCtxPtr(pVCpu)));
1144 Log7Func(("Flushing pvFault=%RGv GCPhysFault=%RGp\n", pvFault, GCPhysFault));
1145 pgmPoolMonitorChainFlush(pPool, pPage);
1146 PGM_UNLOCK(pVM);
1147 return VINF_SUCCESS;
1148 }
1149# endif
1150
1151 /*
1152 * Disassemble the faulting instruction.
1153 */
1154 PDISCPUSTATE pDis = &pVCpu->pgm.s.DisState;
1155 int rc = EMInterpretDisasCurrent(pVM, pVCpu, pDis, NULL);
1156 if (RT_UNLIKELY(rc != VINF_SUCCESS))
1157 {
1158 AssertMsg(rc == VERR_PAGE_NOT_PRESENT || rc == VERR_PAGE_TABLE_NOT_PRESENT, ("Unexpected rc %d\n", rc));
1159 PGM_UNLOCK(pVM);
1160 return rc;
1161 }
1162
1163 Assert(pPage->enmKind != PGMPOOLKIND_FREE);
1164
1165 /*
1166 * We should ALWAYS have the list head as user parameter. This
1167 * is because we use that page to record the changes.
1168 */
1169 Assert(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
1170
1171# ifdef IN_RING0
1172 /* Maximum nr of modifications depends on the page type. */
1173 if ( pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT
1174 || pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_32BIT_PT)
1175 cMaxModifications = 4;
1176 else
1177 cMaxModifications = 24;
1178# else
1179 cMaxModifications = 48;
1180# endif
1181
1182 /*
1183 * Incremental page table updates should weigh more than random ones.
1184 * (Only applies when started from offset 0)
1185 */
1186 pVCpu->pgm.s.cPoolAccessHandler++;
1187 if ( pPage->GCPtrLastAccessHandlerRip >= pRegFrame->rip - 0x40 /* observed loops in Windows 7 x64 */
1188 && pPage->GCPtrLastAccessHandlerRip < pRegFrame->rip + 0x40
1189 && pvFault == (pPage->GCPtrLastAccessHandlerFault + pDis->Param1.cb)
1190 && pVCpu->pgm.s.cPoolAccessHandler == pPage->cLastAccessHandler + 1)
1191 {
1192 Log(("Possible page reuse cMods=%d -> %d (locked=%d type=%s)\n", pPage->cModifications, pPage->cModifications * 2, pgmPoolIsPageLocked(pPage), pgmPoolPoolKindToStr(pPage->enmKind)));
1193 Assert(pPage->cModifications < 32000);
1194 pPage->cModifications = pPage->cModifications * 2;
1195 pPage->GCPtrLastAccessHandlerFault = pvFault;
1196 pPage->cLastAccessHandler = pVCpu->pgm.s.cPoolAccessHandler;
1197 if (pPage->cModifications >= cMaxModifications)
1198 {
1199 STAM_COUNTER_INC(&pPool->StatMonitorPfRZFlushReinit);
1200 fForcedFlush = true;
1201 }
1202 }
1203
1204 if (pPage->cModifications >= cMaxModifications)
1205 Log(("Mod overflow %RGv cMods=%d (locked=%d type=%s)\n", pvFault, pPage->cModifications, pgmPoolIsPageLocked(pPage), pgmPoolPoolKindToStr(pPage->enmKind)));
1206
1207 /*
1208 * Check if it's worth dealing with.
1209 */
1210 bool fReused = false;
1211 bool fNotReusedNotForking = false;
1212 if ( ( pPage->cModifications < cMaxModifications /** @todo \#define */ /** @todo need to check that it's not mapping EIP. */ /** @todo adjust this! */
1213 || pgmPoolIsPageLocked(pPage)
1214 )
1215 && !(fReused = pgmRZPoolMonitorIsReused(pVM, pVCpu, pRegFrame, pDis, pvFault, pPage))
1216 && !pgmRZPoolMonitorIsForking(pPool, pDis, GCPhysFault & PAGE_OFFSET_MASK))
1217 {
1218 /*
1219 * Simple instructions, no REP prefix.
1220 */
1221 if (!(pDis->fPrefix & (DISPREFIX_REP | DISPREFIX_REPNE)))
1222 {
1223 rc = pgmRZPoolAccessPfHandlerSimple(pVM, pVCpu, pPool, pPage, pDis, pRegFrame, GCPhysFault, pvFault, &fReused);
1224 if (fReused)
1225 goto flushPage;
1226
1227 /* A mov instruction to change the first page table entry will be remembered so we can detect
1228 * full page table changes early on. This will reduce the amount of unnecessary traps we'll take.
1229 */
1230 if ( rc == VINF_SUCCESS
1231 && !pPage->cLocked /* only applies to unlocked pages as we can't free locked ones (e.g. cr3 root). */
1232 && pDis->pCurInstr->uOpcode == OP_MOV
1233 && (pvFault & PAGE_OFFSET_MASK) == 0)
1234 {
1235 pPage->GCPtrLastAccessHandlerFault = pvFault;
1236 pPage->cLastAccessHandler = pVCpu->pgm.s.cPoolAccessHandler;
1237 pPage->GCPtrLastAccessHandlerRip = pRegFrame->rip;
1238 /* Make sure we don't kick out a page too quickly. */
1239 if (pPage->cModifications > 8)
1240 pPage->cModifications = 2;
1241 }
1242 else if (pPage->GCPtrLastAccessHandlerFault == pvFault)
1243 {
1244 /* ignore the 2nd write to this page table entry. */
1245 pPage->cLastAccessHandler = pVCpu->pgm.s.cPoolAccessHandler;
1246 }
1247 else
1248 {
1249 pPage->GCPtrLastAccessHandlerFault = NIL_RTGCPTR;
1250 pPage->GCPtrLastAccessHandlerRip = 0;
1251 }
1252
1253 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->StatMonitorPfRZ, &pPool->StatMonitorPfRZHandled, a);
1254 PGM_UNLOCK(pVM);
1255 return rc;
1256 }
1257
1258 /*
1259 * Windows is frequently doing small memset() operations (netio test 4k+).
1260 * We have to deal with these or we'll kill the cache and performance.
1261 */
1262 if ( pDis->pCurInstr->uOpcode == OP_STOSWD
1263 && !pRegFrame->eflags.Bits.u1DF
1264 && pDis->uOpMode == pDis->uCpuMode
1265 && pDis->uAddrMode == pDis->uCpuMode)
1266 {
1267 bool fValidStosd = false;
1268
1269 if ( pDis->uCpuMode == DISCPUMODE_32BIT
1270 && pDis->fPrefix == DISPREFIX_REP
1271 && pRegFrame->ecx <= 0x20
1272 && pRegFrame->ecx * 4 <= GUEST_PAGE_SIZE - ((uintptr_t)pvFault & GUEST_PAGE_OFFSET_MASK)
1273 && !((uintptr_t)pvFault & 3)
1274 && (pRegFrame->eax == 0 || pRegFrame->eax == 0x80) /* the two values observed. */
1275 )
1276 {
1277 fValidStosd = true;
1278 pRegFrame->rcx &= 0xffffffff; /* paranoia */
1279 }
1280 else
1281 if ( pDis->uCpuMode == DISCPUMODE_64BIT
1282 && pDis->fPrefix == (DISPREFIX_REP | DISPREFIX_REX)
1283 && pRegFrame->rcx <= 0x20
1284 && pRegFrame->rcx * 8 <= GUEST_PAGE_SIZE - ((uintptr_t)pvFault & GUEST_PAGE_OFFSET_MASK)
1285 && !((uintptr_t)pvFault & 7)
1286 && (pRegFrame->rax == 0 || pRegFrame->rax == 0x80) /* the two values observed. */
1287 )
1288 {
1289 fValidStosd = true;
1290 }
1291
1292 if (fValidStosd)
1293 {
1294 rc = pgmRZPoolAccessPfHandlerSTOSD(pVM, pPool, pPage, pDis, pRegFrame, GCPhysFault, pvFault);
1295 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->StatMonitorPfRZ, &pPool->StatMonitorPfRZRepStosd, a);
1296 PGM_UNLOCK(pVM);
1297 return rc;
1298 }
1299 }
1300
1301 /* REP prefix, don't bother. */
1302 STAM_COUNTER_INC(&pPool->StatMonitorPfRZRepPrefix);
1303 Log4(("pgmRZPoolAccessPfHandler: eax=%#x ecx=%#x edi=%#x esi=%#x rip=%RGv opcode=%d prefix=%#x\n",
1304 pRegFrame->eax, pRegFrame->ecx, pRegFrame->edi, pRegFrame->esi, (RTGCPTR)pRegFrame->rip, pDis->pCurInstr->uOpcode, pDis->fPrefix));
1305 fNotReusedNotForking = true;
1306 }
1307
1308# if defined(PGMPOOL_WITH_OPTIMIZED_DIRTY_PT) && defined(IN_RING0)
1309 /* E.g. Windows 7 x64 initializes page tables and touches some pages in the table during the process. This
1310 * leads to pgm pool trashing and an excessive amount of write faults due to page monitoring.
1311 */
1312 if ( pPage->cModifications >= cMaxModifications
1313 && !fForcedFlush
1314 && (pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT || pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_32BIT_PT)
1315 && ( fNotReusedNotForking
1316 || ( !pgmRZPoolMonitorIsReused(pVM, pVCpu, pRegFrame, pDis, pvFault, pPage)
1317 && !pgmRZPoolMonitorIsForking(pPool, pDis, GCPhysFault & PAGE_OFFSET_MASK))
1318 )
1319 )
1320 {
1321 Assert(!pgmPoolIsPageLocked(pPage));
1322 Assert(pPage->fDirty == false);
1323
1324 /* Flush any monitored duplicates as we will disable write protection. */
1325 if ( pPage->iMonitoredNext != NIL_PGMPOOL_IDX
1326 || pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
1327 {
1328 PPGMPOOLPAGE pPageHead = pPage;
1329
1330 /* Find the monitor head. */
1331 while (pPageHead->iMonitoredPrev != NIL_PGMPOOL_IDX)
1332 pPageHead = &pPool->aPages[pPageHead->iMonitoredPrev];
1333
1334 while (pPageHead)
1335 {
1336 unsigned idxNext = pPageHead->iMonitoredNext;
1337
1338 if (pPageHead != pPage)
1339 {
1340 STAM_COUNTER_INC(&pPool->StatDirtyPageDupFlush);
1341 Log(("Flush duplicate page idx=%d GCPhys=%RGp type=%s\n", pPageHead->idx, pPageHead->GCPhys, pgmPoolPoolKindToStr(pPageHead->enmKind)));
1342 int rc2 = pgmPoolFlushPage(pPool, pPageHead);
1343 AssertRC(rc2);
1344 }
1345
1346 if (idxNext == NIL_PGMPOOL_IDX)
1347 break;
1348
1349 pPageHead = &pPool->aPages[idxNext];
1350 }
1351 }
1352
1353 /* The flushing above might fail for locked pages, so double check. */
1354 if ( pPage->iMonitoredNext == NIL_PGMPOOL_IDX
1355 && pPage->iMonitoredPrev == NIL_PGMPOOL_IDX)
1356 {
1357 pgmPoolAddDirtyPage(pVM, pPool, pPage);
1358
1359 /* Temporarily allow write access to the page table again. */
1360 rc = PGMHandlerPhysicalPageTempOff(pVM,
1361 pPage->GCPhys & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK,
1362 pPage->GCPhys & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK);
1363 if (rc == VINF_SUCCESS)
1364 {
1365 rc = PGMShwMakePageWritable(pVCpu, pvFault, PGM_MK_PG_IS_WRITE_FAULT);
1366 AssertMsg(rc == VINF_SUCCESS
1367 /* In the SMP case the page table might be removed while we wait for the PGM lock in the trap handler. */
1368 || rc == VERR_PAGE_TABLE_NOT_PRESENT
1369 || rc == VERR_PAGE_NOT_PRESENT,
1370 ("PGMShwModifyPage -> GCPtr=%RGv rc=%d\n", pvFault, rc));
1371# ifdef VBOX_STRICT
1372 pPage->GCPtrDirtyFault = pvFault;
1373# endif
1374
1375 STAM_PROFILE_STOP(&pVM->pgm.s.CTX_SUFF(pPool)->StatMonitorPfRZ, a);
1376 PGM_UNLOCK(pVM);
1377 return rc;
1378 }
1379 }
1380 }
1381# endif /* PGMPOOL_WITH_OPTIMIZED_DIRTY_PT && IN_RING0 */
1382
1383 STAM_COUNTER_INC(&pPool->StatMonitorPfRZFlushModOverflow);
1384flushPage:
1385 /*
1386 * Not worth it, so flush it.
1387 *
1388 * If we considered it to be reused, don't go back to ring-3
1389 * to emulate failed instructions since we usually cannot
1390 * interpret then. This may be a bit risky, in which case
1391 * the reuse detection must be fixed.
1392 */
1393 rc = pgmRZPoolAccessPfHandlerFlush(pVM, pVCpu, pPool, pPage, pDis, pRegFrame, GCPhysFault, pvFault);
1394 if ( rc == VINF_EM_RAW_EMULATE_INSTR
1395 && fReused)
1396 {
1397 Assert(!PGMPOOL_PAGE_IS_NESTED(pPage)); /* temporary, remove later. */
1398 /* Make sure that the current instruction still has shadow page backing, otherwise we'll end up in a loop. */
1399 if (PGMShwGetPage(pVCpu, pRegFrame->rip, NULL, NULL) == VINF_SUCCESS)
1400 rc = VINF_SUCCESS; /* safe to restart the instruction. */
1401 }
1402 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->StatMonitorPfRZ, &pPool->StatMonitorPfRZFlushPage, a);
1403 PGM_UNLOCK(pVM);
1404 return rc;
1405}
1406
1407#endif /* !IN_RING3 */
1408
1409/**
1410 * @callback_method_impl{FNPGMPHYSHANDLER,
1411 * Access handler for shadowed page table pages.}
1412 *
1413 * @remarks Only uses the VINF_PGM_HANDLER_DO_DEFAULT status.
1414 * @note The @a uUser argument is the index of the PGMPOOLPAGE.
1415 */
1416DECLCALLBACK(VBOXSTRICTRC)
1417pgmPoolAccessHandler(PVMCC pVM, PVMCPUCC pVCpu, RTGCPHYS GCPhys, void *pvPhys, void *pvBuf, size_t cbBuf,
1418 PGMACCESSTYPE enmAccessType, PGMACCESSORIGIN enmOrigin, uint64_t uUser)
1419{
1420 PPGMPOOL const pPool = pVM->pgm.s.CTX_SUFF(pPool);
1421 STAM_PROFILE_START(&pPool->CTX_SUFF_Z(StatMonitor), a);
1422 AssertReturn(uUser < pPool->cCurPages, VERR_PGM_POOL_IPE);
1423 PPGMPOOLPAGE const pPage = &pPool->aPages[uUser];
1424 LogFlow(("PGM_ALL_CB_DECL: GCPhys=%RGp %p:{.Core=%RHp, .idx=%d, .GCPhys=%RGp, .enmType=%d}\n",
1425 GCPhys, pPage, pPage->Core.Key, pPage->idx, pPage->GCPhys, pPage->enmKind));
1426
1427 NOREF(pvPhys); NOREF(pvBuf); NOREF(enmAccessType);
1428
1429 PGM_LOCK_VOID(pVM);
1430
1431#ifdef VBOX_WITH_STATISTICS
1432 /*
1433 * Collect stats on the access.
1434 */
1435 AssertCompile(RT_ELEMENTS(pPool->CTX_MID_Z(aStatMonitor,Sizes)) == 19);
1436 if (cbBuf <= 16 && cbBuf > 0)
1437 STAM_COUNTER_INC(&pPool->CTX_MID_Z(aStatMonitor,Sizes)[cbBuf - 1]);
1438 else if (cbBuf >= 17 && cbBuf < 32)
1439 STAM_COUNTER_INC(&pPool->CTX_MID_Z(aStatMonitor,Sizes)[16]);
1440 else if (cbBuf >= 32 && cbBuf < 64)
1441 STAM_COUNTER_INC(&pPool->CTX_MID_Z(aStatMonitor,Sizes)[17]);
1442 else if (cbBuf >= 64)
1443 STAM_COUNTER_INC(&pPool->CTX_MID_Z(aStatMonitor,Sizes)[18]);
1444
1445 uint8_t cbAlign;
1446 switch (pPage->enmKind)
1447 {
1448 default:
1449 cbAlign = 7;
1450 break;
1451 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1452 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1453 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1454 case PGMPOOLKIND_32BIT_PD:
1455 case PGMPOOLKIND_32BIT_PD_PHYS:
1456 cbAlign = 3;
1457 break;
1458 }
1459 AssertCompile(RT_ELEMENTS(pPool->CTX_MID_Z(aStatMonitor,Misaligned)) == 7);
1460 if ((uint8_t)GCPhys & cbAlign)
1461 STAM_COUNTER_INC(&pPool->CTX_MID_Z(aStatMonitor,Misaligned)[((uint8_t)GCPhys & cbAlign) - 1]);
1462#endif
1463
1464 /*
1465 * Make sure the pool page wasn't modified by a different CPU.
1466 */
1467 if (PHYS_PAGE_ADDRESS(GCPhys) == PHYS_PAGE_ADDRESS(pPage->GCPhys))
1468 {
1469 Assert(pPage->enmKind != PGMPOOLKIND_FREE);
1470
1471 /* The max modification count before flushing depends on the context and page type. */
1472#ifdef IN_RING3
1473 uint16_t const cMaxModifications = 96; /* it's cheaper here, right? */
1474#else
1475 uint16_t cMaxModifications;
1476 if ( pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT
1477 || pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_32BIT_PT)
1478 cMaxModifications = 4;
1479 else
1480 cMaxModifications = 24;
1481#endif
1482
1483 /*
1484 * We don't have to be very sophisticated about this since there are relativly few calls here.
1485 * However, we must try our best to detect any non-cpu accesses (disk / networking).
1486 */
1487 if ( ( pPage->cModifications < cMaxModifications
1488 || pgmPoolIsPageLocked(pPage) )
1489 && enmOrigin != PGMACCESSORIGIN_DEVICE
1490 && cbBuf <= 16)
1491 {
1492 /* Clear the shadow entry. */
1493 if (!pPage->cModifications++)
1494 pgmPoolMonitorModifiedInsert(pPool, pPage);
1495
1496 if (cbBuf <= 8)
1497 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhys, pvBuf, (uint32_t)cbBuf);
1498 else
1499 {
1500 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhys, pvBuf, 8);
1501 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhys + 8, (uint8_t *)pvBuf + 8, (uint32_t)cbBuf - 8);
1502 }
1503 }
1504 else
1505 pgmPoolMonitorChainFlush(pPool, pPage);
1506
1507 STAM_PROFILE_STOP_EX(&pPool->CTX_SUFF_Z(StatMonitor), &pPool->CTX_MID_Z(StatMonitor,FlushPage), a);
1508 }
1509 else
1510 Log(("CPU%d: PGM_ALL_CB_DECL pgm pool page for %RGp changed (to %RGp) while waiting!\n", pVCpu->idCpu, PHYS_PAGE_ADDRESS(GCPhys), PHYS_PAGE_ADDRESS(pPage->GCPhys)));
1511 PGM_UNLOCK(pVM);
1512 return VINF_PGM_HANDLER_DO_DEFAULT;
1513}
1514
1515
1516#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
1517
1518# if defined(VBOX_STRICT) && !defined(IN_RING3)
1519
1520/**
1521 * Check references to guest physical memory in a PAE / PAE page table.
1522 *
1523 * @param pPool The pool.
1524 * @param pPage The page.
1525 * @param pShwPT The shadow page table (mapping of the page).
1526 * @param pGstPT The guest page table.
1527 */
1528static void pgmPoolTrackCheckPTPaePae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT, PCX86PTPAE pGstPT)
1529{
1530 unsigned cErrors = 0;
1531 int LastRc = -1; /* initialized to shut up gcc */
1532 unsigned LastPTE = ~0U; /* initialized to shut up gcc */
1533 RTHCPHYS LastHCPhys = NIL_RTHCPHYS; /* initialized to shut up gcc */
1534 PVMCC pVM = pPool->CTX_SUFF(pVM);
1535
1536# ifdef VBOX_STRICT
1537 for (unsigned i = 0; i < RT_MIN(RT_ELEMENTS(pShwPT->a), pPage->iFirstPresent); i++)
1538 AssertMsg(!PGMSHWPTEPAE_IS_P(pShwPT->a[i]), ("Unexpected PTE: idx=%d %RX64 (first=%d)\n", i, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), pPage->iFirstPresent));
1539# endif
1540 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
1541 {
1542 if (PGMSHWPTEPAE_IS_P(pShwPT->a[i]))
1543 {
1544 RTHCPHYS HCPhys = NIL_RTHCPHYS;
1545 int rc = PGMPhysGCPhys2HCPhys(pVM, pGstPT->a[i].u & X86_PTE_PAE_PG_MASK, &HCPhys);
1546 if ( rc != VINF_SUCCESS
1547 || PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]) != HCPhys)
1548 {
1549 Log(("rc=%d idx=%d guest %RX64 shw=%RX64 vs %RHp\n", rc, i, pGstPT->a[i].u, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), HCPhys));
1550 LastPTE = i;
1551 LastRc = rc;
1552 LastHCPhys = HCPhys;
1553 cErrors++;
1554
1555 RTHCPHYS HCPhysPT = NIL_RTHCPHYS;
1556 rc = PGMPhysGCPhys2HCPhys(pVM, pPage->GCPhys, &HCPhysPT);
1557 AssertRC(rc);
1558
1559 for (unsigned iPage = 0; iPage < pPool->cCurPages; iPage++)
1560 {
1561 PPGMPOOLPAGE pTempPage = &pPool->aPages[iPage];
1562
1563 if (pTempPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT)
1564 {
1565 PPGMSHWPTPAE pShwPT2 = (PPGMSHWPTPAE)PGMPOOL_PAGE_2_PTR(pVM, pTempPage);
1566
1567 for (unsigned j = 0; j < RT_ELEMENTS(pShwPT->a); j++)
1568 {
1569 if ( PGMSHWPTEPAE_IS_P_RW(pShwPT2->a[j])
1570 && PGMSHWPTEPAE_GET_HCPHYS(pShwPT2->a[j]) == HCPhysPT)
1571 {
1572 Log(("GCPhys=%RGp idx=%d %RX64 vs %RX64\n", pTempPage->GCPhys, j, PGMSHWPTEPAE_GET_LOG(pShwPT->a[j]), PGMSHWPTEPAE_GET_LOG(pShwPT2->a[j])));
1573 }
1574 }
1575
1576 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pShwPT2);
1577 }
1578 }
1579 }
1580 }
1581 }
1582 AssertMsg(!cErrors, ("cErrors=%d: last rc=%d idx=%d guest %RX64 shw=%RX64 vs %RHp\n", cErrors, LastRc, LastPTE, pGstPT->a[LastPTE].u, PGMSHWPTEPAE_GET_LOG(pShwPT->a[LastPTE]), LastHCPhys));
1583}
1584
1585
1586/**
1587 * Check references to guest physical memory in a PAE / 32-bit page table.
1588 *
1589 * @param pPool The pool.
1590 * @param pPage The page.
1591 * @param pShwPT The shadow page table (mapping of the page).
1592 * @param pGstPT The guest page table.
1593 */
1594static void pgmPoolTrackCheckPTPae32Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT, PCX86PT pGstPT)
1595{
1596 unsigned cErrors = 0;
1597 int LastRc = -1; /* initialized to shut up gcc */
1598 unsigned LastPTE = ~0U; /* initialized to shut up gcc */
1599 RTHCPHYS LastHCPhys = NIL_RTHCPHYS; /* initialized to shut up gcc */
1600 PVMCC pVM = pPool->CTX_SUFF(pVM);
1601
1602# ifdef VBOX_STRICT
1603 for (unsigned i = 0; i < RT_MIN(RT_ELEMENTS(pShwPT->a), pPage->iFirstPresent); i++)
1604 AssertMsg(!PGMSHWPTEPAE_IS_P(pShwPT->a[i]), ("Unexpected PTE: idx=%d %RX64 (first=%d)\n", i, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), pPage->iFirstPresent));
1605# endif
1606 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
1607 {
1608 if (PGMSHWPTEPAE_IS_P(pShwPT->a[i]))
1609 {
1610 RTHCPHYS HCPhys = NIL_RTHCPHYS;
1611 int rc = PGMPhysGCPhys2HCPhys(pVM, pGstPT->a[i].u & X86_PTE_PG_MASK, &HCPhys);
1612 if ( rc != VINF_SUCCESS
1613 || PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]) != HCPhys)
1614 {
1615 Log(("rc=%d idx=%d guest %x shw=%RX64 vs %RHp\n", rc, i, pGstPT->a[i].u, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), HCPhys));
1616 LastPTE = i;
1617 LastRc = rc;
1618 LastHCPhys = HCPhys;
1619 cErrors++;
1620
1621 RTHCPHYS HCPhysPT = NIL_RTHCPHYS;
1622 rc = PGMPhysGCPhys2HCPhys(pVM, pPage->GCPhys, &HCPhysPT);
1623 AssertRC(rc);
1624
1625 for (unsigned iPage = 0; iPage < pPool->cCurPages; iPage++)
1626 {
1627 PPGMPOOLPAGE pTempPage = &pPool->aPages[iPage];
1628
1629 if (pTempPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_32BIT_PT)
1630 {
1631 PPGMSHWPTPAE pShwPT2 = (PPGMSHWPTPAE)PGMPOOL_PAGE_2_PTR(pVM, pTempPage);
1632
1633 for (unsigned j = 0; j < RT_ELEMENTS(pShwPT->a); j++)
1634 {
1635 if ( PGMSHWPTEPAE_IS_P_RW(pShwPT2->a[j])
1636 && PGMSHWPTEPAE_GET_HCPHYS(pShwPT2->a[j]) == HCPhysPT)
1637 {
1638 Log(("GCPhys=%RGp idx=%d %RX64 vs %RX64\n", pTempPage->GCPhys, j, PGMSHWPTEPAE_GET_LOG(pShwPT->a[j]), PGMSHWPTEPAE_GET_LOG(pShwPT2->a[j])));
1639 }
1640 }
1641
1642 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pShwPT2);
1643 }
1644 }
1645 }
1646 }
1647 }
1648 AssertMsg(!cErrors, ("cErrors=%d: last rc=%d idx=%d guest %x shw=%RX64 vs %RHp\n", cErrors, LastRc, LastPTE, pGstPT->a[LastPTE].u, PGMSHWPTEPAE_GET_LOG(pShwPT->a[LastPTE]), LastHCPhys));
1649}
1650
1651# endif /* VBOX_STRICT && !IN_RING3 */
1652
1653/**
1654 * Clear references to guest physical memory in a PAE / PAE page table.
1655 *
1656 * @returns nr of changed PTEs
1657 * @param pPool The pool.
1658 * @param pPage The page.
1659 * @param pShwPT The shadow page table (mapping of the page).
1660 * @param pGstPT The guest page table.
1661 * @param pOldGstPT The old cached guest page table.
1662 * @param fAllowRemoval Bail out as soon as we encounter an invalid PTE
1663 * @param pfFlush Flush reused page table (out)
1664 */
1665DECLINLINE(unsigned) pgmPoolTrackFlushPTPaePae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT, PCX86PTPAE pGstPT,
1666 PCX86PTPAE pOldGstPT, bool fAllowRemoval, bool *pfFlush)
1667{
1668 unsigned cChanged = 0;
1669
1670# ifdef VBOX_STRICT
1671 for (unsigned i = 0; i < RT_MIN(RT_ELEMENTS(pShwPT->a), pPage->iFirstPresent); i++)
1672 AssertMsg(!PGMSHWPTEPAE_IS_P(pShwPT->a[i]), ("Unexpected PTE: idx=%d %RX64 (first=%d)\n", i, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), pPage->iFirstPresent));
1673# endif
1674 *pfFlush = false;
1675
1676 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
1677 {
1678 /* Check the new value written by the guest. If present and with a bogus physical address, then
1679 * it's fairly safe to assume the guest is reusing the PT.
1680 */
1681 if ( fAllowRemoval
1682 && (pGstPT->a[i].u & X86_PTE_P))
1683 {
1684 if (!PGMPhysIsGCPhysValid(pPool->CTX_SUFF(pVM), pGstPT->a[i].u & X86_PTE_PAE_PG_MASK))
1685 {
1686 *pfFlush = true;
1687 return ++cChanged;
1688 }
1689 }
1690 if (PGMSHWPTEPAE_IS_P(pShwPT->a[i]))
1691 {
1692 /* If the old cached PTE is identical, then there's no need to flush the shadow copy. */
1693 if ((pGstPT->a[i].u & X86_PTE_PAE_PG_MASK) == (pOldGstPT->a[i].u & X86_PTE_PAE_PG_MASK))
1694 {
1695# ifdef VBOX_STRICT
1696 RTHCPHYS HCPhys = NIL_RTGCPHYS;
1697 int rc = PGMPhysGCPhys2HCPhys(pPool->CTX_SUFF(pVM), pGstPT->a[i].u & X86_PTE_PAE_PG_MASK, &HCPhys);
1698 AssertMsg(rc == VINF_SUCCESS && PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]) == HCPhys, ("rc=%d guest %RX64 old %RX64 shw=%RX64 vs %RHp\n", rc, pGstPT->a[i].u, pOldGstPT->a[i].u, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), HCPhys));
1699# endif
1700 uint64_t uHostAttr = PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & (X86_PTE_P | X86_PTE_US | X86_PTE_A | X86_PTE_D | X86_PTE_G | X86_PTE_PAE_NX);
1701 bool fHostRW = !!(PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & X86_PTE_RW);
1702 uint64_t uGuestAttr = pGstPT->a[i].u & (X86_PTE_P | X86_PTE_US | X86_PTE_A | X86_PTE_D | X86_PTE_G | X86_PTE_PAE_NX);
1703 bool fGuestRW = !!(pGstPT->a[i].u & X86_PTE_RW);
1704
1705 if ( uHostAttr == uGuestAttr
1706 && fHostRW <= fGuestRW)
1707 continue;
1708 }
1709 cChanged++;
1710 /* Something was changed, so flush it. */
1711 Log4(("pgmPoolTrackDerefPTPaePae: i=%d pte=%RX64 hint=%RX64\n",
1712 i, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pOldGstPT->a[i].u & X86_PTE_PAE_PG_MASK));
1713 pgmPoolTracDerefGCPhysHint(pPool, pPage, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pOldGstPT->a[i].u & X86_PTE_PAE_PG_MASK, i);
1714 PGMSHWPTEPAE_ATOMIC_SET(pShwPT->a[i], 0);
1715 }
1716 }
1717 return cChanged;
1718}
1719
1720
1721/**
1722 * Clear references to guest physical memory in a PAE / PAE page table.
1723 *
1724 * @returns nr of changed PTEs
1725 * @param pPool The pool.
1726 * @param pPage The page.
1727 * @param pShwPT The shadow page table (mapping of the page).
1728 * @param pGstPT The guest page table.
1729 * @param pOldGstPT The old cached guest page table.
1730 * @param fAllowRemoval Bail out as soon as we encounter an invalid PTE
1731 * @param pfFlush Flush reused page table (out)
1732 */
1733DECLINLINE(unsigned) pgmPoolTrackFlushPTPae32Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT, PCX86PT pGstPT,
1734 PCX86PT pOldGstPT, bool fAllowRemoval, bool *pfFlush)
1735{
1736 unsigned cChanged = 0;
1737
1738# ifdef VBOX_STRICT
1739 for (unsigned i = 0; i < RT_MIN(RT_ELEMENTS(pShwPT->a), pPage->iFirstPresent); i++)
1740 AssertMsg(!PGMSHWPTEPAE_IS_P(pShwPT->a[i]), ("Unexpected PTE: idx=%d %RX64 (first=%d)\n", i, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), pPage->iFirstPresent));
1741# endif
1742 *pfFlush = false;
1743
1744 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
1745 {
1746 /* Check the new value written by the guest. If present and with a bogus physical address, then
1747 * it's fairly safe to assume the guest is reusing the PT. */
1748 if (fAllowRemoval)
1749 {
1750 X86PGUINT const uPte = pGstPT->a[i].u;
1751 if ( (uPte & X86_PTE_P)
1752 && !PGMPhysIsGCPhysValid(pPool->CTX_SUFF(pVM), uPte & X86_PTE_PG_MASK))
1753 {
1754 *pfFlush = true;
1755 return ++cChanged;
1756 }
1757 }
1758 if (PGMSHWPTEPAE_IS_P(pShwPT->a[i]))
1759 {
1760 /* If the old cached PTE is identical, then there's no need to flush the shadow copy. */
1761 if ((pGstPT->a[i].u & X86_PTE_PG_MASK) == (pOldGstPT->a[i].u & X86_PTE_PG_MASK))
1762 {
1763# ifdef VBOX_STRICT
1764 RTHCPHYS HCPhys = NIL_RTGCPHYS;
1765 int rc = PGMPhysGCPhys2HCPhys(pPool->CTX_SUFF(pVM), pGstPT->a[i].u & X86_PTE_PG_MASK, &HCPhys);
1766 AssertMsg(rc == VINF_SUCCESS && PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]) == HCPhys, ("rc=%d guest %x old %x shw=%RX64 vs %RHp\n", rc, pGstPT->a[i].u, pOldGstPT->a[i].u, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), HCPhys));
1767# endif
1768 uint64_t uHostAttr = PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & (X86_PTE_P | X86_PTE_US | X86_PTE_A | X86_PTE_D | X86_PTE_G);
1769 bool fHostRW = !!(PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & X86_PTE_RW);
1770 uint64_t uGuestAttr = pGstPT->a[i].u & (X86_PTE_P | X86_PTE_US | X86_PTE_A | X86_PTE_D | X86_PTE_G);
1771 bool fGuestRW = !!(pGstPT->a[i].u & X86_PTE_RW);
1772
1773 if ( uHostAttr == uGuestAttr
1774 && fHostRW <= fGuestRW)
1775 continue;
1776 }
1777 cChanged++;
1778 /* Something was changed, so flush it. */
1779 Log4(("pgmPoolTrackDerefPTPaePae: i=%d pte=%RX64 hint=%x\n",
1780 i, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pOldGstPT->a[i].u & X86_PTE_PG_MASK));
1781 pgmPoolTracDerefGCPhysHint(pPool, pPage, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pOldGstPT->a[i].u & X86_PTE_PG_MASK, i);
1782 PGMSHWPTEPAE_ATOMIC_SET(pShwPT->a[i], 0);
1783 }
1784 }
1785 return cChanged;
1786}
1787
1788
1789/**
1790 * Flush a dirty page
1791 *
1792 * @param pVM The cross context VM structure.
1793 * @param pPool The pool.
1794 * @param idxSlot Dirty array slot index
1795 * @param fAllowRemoval Allow a reused page table to be removed
1796 */
1797static void pgmPoolFlushDirtyPage(PVMCC pVM, PPGMPOOL pPool, unsigned idxSlot, bool fAllowRemoval = false)
1798{
1799 AssertCompile(RT_ELEMENTS(pPool->aidxDirtyPages) == RT_ELEMENTS(pPool->aDirtyPages));
1800
1801 Assert(idxSlot < RT_ELEMENTS(pPool->aDirtyPages));
1802 unsigned idxPage = pPool->aidxDirtyPages[idxSlot];
1803 if (idxPage == NIL_PGMPOOL_IDX)
1804 return;
1805
1806 PPGMPOOLPAGE pPage = &pPool->aPages[idxPage];
1807 Assert(pPage->idx == idxPage);
1808 Assert(pPage->iMonitoredNext == NIL_PGMPOOL_IDX && pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
1809
1810 AssertMsg(pPage->fDirty, ("Page %RGp (slot=%d) not marked dirty!", pPage->GCPhys, idxSlot));
1811 Log(("Flush dirty page %RGp cMods=%d\n", pPage->GCPhys, pPage->cModifications));
1812
1813 /* First write protect the page again to catch all write accesses. (before checking for changes -> SMP) */
1814 int rc = PGMHandlerPhysicalReset(pVM, pPage->GCPhys & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK);
1815 Assert(rc == VINF_SUCCESS);
1816 pPage->fDirty = false;
1817
1818# ifdef VBOX_STRICT
1819 uint64_t fFlags = 0;
1820 RTHCPHYS HCPhys;
1821 rc = PGMShwGetPage(VMMGetCpu(pVM), pPage->GCPtrDirtyFault, &fFlags, &HCPhys);
1822 AssertMsg( ( rc == VINF_SUCCESS
1823 && (!(fFlags & X86_PTE_RW) || HCPhys != pPage->Core.Key))
1824 /* In the SMP case the page table might be removed while we wait for the PGM lock in the trap handler. */
1825 || rc == VERR_PAGE_TABLE_NOT_PRESENT
1826 || rc == VERR_PAGE_NOT_PRESENT,
1827 ("PGMShwGetPage -> GCPtr=%RGv rc=%d flags=%RX64\n", pPage->GCPtrDirtyFault, rc, fFlags));
1828# endif
1829
1830 /* Flush those PTEs that have changed. */
1831 STAM_PROFILE_START(&pPool->StatTrackDeref,a);
1832 void *pvShw = PGMPOOL_PAGE_2_PTR(pVM, pPage);
1833 void *pvGst;
1834 rc = PGM_GCPHYS_2_PTR_EX(pVM, pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
1835 bool fFlush;
1836 unsigned cChanges;
1837
1838 if (pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT)
1839 cChanges = pgmPoolTrackFlushPTPaePae(pPool, pPage, (PPGMSHWPTPAE)pvShw, (PCX86PTPAE)pvGst,
1840 (PCX86PTPAE)&pPool->aDirtyPages[idxSlot].aPage[0], fAllowRemoval, &fFlush);
1841 else
1842 {
1843 Assert(!PGMPOOL_PAGE_IS_NESTED(pPage)); /* temporary, remove later. */
1844 cChanges = pgmPoolTrackFlushPTPae32Bit(pPool, pPage, (PPGMSHWPTPAE)pvShw, (PCX86PT)pvGst,
1845 (PCX86PT)&pPool->aDirtyPages[idxSlot].aPage[0], fAllowRemoval, &fFlush);
1846 }
1847
1848 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvGst);
1849 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvShw);
1850 STAM_PROFILE_STOP(&pPool->StatTrackDeref,a);
1851 /* Note: we might want to consider keeping the dirty page active in case there were many changes. */
1852
1853 /* This page is likely to be modified again, so reduce the nr of modifications just a bit here. */
1854 Assert(pPage->cModifications);
1855 if (cChanges < 4)
1856 pPage->cModifications = 1; /* must use > 0 here */
1857 else
1858 pPage->cModifications = RT_MAX(1, pPage->cModifications / 2);
1859
1860 STAM_COUNTER_INC(&pPool->StatResetDirtyPages);
1861 if (pPool->cDirtyPages == RT_ELEMENTS(pPool->aDirtyPages))
1862 pPool->idxFreeDirtyPage = idxSlot;
1863
1864 pPool->cDirtyPages--;
1865 pPool->aidxDirtyPages[idxSlot] = NIL_PGMPOOL_IDX;
1866 Assert(pPool->cDirtyPages <= RT_ELEMENTS(pPool->aDirtyPages));
1867 if (fFlush)
1868 {
1869 Assert(fAllowRemoval);
1870 Log(("Flush reused page table!\n"));
1871 pgmPoolFlushPage(pPool, pPage);
1872 STAM_COUNTER_INC(&pPool->StatForceFlushReused);
1873 }
1874 else
1875 Log(("Removed dirty page %RGp cMods=%d cChanges=%d\n", pPage->GCPhys, pPage->cModifications, cChanges));
1876}
1877
1878
1879# ifndef IN_RING3
1880/**
1881 * Add a new dirty page
1882 *
1883 * @param pVM The cross context VM structure.
1884 * @param pPool The pool.
1885 * @param pPage The page.
1886 */
1887void pgmPoolAddDirtyPage(PVMCC pVM, PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1888{
1889 PGM_LOCK_ASSERT_OWNER(pVM);
1890 AssertCompile(RT_ELEMENTS(pPool->aDirtyPages) == 8 || RT_ELEMENTS(pPool->aDirtyPages) == 16);
1891 Assert(!pPage->fDirty);
1892 Assert(!PGMPOOL_PAGE_IS_NESTED(pPage));
1893
1894 unsigned idxFree = pPool->idxFreeDirtyPage;
1895 Assert(idxFree < RT_ELEMENTS(pPool->aDirtyPages));
1896 Assert(pPage->iMonitoredNext == NIL_PGMPOOL_IDX && pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
1897
1898 if (pPool->cDirtyPages >= RT_ELEMENTS(pPool->aDirtyPages))
1899 {
1900 STAM_COUNTER_INC(&pPool->StatDirtyPageOverFlowFlush);
1901 pgmPoolFlushDirtyPage(pVM, pPool, idxFree, true /* allow removal of reused page tables*/);
1902 }
1903 Assert(pPool->cDirtyPages < RT_ELEMENTS(pPool->aDirtyPages));
1904 AssertMsg(pPool->aidxDirtyPages[idxFree] == NIL_PGMPOOL_IDX, ("idxFree=%d cDirtyPages=%d\n", idxFree, pPool->cDirtyPages));
1905
1906 Log(("Add dirty page %RGp (slot=%d)\n", pPage->GCPhys, idxFree));
1907
1908 /*
1909 * Make a copy of the guest page table as we require valid GCPhys addresses
1910 * when removing references to physical pages.
1911 * (The HCPhys linear lookup is *extremely* expensive!)
1912 */
1913 void *pvGst;
1914 int rc = PGM_GCPHYS_2_PTR_EX(pVM, pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
1915 memcpy(&pPool->aDirtyPages[idxFree].aPage[0], pvGst,
1916 pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT ? PAGE_SIZE : PAGE_SIZE / 2);
1917# ifdef VBOX_STRICT
1918 void *pvShw = PGMPOOL_PAGE_2_PTR(pVM, pPage);
1919 if (pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT)
1920 pgmPoolTrackCheckPTPaePae(pPool, pPage, (PPGMSHWPTPAE)pvShw, (PCX86PTPAE)pvGst);
1921 else
1922 pgmPoolTrackCheckPTPae32Bit(pPool, pPage, (PPGMSHWPTPAE)pvShw, (PCX86PT)pvGst);
1923 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvShw);
1924# endif
1925 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvGst);
1926
1927 STAM_COUNTER_INC(&pPool->StatDirtyPage);
1928 pPage->fDirty = true;
1929 pPage->idxDirtyEntry = (uint8_t)idxFree; Assert(pPage->idxDirtyEntry == idxFree);
1930 pPool->aidxDirtyPages[idxFree] = pPage->idx;
1931 pPool->cDirtyPages++;
1932
1933 pPool->idxFreeDirtyPage = (pPool->idxFreeDirtyPage + 1) & (RT_ELEMENTS(pPool->aDirtyPages) - 1);
1934 if ( pPool->cDirtyPages < RT_ELEMENTS(pPool->aDirtyPages)
1935 && pPool->aidxDirtyPages[pPool->idxFreeDirtyPage] != NIL_PGMPOOL_IDX)
1936 {
1937 unsigned i;
1938 for (i = 1; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
1939 {
1940 idxFree = (pPool->idxFreeDirtyPage + i) & (RT_ELEMENTS(pPool->aDirtyPages) - 1);
1941 if (pPool->aidxDirtyPages[idxFree] == NIL_PGMPOOL_IDX)
1942 {
1943 pPool->idxFreeDirtyPage = idxFree;
1944 break;
1945 }
1946 }
1947 Assert(i != RT_ELEMENTS(pPool->aDirtyPages));
1948 }
1949
1950 Assert(pPool->cDirtyPages == RT_ELEMENTS(pPool->aDirtyPages) || pPool->aidxDirtyPages[pPool->idxFreeDirtyPage] == NIL_PGMPOOL_IDX);
1951
1952 /*
1953 * Clear all references to this shadow table. See @bugref{7298}.
1954 */
1955 pgmPoolTrackClearPageUsers(pPool, pPage);
1956}
1957# endif /* !IN_RING3 */
1958
1959
1960/**
1961 * Check if the specified page is dirty (not write monitored)
1962 *
1963 * @return dirty or not
1964 * @param pVM The cross context VM structure.
1965 * @param GCPhys Guest physical address
1966 */
1967bool pgmPoolIsDirtyPageSlow(PVMCC pVM, RTGCPHYS GCPhys)
1968{
1969 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1970 PGM_LOCK_ASSERT_OWNER(pVM);
1971 if (!pPool->cDirtyPages)
1972 return false;
1973
1974 GCPhys = GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK;
1975
1976 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
1977 {
1978 unsigned idxPage = pPool->aidxDirtyPages[i];
1979 if (idxPage != NIL_PGMPOOL_IDX)
1980 {
1981 PPGMPOOLPAGE pPage = &pPool->aPages[idxPage];
1982 if (pPage->GCPhys == GCPhys)
1983 return true;
1984 }
1985 }
1986 return false;
1987}
1988
1989
1990/**
1991 * Reset all dirty pages by reinstating page monitoring.
1992 *
1993 * @param pVM The cross context VM structure.
1994 */
1995void pgmPoolResetDirtyPages(PVMCC pVM)
1996{
1997 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1998 PGM_LOCK_ASSERT_OWNER(pVM);
1999 Assert(pPool->cDirtyPages <= RT_ELEMENTS(pPool->aDirtyPages));
2000
2001 if (!pPool->cDirtyPages)
2002 return;
2003
2004 Log(("pgmPoolResetDirtyPages\n"));
2005 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
2006 pgmPoolFlushDirtyPage(pVM, pPool, i, true /* allow removal of reused page tables*/);
2007
2008 pPool->idxFreeDirtyPage = 0;
2009 if ( pPool->cDirtyPages != RT_ELEMENTS(pPool->aDirtyPages)
2010 && pPool->aidxDirtyPages[pPool->idxFreeDirtyPage] != NIL_PGMPOOL_IDX)
2011 {
2012 unsigned i;
2013 for (i = 1; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
2014 {
2015 if (pPool->aidxDirtyPages[i] == NIL_PGMPOOL_IDX)
2016 {
2017 pPool->idxFreeDirtyPage = i;
2018 break;
2019 }
2020 }
2021 AssertMsg(i != RT_ELEMENTS(pPool->aDirtyPages), ("cDirtyPages %d", pPool->cDirtyPages));
2022 }
2023
2024 Assert(pPool->aidxDirtyPages[pPool->idxFreeDirtyPage] == NIL_PGMPOOL_IDX || pPool->cDirtyPages == RT_ELEMENTS(pPool->aDirtyPages));
2025 return;
2026}
2027
2028
2029/**
2030 * Invalidate the PT entry for the specified page
2031 *
2032 * @param pVM The cross context VM structure.
2033 * @param GCPtrPage Guest page to invalidate
2034 */
2035void pgmPoolResetDirtyPage(PVMCC pVM, RTGCPTR GCPtrPage)
2036{
2037 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
2038 PGM_LOCK_ASSERT_OWNER(pVM);
2039 Assert(pPool->cDirtyPages <= RT_ELEMENTS(pPool->aDirtyPages));
2040
2041 if (!pPool->cDirtyPages)
2042 return;
2043
2044 Log(("pgmPoolResetDirtyPage %RGv\n", GCPtrPage)); RT_NOREF_PV(GCPtrPage);
2045 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
2046 {
2047 /** @todo What was intended here??? This looks incomplete... */
2048 }
2049}
2050
2051
2052/**
2053 * Reset all dirty pages by reinstating page monitoring.
2054 *
2055 * @param pVM The cross context VM structure.
2056 * @param GCPhysPT Physical address of the page table
2057 */
2058void pgmPoolInvalidateDirtyPage(PVMCC pVM, RTGCPHYS GCPhysPT)
2059{
2060 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
2061 PGM_LOCK_ASSERT_OWNER(pVM);
2062 Assert(pPool->cDirtyPages <= RT_ELEMENTS(pPool->aDirtyPages));
2063 unsigned idxDirtyPage = RT_ELEMENTS(pPool->aDirtyPages);
2064
2065 if (!pPool->cDirtyPages)
2066 return;
2067
2068 GCPhysPT = GCPhysPT & ~(RTGCPHYS)PAGE_OFFSET_MASK;
2069
2070 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
2071 {
2072 unsigned idxPage = pPool->aidxDirtyPages[i];
2073 if (idxPage != NIL_PGMPOOL_IDX)
2074 {
2075 PPGMPOOLPAGE pPage = &pPool->aPages[idxPage];
2076 if (pPage->GCPhys == GCPhysPT)
2077 {
2078 idxDirtyPage = i;
2079 break;
2080 }
2081 }
2082 }
2083
2084 if (idxDirtyPage != RT_ELEMENTS(pPool->aDirtyPages))
2085 {
2086 pgmPoolFlushDirtyPage(pVM, pPool, idxDirtyPage, true /* allow removal of reused page tables*/);
2087 if ( pPool->cDirtyPages != RT_ELEMENTS(pPool->aDirtyPages)
2088 && pPool->aidxDirtyPages[pPool->idxFreeDirtyPage] != NIL_PGMPOOL_IDX)
2089 {
2090 unsigned i;
2091 for (i = 0; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
2092 {
2093 if (pPool->aidxDirtyPages[i] == NIL_PGMPOOL_IDX)
2094 {
2095 pPool->idxFreeDirtyPage = i;
2096 break;
2097 }
2098 }
2099 AssertMsg(i != RT_ELEMENTS(pPool->aDirtyPages), ("cDirtyPages %d", pPool->cDirtyPages));
2100 }
2101 }
2102}
2103
2104#endif /* PGMPOOL_WITH_OPTIMIZED_DIRTY_PT */
2105
2106/**
2107 * Inserts a page into the GCPhys hash table.
2108 *
2109 * @param pPool The pool.
2110 * @param pPage The page.
2111 */
2112DECLINLINE(void) pgmPoolHashInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2113{
2114 Log3(("pgmPoolHashInsert: %RGp\n", pPage->GCPhys));
2115 Assert(pPage->GCPhys != NIL_RTGCPHYS); Assert(pPage->iNext == NIL_PGMPOOL_IDX);
2116 uint16_t iHash = PGMPOOL_HASH(pPage->GCPhys);
2117 pPage->iNext = pPool->aiHash[iHash];
2118 pPool->aiHash[iHash] = pPage->idx;
2119}
2120
2121
2122/**
2123 * Removes a page from the GCPhys hash table.
2124 *
2125 * @param pPool The pool.
2126 * @param pPage The page.
2127 */
2128DECLINLINE(void) pgmPoolHashRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2129{
2130 Log3(("pgmPoolHashRemove: %RGp\n", pPage->GCPhys));
2131 uint16_t iHash = PGMPOOL_HASH(pPage->GCPhys);
2132 if (pPool->aiHash[iHash] == pPage->idx)
2133 pPool->aiHash[iHash] = pPage->iNext;
2134 else
2135 {
2136 uint16_t iPrev = pPool->aiHash[iHash];
2137 for (;;)
2138 {
2139 const int16_t i = pPool->aPages[iPrev].iNext;
2140 if (i == pPage->idx)
2141 {
2142 pPool->aPages[iPrev].iNext = pPage->iNext;
2143 break;
2144 }
2145 if (i == NIL_PGMPOOL_IDX)
2146 {
2147 AssertReleaseMsgFailed(("GCPhys=%RGp idx=%d\n", pPage->GCPhys, pPage->idx));
2148 break;
2149 }
2150 iPrev = i;
2151 }
2152 }
2153 pPage->iNext = NIL_PGMPOOL_IDX;
2154}
2155
2156
2157/**
2158 * Frees up one cache page.
2159 *
2160 * @returns VBox status code.
2161 * @retval VINF_SUCCESS on success.
2162 * @param pPool The pool.
2163 * @param iUser The user index.
2164 */
2165static int pgmPoolCacheFreeOne(PPGMPOOL pPool, uint16_t iUser)
2166{
2167 const PVMCC pVM = pPool->CTX_SUFF(pVM);
2168 Assert(pPool->iAgeHead != pPool->iAgeTail); /* We shouldn't be here if there < 2 cached entries! */
2169 STAM_COUNTER_INC(&pPool->StatCacheFreeUpOne);
2170
2171 /*
2172 * Select one page from the tail of the age list.
2173 */
2174 PPGMPOOLPAGE pPage;
2175 for (unsigned iLoop = 0; ; iLoop++)
2176 {
2177 uint16_t iToFree = pPool->iAgeTail;
2178 if (iToFree == iUser && iUser != NIL_PGMPOOL_IDX)
2179 iToFree = pPool->aPages[iToFree].iAgePrev;
2180/* This is the alternative to the SyncCR3 pgmPoolCacheUsed calls.
2181 if (pPool->aPages[iToFree].iUserHead != NIL_PGMPOOL_USER_INDEX)
2182 {
2183 uint16_t i = pPool->aPages[iToFree].iAgePrev;
2184 for (unsigned j = 0; j < 10 && i != NIL_PGMPOOL_USER_INDEX; j++, i = pPool->aPages[i].iAgePrev)
2185 {
2186 if (pPool->aPages[iToFree].iUserHead == NIL_PGMPOOL_USER_INDEX)
2187 continue;
2188 iToFree = i;
2189 break;
2190 }
2191 }
2192*/
2193 Assert(iToFree != iUser);
2194 AssertReleaseMsg(iToFree != NIL_PGMPOOL_IDX,
2195 ("iToFree=%#x (iAgeTail=%#x) iUser=%#x iLoop=%u - pPool=%p LB %#zx\n",
2196 iToFree, pPool->iAgeTail, iUser, iLoop, pPool,
2197 RT_UOFFSETOF_DYN(PGMPOOL, aPages[pPool->cMaxPages])
2198 + pPool->cMaxUsers * sizeof(PGMPOOLUSER)
2199 + pPool->cMaxPhysExts * sizeof(PGMPOOLPHYSEXT) ));
2200
2201 pPage = &pPool->aPages[iToFree];
2202
2203 /*
2204 * Reject any attempts at flushing the currently active shadow CR3 mapping.
2205 * Call pgmPoolCacheUsed to move the page to the head of the age list.
2206 */
2207 if ( !pgmPoolIsPageLocked(pPage)
2208 && pPage->idx >= PGMPOOL_IDX_FIRST /* paranoia (#6349) */)
2209 break;
2210 LogFlow(("pgmPoolCacheFreeOne: refuse CR3 mapping\n"));
2211 pgmPoolCacheUsed(pPool, pPage);
2212 AssertLogRelReturn(iLoop < 8192, VERR_PGM_POOL_TOO_MANY_LOOPS);
2213 }
2214
2215 /*
2216 * Found a usable page, flush it and return.
2217 */
2218 int rc = pgmPoolFlushPage(pPool, pPage);
2219 /* This flush was initiated by us and not the guest, so explicitly flush the TLB. */
2220 /** @todo find out why this is necessary; pgmPoolFlushPage should trigger a flush if one is really needed. */
2221 if (rc == VINF_SUCCESS)
2222 PGM_INVL_ALL_VCPU_TLBS(pVM);
2223 return rc;
2224}
2225
2226
2227/**
2228 * Checks if a kind mismatch is really a page being reused
2229 * or if it's just normal remappings.
2230 *
2231 * @returns true if reused and the cached page (enmKind1) should be flushed
2232 * @returns false if not reused.
2233 * @param enmKind1 The kind of the cached page.
2234 * @param enmKind2 The kind of the requested page.
2235 */
2236static bool pgmPoolCacheReusedByKind(PGMPOOLKIND enmKind1, PGMPOOLKIND enmKind2)
2237{
2238 switch (enmKind1)
2239 {
2240 /*
2241 * Never reuse them. There is no remapping in non-paging mode.
2242 */
2243 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2244 case PGMPOOLKIND_32BIT_PD_PHYS:
2245 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2246 case PGMPOOLKIND_PAE_PD_PHYS:
2247 case PGMPOOLKIND_PAE_PDPT_PHYS:
2248 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2249 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2250 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2251 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2252 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2253 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT: /* never reuse them for other types */
2254 return false;
2255
2256 /*
2257 * It's perfectly fine to reuse these, except for PAE and non-paging stuff.
2258 */
2259 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2260 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2261 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2262 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2263 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2264 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2265 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2266 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2267 case PGMPOOLKIND_32BIT_PD:
2268 case PGMPOOLKIND_PAE_PDPT:
2269 Assert(!PGMPOOL_PAGE_IS_KIND_NESTED(enmKind2));
2270 switch (enmKind2)
2271 {
2272 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2273 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2274 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2275 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2276 case PGMPOOLKIND_64BIT_PML4:
2277 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2278 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2279 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2280 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2281 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2282 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2283 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2284 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2285 return true;
2286 default:
2287 return false;
2288 }
2289
2290 /*
2291 * It's perfectly fine to reuse these, except for PAE and non-paging stuff.
2292 */
2293 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2294 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2295 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2296 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2297 case PGMPOOLKIND_64BIT_PML4:
2298 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2299 Assert(!PGMPOOL_PAGE_IS_KIND_NESTED(enmKind2));
2300 switch (enmKind2)
2301 {
2302 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2303 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2304 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2305 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2306 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2307 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2308 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2309 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2310 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2311 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2312 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2313 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2314 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2315 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2316 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2317 return true;
2318 default:
2319 return false;
2320 }
2321
2322#ifdef VBOX_WITH_NESTED_HWVIRT_VMX_EPT
2323 case PGMPOOLKIND_EPT_PT_FOR_EPT_PT:
2324 case PGMPOOLKIND_EPT_PD_FOR_EPT_PD:
2325 case PGMPOOLKIND_EPT_PDPT_FOR_EPT_PDPT:
2326 return PGMPOOL_PAGE_IS_KIND_NESTED(enmKind2);
2327
2328 case PGMPOOLKIND_EPT_PML4_FOR_EPT_PML4:
2329 return false;
2330#endif
2331
2332 /*
2333 * These cannot be flushed, and it's common to reuse the PDs as PTs.
2334 */
2335 case PGMPOOLKIND_ROOT_NESTED:
2336 return false;
2337
2338 default:
2339 AssertFatalMsgFailed(("enmKind1=%d\n", enmKind1));
2340 }
2341}
2342
2343
2344/**
2345 * Attempts to satisfy a pgmPoolAlloc request from the cache.
2346 *
2347 * @returns VBox status code.
2348 * @retval VINF_PGM_CACHED_PAGE on success.
2349 * @retval VERR_FILE_NOT_FOUND if not found.
2350 * @param pPool The pool.
2351 * @param GCPhys The GC physical address of the page we're gonna shadow.
2352 * @param enmKind The kind of mapping.
2353 * @param enmAccess Access type for the mapping (only relevant for big pages)
2354 * @param fA20Enabled Whether the CPU has the A20 gate enabled.
2355 * @param iUser The shadow page pool index of the user table. This is
2356 * NIL_PGMPOOL_IDX for root pages.
2357 * @param iUserTable The index into the user table (shadowed). Ignored if
2358 * root page
2359 * @param ppPage Where to store the pointer to the page.
2360 */
2361static int pgmPoolCacheAlloc(PPGMPOOL pPool, RTGCPHYS GCPhys, PGMPOOLKIND enmKind, PGMPOOLACCESS enmAccess, bool fA20Enabled,
2362 uint16_t iUser, uint32_t iUserTable, PPPGMPOOLPAGE ppPage)
2363{
2364 /*
2365 * Look up the GCPhys in the hash.
2366 */
2367 unsigned i = pPool->aiHash[PGMPOOL_HASH(GCPhys)];
2368 Log3(("pgmPoolCacheAlloc: %RGp kind %s iUser=%d iUserTable=%x SLOT=%d\n", GCPhys, pgmPoolPoolKindToStr(enmKind), iUser, iUserTable, i));
2369 if (i != NIL_PGMPOOL_IDX)
2370 {
2371 do
2372 {
2373 PPGMPOOLPAGE pPage = &pPool->aPages[i];
2374 Log4(("pgmPoolCacheAlloc: slot %d found page %RGp\n", i, pPage->GCPhys));
2375 if (pPage->GCPhys == GCPhys)
2376 {
2377 if ( (PGMPOOLKIND)pPage->enmKind == enmKind
2378 && (PGMPOOLACCESS)pPage->enmAccess == enmAccess
2379 && pPage->fA20Enabled == fA20Enabled)
2380 {
2381 /* Put it at the start of the use list to make sure pgmPoolTrackAddUser
2382 * doesn't flush it in case there are no more free use records.
2383 */
2384 pgmPoolCacheUsed(pPool, pPage);
2385
2386 int rc = VINF_SUCCESS;
2387 if (iUser != NIL_PGMPOOL_IDX)
2388 rc = pgmPoolTrackAddUser(pPool, pPage, iUser, iUserTable);
2389 if (RT_SUCCESS(rc))
2390 {
2391 Assert((PGMPOOLKIND)pPage->enmKind == enmKind);
2392 *ppPage = pPage;
2393 if (pPage->cModifications)
2394 pPage->cModifications = 1; /* reset counter (can't use 0, or else it will be reinserted in the modified list) */
2395 STAM_COUNTER_INC(&pPool->StatCacheHits);
2396 return VINF_PGM_CACHED_PAGE;
2397 }
2398 return rc;
2399 }
2400
2401 if ((PGMPOOLKIND)pPage->enmKind != enmKind)
2402 {
2403 /*
2404 * The kind is different. In some cases we should now flush the page
2405 * as it has been reused, but in most cases this is normal remapping
2406 * of PDs as PT or big pages using the GCPhys field in a slightly
2407 * different way than the other kinds.
2408 */
2409 if (pgmPoolCacheReusedByKind((PGMPOOLKIND)pPage->enmKind, enmKind))
2410 {
2411 STAM_COUNTER_INC(&pPool->StatCacheKindMismatches);
2412 pgmPoolFlushPage(pPool, pPage);
2413 break;
2414 }
2415 }
2416 }
2417
2418 /* next */
2419 i = pPage->iNext;
2420 } while (i != NIL_PGMPOOL_IDX);
2421 }
2422
2423 Log3(("pgmPoolCacheAlloc: Missed GCPhys=%RGp enmKind=%s\n", GCPhys, pgmPoolPoolKindToStr(enmKind)));
2424 STAM_COUNTER_INC(&pPool->StatCacheMisses);
2425 return VERR_FILE_NOT_FOUND;
2426}
2427
2428
2429/**
2430 * Inserts a page into the cache.
2431 *
2432 * @param pPool The pool.
2433 * @param pPage The cached page.
2434 * @param fCanBeCached Set if the page is fit for caching from the caller's point of view.
2435 */
2436static void pgmPoolCacheInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage, bool fCanBeCached)
2437{
2438 /*
2439 * Insert into the GCPhys hash if the page is fit for that.
2440 */
2441 Assert(!pPage->fCached);
2442 if (fCanBeCached)
2443 {
2444 pPage->fCached = true;
2445 pgmPoolHashInsert(pPool, pPage);
2446 Log3(("pgmPoolCacheInsert: Caching %p:{.Core=%RHp, .idx=%d, .enmKind=%s, GCPhys=%RGp}\n",
2447 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), pPage->GCPhys));
2448 STAM_COUNTER_INC(&pPool->StatCacheCacheable);
2449 }
2450 else
2451 {
2452 Log3(("pgmPoolCacheInsert: Not caching %p:{.Core=%RHp, .idx=%d, .enmKind=%s, GCPhys=%RGp}\n",
2453 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), pPage->GCPhys));
2454 STAM_COUNTER_INC(&pPool->StatCacheUncacheable);
2455 }
2456
2457 /*
2458 * Insert at the head of the age list.
2459 */
2460 pPage->iAgePrev = NIL_PGMPOOL_IDX;
2461 pPage->iAgeNext = pPool->iAgeHead;
2462 if (pPool->iAgeHead != NIL_PGMPOOL_IDX)
2463 pPool->aPages[pPool->iAgeHead].iAgePrev = pPage->idx;
2464 else
2465 pPool->iAgeTail = pPage->idx;
2466 pPool->iAgeHead = pPage->idx;
2467}
2468
2469
2470/**
2471 * Flushes a cached page.
2472 *
2473 * @param pPool The pool.
2474 * @param pPage The cached page.
2475 */
2476static void pgmPoolCacheFlushPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2477{
2478 Log3(("pgmPoolCacheFlushPage: %RGp\n", pPage->GCPhys));
2479
2480 /*
2481 * Remove the page from the hash.
2482 */
2483 if (pPage->fCached)
2484 {
2485 pPage->fCached = false;
2486 pgmPoolHashRemove(pPool, pPage);
2487 }
2488 else
2489 Assert(pPage->iNext == NIL_PGMPOOL_IDX);
2490
2491 /*
2492 * Remove it from the age list.
2493 */
2494 if (pPage->iAgeNext != NIL_PGMPOOL_IDX)
2495 pPool->aPages[pPage->iAgeNext].iAgePrev = pPage->iAgePrev;
2496 else
2497 pPool->iAgeTail = pPage->iAgePrev;
2498 if (pPage->iAgePrev != NIL_PGMPOOL_IDX)
2499 pPool->aPages[pPage->iAgePrev].iAgeNext = pPage->iAgeNext;
2500 else
2501 pPool->iAgeHead = pPage->iAgeNext;
2502 pPage->iAgeNext = NIL_PGMPOOL_IDX;
2503 pPage->iAgePrev = NIL_PGMPOOL_IDX;
2504}
2505
2506
2507/**
2508 * Looks for pages sharing the monitor.
2509 *
2510 * @returns Pointer to the head page.
2511 * @returns NULL if not found.
2512 * @param pPool The Pool
2513 * @param pNewPage The page which is going to be monitored.
2514 */
2515static PPGMPOOLPAGE pgmPoolMonitorGetPageByGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pNewPage)
2516{
2517 /*
2518 * Look up the GCPhys in the hash.
2519 */
2520 RTGCPHYS GCPhys = pNewPage->GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK;
2521 unsigned i = pPool->aiHash[PGMPOOL_HASH(GCPhys)];
2522 if (i == NIL_PGMPOOL_IDX)
2523 return NULL;
2524 do
2525 {
2526 PPGMPOOLPAGE pPage = &pPool->aPages[i];
2527 if ( pPage->GCPhys - GCPhys < PAGE_SIZE
2528 && pPage != pNewPage)
2529 {
2530 switch (pPage->enmKind)
2531 {
2532 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2533 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2534 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2535 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2536 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2537 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2538 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2539 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2540 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2541 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2542 case PGMPOOLKIND_64BIT_PML4:
2543 case PGMPOOLKIND_32BIT_PD:
2544 case PGMPOOLKIND_PAE_PDPT:
2545#ifdef VBOX_WITH_NESTED_HWVIRT_VMX_EPT
2546 case PGMPOOLKIND_EPT_PT_FOR_EPT_PT:
2547 case PGMPOOLKIND_EPT_PD_FOR_EPT_PD:
2548 case PGMPOOLKIND_EPT_PDPT_FOR_EPT_PDPT:
2549#endif
2550 {
2551 /* find the head */
2552 while (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
2553 {
2554 Assert(pPage->iMonitoredPrev != pPage->idx);
2555 pPage = &pPool->aPages[pPage->iMonitoredPrev];
2556 }
2557 return pPage;
2558 }
2559
2560 /* ignore, no monitoring. */
2561 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2562 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2563 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2564 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2565 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2566 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2567 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2568 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2569 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2570 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2571 case PGMPOOLKIND_ROOT_NESTED:
2572 case PGMPOOLKIND_PAE_PD_PHYS:
2573 case PGMPOOLKIND_PAE_PDPT_PHYS:
2574 case PGMPOOLKIND_32BIT_PD_PHYS:
2575 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
2576#ifdef VBOX_WITH_NESTED_HWVIRT_VMX_EPT
2577 case PGMPOOLKIND_EPT_PML4_FOR_EPT_PML4:
2578#endif
2579 break;
2580 default:
2581 AssertFatalMsgFailed(("enmKind=%d idx=%d\n", pPage->enmKind, pPage->idx));
2582 }
2583 }
2584
2585 /* next */
2586 i = pPage->iNext;
2587 } while (i != NIL_PGMPOOL_IDX);
2588 return NULL;
2589}
2590
2591
2592/**
2593 * Enabled write monitoring of a guest page.
2594 *
2595 * @returns VBox status code.
2596 * @retval VINF_SUCCESS on success.
2597 * @param pPool The pool.
2598 * @param pPage The cached page.
2599 */
2600static int pgmPoolMonitorInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2601{
2602 LogFlow(("pgmPoolMonitorInsert %RGp\n", pPage->GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK));
2603
2604 /*
2605 * Filter out the relevant kinds.
2606 */
2607 switch (pPage->enmKind)
2608 {
2609 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2610 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2611 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2612 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2613 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2614 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2615 case PGMPOOLKIND_64BIT_PML4:
2616 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2617 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2618 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2619 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2620 case PGMPOOLKIND_32BIT_PD:
2621 case PGMPOOLKIND_PAE_PDPT:
2622 break;
2623
2624 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2625 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2626 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2627 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2628 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2629 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2630 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2631 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2632 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2633 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2634 case PGMPOOLKIND_ROOT_NESTED:
2635 /* Nothing to monitor here. */
2636 return VINF_SUCCESS;
2637
2638 case PGMPOOLKIND_32BIT_PD_PHYS:
2639 case PGMPOOLKIND_PAE_PDPT_PHYS:
2640 case PGMPOOLKIND_PAE_PD_PHYS:
2641 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
2642 /* Nothing to monitor here. */
2643 return VINF_SUCCESS;
2644
2645#ifdef VBOX_WITH_NESTED_HWVIRT_VMX_EPT
2646 case PGMPOOLKIND_EPT_PT_FOR_EPT_PT:
2647 case PGMPOOLKIND_EPT_PD_FOR_EPT_PD:
2648 case PGMPOOLKIND_EPT_PDPT_FOR_EPT_PDPT:
2649 break;
2650
2651 case PGMPOOLKIND_EPT_PML4_FOR_EPT_PML4:
2652 /* Nothing to monitor here. */
2653 return VINF_SUCCESS;
2654#endif
2655
2656 default:
2657 AssertFatalMsgFailed(("This can't happen! enmKind=%d\n", pPage->enmKind));
2658 }
2659
2660 /*
2661 * Install handler.
2662 */
2663 int rc;
2664 PPGMPOOLPAGE pPageHead = pgmPoolMonitorGetPageByGCPhys(pPool, pPage);
2665 if (pPageHead)
2666 {
2667 Assert(pPageHead != pPage); Assert(pPageHead->iMonitoredNext != pPage->idx);
2668 Assert(pPageHead->iMonitoredPrev != pPage->idx);
2669
2670#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
2671 if (pPageHead->fDirty)
2672 pgmPoolFlushDirtyPage(pPool->CTX_SUFF(pVM), pPool, pPageHead->idxDirtyEntry, false /* do not remove */);
2673#endif
2674
2675 pPage->iMonitoredPrev = pPageHead->idx;
2676 pPage->iMonitoredNext = pPageHead->iMonitoredNext;
2677 if (pPageHead->iMonitoredNext != NIL_PGMPOOL_IDX)
2678 pPool->aPages[pPageHead->iMonitoredNext].iMonitoredPrev = pPage->idx;
2679 pPageHead->iMonitoredNext = pPage->idx;
2680 rc = VINF_SUCCESS;
2681 if (PGMPOOL_PAGE_IS_NESTED(pPage))
2682 Log7Func(("Adding to monitoring list GCPhysPage=%RGp\n", pPage->GCPhys));
2683 }
2684 else
2685 {
2686 if (PGMPOOL_PAGE_IS_NESTED(pPage))
2687 Log7Func(("Started monitoring GCPhysPage=%RGp HCPhys=%RHp enmKind=%s\n", pPage->GCPhys, pPage->Core.Key, pgmPoolPoolKindToStr(pPage->enmKind)));
2688
2689 Assert(pPage->iMonitoredNext == NIL_PGMPOOL_IDX); Assert(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
2690 PVMCC pVM = pPool->CTX_SUFF(pVM);
2691 const RTGCPHYS GCPhysPage = pPage->GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK;
2692 rc = PGMHandlerPhysicalRegister(pVM, GCPhysPage, GCPhysPage + PAGE_OFFSET_MASK, pPool->hAccessHandlerType,
2693 pPage - &pPool->aPages[0], NIL_RTR3PTR /*pszDesc*/);
2694 /** @todo we should probably deal with out-of-memory conditions here, but for now increasing
2695 * the heap size should suffice. */
2696 AssertFatalMsgRC(rc, ("PGMHandlerPhysicalRegisterEx %RGp failed with %Rrc\n", GCPhysPage, rc));
2697 PVMCPU pVCpu = VMMGetCpu(pVM);
2698 AssertFatalMsg(!(pVCpu->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL) || VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3), ("fSyncFlags=%x syncff=%d\n", pVCpu->pgm.s.fSyncFlags, VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3)));
2699 }
2700 pPage->fMonitored = true;
2701 return rc;
2702}
2703
2704
2705/**
2706 * Disables write monitoring of a guest page.
2707 *
2708 * @returns VBox status code.
2709 * @retval VINF_SUCCESS on success.
2710 * @param pPool The pool.
2711 * @param pPage The cached page.
2712 */
2713static int pgmPoolMonitorFlush(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2714{
2715 /*
2716 * Filter out the relevant kinds.
2717 */
2718 switch (pPage->enmKind)
2719 {
2720 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2721 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2722 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2723 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2724 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2725 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2726 case PGMPOOLKIND_64BIT_PML4:
2727 case PGMPOOLKIND_32BIT_PD:
2728 case PGMPOOLKIND_PAE_PDPT:
2729 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2730 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2731 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2732 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2733 break;
2734
2735 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2736 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2737 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2738 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2739 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2740 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2741 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2742 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2743 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2744 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2745 case PGMPOOLKIND_ROOT_NESTED:
2746 case PGMPOOLKIND_PAE_PD_PHYS:
2747 case PGMPOOLKIND_PAE_PDPT_PHYS:
2748 case PGMPOOLKIND_32BIT_PD_PHYS:
2749 /* Nothing to monitor here. */
2750 Assert(!pPage->fMonitored);
2751 return VINF_SUCCESS;
2752
2753#ifdef VBOX_WITH_NESTED_HWVIRT_VMX_EPT
2754 case PGMPOOLKIND_EPT_PT_FOR_EPT_PT:
2755 case PGMPOOLKIND_EPT_PD_FOR_EPT_PD:
2756 case PGMPOOLKIND_EPT_PDPT_FOR_EPT_PDPT:
2757 break;
2758
2759 case PGMPOOLKIND_EPT_PML4_FOR_EPT_PML4:
2760 /* Nothing to monitor here. */
2761 Assert(!pPage->fMonitored);
2762 return VINF_SUCCESS;
2763#endif
2764
2765 default:
2766 AssertFatalMsgFailed(("This can't happen! enmKind=%d\n", pPage->enmKind));
2767 }
2768 Assert(pPage->fMonitored);
2769
2770 /*
2771 * Remove the page from the monitored list or uninstall it if last.
2772 */
2773 const PVMCC pVM = pPool->CTX_SUFF(pVM);
2774 int rc;
2775 if ( pPage->iMonitoredNext != NIL_PGMPOOL_IDX
2776 || pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
2777 {
2778 if (pPage->iMonitoredPrev == NIL_PGMPOOL_IDX)
2779 {
2780 PPGMPOOLPAGE pNewHead = &pPool->aPages[pPage->iMonitoredNext];
2781 pNewHead->iMonitoredPrev = NIL_PGMPOOL_IDX;
2782 rc = PGMHandlerPhysicalChangeUserArg(pVM, pPage->GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK, pPage->iMonitoredNext);
2783
2784 AssertFatalRCSuccess(rc);
2785 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
2786 }
2787 else
2788 {
2789 pPool->aPages[pPage->iMonitoredPrev].iMonitoredNext = pPage->iMonitoredNext;
2790 if (pPage->iMonitoredNext != NIL_PGMPOOL_IDX)
2791 {
2792 pPool->aPages[pPage->iMonitoredNext].iMonitoredPrev = pPage->iMonitoredPrev;
2793 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
2794 }
2795 pPage->iMonitoredPrev = NIL_PGMPOOL_IDX;
2796 rc = VINF_SUCCESS;
2797 }
2798 }
2799 else
2800 {
2801 rc = PGMHandlerPhysicalDeregister(pVM, pPage->GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK);
2802 AssertFatalRC(rc);
2803 PVMCPU pVCpu = VMMGetCpu(pVM);
2804 AssertFatalMsg(!(pVCpu->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL) || VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3),
2805 ("%#x %#x\n", pVCpu->pgm.s.fSyncFlags, pVM->fGlobalForcedActions));
2806 }
2807 pPage->fMonitored = false;
2808
2809 /*
2810 * Remove it from the list of modified pages (if in it).
2811 */
2812 pgmPoolMonitorModifiedRemove(pPool, pPage);
2813
2814 if (PGMPOOL_PAGE_IS_NESTED(pPage))
2815 Log7Func(("Stopped monitoring %RGp\n", pPage->GCPhys));
2816
2817 return rc;
2818}
2819
2820
2821/**
2822 * Inserts the page into the list of modified pages.
2823 *
2824 * @param pPool The pool.
2825 * @param pPage The page.
2826 */
2827void pgmPoolMonitorModifiedInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2828{
2829 Log3(("pgmPoolMonitorModifiedInsert: idx=%d\n", pPage->idx));
2830 AssertMsg( pPage->iModifiedNext == NIL_PGMPOOL_IDX
2831 && pPage->iModifiedPrev == NIL_PGMPOOL_IDX
2832 && pPool->iModifiedHead != pPage->idx,
2833 ("Next=%d Prev=%d idx=%d cModifications=%d Head=%d cModifiedPages=%d\n",
2834 pPage->iModifiedNext, pPage->iModifiedPrev, pPage->idx, pPage->cModifications,
2835 pPool->iModifiedHead, pPool->cModifiedPages));
2836
2837 pPage->iModifiedNext = pPool->iModifiedHead;
2838 if (pPool->iModifiedHead != NIL_PGMPOOL_IDX)
2839 pPool->aPages[pPool->iModifiedHead].iModifiedPrev = pPage->idx;
2840 pPool->iModifiedHead = pPage->idx;
2841 pPool->cModifiedPages++;
2842#ifdef VBOX_WITH_STATISTICS
2843 if (pPool->cModifiedPages > pPool->cModifiedPagesHigh)
2844 pPool->cModifiedPagesHigh = pPool->cModifiedPages;
2845#endif
2846}
2847
2848
2849/**
2850 * Removes the page from the list of modified pages and resets the
2851 * modification counter.
2852 *
2853 * @param pPool The pool.
2854 * @param pPage The page which is believed to be in the list of modified pages.
2855 */
2856static void pgmPoolMonitorModifiedRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2857{
2858 Log3(("pgmPoolMonitorModifiedRemove: idx=%d cModifications=%d\n", pPage->idx, pPage->cModifications));
2859 if (pPool->iModifiedHead == pPage->idx)
2860 {
2861 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX);
2862 pPool->iModifiedHead = pPage->iModifiedNext;
2863 if (pPage->iModifiedNext != NIL_PGMPOOL_IDX)
2864 {
2865 pPool->aPages[pPage->iModifiedNext].iModifiedPrev = NIL_PGMPOOL_IDX;
2866 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
2867 }
2868 pPool->cModifiedPages--;
2869 }
2870 else if (pPage->iModifiedPrev != NIL_PGMPOOL_IDX)
2871 {
2872 pPool->aPages[pPage->iModifiedPrev].iModifiedNext = pPage->iModifiedNext;
2873 if (pPage->iModifiedNext != NIL_PGMPOOL_IDX)
2874 {
2875 pPool->aPages[pPage->iModifiedNext].iModifiedPrev = pPage->iModifiedPrev;
2876 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
2877 }
2878 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
2879 pPool->cModifiedPages--;
2880 }
2881 else
2882 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX);
2883 pPage->cModifications = 0;
2884}
2885
2886
2887/**
2888 * Zaps the list of modified pages, resetting their modification counters in the process.
2889 *
2890 * @param pVM The cross context VM structure.
2891 */
2892static void pgmPoolMonitorModifiedClearAll(PVMCC pVM)
2893{
2894 PGM_LOCK_VOID(pVM);
2895 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
2896 LogFlow(("pgmPoolMonitorModifiedClearAll: cModifiedPages=%d\n", pPool->cModifiedPages));
2897
2898 unsigned cPages = 0; NOREF(cPages);
2899
2900#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
2901 pgmPoolResetDirtyPages(pVM);
2902#endif
2903
2904 uint16_t idx = pPool->iModifiedHead;
2905 pPool->iModifiedHead = NIL_PGMPOOL_IDX;
2906 while (idx != NIL_PGMPOOL_IDX)
2907 {
2908 PPGMPOOLPAGE pPage = &pPool->aPages[idx];
2909 idx = pPage->iModifiedNext;
2910 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
2911 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
2912 pPage->cModifications = 0;
2913 Assert(++cPages);
2914 }
2915 AssertMsg(cPages == pPool->cModifiedPages, ("%d != %d\n", cPages, pPool->cModifiedPages));
2916 pPool->cModifiedPages = 0;
2917 PGM_UNLOCK(pVM);
2918}
2919
2920
2921/**
2922 * Handle SyncCR3 pool tasks
2923 *
2924 * @returns VBox status code.
2925 * @retval VINF_SUCCESS if successfully added.
2926 * @retval VINF_PGM_SYNC_CR3 is it needs to be deferred to ring 3 (GC only)
2927 * @param pVCpu The cross context virtual CPU structure.
2928 * @remark Should only be used when monitoring is available, thus placed in
2929 * the PGMPOOL_WITH_MONITORING \#ifdef.
2930 */
2931int pgmPoolSyncCR3(PVMCPUCC pVCpu)
2932{
2933 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
2934 LogFlow(("pgmPoolSyncCR3 fSyncFlags=%x\n", pVCpu->pgm.s.fSyncFlags));
2935
2936 /*
2937 * When monitoring shadowed pages, we reset the modification counters on CR3 sync.
2938 * Occasionally we will have to clear all the shadow page tables because we wanted
2939 * to monitor a page which was mapped by too many shadowed page tables. This operation
2940 * sometimes referred to as a 'lightweight flush'.
2941 */
2942# ifdef IN_RING3 /* Don't flush in ring-0 or raw mode, it's taking too long. */
2943 if (pVCpu->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)
2944 pgmR3PoolClearAll(pVM, false /*fFlushRemTlb*/);
2945# else /* !IN_RING3 */
2946 if (pVCpu->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)
2947 {
2948 Log(("SyncCR3: PGM_SYNC_CLEAR_PGM_POOL is set -> VINF_PGM_SYNC_CR3\n"));
2949 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3); /** @todo no need to do global sync, right? */
2950
2951 /* Make sure all other VCPUs return to ring 3. */
2952 if (pVM->cCpus > 1)
2953 {
2954 VM_FF_SET(pVM, VM_FF_PGM_POOL_FLUSH_PENDING);
2955 PGM_INVL_ALL_VCPU_TLBS(pVM);
2956 }
2957 return VINF_PGM_SYNC_CR3;
2958 }
2959# endif /* !IN_RING3 */
2960 else
2961 {
2962 pgmPoolMonitorModifiedClearAll(pVM);
2963
2964 /* pgmPoolMonitorModifiedClearAll can cause a pgm pool flush (dirty page clearing), so make sure we handle this! */
2965 if (pVCpu->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)
2966 {
2967 Log(("pgmPoolMonitorModifiedClearAll caused a pgm flush -> call pgmPoolSyncCR3 again!\n"));
2968 return pgmPoolSyncCR3(pVCpu);
2969 }
2970 }
2971 return VINF_SUCCESS;
2972}
2973
2974
2975/**
2976 * Frees up at least one user entry.
2977 *
2978 * @returns VBox status code.
2979 * @retval VINF_SUCCESS if successfully added.
2980 *
2981 * @param pPool The pool.
2982 * @param iUser The user index.
2983 */
2984static int pgmPoolTrackFreeOneUser(PPGMPOOL pPool, uint16_t iUser)
2985{
2986 STAM_COUNTER_INC(&pPool->StatTrackFreeUpOneUser);
2987 /*
2988 * Just free cached pages in a braindead fashion.
2989 */
2990 /** @todo walk the age list backwards and free the first with usage. */
2991 int rc = VINF_SUCCESS;
2992 do
2993 {
2994 int rc2 = pgmPoolCacheFreeOne(pPool, iUser);
2995 if (RT_FAILURE(rc2) && rc == VINF_SUCCESS)
2996 rc = rc2;
2997 } while (pPool->iUserFreeHead == NIL_PGMPOOL_USER_INDEX);
2998 return rc;
2999}
3000
3001
3002/**
3003 * Inserts a page into the cache.
3004 *
3005 * This will create user node for the page, insert it into the GCPhys
3006 * hash, and insert it into the age list.
3007 *
3008 * @returns VBox status code.
3009 * @retval VINF_SUCCESS if successfully added.
3010 *
3011 * @param pPool The pool.
3012 * @param pPage The cached page.
3013 * @param GCPhys The GC physical address of the page we're gonna shadow.
3014 * @param iUser The user index.
3015 * @param iUserTable The user table index.
3016 */
3017DECLINLINE(int) pgmPoolTrackInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTGCPHYS GCPhys, uint16_t iUser, uint32_t iUserTable)
3018{
3019 int rc = VINF_SUCCESS;
3020 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
3021
3022 LogFlow(("pgmPoolTrackInsert GCPhys=%RGp iUser=%d iUserTable=%x\n", GCPhys, iUser, iUserTable)); RT_NOREF_PV(GCPhys);
3023
3024 if (iUser != NIL_PGMPOOL_IDX)
3025 {
3026#ifdef VBOX_STRICT
3027 /*
3028 * Check that the entry doesn't already exists.
3029 */
3030 if (pPage->iUserHead != NIL_PGMPOOL_USER_INDEX)
3031 {
3032 uint16_t i = pPage->iUserHead;
3033 do
3034 {
3035 Assert(i < pPool->cMaxUsers);
3036 AssertMsg(paUsers[i].iUser != iUser || paUsers[i].iUserTable != iUserTable, ("%x %x vs new %x %x\n", paUsers[i].iUser, paUsers[i].iUserTable, iUser, iUserTable));
3037 i = paUsers[i].iNext;
3038 } while (i != NIL_PGMPOOL_USER_INDEX);
3039 }
3040#endif
3041
3042 /*
3043 * Find free a user node.
3044 */
3045 uint16_t i = pPool->iUserFreeHead;
3046 if (i == NIL_PGMPOOL_USER_INDEX)
3047 {
3048 rc = pgmPoolTrackFreeOneUser(pPool, iUser);
3049 if (RT_FAILURE(rc))
3050 return rc;
3051 i = pPool->iUserFreeHead;
3052 }
3053
3054 /*
3055 * Unlink the user node from the free list,
3056 * initialize and insert it into the user list.
3057 */
3058 pPool->iUserFreeHead = paUsers[i].iNext;
3059 paUsers[i].iNext = NIL_PGMPOOL_USER_INDEX;
3060 paUsers[i].iUser = iUser;
3061 paUsers[i].iUserTable = iUserTable;
3062 pPage->iUserHead = i;
3063 }
3064 else
3065 pPage->iUserHead = NIL_PGMPOOL_USER_INDEX;
3066
3067
3068 /*
3069 * Insert into cache and enable monitoring of the guest page if enabled.
3070 *
3071 * Until we implement caching of all levels, including the CR3 one, we'll
3072 * have to make sure we don't try monitor & cache any recursive reuse of
3073 * a monitored CR3 page. Because all windows versions are doing this we'll
3074 * have to be able to do combined access monitoring, CR3 + PT and
3075 * PD + PT (guest PAE).
3076 *
3077 * Update:
3078 * We're now cooperating with the CR3 monitor if an uncachable page is found.
3079 */
3080 const bool fCanBeMonitored = true;
3081 pgmPoolCacheInsert(pPool, pPage, fCanBeMonitored); /* This can be expanded. */
3082 if (fCanBeMonitored)
3083 {
3084 rc = pgmPoolMonitorInsert(pPool, pPage);
3085 AssertRC(rc);
3086 }
3087 return rc;
3088}
3089
3090
3091/**
3092 * Adds a user reference to a page.
3093 *
3094 * This will move the page to the head of the
3095 *
3096 * @returns VBox status code.
3097 * @retval VINF_SUCCESS if successfully added.
3098 *
3099 * @param pPool The pool.
3100 * @param pPage The cached page.
3101 * @param iUser The user index.
3102 * @param iUserTable The user table.
3103 */
3104static int pgmPoolTrackAddUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable)
3105{
3106 Log3(("pgmPoolTrackAddUser: GCPhys=%RGp iUser=%x iUserTable=%x\n", pPage->GCPhys, iUser, iUserTable));
3107 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
3108 Assert(iUser != NIL_PGMPOOL_IDX);
3109
3110# ifdef VBOX_STRICT
3111 /*
3112 * Check that the entry doesn't already exists. We only allow multiple
3113 * users of top-level paging structures (SHW_POOL_ROOT_IDX).
3114 */
3115 if (pPage->iUserHead != NIL_PGMPOOL_USER_INDEX)
3116 {
3117 uint16_t i = pPage->iUserHead;
3118 do
3119 {
3120 Assert(i < pPool->cMaxUsers);
3121 /** @todo this assertion looks odd... Shouldn't it be && here? */
3122 AssertMsg(paUsers[i].iUser != iUser || paUsers[i].iUserTable != iUserTable, ("%x %x vs new %x %x\n", paUsers[i].iUser, paUsers[i].iUserTable, iUser, iUserTable));
3123 i = paUsers[i].iNext;
3124 } while (i != NIL_PGMPOOL_USER_INDEX);
3125 }
3126# endif
3127
3128 /*
3129 * Allocate a user node.
3130 */
3131 uint16_t i = pPool->iUserFreeHead;
3132 if (i == NIL_PGMPOOL_USER_INDEX)
3133 {
3134 int rc = pgmPoolTrackFreeOneUser(pPool, iUser);
3135 if (RT_FAILURE(rc))
3136 return rc;
3137 i = pPool->iUserFreeHead;
3138 }
3139 pPool->iUserFreeHead = paUsers[i].iNext;
3140
3141 /*
3142 * Initialize the user node and insert it.
3143 */
3144 paUsers[i].iNext = pPage->iUserHead;
3145 paUsers[i].iUser = iUser;
3146 paUsers[i].iUserTable = iUserTable;
3147 pPage->iUserHead = i;
3148
3149# ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
3150 if (pPage->fDirty)
3151 pgmPoolFlushDirtyPage(pPool->CTX_SUFF(pVM), pPool, pPage->idxDirtyEntry, false /* do not remove */);
3152# endif
3153
3154 /*
3155 * Tell the cache to update its replacement stats for this page.
3156 */
3157 pgmPoolCacheUsed(pPool, pPage);
3158 return VINF_SUCCESS;
3159}
3160
3161
3162/**
3163 * Frees a user record associated with a page.
3164 *
3165 * This does not clear the entry in the user table, it simply replaces the
3166 * user record to the chain of free records.
3167 *
3168 * @param pPool The pool.
3169 * @param pPage The shadow page.
3170 * @param iUser The shadow page pool index of the user table.
3171 * @param iUserTable The index into the user table (shadowed).
3172 *
3173 * @remarks Don't call this for root pages.
3174 */
3175static void pgmPoolTrackFreeUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable)
3176{
3177 Log3(("pgmPoolTrackFreeUser %RGp %x %x\n", pPage->GCPhys, iUser, iUserTable));
3178 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
3179 Assert(iUser != NIL_PGMPOOL_IDX);
3180
3181 /*
3182 * Unlink and free the specified user entry.
3183 */
3184
3185 /* Special: For PAE and 32-bit paging, there is usually no more than one user. */
3186 uint16_t i = pPage->iUserHead;
3187 if ( i != NIL_PGMPOOL_USER_INDEX
3188 && paUsers[i].iUser == iUser
3189 && paUsers[i].iUserTable == iUserTable)
3190 {
3191 pPage->iUserHead = paUsers[i].iNext;
3192
3193 paUsers[i].iUser = NIL_PGMPOOL_IDX;
3194 paUsers[i].iNext = pPool->iUserFreeHead;
3195 pPool->iUserFreeHead = i;
3196 return;
3197 }
3198
3199 /* General: Linear search. */
3200 uint16_t iPrev = NIL_PGMPOOL_USER_INDEX;
3201 while (i != NIL_PGMPOOL_USER_INDEX)
3202 {
3203 if ( paUsers[i].iUser == iUser
3204 && paUsers[i].iUserTable == iUserTable)
3205 {
3206 if (iPrev != NIL_PGMPOOL_USER_INDEX)
3207 paUsers[iPrev].iNext = paUsers[i].iNext;
3208 else
3209 pPage->iUserHead = paUsers[i].iNext;
3210
3211 paUsers[i].iUser = NIL_PGMPOOL_IDX;
3212 paUsers[i].iNext = pPool->iUserFreeHead;
3213 pPool->iUserFreeHead = i;
3214 return;
3215 }
3216 iPrev = i;
3217 i = paUsers[i].iNext;
3218 }
3219
3220 /* Fatal: didn't find it */
3221 AssertFatalMsgFailed(("Didn't find the user entry! iUser=%d iUserTable=%#x GCPhys=%RGp\n",
3222 iUser, iUserTable, pPage->GCPhys));
3223}
3224
3225
3226#if 0 /* unused */
3227/**
3228 * Gets the entry size of a shadow table.
3229 *
3230 * @param enmKind The kind of page.
3231 *
3232 * @returns The size of the entry in bytes. That is, 4 or 8.
3233 * @returns If the kind is not for a table, an assertion is raised and 0 is
3234 * returned.
3235 */
3236DECLINLINE(unsigned) pgmPoolTrackGetShadowEntrySize(PGMPOOLKIND enmKind)
3237{
3238 switch (enmKind)
3239 {
3240 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
3241 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
3242 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
3243 case PGMPOOLKIND_32BIT_PD:
3244 case PGMPOOLKIND_32BIT_PD_PHYS:
3245 return 4;
3246
3247 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
3248 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
3249 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
3250 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
3251 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
3252 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
3253 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
3254 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
3255 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
3256 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
3257 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
3258 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
3259 case PGMPOOLKIND_64BIT_PML4:
3260 case PGMPOOLKIND_PAE_PDPT:
3261 case PGMPOOLKIND_ROOT_NESTED:
3262 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
3263 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
3264 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
3265 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
3266 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
3267 case PGMPOOLKIND_PAE_PD_PHYS:
3268 case PGMPOOLKIND_PAE_PDPT_PHYS:
3269 return 8;
3270
3271 default:
3272 AssertFatalMsgFailed(("enmKind=%d\n", enmKind));
3273 }
3274}
3275#endif /* unused */
3276
3277#if 0 /* unused */
3278/**
3279 * Gets the entry size of a guest table.
3280 *
3281 * @param enmKind The kind of page.
3282 *
3283 * @returns The size of the entry in bytes. That is, 0, 4 or 8.
3284 * @returns If the kind is not for a table, an assertion is raised and 0 is
3285 * returned.
3286 */
3287DECLINLINE(unsigned) pgmPoolTrackGetGuestEntrySize(PGMPOOLKIND enmKind)
3288{
3289 switch (enmKind)
3290 {
3291 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
3292 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
3293 case PGMPOOLKIND_32BIT_PD:
3294 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
3295 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
3296 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
3297 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
3298 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
3299 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
3300 return 4;
3301
3302 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
3303 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
3304 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
3305 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
3306 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
3307 case PGMPOOLKIND_64BIT_PML4:
3308 case PGMPOOLKIND_PAE_PDPT:
3309 return 8;
3310
3311 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
3312 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
3313 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
3314 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
3315 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
3316 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
3317 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
3318 case PGMPOOLKIND_ROOT_NESTED:
3319 case PGMPOOLKIND_PAE_PD_PHYS:
3320 case PGMPOOLKIND_PAE_PDPT_PHYS:
3321 case PGMPOOLKIND_32BIT_PD_PHYS:
3322 /** @todo can we return 0? (nobody is calling this...) */
3323 AssertFailed();
3324 return 0;
3325
3326 default:
3327 AssertFatalMsgFailed(("enmKind=%d\n", enmKind));
3328 }
3329}
3330#endif /* unused */
3331
3332
3333/**
3334 * Checks one shadow page table entry for a mapping of a physical page.
3335 *
3336 * @returns true / false indicating removal of all relevant PTEs
3337 *
3338 * @param pVM The cross context VM structure.
3339 * @param pPhysPage The guest page in question.
3340 * @param fFlushPTEs Flush PTEs or allow them to be updated (e.g. in case of an RW bit change)
3341 * @param iShw The shadow page table.
3342 * @param iPte Page table entry or NIL_PGMPOOL_PHYSEXT_IDX_PTE if unknown
3343 */
3344static bool pgmPoolTrackFlushGCPhysPTInt(PVM pVM, PCPGMPAGE pPhysPage, bool fFlushPTEs, uint16_t iShw, uint16_t iPte)
3345{
3346 LogFlow(("pgmPoolTrackFlushGCPhysPTInt: pPhysPage=%RHp iShw=%d iPte=%d\n", PGM_PAGE_GET_HCPHYS(pPhysPage), iShw, iPte));
3347 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3348 bool fRet = false;
3349
3350 /*
3351 * Assert sanity.
3352 */
3353 Assert(iPte != NIL_PGMPOOL_PHYSEXT_IDX_PTE);
3354 AssertFatalMsg(iShw < pPool->cCurPages && iShw != NIL_PGMPOOL_IDX, ("iShw=%d\n", iShw));
3355 PPGMPOOLPAGE pPage = &pPool->aPages[iShw];
3356
3357 /*
3358 * Then, clear the actual mappings to the page in the shadow PT.
3359 */
3360 switch (pPage->enmKind)
3361 {
3362 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
3363 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
3364 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
3365 {
3366 const uint32_t u32 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P;
3367 PX86PT pPT = (PX86PT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3368 uint32_t u32AndMask = 0;
3369 uint32_t u32OrMask = 0;
3370
3371 if (!fFlushPTEs)
3372 {
3373 /* Note! Disregarding the PGMPHYSHANDLER_F_NOT_IN_HM bit here. Should be harmless. */
3374 switch (PGM_PAGE_GET_HNDL_PHYS_STATE(pPhysPage))
3375 {
3376 case PGM_PAGE_HNDL_PHYS_STATE_NONE: /* No handler installed. */
3377 case PGM_PAGE_HNDL_PHYS_STATE_DISABLED: /* Monitoring is temporarily disabled. */
3378 u32OrMask = X86_PTE_RW;
3379 u32AndMask = UINT32_MAX;
3380 fRet = true;
3381 STAM_COUNTER_INC(&pPool->StatTrackFlushEntryKeep);
3382 break;
3383
3384 case PGM_PAGE_HNDL_PHYS_STATE_WRITE: /* Write access is monitored. */
3385 u32OrMask = 0;
3386 u32AndMask = ~X86_PTE_RW;
3387 fRet = true;
3388 STAM_COUNTER_INC(&pPool->StatTrackFlushEntryKeep);
3389 break;
3390 default:
3391 /* We will end up here when called with an "ALL" access handler. */
3392 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3393 break;
3394 }
3395 }
3396 else
3397 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3398
3399 /* Update the counter if we're removing references. */
3400 if (!u32AndMask)
3401 {
3402 Assert(pPage->cPresent);
3403 Assert(pPool->cPresent);
3404 pPage->cPresent--;
3405 pPool->cPresent--;
3406 }
3407
3408 if ((pPT->a[iPte].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
3409 {
3410 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pte=%RX32\n", iPte, pPT->a[iPte]));
3411 X86PTE Pte;
3412 Pte.u = (pPT->a[iPte].u & u32AndMask) | u32OrMask;
3413 if (Pte.u & PGM_PTFLAGS_TRACK_DIRTY)
3414 Pte.u &= ~(X86PGUINT)X86_PTE_RW; /* need to disallow writes when dirty bit tracking is still active. */
3415
3416 ASMAtomicWriteU32(&pPT->a[iPte].u, Pte.u);
3417 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);
3418 return fRet;
3419 }
3420#ifdef LOG_ENABLED
3421 Log(("iFirstPresent=%d cPresent=%d\n", pPage->iFirstPresent, pPage->cPresent));
3422 for (unsigned i = 0, cFound = 0; i < RT_ELEMENTS(pPT->a); i++)
3423 if ((pPT->a[i].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
3424 {
3425 Log(("i=%d cFound=%d\n", i, ++cFound));
3426 }
3427#endif
3428 AssertFatalMsgFailed(("iFirstPresent=%d cPresent=%d u32=%RX32 poolkind=%x\n", pPage->iFirstPresent, pPage->cPresent, u32, pPage->enmKind));
3429 /*PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);*/
3430 break;
3431 }
3432
3433 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
3434 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
3435 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
3436 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
3437 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
3438 case PGMPOOLKIND_EPT_PT_FOR_PHYS: /* physical mask the same as PAE; RW bit as well; be careful! */
3439#ifdef VBOX_WITH_NESTED_HWVIRT_VMX_EPT
3440 case PGMPOOLKIND_EPT_PT_FOR_EPT_PT:
3441#endif
3442 {
3443 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P;
3444 PPGMSHWPTPAE pPT = (PPGMSHWPTPAE)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3445 uint64_t u64OrMask = 0;
3446 uint64_t u64AndMask = 0;
3447
3448 if (!fFlushPTEs)
3449 {
3450 /* Note! Disregarding the PGMPHYSHANDLER_F_NOT_IN_HM bit here. Should be harmless. */
3451 switch (PGM_PAGE_GET_HNDL_PHYS_STATE(pPhysPage))
3452 {
3453 case PGM_PAGE_HNDL_PHYS_STATE_NONE: /* No handler installed. */
3454 case PGM_PAGE_HNDL_PHYS_STATE_DISABLED: /* Monitoring is temporarily disabled. */
3455 u64OrMask = X86_PTE_RW;
3456 u64AndMask = UINT64_MAX;
3457 fRet = true;
3458 STAM_COUNTER_INC(&pPool->StatTrackFlushEntryKeep);
3459 break;
3460
3461 case PGM_PAGE_HNDL_PHYS_STATE_WRITE: /* Write access is monitored. */
3462 u64OrMask = 0;
3463 u64AndMask = ~(uint64_t)X86_PTE_RW;
3464 fRet = true;
3465 STAM_COUNTER_INC(&pPool->StatTrackFlushEntryKeep);
3466 break;
3467
3468 default:
3469 /* We will end up here when called with an "ALL" access handler. */
3470 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3471 break;
3472 }
3473 }
3474 else
3475 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3476
3477 /* Update the counter if we're removing references. */
3478 if (!u64AndMask)
3479 {
3480 Assert(pPage->cPresent);
3481 Assert(pPool->cPresent);
3482 pPage->cPresent--;
3483 pPool->cPresent--;
3484 }
3485
3486 if ((PGMSHWPTEPAE_GET_U(pPT->a[iPte]) & (X86_PTE_PAE_PG_MASK | X86_PTE_P | X86_PTE_PAE_MBZ_MASK_NX)) == u64)
3487 {
3488 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pte=%RX64\n", iPte, PGMSHWPTEPAE_GET_LOG(pPT->a[iPte])));
3489 X86PTEPAE Pte;
3490 Pte.u = (PGMSHWPTEPAE_GET_U(pPT->a[iPte]) & u64AndMask) | u64OrMask;
3491 if (Pte.u & PGM_PTFLAGS_TRACK_DIRTY)
3492 Pte.u &= ~(X86PGPAEUINT)X86_PTE_RW; /* need to disallow writes when dirty bit tracking is still active. */
3493
3494 PGMSHWPTEPAE_ATOMIC_SET(pPT->a[iPte], Pte.u);
3495 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);
3496 return fRet;
3497 }
3498#ifdef LOG_ENABLED
3499 Log(("iFirstPresent=%d cPresent=%d\n", pPage->iFirstPresent, pPage->cPresent));
3500 Log(("Found %RX64 expected %RX64\n", PGMSHWPTEPAE_GET_U(pPT->a[iPte]) & (X86_PTE_PAE_PG_MASK | X86_PTE_P | X86_PTE_PAE_MBZ_MASK_NX), u64));
3501 for (unsigned i = 0, cFound = 0; i < RT_ELEMENTS(pPT->a); i++)
3502 if ((PGMSHWPTEPAE_GET_U(pPT->a[i]) & (X86_PTE_PAE_PG_MASK | X86_PTE_P | X86_PTE_PAE_MBZ_MASK_NX)) == u64)
3503 Log(("i=%d cFound=%d\n", i, ++cFound));
3504#endif
3505 AssertFatalMsgFailed(("iFirstPresent=%d cPresent=%d u64=%RX64 poolkind=%x iPte=%d PT=%RX64\n", pPage->iFirstPresent, pPage->cPresent, u64, pPage->enmKind, iPte, PGMSHWPTEPAE_GET_LOG(pPT->a[iPte])));
3506 /*PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);*/
3507 break;
3508 }
3509
3510#ifdef PGM_WITH_LARGE_PAGES
3511 /* Large page case only. */
3512 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
3513 {
3514 Assert(pVM->pgm.s.fNestedPaging);
3515
3516 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PDE4M_P | X86_PDE4M_PS;
3517 PEPTPD pPD = (PEPTPD)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3518
3519 if ((pPD->a[iPte].u & (EPT_PDE2M_PG_MASK | X86_PDE4M_P | X86_PDE4M_PS)) == u64)
3520 {
3521 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pde=%RX64\n", iPte, pPD->a[iPte]));
3522 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3523 pPD->a[iPte].u = 0;
3524 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPD);
3525
3526 /* Update the counter as we're removing references. */
3527 Assert(pPage->cPresent);
3528 Assert(pPool->cPresent);
3529 pPage->cPresent--;
3530 pPool->cPresent--;
3531
3532 return fRet;
3533 }
3534# ifdef LOG_ENABLED
3535 Log(("iFirstPresent=%d cPresent=%d\n", pPage->iFirstPresent, pPage->cPresent));
3536 for (unsigned i = 0, cFound = 0; i < RT_ELEMENTS(pPD->a); i++)
3537 if ((pPD->a[i].u & (EPT_PDE2M_PG_MASK | X86_PDE4M_P | X86_PDE4M_PS)) == u64)
3538 Log(("i=%d cFound=%d\n", i, ++cFound));
3539# endif
3540 AssertFatalMsgFailed(("iFirstPresent=%d cPresent=%d\n", pPage->iFirstPresent, pPage->cPresent));
3541 /*PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPD);*/
3542 break;
3543 }
3544
3545 /* AMD-V nested paging */ /** @todo merge with EPT as we only check the parts that are identical. */
3546 case PGMPOOLKIND_PAE_PD_PHYS:
3547 {
3548 Assert(pVM->pgm.s.fNestedPaging);
3549
3550 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PDE4M_P | X86_PDE4M_PS;
3551 PX86PDPAE pPD = (PX86PDPAE)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3552
3553 if ((pPD->a[iPte].u & (X86_PDE2M_PAE_PG_MASK | X86_PDE4M_P | X86_PDE4M_PS)) == u64)
3554 {
3555 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pde=%RX64\n", iPte, pPD->a[iPte]));
3556 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3557 pPD->a[iPte].u = 0;
3558 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPD);
3559
3560 /* Update the counter as we're removing references. */
3561 Assert(pPage->cPresent);
3562 Assert(pPool->cPresent);
3563 pPage->cPresent--;
3564 pPool->cPresent--;
3565 return fRet;
3566 }
3567# ifdef LOG_ENABLED
3568 Log(("iFirstPresent=%d cPresent=%d\n", pPage->iFirstPresent, pPage->cPresent));
3569 for (unsigned i = 0, cFound = 0; i < RT_ELEMENTS(pPD->a); i++)
3570 if ((pPD->a[i].u & (X86_PDE2M_PAE_PG_MASK | X86_PDE4M_P | X86_PDE4M_PS)) == u64)
3571 Log(("i=%d cFound=%d\n", i, ++cFound));
3572# endif
3573 AssertFatalMsgFailed(("iFirstPresent=%d cPresent=%d\n", pPage->iFirstPresent, pPage->cPresent));
3574 /*PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPD);*/
3575 break;
3576 }
3577#endif /* PGM_WITH_LARGE_PAGES */
3578
3579 default:
3580 AssertFatalMsgFailed(("enmKind=%d iShw=%d\n", pPage->enmKind, iShw));
3581 }
3582
3583 /* not reached. */
3584#ifndef _MSC_VER
3585 return fRet;
3586#endif
3587}
3588
3589
3590/**
3591 * Scans one shadow page table for mappings of a physical page.
3592 *
3593 * @param pVM The cross context VM structure.
3594 * @param pPhysPage The guest page in question.
3595 * @param fFlushPTEs Flush PTEs or allow them to be updated (e.g. in case of an RW bit change)
3596 * @param iShw The shadow page table.
3597 */
3598static void pgmPoolTrackFlushGCPhysPT(PVM pVM, PPGMPAGE pPhysPage, bool fFlushPTEs, uint16_t iShw)
3599{
3600 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool); NOREF(pPool);
3601
3602 /* We should only come here with when there's only one reference to this physical page. */
3603 Assert(PGMPOOL_TD_GET_CREFS(PGM_PAGE_GET_TRACKING(pPhysPage)) == 1);
3604
3605 Log2(("pgmPoolTrackFlushGCPhysPT: pPhysPage=%RHp iShw=%d\n", PGM_PAGE_GET_HCPHYS(pPhysPage), iShw));
3606 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPT, f);
3607 bool fKeptPTEs = pgmPoolTrackFlushGCPhysPTInt(pVM, pPhysPage, fFlushPTEs, iShw, PGM_PAGE_GET_PTE_INDEX(pPhysPage));
3608 if (!fKeptPTEs)
3609 PGM_PAGE_SET_TRACKING(pVM, pPhysPage, 0);
3610 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPT, f);
3611}
3612
3613
3614/**
3615 * Flushes a list of shadow page tables mapping the same physical page.
3616 *
3617 * @param pVM The cross context VM structure.
3618 * @param pPhysPage The guest page in question.
3619 * @param fFlushPTEs Flush PTEs or allow them to be updated (e.g. in case of an RW bit change)
3620 * @param iPhysExt The physical cross reference extent list to flush.
3621 */
3622static void pgmPoolTrackFlushGCPhysPTs(PVMCC pVM, PPGMPAGE pPhysPage, bool fFlushPTEs, uint16_t iPhysExt)
3623{
3624 PGM_LOCK_ASSERT_OWNER(pVM);
3625 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3626 bool fKeepList = false;
3627
3628 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPTs, f);
3629 Log2(("pgmPoolTrackFlushGCPhysPTs: pPhysPage=%RHp iPhysExt=%u\n", PGM_PAGE_GET_HCPHYS(pPhysPage), iPhysExt));
3630
3631 const uint16_t iPhysExtStart = iPhysExt;
3632 PPGMPOOLPHYSEXT pPhysExt;
3633 do
3634 {
3635 Assert(iPhysExt < pPool->cMaxPhysExts);
3636 pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
3637 for (unsigned i = 0; i < RT_ELEMENTS(pPhysExt->aidx); i++)
3638 {
3639 if (pPhysExt->aidx[i] != NIL_PGMPOOL_IDX)
3640 {
3641 bool fKeptPTEs = pgmPoolTrackFlushGCPhysPTInt(pVM, pPhysPage, fFlushPTEs, pPhysExt->aidx[i], pPhysExt->apte[i]);
3642 if (!fKeptPTEs)
3643 {
3644 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
3645 pPhysExt->apte[i] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
3646 }
3647 else
3648 fKeepList = true;
3649 }
3650 }
3651 /* next */
3652 iPhysExt = pPhysExt->iNext;
3653 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
3654
3655 if (!fKeepList)
3656 {
3657 /* insert the list into the free list and clear the ram range entry. */
3658 pPhysExt->iNext = pPool->iPhysExtFreeHead;
3659 pPool->iPhysExtFreeHead = iPhysExtStart;
3660 /* Invalidate the tracking data. */
3661 PGM_PAGE_SET_TRACKING(pVM, pPhysPage, 0);
3662 }
3663
3664 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTs, f);
3665}
3666
3667
3668/**
3669 * Flushes all shadow page table mappings of the given guest page.
3670 *
3671 * This is typically called when the host page backing the guest one has been
3672 * replaced or when the page protection was changed due to a guest access
3673 * caught by the monitoring.
3674 *
3675 * @returns VBox status code.
3676 * @retval VINF_SUCCESS if all references has been successfully cleared.
3677 * @retval VINF_PGM_SYNC_CR3 if we're better off with a CR3 sync and a page
3678 * pool cleaning. FF and sync flags are set.
3679 *
3680 * @param pVM The cross context VM structure.
3681 * @param GCPhysPage GC physical address of the page in question
3682 * @param pPhysPage The guest page in question.
3683 * @param fFlushPTEs Flush PTEs or allow them to be updated (e.g. in case of an RW bit change)
3684 * @param pfFlushTLBs This is set to @a true if the shadow TLBs should be
3685 * flushed, it is NOT touched if this isn't necessary.
3686 * The caller MUST initialized this to @a false.
3687 */
3688int pgmPoolTrackUpdateGCPhys(PVMCC pVM, RTGCPHYS GCPhysPage, PPGMPAGE pPhysPage, bool fFlushPTEs, bool *pfFlushTLBs)
3689{
3690 PVMCPUCC pVCpu = VMMGetCpu(pVM);
3691 PGM_LOCK_VOID(pVM);
3692 int rc = VINF_SUCCESS;
3693
3694#ifdef PGM_WITH_LARGE_PAGES
3695 /* Is this page part of a large page? */
3696 if (PGM_PAGE_GET_PDE_TYPE(pPhysPage) == PGM_PAGE_PDE_TYPE_PDE)
3697 {
3698 RTGCPHYS GCPhysBase = GCPhysPage & X86_PDE2M_PAE_PG_MASK;
3699 GCPhysPage &= X86_PDE_PAE_PG_MASK;
3700
3701 /* Fetch the large page base. */
3702 PPGMPAGE pLargePage;
3703 if (GCPhysBase != GCPhysPage)
3704 {
3705 pLargePage = pgmPhysGetPage(pVM, GCPhysBase);
3706 AssertFatal(pLargePage);
3707 }
3708 else
3709 pLargePage = pPhysPage;
3710
3711 Log(("pgmPoolTrackUpdateGCPhys: update large page PDE for %RGp (%RGp)\n", GCPhysBase, GCPhysPage));
3712
3713 if (PGM_PAGE_GET_PDE_TYPE(pLargePage) == PGM_PAGE_PDE_TYPE_PDE)
3714 {
3715 /* Mark the large page as disabled as we need to break it up to change a single page in the 2 MB range. */
3716 PGM_PAGE_SET_PDE_TYPE(pVM, pLargePage, PGM_PAGE_PDE_TYPE_PDE_DISABLED);
3717 pVM->pgm.s.cLargePagesDisabled++;
3718
3719 /* Update the base as that *only* that one has a reference and there's only one PDE to clear. */
3720 rc = pgmPoolTrackUpdateGCPhys(pVM, GCPhysBase, pLargePage, fFlushPTEs, pfFlushTLBs);
3721
3722 *pfFlushTLBs = true;
3723 PGM_UNLOCK(pVM);
3724 return rc;
3725 }
3726 }
3727#else
3728 NOREF(GCPhysPage);
3729#endif /* PGM_WITH_LARGE_PAGES */
3730
3731 const uint16_t u16 = PGM_PAGE_GET_TRACKING(pPhysPage);
3732 if (u16)
3733 {
3734 /*
3735 * The zero page is currently screwing up the tracking and we'll
3736 * have to flush the whole shebang. Unless VBOX_WITH_NEW_LAZY_PAGE_ALLOC
3737 * is defined, zero pages won't normally be mapped. Some kind of solution
3738 * will be needed for this problem of course, but it will have to wait...
3739 */
3740 if ( PGM_PAGE_IS_ZERO(pPhysPage)
3741 || PGM_PAGE_IS_BALLOONED(pPhysPage))
3742 rc = VINF_PGM_GCPHYS_ALIASED;
3743 else
3744 {
3745 if (PGMPOOL_TD_GET_CREFS(u16) != PGMPOOL_TD_CREFS_PHYSEXT)
3746 {
3747 Assert(PGMPOOL_TD_GET_CREFS(u16) == 1);
3748 pgmPoolTrackFlushGCPhysPT(pVM,
3749 pPhysPage,
3750 fFlushPTEs,
3751 PGMPOOL_TD_GET_IDX(u16));
3752 }
3753 else if (u16 != PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED))
3754 pgmPoolTrackFlushGCPhysPTs(pVM, pPhysPage, fFlushPTEs, PGMPOOL_TD_GET_IDX(u16));
3755 else
3756 rc = pgmPoolTrackFlushGCPhysPTsSlow(pVM, pPhysPage);
3757 *pfFlushTLBs = true;
3758 }
3759 }
3760
3761 if (rc == VINF_PGM_GCPHYS_ALIASED)
3762 {
3763 pVCpu->pgm.s.fSyncFlags |= PGM_SYNC_CLEAR_PGM_POOL;
3764 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
3765 rc = VINF_PGM_SYNC_CR3;
3766 }
3767 PGM_UNLOCK(pVM);
3768 return rc;
3769}
3770
3771
3772/**
3773 * Scans all shadow page tables for mappings of a physical page.
3774 *
3775 * This may be slow, but it's most likely more efficient than cleaning
3776 * out the entire page pool / cache.
3777 *
3778 * @returns VBox status code.
3779 * @retval VINF_SUCCESS if all references has been successfully cleared.
3780 * @retval VINF_PGM_GCPHYS_ALIASED if we're better off with a CR3 sync and
3781 * a page pool cleaning.
3782 *
3783 * @param pVM The cross context VM structure.
3784 * @param pPhysPage The guest page in question.
3785 */
3786int pgmPoolTrackFlushGCPhysPTsSlow(PVMCC pVM, PPGMPAGE pPhysPage)
3787{
3788 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3789 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPTsSlow, s);
3790 LogFlow(("pgmPoolTrackFlushGCPhysPTsSlow: cUsedPages=%d cPresent=%d pPhysPage=%R[pgmpage]\n",
3791 pPool->cUsedPages, pPool->cPresent, pPhysPage));
3792
3793 /*
3794 * There is a limit to what makes sense.
3795 */
3796 if ( pPool->cPresent > 1024
3797 && pVM->cCpus == 1)
3798 {
3799 LogFlow(("pgmPoolTrackFlushGCPhysPTsSlow: giving up... (cPresent=%d)\n", pPool->cPresent));
3800 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTsSlow, s);
3801 return VINF_PGM_GCPHYS_ALIASED;
3802 }
3803
3804 /*
3805 * Iterate all the pages until we've encountered all that in use.
3806 * This is simple but not quite optimal solution.
3807 */
3808 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage);
3809 unsigned cLeft = pPool->cUsedPages;
3810 unsigned iPage = pPool->cCurPages;
3811 while (--iPage >= PGMPOOL_IDX_FIRST)
3812 {
3813 PPGMPOOLPAGE pPage = &pPool->aPages[iPage];
3814 if ( pPage->GCPhys != NIL_RTGCPHYS
3815 && pPage->cPresent)
3816 {
3817 Assert(!PGMPOOL_PAGE_IS_NESTED(pPage)); /* see if it hits */
3818 switch (pPage->enmKind)
3819 {
3820 /*
3821 * We only care about shadow page tables.
3822 */
3823 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
3824 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
3825 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
3826 {
3827 const uint32_t u32 = (uint32_t)u64;
3828 unsigned cPresent = pPage->cPresent;
3829 PX86PT pPT = (PX86PT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3830 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
3831 {
3832 const X86PGUINT uPte = pPT->a[i].u;
3833 if (uPte & X86_PTE_P)
3834 {
3835 if ((uPte & X86_PTE_PG_MASK) == u32)
3836 {
3837 //Log4(("pgmPoolTrackFlushGCPhysPTsSlow: idx=%d i=%d pte=%RX32\n", iPage, i, pPT->a[i]));
3838 ASMAtomicWriteU32(&pPT->a[i].u, 0);
3839
3840 /* Update the counter as we're removing references. */
3841 Assert(pPage->cPresent);
3842 Assert(pPool->cPresent);
3843 pPage->cPresent--;
3844 pPool->cPresent--;
3845 }
3846 if (!--cPresent)
3847 break;
3848 }
3849 }
3850 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);
3851 break;
3852 }
3853
3854 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
3855 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
3856 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
3857 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
3858 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
3859 {
3860 unsigned cPresent = pPage->cPresent;
3861 PPGMSHWPTPAE pPT = (PPGMSHWPTPAE)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3862 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
3863 if (PGMSHWPTEPAE_IS_P(pPT->a[i]))
3864 {
3865 if ((PGMSHWPTEPAE_GET_U(pPT->a[i]) & X86_PTE_PAE_PG_MASK) == u64)
3866 {
3867 //Log4(("pgmPoolTrackFlushGCPhysPTsSlow: idx=%d i=%d pte=%RX64\n", iPage, i, pPT->a[i]));
3868 PGMSHWPTEPAE_ATOMIC_SET(pPT->a[i], 0); /// @todo why not atomic?
3869
3870 /* Update the counter as we're removing references. */
3871 Assert(pPage->cPresent);
3872 Assert(pPool->cPresent);
3873 pPage->cPresent--;
3874 pPool->cPresent--;
3875 }
3876 if (!--cPresent)
3877 break;
3878 }
3879 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);
3880 break;
3881 }
3882
3883 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
3884 {
3885 unsigned cPresent = pPage->cPresent;
3886 PEPTPT pPT = (PEPTPT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3887 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
3888 {
3889 X86PGPAEUINT const uPte = pPT->a[i].u;
3890 if (uPte & EPT_E_READ)
3891 {
3892 if ((uPte & EPT_PTE_PG_MASK) == u64)
3893 {
3894 //Log4(("pgmPoolTrackFlushGCPhysPTsSlow: idx=%d i=%d pte=%RX64\n", iPage, i, pPT->a[i]));
3895 ASMAtomicWriteU64(&pPT->a[i].u, 0);
3896
3897 /* Update the counter as we're removing references. */
3898 Assert(pPage->cPresent);
3899 Assert(pPool->cPresent);
3900 pPage->cPresent--;
3901 pPool->cPresent--;
3902 }
3903 if (!--cPresent)
3904 break;
3905 }
3906 }
3907 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);
3908 break;
3909 }
3910 }
3911
3912 if (!--cLeft)
3913 break;
3914 }
3915 }
3916
3917 PGM_PAGE_SET_TRACKING(pVM, pPhysPage, 0);
3918 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTsSlow, s);
3919
3920 /*
3921 * There is a limit to what makes sense. The above search is very expensive, so force a pgm pool flush.
3922 */
3923 if (pPool->cPresent > 1024)
3924 {
3925 LogFlow(("pgmPoolTrackFlushGCPhysPTsSlow: giving up... (cPresent=%d)\n", pPool->cPresent));
3926 return VINF_PGM_GCPHYS_ALIASED;
3927 }
3928
3929 return VINF_SUCCESS;
3930}
3931
3932
3933/**
3934 * Clears the user entry in a user table.
3935 *
3936 * This is used to remove all references to a page when flushing it.
3937 */
3938static void pgmPoolTrackClearPageUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PCPGMPOOLUSER pUser)
3939{
3940 Assert(pUser->iUser != NIL_PGMPOOL_IDX);
3941 Assert(pUser->iUser < pPool->cCurPages);
3942 uint32_t iUserTable = pUser->iUserTable;
3943
3944 /*
3945 * Map the user page. Ignore references made by fictitious pages.
3946 */
3947 PPGMPOOLPAGE pUserPage = &pPool->aPages[pUser->iUser];
3948 LogFlow(("pgmPoolTrackClearPageUser: clear %x in %s (%RGp) (flushing %s)\n", iUserTable, pgmPoolPoolKindToStr(pUserPage->enmKind), pUserPage->Core.Key, pgmPoolPoolKindToStr(pPage->enmKind)));
3949 union
3950 {
3951 uint64_t *pau64;
3952 uint32_t *pau32;
3953 } u;
3954 if (pUserPage->idx < PGMPOOL_IDX_FIRST)
3955 {
3956 Assert(!pUserPage->pvPageR3);
3957 return;
3958 }
3959 u.pau64 = (uint64_t *)PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pUserPage);
3960
3961
3962 /* Safety precaution in case we change the paging for other modes too in the future. */
3963 Assert(!pgmPoolIsPageLocked(pPage)); RT_NOREF_PV(pPage);
3964
3965#ifdef VBOX_STRICT
3966 /*
3967 * Some sanity checks.
3968 */
3969 switch (pUserPage->enmKind)
3970 {
3971 case PGMPOOLKIND_32BIT_PD:
3972 case PGMPOOLKIND_32BIT_PD_PHYS:
3973 Assert(iUserTable < X86_PG_ENTRIES);
3974 break;
3975 case PGMPOOLKIND_PAE_PDPT:
3976 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
3977 case PGMPOOLKIND_PAE_PDPT_PHYS:
3978 Assert(iUserTable < 4);
3979 Assert(!(u.pau64[iUserTable] & PGM_PLXFLAGS_PERMANENT));
3980 break;
3981 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
3982 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
3983 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
3984 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
3985 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
3986 case PGMPOOLKIND_PAE_PD_PHYS:
3987 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3988 break;
3989 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
3990 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3991 break;
3992 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
3993 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3994 Assert(!(u.pau64[iUserTable] & PGM_PLXFLAGS_PERMANENT));
3995 break;
3996 case PGMPOOLKIND_64BIT_PML4:
3997 Assert(!(u.pau64[iUserTable] & PGM_PLXFLAGS_PERMANENT));
3998 /* GCPhys >> PAGE_SHIFT is the index here */
3999 break;
4000 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
4001 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
4002 Assert(iUserTable < X86_PG_PAE_ENTRIES);
4003 break;
4004
4005 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
4006 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
4007 Assert(iUserTable < X86_PG_PAE_ENTRIES);
4008 break;
4009
4010 case PGMPOOLKIND_ROOT_NESTED:
4011 Assert(iUserTable < X86_PG_PAE_ENTRIES);
4012 break;
4013
4014# ifdef VBOX_WITH_NESTED_HWVIRT_VMX_EPT
4015 case PGMPOOLKIND_EPT_PT_FOR_EPT_PT:
4016 case PGMPOOLKIND_EPT_PD_FOR_EPT_PD:
4017 case PGMPOOLKIND_EPT_PDPT_FOR_EPT_PDPT:
4018 case PGMPOOLKIND_EPT_PML4_FOR_EPT_PML4:
4019 Assert(iUserTable < EPT_PG_ENTRIES);
4020 break;
4021# endif
4022
4023 default:
4024 AssertMsgFailed(("enmKind=%d GCPhys=%RGp\n", pUserPage->enmKind, pPage->GCPhys));
4025 break;
4026 }
4027#endif /* VBOX_STRICT */
4028
4029 /*
4030 * Clear the entry in the user page.
4031 */
4032 switch (pUserPage->enmKind)
4033 {
4034 /* 32-bit entries */
4035 case PGMPOOLKIND_32BIT_PD:
4036 case PGMPOOLKIND_32BIT_PD_PHYS:
4037 ASMAtomicWriteU32(&u.pau32[iUserTable], 0);
4038 break;
4039
4040 /* 64-bit entries */
4041 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
4042 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
4043 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
4044 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
4045 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
4046 case PGMPOOLKIND_PAE_PD_PHYS:
4047 case PGMPOOLKIND_PAE_PDPT_PHYS:
4048 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
4049 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
4050 case PGMPOOLKIND_64BIT_PML4:
4051 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
4052 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
4053 case PGMPOOLKIND_PAE_PDPT:
4054 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
4055 case PGMPOOLKIND_ROOT_NESTED:
4056 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
4057 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
4058# ifdef VBOX_WITH_NESTED_HWVIRT_VMX_EPT
4059 case PGMPOOLKIND_EPT_PT_FOR_EPT_PT:
4060 case PGMPOOLKIND_EPT_PD_FOR_EPT_PD:
4061 case PGMPOOLKIND_EPT_PDPT_FOR_EPT_PDPT:
4062 case PGMPOOLKIND_EPT_PML4_FOR_EPT_PML4:
4063#endif
4064 ASMAtomicWriteU64(&u.pau64[iUserTable], 0);
4065 break;
4066
4067 default:
4068 AssertFatalMsgFailed(("enmKind=%d iUser=%d iUserTable=%#x\n", pUserPage->enmKind, pUser->iUser, pUser->iUserTable));
4069 }
4070 PGM_DYNMAP_UNUSED_HINT_VM(pPool->CTX_SUFF(pVM), u.pau64);
4071}
4072
4073
4074/**
4075 * Clears all users of a page.
4076 */
4077static void pgmPoolTrackClearPageUsers(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
4078{
4079 /*
4080 * Free all the user records.
4081 */
4082 LogFlow(("pgmPoolTrackClearPageUsers %RGp\n", pPage->GCPhys));
4083
4084 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
4085 uint16_t i = pPage->iUserHead;
4086 while (i != NIL_PGMPOOL_USER_INDEX)
4087 {
4088 /* Clear enter in user table. */
4089 pgmPoolTrackClearPageUser(pPool, pPage, &paUsers[i]);
4090
4091 /* Free it. */
4092 const uint16_t iNext = paUsers[i].iNext;
4093 paUsers[i].iUser = NIL_PGMPOOL_IDX;
4094 paUsers[i].iNext = pPool->iUserFreeHead;
4095 pPool->iUserFreeHead = i;
4096
4097 /* Next. */
4098 i = iNext;
4099 }
4100 pPage->iUserHead = NIL_PGMPOOL_USER_INDEX;
4101}
4102
4103
4104/**
4105 * Allocates a new physical cross reference extent.
4106 *
4107 * @returns Pointer to the allocated extent on success. NULL if we're out of them.
4108 * @param pVM The cross context VM structure.
4109 * @param piPhysExt Where to store the phys ext index.
4110 */
4111PPGMPOOLPHYSEXT pgmPoolTrackPhysExtAlloc(PVMCC pVM, uint16_t *piPhysExt)
4112{
4113 PGM_LOCK_ASSERT_OWNER(pVM);
4114 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
4115 uint16_t iPhysExt = pPool->iPhysExtFreeHead;
4116 if (iPhysExt == NIL_PGMPOOL_PHYSEXT_INDEX)
4117 {
4118 STAM_COUNTER_INC(&pPool->StamTrackPhysExtAllocFailures);
4119 return NULL;
4120 }
4121 PPGMPOOLPHYSEXT pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
4122 pPool->iPhysExtFreeHead = pPhysExt->iNext;
4123 pPhysExt->iNext = NIL_PGMPOOL_PHYSEXT_INDEX;
4124 *piPhysExt = iPhysExt;
4125 return pPhysExt;
4126}
4127
4128
4129/**
4130 * Frees a physical cross reference extent.
4131 *
4132 * @param pVM The cross context VM structure.
4133 * @param iPhysExt The extent to free.
4134 */
4135void pgmPoolTrackPhysExtFree(PVMCC pVM, uint16_t iPhysExt)
4136{
4137 PGM_LOCK_ASSERT_OWNER(pVM);
4138 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
4139 Assert(iPhysExt < pPool->cMaxPhysExts);
4140 PPGMPOOLPHYSEXT pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
4141 for (unsigned i = 0; i < RT_ELEMENTS(pPhysExt->aidx); i++)
4142 {
4143 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
4144 pPhysExt->apte[i] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
4145 }
4146 pPhysExt->iNext = pPool->iPhysExtFreeHead;
4147 pPool->iPhysExtFreeHead = iPhysExt;
4148}
4149
4150
4151/**
4152 * Frees a physical cross reference extent.
4153 *
4154 * @param pVM The cross context VM structure.
4155 * @param iPhysExt The extent to free.
4156 */
4157void pgmPoolTrackPhysExtFreeList(PVMCC pVM, uint16_t iPhysExt)
4158{
4159 PGM_LOCK_ASSERT_OWNER(pVM);
4160 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
4161
4162 const uint16_t iPhysExtStart = iPhysExt;
4163 PPGMPOOLPHYSEXT pPhysExt;
4164 do
4165 {
4166 Assert(iPhysExt < pPool->cMaxPhysExts);
4167 pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
4168 for (unsigned i = 0; i < RT_ELEMENTS(pPhysExt->aidx); i++)
4169 {
4170 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
4171 pPhysExt->apte[i] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
4172 }
4173
4174 /* next */
4175 iPhysExt = pPhysExt->iNext;
4176 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
4177
4178 pPhysExt->iNext = pPool->iPhysExtFreeHead;
4179 pPool->iPhysExtFreeHead = iPhysExtStart;
4180}
4181
4182
4183/**
4184 * Insert a reference into a list of physical cross reference extents.
4185 *
4186 * @returns The new tracking data for PGMPAGE.
4187 *
4188 * @param pVM The cross context VM structure.
4189 * @param iPhysExt The physical extent index of the list head.
4190 * @param iShwPT The shadow page table index.
4191 * @param iPte Page table entry
4192 *
4193 */
4194static uint16_t pgmPoolTrackPhysExtInsert(PVMCC pVM, uint16_t iPhysExt, uint16_t iShwPT, uint16_t iPte)
4195{
4196 PGM_LOCK_ASSERT_OWNER(pVM);
4197 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
4198 PPGMPOOLPHYSEXT paPhysExts = pPool->CTX_SUFF(paPhysExts);
4199
4200 /*
4201 * Special common cases.
4202 */
4203 if (paPhysExts[iPhysExt].aidx[1] == NIL_PGMPOOL_IDX)
4204 {
4205 paPhysExts[iPhysExt].aidx[1] = iShwPT;
4206 paPhysExts[iPhysExt].apte[1] = iPte;
4207 STAM_COUNTER_INC(&pVM->pgm.s.Stats.StatTrackAliasedMany);
4208 LogFlow(("pgmPoolTrackPhysExtInsert: %d:{,%d pte %d,}\n", iPhysExt, iShwPT, iPte));
4209 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExt);
4210 }
4211 if (paPhysExts[iPhysExt].aidx[2] == NIL_PGMPOOL_IDX)
4212 {
4213 paPhysExts[iPhysExt].aidx[2] = iShwPT;
4214 paPhysExts[iPhysExt].apte[2] = iPte;
4215 STAM_COUNTER_INC(&pVM->pgm.s.Stats.StatTrackAliasedMany);
4216 LogFlow(("pgmPoolTrackPhysExtInsert: %d:{,,%d pte %d}\n", iPhysExt, iShwPT, iPte));
4217 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExt);
4218 }
4219 AssertCompile(RT_ELEMENTS(paPhysExts[iPhysExt].aidx) == 3);
4220
4221 /*
4222 * General treatment.
4223 */
4224 const uint16_t iPhysExtStart = iPhysExt;
4225 unsigned cMax = 15;
4226 for (;;)
4227 {
4228 Assert(iPhysExt < pPool->cMaxPhysExts);
4229 for (unsigned i = 0; i < RT_ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
4230 if (paPhysExts[iPhysExt].aidx[i] == NIL_PGMPOOL_IDX)
4231 {
4232 paPhysExts[iPhysExt].aidx[i] = iShwPT;
4233 paPhysExts[iPhysExt].apte[i] = iPte;
4234 STAM_COUNTER_INC(&pVM->pgm.s.Stats.StatTrackAliasedMany);
4235 LogFlow(("pgmPoolTrackPhysExtInsert: %d:{%d pte %d} i=%d cMax=%d\n", iPhysExt, iShwPT, iPte, i, cMax));
4236 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExtStart);
4237 }
4238 if (!--cMax)
4239 {
4240 STAM_COUNTER_INC(&pVM->pgm.s.Stats.StatTrackOverflows);
4241 pgmPoolTrackPhysExtFreeList(pVM, iPhysExtStart);
4242 LogFlow(("pgmPoolTrackPhysExtInsert: overflow (1) iShwPT=%d\n", iShwPT));
4243 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED);
4244 }
4245
4246 /* advance */
4247 iPhysExt = paPhysExts[iPhysExt].iNext;
4248 if (iPhysExt == NIL_PGMPOOL_PHYSEXT_INDEX)
4249 break;
4250 }
4251
4252 /*
4253 * Add another extent to the list.
4254 */
4255 PPGMPOOLPHYSEXT pNew = pgmPoolTrackPhysExtAlloc(pVM, &iPhysExt);
4256 if (!pNew)
4257 {
4258 STAM_COUNTER_INC(&pVM->pgm.s.Stats.StatTrackNoExtentsLeft);
4259 pgmPoolTrackPhysExtFreeList(pVM, iPhysExtStart);
4260 LogFlow(("pgmPoolTrackPhysExtInsert: pgmPoolTrackPhysExtAlloc failed iShwPT=%d\n", iShwPT));
4261 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED);
4262 }
4263 pNew->iNext = iPhysExtStart;
4264 pNew->aidx[0] = iShwPT;
4265 pNew->apte[0] = iPte;
4266 LogFlow(("pgmPoolTrackPhysExtInsert: added new extent %d:{%d pte %d}->%d\n", iPhysExt, iShwPT, iPte, iPhysExtStart));
4267 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExt);
4268}
4269
4270
4271/**
4272 * Add a reference to guest physical page where extents are in use.
4273 *
4274 * @returns The new tracking data for PGMPAGE.
4275 *
4276 * @param pVM The cross context VM structure.
4277 * @param pPhysPage Pointer to the aPages entry in the ram range.
4278 * @param u16 The ram range flags (top 16-bits).
4279 * @param iShwPT The shadow page table index.
4280 * @param iPte Page table entry
4281 */
4282uint16_t pgmPoolTrackPhysExtAddref(PVMCC pVM, PPGMPAGE pPhysPage, uint16_t u16, uint16_t iShwPT, uint16_t iPte)
4283{
4284 PGM_LOCK_VOID(pVM);
4285 if (PGMPOOL_TD_GET_CREFS(u16) != PGMPOOL_TD_CREFS_PHYSEXT)
4286 {
4287 /*
4288 * Convert to extent list.
4289 */
4290 Assert(PGMPOOL_TD_GET_CREFS(u16) == 1);
4291 uint16_t iPhysExt;
4292 PPGMPOOLPHYSEXT pPhysExt = pgmPoolTrackPhysExtAlloc(pVM, &iPhysExt);
4293 if (pPhysExt)
4294 {
4295 LogFlow(("pgmPoolTrackPhysExtAddref: new extent: %d:{%d, %d}\n", iPhysExt, PGMPOOL_TD_GET_IDX(u16), iShwPT));
4296 STAM_COUNTER_INC(&pVM->pgm.s.Stats.StatTrackAliased);
4297 pPhysExt->aidx[0] = PGMPOOL_TD_GET_IDX(u16);
4298 pPhysExt->apte[0] = PGM_PAGE_GET_PTE_INDEX(pPhysPage);
4299 pPhysExt->aidx[1] = iShwPT;
4300 pPhysExt->apte[1] = iPte;
4301 u16 = PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExt);
4302 }
4303 else
4304 u16 = PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED);
4305 }
4306 else if (u16 != PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED))
4307 {
4308 /*
4309 * Insert into the extent list.
4310 */
4311 u16 = pgmPoolTrackPhysExtInsert(pVM, PGMPOOL_TD_GET_IDX(u16), iShwPT, iPte);
4312 }
4313 else
4314 STAM_COUNTER_INC(&pVM->pgm.s.Stats.StatTrackAliasedLots);
4315 PGM_UNLOCK(pVM);
4316 return u16;
4317}
4318
4319
4320/**
4321 * Clear references to guest physical memory.
4322 *
4323 * @param pPool The pool.
4324 * @param pPage The page.
4325 * @param pPhysPage Pointer to the aPages entry in the ram range.
4326 * @param iPte Shadow PTE index
4327 */
4328void pgmPoolTrackPhysExtDerefGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMPAGE pPhysPage, uint16_t iPte)
4329{
4330 PVMCC pVM = pPool->CTX_SUFF(pVM);
4331 const unsigned cRefs = PGM_PAGE_GET_TD_CREFS(pPhysPage);
4332 AssertFatalMsg(cRefs == PGMPOOL_TD_CREFS_PHYSEXT, ("cRefs=%d pPhysPage=%R[pgmpage] pPage=%p:{.idx=%d}\n", cRefs, pPhysPage, pPage, pPage->idx));
4333
4334 uint16_t iPhysExt = PGM_PAGE_GET_TD_IDX(pPhysPage);
4335 if (iPhysExt != PGMPOOL_TD_IDX_OVERFLOWED)
4336 {
4337 PGM_LOCK_VOID(pVM);
4338
4339 uint16_t iPhysExtPrev = NIL_PGMPOOL_PHYSEXT_INDEX;
4340 PPGMPOOLPHYSEXT paPhysExts = pPool->CTX_SUFF(paPhysExts);
4341 do
4342 {
4343 Assert(iPhysExt < pPool->cMaxPhysExts);
4344
4345 /*
4346 * Look for the shadow page and check if it's all freed.
4347 */
4348 for (unsigned i = 0; i < RT_ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
4349 {
4350 if ( paPhysExts[iPhysExt].aidx[i] == pPage->idx
4351 && paPhysExts[iPhysExt].apte[i] == iPte)
4352 {
4353 paPhysExts[iPhysExt].aidx[i] = NIL_PGMPOOL_IDX;
4354 paPhysExts[iPhysExt].apte[i] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
4355
4356 for (i = 0; i < RT_ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
4357 if (paPhysExts[iPhysExt].aidx[i] != NIL_PGMPOOL_IDX)
4358 {
4359 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage] idx=%d\n", pPhysPage, pPage->idx));
4360 PGM_UNLOCK(pVM);
4361 return;
4362 }
4363
4364 /* we can free the node. */
4365 const uint16_t iPhysExtNext = paPhysExts[iPhysExt].iNext;
4366 if ( iPhysExtPrev == NIL_PGMPOOL_PHYSEXT_INDEX
4367 && iPhysExtNext == NIL_PGMPOOL_PHYSEXT_INDEX)
4368 {
4369 /* lonely node */
4370 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
4371 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage] idx=%d lonely\n", pPhysPage, pPage->idx));
4372 PGM_PAGE_SET_TRACKING(pVM, pPhysPage, 0);
4373 }
4374 else if (iPhysExtPrev == NIL_PGMPOOL_PHYSEXT_INDEX)
4375 {
4376 /* head */
4377 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage] idx=%d head\n", pPhysPage, pPage->idx));
4378 PGM_PAGE_SET_TRACKING(pVM, pPhysPage, PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExtNext));
4379 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
4380 }
4381 else
4382 {
4383 /* in list */
4384 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage] idx=%d in list\n", pPhysPage, pPage->idx));
4385 paPhysExts[iPhysExtPrev].iNext = iPhysExtNext;
4386 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
4387 }
4388 iPhysExt = iPhysExtNext;
4389 PGM_UNLOCK(pVM);
4390 return;
4391 }
4392 }
4393
4394 /* next */
4395 iPhysExtPrev = iPhysExt;
4396 iPhysExt = paPhysExts[iPhysExt].iNext;
4397 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
4398
4399 PGM_UNLOCK(pVM);
4400 AssertFatalMsgFailed(("not-found! cRefs=%d pPhysPage=%R[pgmpage] pPage=%p:{.idx=%d}\n", cRefs, pPhysPage, pPage, pPage->idx));
4401 }
4402 else /* nothing to do */
4403 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage]\n", pPhysPage));
4404}
4405
4406/**
4407 * Clear references to guest physical memory.
4408 *
4409 * This is the same as pgmPoolTracDerefGCPhysHint except that the guest
4410 * physical address is assumed to be correct, so the linear search can be
4411 * skipped and we can assert at an earlier point.
4412 *
4413 * @param pPool The pool.
4414 * @param pPage The page.
4415 * @param HCPhys The host physical address corresponding to the guest page.
4416 * @param GCPhys The guest physical address corresponding to HCPhys.
4417 * @param iPte Shadow PTE index
4418 */
4419static void pgmPoolTracDerefGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTHCPHYS HCPhys, RTGCPHYS GCPhys, uint16_t iPte)
4420{
4421 /*
4422 * Lookup the page and check if it checks out before derefing it.
4423 */
4424 PVMCC pVM = pPool->CTX_SUFF(pVM);
4425 PPGMPAGE pPhysPage = pgmPhysGetPage(pVM, GCPhys);
4426 if (pPhysPage)
4427 {
4428 Assert(PGM_PAGE_GET_HCPHYS(pPhysPage));
4429#ifdef LOG_ENABLED
4430 RTHCPHYS HCPhysPage = PGM_PAGE_GET_HCPHYS(pPhysPage);
4431 Log2(("pgmPoolTracDerefGCPhys %RHp vs %RHp\n", HCPhysPage, HCPhys));
4432#endif
4433 if (PGM_PAGE_GET_HCPHYS(pPhysPage) == HCPhys)
4434 {
4435 Assert(pPage->cPresent);
4436 Assert(pPool->cPresent);
4437 pPage->cPresent--;
4438 pPool->cPresent--;
4439 pgmTrackDerefGCPhys(pPool, pPage, pPhysPage, iPte);
4440 return;
4441 }
4442
4443 AssertFatalMsgFailed(("HCPhys=%RHp GCPhys=%RGp; found page has HCPhys=%RHp iPte=%u fIsNested=%RTbool\n",
4444 HCPhys, GCPhys, PGM_PAGE_GET_HCPHYS(pPhysPage), iPte, PGMPOOL_PAGE_IS_NESTED(pPage)));
4445 }
4446 AssertFatalMsgFailed(("HCPhys=%RHp GCPhys=%RGp\n", HCPhys, GCPhys));
4447}
4448
4449
4450/**
4451 * Clear references to guest physical memory.
4452 *
4453 * @param pPool The pool.
4454 * @param pPage The page.
4455 * @param HCPhys The host physical address corresponding to the guest page.
4456 * @param GCPhysHint The guest physical address which may corresponding to HCPhys.
4457 * @param iPte Shadow pte index
4458 */
4459void pgmPoolTracDerefGCPhysHint(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTHCPHYS HCPhys, RTGCPHYS GCPhysHint, uint16_t iPte)
4460{
4461 Log4(("pgmPoolTracDerefGCPhysHint %RHp %RGp\n", HCPhys, GCPhysHint));
4462
4463 /*
4464 * Try the hint first.
4465 */
4466 RTHCPHYS HCPhysHinted;
4467 PVMCC pVM = pPool->CTX_SUFF(pVM);
4468 PPGMPAGE pPhysPage = pgmPhysGetPage(pVM, GCPhysHint);
4469 if (pPhysPage)
4470 {
4471 HCPhysHinted = PGM_PAGE_GET_HCPHYS(pPhysPage);
4472 Assert(HCPhysHinted);
4473 if (HCPhysHinted == HCPhys)
4474 {
4475 Assert(pPage->cPresent);
4476 Assert(pPool->cPresent);
4477 pPage->cPresent--;
4478 pPool->cPresent--;
4479 pgmTrackDerefGCPhys(pPool, pPage, pPhysPage, iPte);
4480 return;
4481 }
4482 }
4483 else
4484 HCPhysHinted = UINT64_C(0xdeadbeefdeadbeef);
4485
4486 /*
4487 * Damn, the hint didn't work. We'll have to do an expensive linear search.
4488 */
4489 STAM_COUNTER_INC(&pPool->StatTrackLinearRamSearches);
4490 PPGMRAMRANGE pRam = pPool->CTX_SUFF(pVM)->pgm.s.CTX_SUFF(pRamRangesX);
4491 while (pRam)
4492 {
4493 unsigned iPage = pRam->cb >> PAGE_SHIFT;
4494 while (iPage-- > 0)
4495 {
4496 if (PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]) == HCPhys)
4497 {
4498 Log4(("pgmPoolTracDerefGCPhysHint: Linear HCPhys=%RHp GCPhysHint=%RGp GCPhysReal=%RGp\n",
4499 HCPhys, GCPhysHint, pRam->GCPhys + (iPage << PAGE_SHIFT)));
4500 Assert(pPage->cPresent);
4501 Assert(pPool->cPresent);
4502 pPage->cPresent--;
4503 pPool->cPresent--;
4504 pgmTrackDerefGCPhys(pPool, pPage, &pRam->aPages[iPage], iPte);
4505 return;
4506 }
4507 }
4508 pRam = pRam->CTX_SUFF(pNext);
4509 }
4510
4511 AssertFatalMsgFailed(("HCPhys=%RHp GCPhysHint=%RGp (Hinted page has HCPhys = %RHp)\n", HCPhys, GCPhysHint, HCPhysHinted));
4512}
4513
4514
4515/**
4516 * Clear references to guest physical memory in a 32-bit / 32-bit page table.
4517 *
4518 * @param pPool The pool.
4519 * @param pPage The page.
4520 * @param pShwPT The shadow page table (mapping of the page).
4521 * @param pGstPT The guest page table.
4522 */
4523DECLINLINE(void) pgmPoolTrackDerefPT32Bit32Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PT pShwPT, PCX86PT pGstPT)
4524{
4525 RTGCPHYS32 const fPgMask = pPage->fA20Enabled ? X86_PTE_PG_MASK : X86_PTE_PG_MASK & ~RT_BIT_32(20);
4526 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
4527 {
4528 const X86PGUINT uPte = pShwPT->a[i].u;
4529 Assert(!(uPte & RT_BIT_32(10)));
4530 if (uPte & X86_PTE_P)
4531 {
4532 Log4(("pgmPoolTrackDerefPT32Bit32Bit: i=%d pte=%RX32 hint=%RX32\n",
4533 i, uPte & X86_PTE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK));
4534 pgmPoolTracDerefGCPhysHint(pPool, pPage, uPte & X86_PTE_PG_MASK, pGstPT->a[i].u & fPgMask, i);
4535 if (!pPage->cPresent)
4536 break;
4537 }
4538 }
4539}
4540
4541
4542/**
4543 * Clear references to guest physical memory in a PAE / 32-bit page table.
4544 *
4545 * @param pPool The pool.
4546 * @param pPage The page.
4547 * @param pShwPT The shadow page table (mapping of the page).
4548 * @param pGstPT The guest page table (just a half one).
4549 */
4550DECLINLINE(void) pgmPoolTrackDerefPTPae32Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT, PCX86PT pGstPT)
4551{
4552 RTGCPHYS32 const fPgMask = pPage->fA20Enabled ? X86_PTE_PG_MASK : X86_PTE_PG_MASK & ~RT_BIT_32(20);
4553 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
4554 {
4555 Assert( (PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & UINT64_C(0x7ff0000000000400)) == 0
4556 || (PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & UINT64_C(0x7ff0000000000400)) == UINT64_C(0x7ff0000000000000));
4557 if (PGMSHWPTEPAE_IS_P(pShwPT->a[i]))
4558 {
4559 Log4(("pgmPoolTrackDerefPTPae32Bit: i=%d pte=%RX64 hint=%RX32\n",
4560 i, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pGstPT->a[i].u & X86_PTE_PG_MASK));
4561 pgmPoolTracDerefGCPhysHint(pPool, pPage, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pGstPT->a[i].u & fPgMask, i);
4562 if (!pPage->cPresent)
4563 break;
4564 }
4565 }
4566}
4567
4568
4569/**
4570 * Clear references to guest physical memory in a PAE / PAE page table.
4571 *
4572 * @param pPool The pool.
4573 * @param pPage The page.
4574 * @param pShwPT The shadow page table (mapping of the page).
4575 * @param pGstPT The guest page table.
4576 */
4577DECLINLINE(void) pgmPoolTrackDerefPTPaePae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT, PCX86PTPAE pGstPT)
4578{
4579 RTGCPHYS const fPgMask = pPage->fA20Enabled ? X86_PTE_PAE_PG_MASK : X86_PTE_PAE_PG_MASK & ~RT_BIT_64(20);
4580 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
4581 {
4582 Assert( (PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & UINT64_C(0x7ff0000000000400)) == 0
4583 || (PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & UINT64_C(0x7ff0000000000400)) == UINT64_C(0x7ff0000000000000));
4584 if (PGMSHWPTEPAE_IS_P(pShwPT->a[i]))
4585 {
4586 Log4(("pgmPoolTrackDerefPTPaePae: i=%d pte=%RX32 hint=%RX32\n",
4587 i, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pGstPT->a[i].u & X86_PTE_PAE_PG_MASK));
4588 pgmPoolTracDerefGCPhysHint(pPool, pPage, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pGstPT->a[i].u & fPgMask, i);
4589 if (!pPage->cPresent)
4590 break;
4591 }
4592 }
4593}
4594
4595
4596/**
4597 * Clear references to guest physical memory in a 32-bit / 4MB page table.
4598 *
4599 * @param pPool The pool.
4600 * @param pPage The page.
4601 * @param pShwPT The shadow page table (mapping of the page).
4602 */
4603DECLINLINE(void) pgmPoolTrackDerefPT32Bit4MB(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PT pShwPT)
4604{
4605 RTGCPHYS const GCPhysA20Mask = pPage->fA20Enabled ? UINT64_MAX : ~RT_BIT_64(20);
4606 RTGCPHYS GCPhys = pPage->GCPhys + PAGE_SIZE * pPage->iFirstPresent;
4607 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
4608 {
4609 const X86PGUINT uPte = pShwPT->a[i].u;
4610 Assert(!(uPte & RT_BIT_32(10)));
4611 if (uPte & X86_PTE_P)
4612 {
4613 Log4(("pgmPoolTrackDerefPT32Bit4MB: i=%d pte=%RX32 GCPhys=%RGp\n",
4614 i, uPte & X86_PTE_PG_MASK, GCPhys));
4615 pgmPoolTracDerefGCPhys(pPool, pPage, uPte & X86_PTE_PG_MASK, GCPhys & GCPhysA20Mask, i);
4616 if (!pPage->cPresent)
4617 break;
4618 }
4619 }
4620}
4621
4622
4623/**
4624 * Clear references to guest physical memory in a PAE / 2/4MB page table.
4625 *
4626 * @param pPool The pool.
4627 * @param pPage The page.
4628 * @param pShwPT The shadow page table (mapping of the page).
4629 */
4630DECLINLINE(void) pgmPoolTrackDerefPTPaeBig(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT)
4631{
4632 RTGCPHYS const GCPhysA20Mask = pPage->fA20Enabled ? UINT64_MAX : ~RT_BIT_64(20);
4633 RTGCPHYS GCPhys = pPage->GCPhys + PAGE_SIZE * pPage->iFirstPresent;
4634 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
4635 {
4636 Assert( (PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & UINT64_C(0x7ff0000000000400)) == 0
4637 || (PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & UINT64_C(0x7ff0000000000400)) == UINT64_C(0x7ff0000000000000));
4638 if (PGMSHWPTEPAE_IS_P(pShwPT->a[i]))
4639 {
4640 Log4(("pgmPoolTrackDerefPTPaeBig: i=%d pte=%RX64 hint=%RGp\n",
4641 i, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), GCPhys));
4642 pgmPoolTracDerefGCPhys(pPool, pPage, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), GCPhys & GCPhysA20Mask, i);
4643 if (!pPage->cPresent)
4644 break;
4645 }
4646 }
4647}
4648
4649
4650/**
4651 * Clear references to shadowed pages in an EPT page table.
4652 *
4653 * @param pPool The pool.
4654 * @param pPage The page.
4655 * @param pShwPT The shadow page directory pointer table (mapping of the
4656 * page).
4657 */
4658DECLINLINE(void) pgmPoolTrackDerefPTEPT(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PEPTPT pShwPT)
4659{
4660 RTGCPHYS const GCPhysA20Mask = pPage->fA20Enabled ? UINT64_MAX : ~RT_BIT_64(20);
4661 RTGCPHYS GCPhys = pPage->GCPhys + PAGE_SIZE * pPage->iFirstPresent;
4662 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
4663 {
4664 X86PGPAEUINT const uPte = pShwPT->a[i].u;
4665 Assert((uPte & UINT64_C(0xfff0000000000f80)) == 0);
4666 if (uPte & EPT_E_READ)
4667 {
4668 Log4(("pgmPoolTrackDerefPTEPT: i=%d pte=%RX64 GCPhys=%RX64\n",
4669 i, uPte & EPT_PTE_PG_MASK, pPage->GCPhys));
4670 pgmPoolTracDerefGCPhys(pPool, pPage, uPte & EPT_PTE_PG_MASK, GCPhys & GCPhysA20Mask, i);
4671 if (!pPage->cPresent)
4672 break;
4673 }
4674 }
4675}
4676
4677
4678#ifdef VBOX_WITH_NESTED_HWVIRT_VMX_EPT
4679/**
4680 * Clears references to shadowed pages in a SLAT EPT page table.
4681 *
4682 * @param pPool The pool.
4683 * @param pPage The page.
4684 * @param pShwPT The shadow page table (mapping of the page).
4685 * @param pGstPT The guest page table.
4686 */
4687DECLINLINE(void) pgmPoolTrackDerefNestedPTEPT(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PEPTPT pShwPT, PCEPTPT pGstPT)
4688{
4689 Assert(PGMPOOL_PAGE_IS_NESTED(pPage));
4690 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
4691 {
4692 X86PGPAEUINT const uShwPte = pShwPT->a[i].u;
4693 Assert((uShwPte & UINT64_C(0xfff0000000000f80)) == 0); /* Access, Dirty, UserX (not supported) and ignored bits 7, 11. */
4694 if (uShwPte & EPT_PRESENT_MASK)
4695 {
4696 Log7Func(("Shw=%RX64 GstPte=%RX64\n", uShwPte, pGstPT->a[i].u));
4697 pgmPoolTracDerefGCPhys(pPool, pPage, uShwPte & EPT_PTE_PG_MASK, pGstPT->a[i].u & EPT_PTE_PG_MASK, i);
4698 if (!pPage->cPresent)
4699 break;
4700 }
4701 }
4702}
4703#endif /* VBOX_WITH_NESTED_HWVIRT_VMX_EPT */
4704
4705
4706/**
4707 * Clear references to shadowed pages in a 32 bits page directory.
4708 *
4709 * @param pPool The pool.
4710 * @param pPage The page.
4711 * @param pShwPD The shadow page directory (mapping of the page).
4712 */
4713DECLINLINE(void) pgmPoolTrackDerefPD(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PD pShwPD)
4714{
4715 for (unsigned i = 0; i < RT_ELEMENTS(pShwPD->a); i++)
4716 {
4717 X86PGUINT const uPde = pShwPD->a[i].u;
4718 if (uPde & X86_PDE_P)
4719 {
4720 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPD->a[i].u & X86_PDE_PG_MASK);
4721 if (pSubPage)
4722 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4723 else
4724 AssertFatalMsgFailed(("%x\n", pShwPD->a[i].u & X86_PDE_PG_MASK));
4725 }
4726 }
4727}
4728
4729
4730/**
4731 * Clear references to shadowed pages in a PAE (legacy or 64 bits) page directory.
4732 *
4733 * @param pPool The pool.
4734 * @param pPage The page.
4735 * @param pShwPD The shadow page directory (mapping of the page).
4736 */
4737DECLINLINE(void) pgmPoolTrackDerefPDPae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PDPAE pShwPD)
4738{
4739 for (unsigned i = 0; i < RT_ELEMENTS(pShwPD->a); i++)
4740 {
4741 X86PGPAEUINT const uPde = pShwPD->a[i].u;
4742 if (uPde & X86_PDE_P)
4743 {
4744#ifdef PGM_WITH_LARGE_PAGES
4745 if (uPde & X86_PDE_PS)
4746 {
4747 Log4(("pgmPoolTrackDerefPDPae: i=%d pde=%RX64 GCPhys=%RX64\n",
4748 i, uPde & X86_PDE2M_PAE_PG_MASK, pPage->GCPhys));
4749 pgmPoolTracDerefGCPhys(pPool, pPage, uPde & X86_PDE2M_PAE_PG_MASK,
4750 pPage->GCPhys + i * 2 * _1M /* pPage->GCPhys = base address of the memory described by the PD */,
4751 i);
4752 }
4753 else
4754#endif
4755 {
4756 Assert((uPde & (X86_PDE_PAE_MBZ_MASK_NX | UINT64_C(0x7ff0000000000000))) == 0);
4757 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, uPde & X86_PDE_PAE_PG_MASK);
4758 if (pSubPage)
4759 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4760 else
4761 AssertFatalMsgFailed(("%RX64\n", uPde & X86_PDE_PAE_PG_MASK));
4762 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4763 }
4764 }
4765 }
4766}
4767
4768
4769/**
4770 * Clear references to shadowed pages in a PAE page directory pointer table.
4771 *
4772 * @param pPool The pool.
4773 * @param pPage The page.
4774 * @param pShwPDPT The shadow page directory pointer table (mapping of the page).
4775 */
4776DECLINLINE(void) pgmPoolTrackDerefPDPTPae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PDPT pShwPDPT)
4777{
4778 for (unsigned i = 0; i < X86_PG_PAE_PDPE_ENTRIES; i++)
4779 {
4780 X86PGPAEUINT const uPdpe = pShwPDPT->a[i].u;
4781 Assert((uPdpe & (X86_PDPE_PAE_MBZ_MASK | UINT64_C(0x7ff0000000000200))) == 0);
4782 if (uPdpe & X86_PDPE_P)
4783 {
4784 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, uPdpe & X86_PDPE_PG_MASK);
4785 if (pSubPage)
4786 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4787 else
4788 AssertFatalMsgFailed(("%RX64\n", uPdpe & X86_PDPE_PG_MASK));
4789 }
4790 }
4791}
4792
4793
4794/**
4795 * Clear references to shadowed pages in a 64-bit page directory pointer table.
4796 *
4797 * @param pPool The pool.
4798 * @param pPage The page.
4799 * @param pShwPDPT The shadow page directory pointer table (mapping of the page).
4800 */
4801DECLINLINE(void) pgmPoolTrackDerefPDPT64Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PDPT pShwPDPT)
4802{
4803 for (unsigned i = 0; i < RT_ELEMENTS(pShwPDPT->a); i++)
4804 {
4805 X86PGPAEUINT const uPdpe = pShwPDPT->a[i].u;
4806 Assert((uPdpe & (X86_PDPE_LM_MBZ_MASK_NX | UINT64_C(0x7ff0000000000200))) == 0);
4807 if (uPdpe & X86_PDPE_P)
4808 {
4809 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, uPdpe & X86_PDPE_PG_MASK);
4810 if (pSubPage)
4811 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4812 else
4813 AssertFatalMsgFailed(("%RX64\n", uPdpe & X86_PDPE_PG_MASK));
4814 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4815 }
4816 }
4817}
4818
4819
4820/**
4821 * Clear references to shadowed pages in a 64-bit level 4 page table.
4822 *
4823 * @param pPool The pool.
4824 * @param pPage The page.
4825 * @param pShwPML4 The shadow page directory pointer table (mapping of the page).
4826 */
4827DECLINLINE(void) pgmPoolTrackDerefPML464Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PML4 pShwPML4)
4828{
4829 for (unsigned i = 0; i < RT_ELEMENTS(pShwPML4->a); i++)
4830 {
4831 X86PGPAEUINT const uPml4e = pShwPML4->a[i].u;
4832 Assert((uPml4e & (X86_PML4E_MBZ_MASK_NX | UINT64_C(0x7ff0000000000200))) == 0);
4833 if (uPml4e & X86_PML4E_P)
4834 {
4835 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, uPml4e & X86_PDPE_PG_MASK);
4836 if (pSubPage)
4837 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4838 else
4839 AssertFatalMsgFailed(("%RX64\n", uPml4e & X86_PML4E_PG_MASK));
4840 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4841 }
4842 }
4843}
4844
4845
4846/**
4847 * Clear references to shadowed pages in an EPT page directory.
4848 *
4849 * @param pPool The pool.
4850 * @param pPage The page.
4851 * @param pShwPD The shadow page directory (mapping of the page).
4852 */
4853DECLINLINE(void) pgmPoolTrackDerefPDEPT(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PEPTPD pShwPD)
4854{
4855 for (unsigned i = 0; i < RT_ELEMENTS(pShwPD->a); i++)
4856 {
4857 X86PGPAEUINT const uPde = pShwPD->a[i].u;
4858#ifdef PGM_WITH_LARGE_PAGES
4859 AssertMsg((uPde & UINT64_C(0xfff0000000000f00)) == 0, ("uPde=%RX64\n", uPde));
4860#else
4861 AssertMsg((uPde & UINT64_C(0xfff0000000000f80)) == 0, ("uPde=%RX64\n", uPde));
4862#endif
4863 if (uPde & EPT_E_READ)
4864 {
4865#ifdef PGM_WITH_LARGE_PAGES
4866 if (uPde & EPT_E_LEAF)
4867 {
4868 Log4(("pgmPoolTrackDerefPDEPT: i=%d pde=%RX64 GCPhys=%RX64\n",
4869 i, uPde & EPT_PDE2M_PG_MASK, pPage->GCPhys));
4870 Assert(!PGMPOOL_PAGE_IS_NESTED(pPage)); /* We don't support large guest EPT yet. */
4871 pgmPoolTracDerefGCPhys(pPool, pPage, uPde & EPT_PDE2M_PG_MASK,
4872 pPage->GCPhys + i * 2 * _1M /* pPage->GCPhys = base address of the memory described by the PD */,
4873 i);
4874 }
4875 else
4876#endif
4877 {
4878 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, uPde & EPT_PDE_PG_MASK);
4879 if (pSubPage)
4880 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4881 else
4882 AssertFatalMsgFailed(("%RX64\n", pShwPD->a[i].u & EPT_PDE_PG_MASK));
4883 }
4884 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4885 }
4886 }
4887}
4888
4889
4890/**
4891 * Clear references to shadowed pages in an EPT page directory pointer table.
4892 *
4893 * @param pPool The pool.
4894 * @param pPage The page.
4895 * @param pShwPDPT The shadow page directory pointer table (mapping of the page).
4896 */
4897DECLINLINE(void) pgmPoolTrackDerefPDPTEPT(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PEPTPDPT pShwPDPT)
4898{
4899 for (unsigned i = 0; i < RT_ELEMENTS(pShwPDPT->a); i++)
4900 {
4901 X86PGPAEUINT const uPdpe = pShwPDPT->a[i].u;
4902 Assert((uPdpe & UINT64_C(0xfff0000000000f80)) == 0);
4903 if (uPdpe & EPT_E_READ)
4904 {
4905 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, uPdpe & EPT_PDPTE_PG_MASK);
4906 if (pSubPage)
4907 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4908 else
4909 AssertFatalMsgFailed(("%RX64\n", uPdpe & EPT_PDPTE_PG_MASK));
4910 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4911 }
4912 }
4913}
4914
4915
4916/**
4917 * Clears all references made by this page.
4918 *
4919 * This includes other shadow pages and GC physical addresses.
4920 *
4921 * @param pPool The pool.
4922 * @param pPage The page.
4923 */
4924static void pgmPoolTrackDeref(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
4925{
4926 /*
4927 * Map the shadow page and take action according to the page kind.
4928 */
4929 PVMCC pVM = pPool->CTX_SUFF(pVM);
4930 void *pvShw = PGMPOOL_PAGE_2_PTR(pVM, pPage);
4931 switch (pPage->enmKind)
4932 {
4933 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
4934 {
4935 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
4936 void *pvGst;
4937 int rc = PGM_GCPHYS_2_PTR(pVM, pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
4938 pgmPoolTrackDerefPT32Bit32Bit(pPool, pPage, (PX86PT)pvShw, (PCX86PT)pvGst);
4939 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvGst);
4940 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
4941 break;
4942 }
4943
4944 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
4945 {
4946 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
4947 void *pvGst;
4948 int rc = PGM_GCPHYS_2_PTR_EX(pVM, pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
4949 pgmPoolTrackDerefPTPae32Bit(pPool, pPage, (PPGMSHWPTPAE)pvShw, (PCX86PT)pvGst);
4950 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvGst);
4951 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
4952 break;
4953 }
4954
4955 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
4956 {
4957 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
4958 void *pvGst;
4959 int rc = PGM_GCPHYS_2_PTR(pVM, pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
4960 pgmPoolTrackDerefPTPaePae(pPool, pPage, (PPGMSHWPTPAE)pvShw, (PCX86PTPAE)pvGst);
4961 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvGst);
4962 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
4963 break;
4964 }
4965
4966 case PGMPOOLKIND_32BIT_PT_FOR_PHYS: /* treat it like a 4 MB page */
4967 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
4968 {
4969 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
4970 pgmPoolTrackDerefPT32Bit4MB(pPool, pPage, (PX86PT)pvShw);
4971 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
4972 break;
4973 }
4974
4975 case PGMPOOLKIND_PAE_PT_FOR_PHYS: /* treat it like a 2 MB page */
4976 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
4977 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
4978 {
4979 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
4980 pgmPoolTrackDerefPTPaeBig(pPool, pPage, (PPGMSHWPTPAE)pvShw);
4981 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
4982 break;
4983 }
4984
4985 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
4986 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
4987 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
4988 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
4989 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
4990 case PGMPOOLKIND_PAE_PD_PHYS:
4991 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
4992 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
4993 pgmPoolTrackDerefPDPae(pPool, pPage, (PX86PDPAE)pvShw);
4994 break;
4995
4996 case PGMPOOLKIND_32BIT_PD_PHYS:
4997 case PGMPOOLKIND_32BIT_PD:
4998 pgmPoolTrackDerefPD(pPool, pPage, (PX86PD)pvShw);
4999 break;
5000
5001 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
5002 case PGMPOOLKIND_PAE_PDPT:
5003 case PGMPOOLKIND_PAE_PDPT_PHYS:
5004 pgmPoolTrackDerefPDPTPae(pPool, pPage, (PX86PDPT)pvShw);
5005 break;
5006
5007 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
5008 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
5009 pgmPoolTrackDerefPDPT64Bit(pPool, pPage, (PX86PDPT)pvShw);
5010 break;
5011
5012 case PGMPOOLKIND_64BIT_PML4:
5013 pgmPoolTrackDerefPML464Bit(pPool, pPage, (PX86PML4)pvShw);
5014 break;
5015
5016 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
5017 pgmPoolTrackDerefPTEPT(pPool, pPage, (PEPTPT)pvShw);
5018 break;
5019
5020 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
5021 pgmPoolTrackDerefPDEPT(pPool, pPage, (PEPTPD)pvShw);
5022 break;
5023
5024 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
5025 pgmPoolTrackDerefPDPTEPT(pPool, pPage, (PEPTPDPT)pvShw);
5026 break;
5027
5028#ifdef VBOX_WITH_NESTED_HWVIRT_VMX_EPT
5029 case PGMPOOLKIND_EPT_PT_FOR_EPT_PT:
5030 {
5031 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
5032 void *pvGst;
5033 int const rc = PGM_GCPHYS_2_PTR(pVM, pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
5034 pgmPoolTrackDerefNestedPTEPT(pPool, pPage, (PEPTPT)pvShw, (PCEPTPT)pvGst);
5035 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvGst);
5036 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
5037 break;
5038 }
5039
5040 case PGMPOOLKIND_EPT_PD_FOR_EPT_PD:
5041 pgmPoolTrackDerefPDEPT(pPool, pPage, (PEPTPD)pvShw);
5042 break;
5043
5044 case PGMPOOLKIND_EPT_PDPT_FOR_EPT_PDPT:
5045 pgmPoolTrackDerefPDPTEPT(pPool, pPage, (PEPTPDPT)pvShw);
5046 break;
5047#endif
5048
5049 default:
5050 AssertFatalMsgFailed(("enmKind=%d GCPhys=%RGp\n", pPage->enmKind, pPage->GCPhys));
5051 }
5052
5053 /* paranoia, clear the shadow page. Remove this laser (i.e. let Alloc and ClearAll do it). */
5054 STAM_PROFILE_START(&pPool->StatZeroPage, z);
5055 ASMMemZeroPage(pvShw);
5056 STAM_PROFILE_STOP(&pPool->StatZeroPage, z);
5057 pPage->fZeroed = true;
5058 Assert(!pPage->cPresent);
5059 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvShw);
5060}
5061
5062
5063/**
5064 * Flushes a pool page.
5065 *
5066 * This moves the page to the free list after removing all user references to it.
5067 *
5068 * @returns VBox status code.
5069 * @retval VINF_SUCCESS on success.
5070 * @param pPool The pool.
5071 * @param pPage The shadow page.
5072 * @param fFlush Flush the TLBS when required (should only be false in very specific use cases!!)
5073 */
5074int pgmPoolFlushPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage, bool fFlush)
5075{
5076 PVMCC pVM = pPool->CTX_SUFF(pVM);
5077 bool fFlushRequired = false;
5078
5079 int rc = VINF_SUCCESS;
5080 STAM_PROFILE_START(&pPool->StatFlushPage, f);
5081 LogFlow(("pgmPoolFlushPage: pPage=%p:{.Key=%RHp, .idx=%d, .enmKind=%s, .GCPhys=%RGp}\n",
5082 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), pPage->GCPhys));
5083
5084 if (PGMPOOL_PAGE_IS_NESTED(pPage))
5085 Log7Func(("pPage=%p:{.Key=%RHp, .idx=%d, .enmKind=%s, .GCPhys=%RGp}\n",
5086 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), pPage->GCPhys));
5087
5088 /*
5089 * Reject any attempts at flushing any of the special root pages (shall
5090 * not happen).
5091 */
5092 AssertMsgReturn(pPage->idx >= PGMPOOL_IDX_FIRST,
5093 ("pgmPoolFlushPage: special root page, rejected. enmKind=%s idx=%d\n",
5094 pgmPoolPoolKindToStr(pPage->enmKind), pPage->idx),
5095 VINF_SUCCESS);
5096
5097 PGM_LOCK_VOID(pVM);
5098
5099 /*
5100 * Quietly reject any attempts at flushing the currently active shadow CR3 mapping
5101 */
5102 if (pgmPoolIsPageLocked(pPage))
5103 {
5104 AssertMsg( pPage->enmKind == PGMPOOLKIND_64BIT_PML4
5105 || pPage->enmKind == PGMPOOLKIND_PAE_PDPT
5106 || pPage->enmKind == PGMPOOLKIND_PAE_PDPT_FOR_32BIT
5107 || pPage->enmKind == PGMPOOLKIND_32BIT_PD
5108 || pPage->enmKind == PGMPOOLKIND_PAE_PD_FOR_PAE_PD
5109 || pPage->enmKind == PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD
5110 || pPage->enmKind == PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD
5111 || pPage->enmKind == PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD
5112 || pPage->enmKind == PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD
5113 || pPage->enmKind == PGMPOOLKIND_ROOT_NESTED,
5114 ("Can't free the shadow CR3! (%RHp vs %RHp kind=%d\n", PGMGetHyperCR3(VMMGetCpu(pVM)), pPage->Core.Key, pPage->enmKind));
5115 Log(("pgmPoolFlushPage: current active shadow CR3, rejected. enmKind=%s idx=%d\n", pgmPoolPoolKindToStr(pPage->enmKind), pPage->idx));
5116 PGM_UNLOCK(pVM);
5117 return VINF_SUCCESS;
5118 }
5119
5120 /*
5121 * Mark the page as being in need of an ASMMemZeroPage().
5122 */
5123 pPage->fZeroed = false;
5124
5125#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
5126 if (pPage->fDirty)
5127 pgmPoolFlushDirtyPage(pVM, pPool, pPage->idxDirtyEntry, false /* do not remove */);
5128#endif
5129
5130 /* If there are any users of this table, then we *must* issue a tlb flush on all VCPUs. */
5131 if (pPage->iUserHead != NIL_PGMPOOL_USER_INDEX)
5132 fFlushRequired = true;
5133
5134 /*
5135 * Clear the page.
5136 */
5137 pgmPoolTrackClearPageUsers(pPool, pPage);
5138 STAM_PROFILE_START(&pPool->StatTrackDeref,a);
5139 pgmPoolTrackDeref(pPool, pPage);
5140 STAM_PROFILE_STOP(&pPool->StatTrackDeref,a);
5141
5142 /*
5143 * Flush it from the cache.
5144 */
5145 pgmPoolCacheFlushPage(pPool, pPage);
5146
5147 /*
5148 * Deregistering the monitoring.
5149 */
5150 if (pPage->fMonitored)
5151 rc = pgmPoolMonitorFlush(pPool, pPage);
5152
5153 /*
5154 * Free the page.
5155 */
5156 Assert(pPage->iNext == NIL_PGMPOOL_IDX);
5157 pPage->iNext = pPool->iFreeHead;
5158 pPool->iFreeHead = pPage->idx;
5159 pPage->enmKind = PGMPOOLKIND_FREE;
5160 pPage->enmAccess = PGMPOOLACCESS_DONTCARE;
5161 pPage->GCPhys = NIL_RTGCPHYS;
5162 pPage->fReusedFlushPending = false;
5163
5164 pPool->cUsedPages--;
5165
5166 /* Flush the TLBs of all VCPUs if required. */
5167 if ( fFlushRequired
5168 && fFlush)
5169 {
5170 PGM_INVL_ALL_VCPU_TLBS(pVM);
5171 }
5172
5173 PGM_UNLOCK(pVM);
5174 STAM_PROFILE_STOP(&pPool->StatFlushPage, f);
5175 return rc;
5176}
5177
5178
5179/**
5180 * Frees a usage of a pool page.
5181 *
5182 * The caller is responsible to updating the user table so that it no longer
5183 * references the shadow page.
5184 *
5185 * @param pPool The pool.
5186 * @param pPage The shadow page.
5187 * @param iUser The shadow page pool index of the user table.
5188 * NIL_PGMPOOL_IDX for root pages.
5189 * @param iUserTable The index into the user table (shadowed). Ignored if
5190 * root page.
5191 */
5192void pgmPoolFreeByPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable)
5193{
5194 PVMCC pVM = pPool->CTX_SUFF(pVM);
5195
5196 STAM_PROFILE_START(&pPool->StatFree, a);
5197 LogFlow(("pgmPoolFreeByPage: pPage=%p:{.Key=%RHp, .idx=%d, enmKind=%s} iUser=%d iUserTable=%#x\n",
5198 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), iUser, iUserTable));
5199 AssertReturnVoid(pPage->idx >= PGMPOOL_IDX_FIRST); /* paranoia (#6349) */
5200
5201 PGM_LOCK_VOID(pVM);
5202 if (iUser != NIL_PGMPOOL_IDX)
5203 pgmPoolTrackFreeUser(pPool, pPage, iUser, iUserTable);
5204 if (!pPage->fCached)
5205 pgmPoolFlushPage(pPool, pPage);
5206 PGM_UNLOCK(pVM);
5207 STAM_PROFILE_STOP(&pPool->StatFree, a);
5208}
5209
5210
5211/**
5212 * Makes one or more free page free.
5213 *
5214 * @returns VBox status code.
5215 * @retval VINF_SUCCESS on success.
5216 *
5217 * @param pPool The pool.
5218 * @param enmKind Page table kind
5219 * @param iUser The user of the page.
5220 */
5221static int pgmPoolMakeMoreFreePages(PPGMPOOL pPool, PGMPOOLKIND enmKind, uint16_t iUser)
5222{
5223 PVMCC pVM = pPool->CTX_SUFF(pVM);
5224 LogFlow(("pgmPoolMakeMoreFreePages: enmKind=%d iUser=%d\n", enmKind, iUser));
5225 NOREF(enmKind);
5226
5227 /*
5228 * If the pool isn't full grown yet, expand it.
5229 */
5230 if (pPool->cCurPages < pPool->cMaxPages)
5231 {
5232 STAM_PROFILE_ADV_SUSPEND(&pPool->StatAlloc, a);
5233#ifdef IN_RING3
5234 int rc = PGMR3PoolGrow(pVM, VMMGetCpu(pVM));
5235#else
5236 int rc = PGMR0PoolGrow(pVM, VMMGetCpuId(pVM));
5237#endif
5238 if (RT_FAILURE(rc))
5239 return rc;
5240 STAM_PROFILE_ADV_RESUME(&pPool->StatAlloc, a);
5241 if (pPool->iFreeHead != NIL_PGMPOOL_IDX)
5242 return VINF_SUCCESS;
5243 }
5244
5245 /*
5246 * Free one cached page.
5247 */
5248 return pgmPoolCacheFreeOne(pPool, iUser);
5249}
5250
5251
5252/**
5253 * Allocates a page from the pool.
5254 *
5255 * This page may actually be a cached page and not in need of any processing
5256 * on the callers part.
5257 *
5258 * @returns VBox status code.
5259 * @retval VINF_SUCCESS if a NEW page was allocated.
5260 * @retval VINF_PGM_CACHED_PAGE if a CACHED page was returned.
5261 *
5262 * @param pVM The cross context VM structure.
5263 * @param GCPhys The GC physical address of the page we're gonna shadow.
5264 * For 4MB and 2MB PD entries, it's the first address the
5265 * shadow PT is covering.
5266 * @param enmKind The kind of mapping.
5267 * @param enmAccess Access type for the mapping (only relevant for big pages)
5268 * @param fA20Enabled Whether the A20 gate is enabled or not.
5269 * @param iUser The shadow page pool index of the user table. Root
5270 * pages should pass NIL_PGMPOOL_IDX.
5271 * @param iUserTable The index into the user table (shadowed). Ignored for
5272 * root pages (iUser == NIL_PGMPOOL_IDX).
5273 * @param fLockPage Lock the page
5274 * @param ppPage Where to store the pointer to the page. NULL is stored here on failure.
5275 */
5276int pgmPoolAlloc(PVMCC pVM, RTGCPHYS GCPhys, PGMPOOLKIND enmKind, PGMPOOLACCESS enmAccess, bool fA20Enabled,
5277 uint16_t iUser, uint32_t iUserTable, bool fLockPage, PPPGMPOOLPAGE ppPage)
5278{
5279 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
5280 STAM_PROFILE_ADV_START(&pPool->StatAlloc, a);
5281 LogFlow(("pgmPoolAlloc: GCPhys=%RGp enmKind=%s iUser=%d iUserTable=%#x\n", GCPhys, pgmPoolPoolKindToStr(enmKind), iUser, iUserTable));
5282 *ppPage = NULL;
5283 /** @todo CSAM/PGMPrefetchPage messes up here during CSAMR3CheckGates
5284 * (TRPMR3SyncIDT) because of FF priority. Try fix that?
5285 * Assert(!(pVM->pgm.s.fGlobalSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)); */
5286
5287#if defined(VBOX_STRICT) && defined(VBOX_WITH_NESTED_HWVIRT_VMX_EPT)
5288 PVMCPUCC pVCpu = VMMGetCpu(pVM);
5289 Assert(pVCpu->pgm.s.enmGuestSlatMode == PGMSLAT_DIRECT || PGMPOOL_PAGE_IS_KIND_NESTED(enmKind));
5290#endif
5291
5292 PGM_LOCK_VOID(pVM);
5293
5294 if (pPool->fCacheEnabled)
5295 {
5296 int rc2 = pgmPoolCacheAlloc(pPool, GCPhys, enmKind, enmAccess, fA20Enabled, iUser, iUserTable, ppPage);
5297 if (RT_SUCCESS(rc2))
5298 {
5299 if (fLockPage)
5300 pgmPoolLockPage(pPool, *ppPage);
5301 PGM_UNLOCK(pVM);
5302 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
5303 LogFlow(("pgmPoolAlloc: cached returns %Rrc *ppPage=%p:{.Key=%RHp, .idx=%d}\n", rc2, *ppPage, (*ppPage)->Core.Key, (*ppPage)->idx));
5304 return rc2;
5305 }
5306 }
5307
5308 /*
5309 * Allocate a new one.
5310 */
5311 int rc = VINF_SUCCESS;
5312 uint16_t iNew = pPool->iFreeHead;
5313 if (iNew == NIL_PGMPOOL_IDX)
5314 {
5315 rc = pgmPoolMakeMoreFreePages(pPool, enmKind, iUser);
5316 if (RT_FAILURE(rc))
5317 {
5318 PGM_UNLOCK(pVM);
5319 Log(("pgmPoolAlloc: returns %Rrc (Free)\n", rc));
5320 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
5321 return rc;
5322 }
5323 iNew = pPool->iFreeHead;
5324 AssertReleaseMsgReturn(iNew != NIL_PGMPOOL_IDX, ("iNew=%#x\n", iNew), VERR_PGM_POOL_IPE);
5325 }
5326
5327 /* unlink the free head */
5328 PPGMPOOLPAGE pPage = &pPool->aPages[iNew];
5329 pPool->iFreeHead = pPage->iNext;
5330 pPage->iNext = NIL_PGMPOOL_IDX;
5331
5332 /*
5333 * Initialize it.
5334 */
5335 pPool->cUsedPages++; /* physical handler registration / pgmPoolTrackFlushGCPhysPTsSlow requirement. */
5336 pPage->enmKind = enmKind;
5337 pPage->enmAccess = enmAccess;
5338 pPage->GCPhys = GCPhys;
5339 pPage->fA20Enabled = fA20Enabled;
5340 pPage->fSeenNonGlobal = false; /* Set this to 'true' to disable this feature. */
5341 pPage->fMonitored = false;
5342 pPage->fCached = false;
5343 pPage->fDirty = false;
5344 pPage->fReusedFlushPending = false;
5345 pPage->cModifications = 0;
5346 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
5347 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
5348 pPage->cPresent = 0;
5349 pPage->iFirstPresent = NIL_PGMPOOL_PRESENT_INDEX;
5350 pPage->idxDirtyEntry = 0;
5351 pPage->GCPtrLastAccessHandlerFault = NIL_RTGCPTR;
5352 pPage->GCPtrLastAccessHandlerRip = NIL_RTGCPTR;
5353 pPage->cLastAccessHandler = 0;
5354 pPage->cLocked = 0;
5355# ifdef VBOX_STRICT
5356 pPage->GCPtrDirtyFault = NIL_RTGCPTR;
5357# endif
5358
5359 /*
5360 * Insert into the tracking and cache. If this fails, free the page.
5361 */
5362 int rc3 = pgmPoolTrackInsert(pPool, pPage, GCPhys, iUser, iUserTable);
5363 if (RT_FAILURE(rc3))
5364 {
5365 pPool->cUsedPages--;
5366 pPage->enmKind = PGMPOOLKIND_FREE;
5367 pPage->enmAccess = PGMPOOLACCESS_DONTCARE;
5368 pPage->GCPhys = NIL_RTGCPHYS;
5369 pPage->iNext = pPool->iFreeHead;
5370 pPool->iFreeHead = pPage->idx;
5371 PGM_UNLOCK(pVM);
5372 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
5373 Log(("pgmPoolAlloc: returns %Rrc (Insert)\n", rc3));
5374 return rc3;
5375 }
5376
5377 /*
5378 * Commit the allocation, clear the page and return.
5379 */
5380#ifdef VBOX_WITH_STATISTICS
5381 if (pPool->cUsedPages > pPool->cUsedPagesHigh)
5382 pPool->cUsedPagesHigh = pPool->cUsedPages;
5383#endif
5384
5385 if (!pPage->fZeroed)
5386 {
5387 STAM_PROFILE_START(&pPool->StatZeroPage, z);
5388 void *pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
5389 ASMMemZeroPage(pv);
5390 STAM_PROFILE_STOP(&pPool->StatZeroPage, z);
5391 }
5392
5393 *ppPage = pPage;
5394 if (fLockPage)
5395 pgmPoolLockPage(pPool, pPage);
5396 PGM_UNLOCK(pVM);
5397 LogFlow(("pgmPoolAlloc: returns %Rrc *ppPage=%p:{.Key=%RHp, .idx=%d, .fCached=%RTbool, .fMonitored=%RTbool}\n",
5398 rc, pPage, pPage->Core.Key, pPage->idx, pPage->fCached, pPage->fMonitored));
5399 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
5400 return rc;
5401}
5402
5403
5404/**
5405 * Frees a usage of a pool page.
5406 *
5407 * @param pVM The cross context VM structure.
5408 * @param HCPhys The HC physical address of the shadow page.
5409 * @param iUser The shadow page pool index of the user table.
5410 * NIL_PGMPOOL_IDX if root page.
5411 * @param iUserTable The index into the user table (shadowed). Ignored if
5412 * root page.
5413 */
5414void pgmPoolFree(PVM pVM, RTHCPHYS HCPhys, uint16_t iUser, uint32_t iUserTable)
5415{
5416 LogFlow(("pgmPoolFree: HCPhys=%RHp iUser=%d iUserTable=%#x\n", HCPhys, iUser, iUserTable));
5417 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
5418 pgmPoolFreeByPage(pPool, pgmPoolGetPage(pPool, HCPhys), iUser, iUserTable);
5419}
5420
5421
5422/**
5423 * Internal worker for finding a 'in-use' shadow page give by it's physical address.
5424 *
5425 * @returns Pointer to the shadow page structure.
5426 * @param pPool The pool.
5427 * @param HCPhys The HC physical address of the shadow page.
5428 */
5429PPGMPOOLPAGE pgmPoolGetPage(PPGMPOOL pPool, RTHCPHYS HCPhys)
5430{
5431 PGM_LOCK_ASSERT_OWNER(pPool->CTX_SUFF(pVM));
5432
5433 /*
5434 * Look up the page.
5435 */
5436 PPGMPOOLPAGE pPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, HCPhys & X86_PTE_PAE_PG_MASK);
5437
5438 AssertFatalMsg(pPage && pPage->enmKind != PGMPOOLKIND_FREE, ("HCPhys=%RHp pPage=%p idx=%d\n", HCPhys, pPage, (pPage) ? pPage->idx : 0));
5439 return pPage;
5440}
5441
5442
5443/**
5444 * Internal worker for finding a page for debugging purposes, no assertions.
5445 *
5446 * @returns Pointer to the shadow page structure. NULL on if not found.
5447 * @param pPool The pool.
5448 * @param HCPhys The HC physical address of the shadow page.
5449 */
5450PPGMPOOLPAGE pgmPoolQueryPageForDbg(PPGMPOOL pPool, RTHCPHYS HCPhys)
5451{
5452 PGM_LOCK_ASSERT_OWNER(pPool->CTX_SUFF(pVM));
5453 return (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, HCPhys & X86_PTE_PAE_PG_MASK);
5454}
5455
5456
5457/**
5458 * Internal worker for PGM_HCPHYS_2_PTR.
5459 *
5460 * @returns VBox status code.
5461 * @param pVM The cross context VM structure.
5462 * @param HCPhys The HC physical address of the shadow page.
5463 * @param ppv Where to return the address.
5464 */
5465int pgmPoolHCPhys2Ptr(PVM pVM, RTHCPHYS HCPhys, void **ppv)
5466{
5467 PPGMPOOLPAGE pPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pVM->pgm.s.CTX_SUFF(pPool)->HCPhysTree, HCPhys & X86_PTE_PAE_PG_MASK);
5468 AssertMsgReturn(pPage && pPage->enmKind != PGMPOOLKIND_FREE,
5469 ("HCPhys=%RHp pPage=%p idx=%d\n", HCPhys, pPage, (pPage) ? pPage->idx : 0),
5470 VERR_PGM_POOL_GET_PAGE_FAILED);
5471 *ppv = (uint8_t *)pPage->CTX_SUFF(pvPage) + (HCPhys & PAGE_OFFSET_MASK);
5472 return VINF_SUCCESS;
5473}
5474
5475#ifdef IN_RING3 /* currently only used in ring 3; save some space in the R0 & GC modules (left it here as we might need it elsewhere later on) */
5476
5477/**
5478 * Flush the specified page if present
5479 *
5480 * @param pVM The cross context VM structure.
5481 * @param GCPhys Guest physical address of the page to flush
5482 */
5483void pgmPoolFlushPageByGCPhys(PVM pVM, RTGCPHYS GCPhys)
5484{
5485 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
5486
5487 VM_ASSERT_EMT(pVM);
5488
5489 /*
5490 * Look up the GCPhys in the hash.
5491 */
5492 GCPhys = GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK;
5493 unsigned i = pPool->aiHash[PGMPOOL_HASH(GCPhys)];
5494 if (i == NIL_PGMPOOL_IDX)
5495 return;
5496
5497 do
5498 {
5499 PPGMPOOLPAGE pPage = &pPool->aPages[i];
5500 if (pPage->GCPhys - GCPhys < PAGE_SIZE)
5501 {
5502 Assert(!PGMPOOL_PAGE_IS_NESTED(pPage)); /* Temporary to see if it hits. Remove later. */
5503 switch (pPage->enmKind)
5504 {
5505 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
5506 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
5507 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
5508 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
5509 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
5510 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
5511 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
5512 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
5513 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
5514 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
5515 case PGMPOOLKIND_64BIT_PML4:
5516 case PGMPOOLKIND_32BIT_PD:
5517 case PGMPOOLKIND_PAE_PDPT:
5518 {
5519 Log(("PGMPoolFlushPage: found pgm pool pages for %RGp\n", GCPhys));
5520# ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
5521 if (pPage->fDirty)
5522 STAM_COUNTER_INC(&pPool->StatForceFlushDirtyPage);
5523 else
5524# endif
5525 STAM_COUNTER_INC(&pPool->StatForceFlushPage);
5526 Assert(!pgmPoolIsPageLocked(pPage));
5527 pgmPoolMonitorChainFlush(pPool, pPage);
5528 return;
5529 }
5530
5531 /* ignore, no monitoring. */
5532 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
5533 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
5534 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
5535 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
5536 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
5537 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
5538 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
5539 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
5540 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
5541 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
5542 case PGMPOOLKIND_ROOT_NESTED:
5543 case PGMPOOLKIND_PAE_PD_PHYS:
5544 case PGMPOOLKIND_PAE_PDPT_PHYS:
5545 case PGMPOOLKIND_32BIT_PD_PHYS:
5546 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
5547 break;
5548
5549 default:
5550 AssertFatalMsgFailed(("enmKind=%d idx=%d\n", pPage->enmKind, pPage->idx));
5551 }
5552 }
5553
5554 /* next */
5555 i = pPage->iNext;
5556 } while (i != NIL_PGMPOOL_IDX);
5557 return;
5558}
5559
5560
5561/**
5562 * Reset CPU on hot plugging.
5563 *
5564 * @param pVM The cross context VM structure.
5565 * @param pVCpu The cross context virtual CPU structure.
5566 */
5567void pgmR3PoolResetUnpluggedCpu(PVM pVM, PVMCPU pVCpu)
5568{
5569 pgmR3ExitShadowModeBeforePoolFlush(pVCpu);
5570
5571 pgmR3ReEnterShadowModeAfterPoolFlush(pVM, pVCpu);
5572 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
5573 VMCPU_FF_SET(pVCpu, VMCPU_FF_TLB_FLUSH);
5574}
5575
5576
5577/**
5578 * Flushes the entire cache.
5579 *
5580 * It will assert a global CR3 flush (FF) and assumes the caller is aware of
5581 * this and execute this CR3 flush.
5582 *
5583 * @param pVM The cross context VM structure.
5584 */
5585void pgmR3PoolReset(PVM pVM)
5586{
5587 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
5588
5589 PGM_LOCK_ASSERT_OWNER(pVM);
5590 STAM_PROFILE_START(&pPool->StatR3Reset, a);
5591 LogFlow(("pgmR3PoolReset:\n"));
5592
5593 /*
5594 * If there are no pages in the pool, there is nothing to do.
5595 */
5596 if (pPool->cCurPages <= PGMPOOL_IDX_FIRST)
5597 {
5598 STAM_PROFILE_STOP(&pPool->StatR3Reset, a);
5599 return;
5600 }
5601
5602 /*
5603 * Exit the shadow mode since we're going to clear everything,
5604 * including the root page.
5605 */
5606 VMCC_FOR_EACH_VMCPU(pVM)
5607 pgmR3ExitShadowModeBeforePoolFlush(pVCpu);
5608 VMCC_FOR_EACH_VMCPU_END(pVM);
5609
5610
5611 /*
5612 * Nuke the free list and reinsert all pages into it.
5613 */
5614 for (unsigned i = pPool->cCurPages - 1; i >= PGMPOOL_IDX_FIRST; i--)
5615 {
5616 PPGMPOOLPAGE pPage = &pPool->aPages[i];
5617
5618 if (pPage->fMonitored)
5619 pgmPoolMonitorFlush(pPool, pPage);
5620 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
5621 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
5622 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
5623 pPage->iMonitoredPrev = NIL_PGMPOOL_IDX;
5624 pPage->GCPhys = NIL_RTGCPHYS;
5625 pPage->enmKind = PGMPOOLKIND_FREE;
5626 pPage->enmAccess = PGMPOOLACCESS_DONTCARE;
5627 Assert(pPage->idx == i);
5628 pPage->iNext = i + 1;
5629 pPage->fA20Enabled = true;
5630 pPage->fZeroed = false; /* This could probably be optimized, but better safe than sorry. */
5631 pPage->fSeenNonGlobal = false;
5632 pPage->fMonitored = false;
5633 pPage->fDirty = false;
5634 pPage->fCached = false;
5635 pPage->fReusedFlushPending = false;
5636 pPage->iUserHead = NIL_PGMPOOL_USER_INDEX;
5637 pPage->cPresent = 0;
5638 pPage->iFirstPresent = NIL_PGMPOOL_PRESENT_INDEX;
5639 pPage->cModifications = 0;
5640 pPage->iAgeNext = NIL_PGMPOOL_IDX;
5641 pPage->iAgePrev = NIL_PGMPOOL_IDX;
5642 pPage->idxDirtyEntry = 0;
5643 pPage->GCPtrLastAccessHandlerRip = NIL_RTGCPTR;
5644 pPage->GCPtrLastAccessHandlerFault = NIL_RTGCPTR;
5645 pPage->cLastAccessHandler = 0;
5646 pPage->cLocked = 0;
5647# ifdef VBOX_STRICT
5648 pPage->GCPtrDirtyFault = NIL_RTGCPTR;
5649# endif
5650 }
5651 pPool->aPages[pPool->cCurPages - 1].iNext = NIL_PGMPOOL_IDX;
5652 pPool->iFreeHead = PGMPOOL_IDX_FIRST;
5653 pPool->cUsedPages = 0;
5654
5655 /*
5656 * Zap and reinitialize the user records.
5657 */
5658 pPool->cPresent = 0;
5659 pPool->iUserFreeHead = 0;
5660 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
5661 const unsigned cMaxUsers = pPool->cMaxUsers;
5662 for (unsigned i = 0; i < cMaxUsers; i++)
5663 {
5664 paUsers[i].iNext = i + 1;
5665 paUsers[i].iUser = NIL_PGMPOOL_IDX;
5666 paUsers[i].iUserTable = 0xfffffffe;
5667 }
5668 paUsers[cMaxUsers - 1].iNext = NIL_PGMPOOL_USER_INDEX;
5669
5670 /*
5671 * Clear all the GCPhys links and rebuild the phys ext free list.
5672 */
5673 for (PPGMRAMRANGE pRam = pVM->pgm.s.CTX_SUFF(pRamRangesX);
5674 pRam;
5675 pRam = pRam->CTX_SUFF(pNext))
5676 {
5677 unsigned iPage = pRam->cb >> PAGE_SHIFT;
5678 while (iPage-- > 0)
5679 PGM_PAGE_SET_TRACKING(pVM, &pRam->aPages[iPage], 0);
5680 }
5681
5682 pPool->iPhysExtFreeHead = 0;
5683 PPGMPOOLPHYSEXT paPhysExts = pPool->CTX_SUFF(paPhysExts);
5684 const unsigned cMaxPhysExts = pPool->cMaxPhysExts;
5685 for (unsigned i = 0; i < cMaxPhysExts; i++)
5686 {
5687 paPhysExts[i].iNext = i + 1;
5688 paPhysExts[i].aidx[0] = NIL_PGMPOOL_IDX;
5689 paPhysExts[i].apte[0] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
5690 paPhysExts[i].aidx[1] = NIL_PGMPOOL_IDX;
5691 paPhysExts[i].apte[1] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
5692 paPhysExts[i].aidx[2] = NIL_PGMPOOL_IDX;
5693 paPhysExts[i].apte[2] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
5694 }
5695 paPhysExts[cMaxPhysExts - 1].iNext = NIL_PGMPOOL_PHYSEXT_INDEX;
5696
5697 /*
5698 * Just zap the modified list.
5699 */
5700 pPool->cModifiedPages = 0;
5701 pPool->iModifiedHead = NIL_PGMPOOL_IDX;
5702
5703 /*
5704 * Clear the GCPhys hash and the age list.
5705 */
5706 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aiHash); i++)
5707 pPool->aiHash[i] = NIL_PGMPOOL_IDX;
5708 pPool->iAgeHead = NIL_PGMPOOL_IDX;
5709 pPool->iAgeTail = NIL_PGMPOOL_IDX;
5710
5711# ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
5712 /* Clear all dirty pages. */
5713 pPool->idxFreeDirtyPage = 0;
5714 pPool->cDirtyPages = 0;
5715 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aidxDirtyPages); i++)
5716 pPool->aidxDirtyPages[i] = NIL_PGMPOOL_IDX;
5717# endif
5718
5719 /*
5720 * Reinsert active pages into the hash and ensure monitoring chains are correct.
5721 */
5722 VMCC_FOR_EACH_VMCPU(pVM)
5723 {
5724 /*
5725 * Re-enter the shadowing mode and assert Sync CR3 FF.
5726 */
5727 pgmR3ReEnterShadowModeAfterPoolFlush(pVM, pVCpu);
5728 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
5729 VMCPU_FF_SET(pVCpu, VMCPU_FF_TLB_FLUSH);
5730 }
5731 VMCC_FOR_EACH_VMCPU_END(pVM);
5732
5733 STAM_PROFILE_STOP(&pPool->StatR3Reset, a);
5734}
5735
5736#endif /* IN_RING3 */
5737
5738#if defined(LOG_ENABLED) || defined(VBOX_STRICT)
5739/**
5740 * Stringifies a PGMPOOLKIND value.
5741 */
5742static const char *pgmPoolPoolKindToStr(uint8_t enmKind)
5743{
5744 switch ((PGMPOOLKIND)enmKind)
5745 {
5746 case PGMPOOLKIND_INVALID:
5747 return "PGMPOOLKIND_INVALID";
5748 case PGMPOOLKIND_FREE:
5749 return "PGMPOOLKIND_FREE";
5750 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
5751 return "PGMPOOLKIND_32BIT_PT_FOR_PHYS";
5752 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
5753 return "PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT";
5754 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
5755 return "PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB";
5756 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
5757 return "PGMPOOLKIND_PAE_PT_FOR_PHYS";
5758 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
5759 return "PGMPOOLKIND_PAE_PT_FOR_32BIT_PT";
5760 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
5761 return "PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB";
5762 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
5763 return "PGMPOOLKIND_PAE_PT_FOR_PAE_PT";
5764 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
5765 return "PGMPOOLKIND_PAE_PT_FOR_PAE_2MB";
5766 case PGMPOOLKIND_32BIT_PD:
5767 return "PGMPOOLKIND_32BIT_PD";
5768 case PGMPOOLKIND_32BIT_PD_PHYS:
5769 return "PGMPOOLKIND_32BIT_PD_PHYS";
5770 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
5771 return "PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD";
5772 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
5773 return "PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD";
5774 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
5775 return "PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD";
5776 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
5777 return "PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD";
5778 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
5779 return "PGMPOOLKIND_PAE_PD_FOR_PAE_PD";
5780 case PGMPOOLKIND_PAE_PD_PHYS:
5781 return "PGMPOOLKIND_PAE_PD_PHYS";
5782 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
5783 return "PGMPOOLKIND_PAE_PDPT_FOR_32BIT";
5784 case PGMPOOLKIND_PAE_PDPT:
5785 return "PGMPOOLKIND_PAE_PDPT";
5786 case PGMPOOLKIND_PAE_PDPT_PHYS:
5787 return "PGMPOOLKIND_PAE_PDPT_PHYS";
5788 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
5789 return "PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT";
5790 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
5791 return "PGMPOOLKIND_64BIT_PDPT_FOR_PHYS";
5792 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
5793 return "PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD";
5794 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
5795 return "PGMPOOLKIND_64BIT_PD_FOR_PHYS";
5796 case PGMPOOLKIND_64BIT_PML4:
5797 return "PGMPOOLKIND_64BIT_PML4";
5798 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
5799 return "PGMPOOLKIND_EPT_PDPT_FOR_PHYS";
5800 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
5801 return "PGMPOOLKIND_EPT_PD_FOR_PHYS";
5802 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
5803 return "PGMPOOLKIND_EPT_PT_FOR_PHYS";
5804 case PGMPOOLKIND_ROOT_NESTED:
5805 return "PGMPOOLKIND_ROOT_NESTED";
5806 case PGMPOOLKIND_EPT_PT_FOR_EPT_PT:
5807 return "PGMPOOLKIND_EPT_PT_FOR_EPT_PT";
5808 case PGMPOOLKIND_EPT_PD_FOR_EPT_PD:
5809 return "PGMPOOLKIND_EPT_PD_FOR_EPT_PD";
5810 case PGMPOOLKIND_EPT_PDPT_FOR_EPT_PDPT:
5811 return "PGMPOOLKIND_EPT_PDPT_FOR_EPT_PDPT";
5812 case PGMPOOLKIND_EPT_PML4_FOR_EPT_PML4:
5813 return "PGMPOOLKIND_EPT_PML4_FOR_EPT_PML4";
5814 }
5815 return "Unknown kind!";
5816}
5817#endif /* LOG_ENABLED || VBOX_STRICT */
5818
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette