VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/PGMAllPool.cpp@ 93830

Last change on this file since 93830 was 93725, checked in by vboxsync, 3 years ago

VMM: More arm64 adjustments. bugref:9898

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id Revision
File size: 208.1 KB
Line 
1/* $Id: PGMAllPool.cpp 93725 2022-02-14 13:46:16Z vboxsync $ */
2/** @file
3 * PGM Shadow Page Pool.
4 */
5
6/*
7 * Copyright (C) 2006-2022 Oracle Corporation
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 */
17
18
19/*********************************************************************************************************************************
20* Header Files *
21*********************************************************************************************************************************/
22#define LOG_GROUP LOG_GROUP_PGM_POOL
23#define VBOX_WITHOUT_PAGING_BIT_FIELDS /* 64-bit bitfields are just asking for trouble. See @bugref{9841} and others. */
24#include <VBox/vmm/pgm.h>
25#include <VBox/vmm/mm.h>
26#include <VBox/vmm/em.h>
27#include <VBox/vmm/cpum.h>
28#include "PGMInternal.h"
29#include <VBox/vmm/vmcc.h>
30#include "PGMInline.h"
31#include <VBox/disopcode.h>
32#include <VBox/vmm/hm_vmx.h>
33
34#include <VBox/log.h>
35#include <VBox/err.h>
36#include <iprt/asm.h>
37#include <iprt/string.h>
38
39
40/*********************************************************************************************************************************
41* Internal Functions *
42*********************************************************************************************************************************/
43RT_C_DECLS_BEGIN
44#if 0 /* unused */
45DECLINLINE(unsigned) pgmPoolTrackGetShadowEntrySize(PGMPOOLKIND enmKind);
46DECLINLINE(unsigned) pgmPoolTrackGetGuestEntrySize(PGMPOOLKIND enmKind);
47#endif /* unused */
48static void pgmPoolTrackClearPageUsers(PPGMPOOL pPool, PPGMPOOLPAGE pPage);
49static void pgmPoolTrackDeref(PPGMPOOL pPool, PPGMPOOLPAGE pPage);
50static int pgmPoolTrackAddUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable);
51static void pgmPoolMonitorModifiedRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage);
52#if defined(LOG_ENABLED) || defined(VBOX_STRICT)
53static const char *pgmPoolPoolKindToStr(uint8_t enmKind);
54#endif
55#if 0 /*defined(VBOX_STRICT) && defined(PGMPOOL_WITH_OPTIMIZED_DIRTY_PT)*/
56static void pgmPoolTrackCheckPTPaePae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT, PCX86PTPAE pGstPT);
57#endif
58
59int pgmPoolTrackFlushGCPhysPTsSlow(PVMCC pVM, PPGMPAGE pPhysPage);
60PPGMPOOLPHYSEXT pgmPoolTrackPhysExtAlloc(PVMCC pVM, uint16_t *piPhysExt);
61void pgmPoolTrackPhysExtFree(PVMCC pVM, uint16_t iPhysExt);
62void pgmPoolTrackPhysExtFreeList(PVMCC pVM, uint16_t iPhysExt);
63
64RT_C_DECLS_END
65
66
67#if 0 /* unused */
68/**
69 * Checks if the specified page pool kind is for a 4MB or 2MB guest page.
70 *
71 * @returns true if it's the shadow of a 4MB or 2MB guest page, otherwise false.
72 * @param enmKind The page kind.
73 */
74DECLINLINE(bool) pgmPoolIsBigPage(PGMPOOLKIND enmKind)
75{
76 switch (enmKind)
77 {
78 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
79 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
80 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
81 return true;
82 default:
83 return false;
84 }
85}
86#endif /* unused */
87
88
89/**
90 * Flushes a chain of pages sharing the same access monitor.
91 *
92 * @param pPool The pool.
93 * @param pPage A page in the chain.
94 */
95void pgmPoolMonitorChainFlush(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
96{
97 LogFlow(("pgmPoolMonitorChainFlush: Flush page %RGp type=%d\n", pPage->GCPhys, pPage->enmKind));
98
99 /*
100 * Find the list head.
101 */
102 uint16_t idx = pPage->idx;
103 if (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
104 {
105 while (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
106 {
107 idx = pPage->iMonitoredPrev;
108 Assert(idx != pPage->idx);
109 pPage = &pPool->aPages[idx];
110 }
111 }
112
113 /*
114 * Iterate the list flushing each shadow page.
115 */
116 for (;;)
117 {
118 idx = pPage->iMonitoredNext;
119 Assert(idx != pPage->idx);
120 if (pPage->idx >= PGMPOOL_IDX_FIRST)
121 {
122 int rc2 = pgmPoolFlushPage(pPool, pPage);
123 AssertRC(rc2);
124 }
125 /* next */
126 if (idx == NIL_PGMPOOL_IDX)
127 break;
128 pPage = &pPool->aPages[idx];
129 }
130}
131
132
133/**
134 * Wrapper for getting the current context pointer to the entry being modified.
135 *
136 * @returns VBox status code suitable for scheduling.
137 * @param pVM The cross context VM structure.
138 * @param pvDst Destination address
139 * @param pvSrc Pointer to the mapping of @a GCPhysSrc or NULL depending
140 * on the context (e.g. \#PF in R0 & RC).
141 * @param GCPhysSrc The source guest physical address.
142 * @param cb Size of data to read
143 */
144DECLINLINE(int) pgmPoolPhysSimpleReadGCPhys(PVMCC pVM, void *pvDst, void const *pvSrc, RTGCPHYS GCPhysSrc, size_t cb)
145{
146#if defined(IN_RING3)
147 NOREF(pVM); NOREF(GCPhysSrc);
148 memcpy(pvDst, (RTHCPTR)((uintptr_t)pvSrc & ~(RTHCUINTPTR)(cb - 1)), cb);
149 return VINF_SUCCESS;
150#else
151 /** @todo in RC we could attempt to use the virtual address, although this can cause many faults (PAE Windows XP guest). */
152 NOREF(pvSrc);
153 return PGMPhysSimpleReadGCPhys(pVM, pvDst, GCPhysSrc & ~(RTGCPHYS)(cb - 1), cb);
154#endif
155}
156
157
158/**
159 * Process shadow entries before they are changed by the guest.
160 *
161 * For PT entries we will clear them. For PD entries, we'll simply check
162 * for mapping conflicts and set the SyncCR3 FF if found.
163 *
164 * @param pVCpu The cross context virtual CPU structure.
165 * @param pPool The pool.
166 * @param pPage The head page.
167 * @param GCPhysFault The guest physical fault address.
168 * @param pvAddress Pointer to the mapping of @a GCPhysFault or NULL
169 * depending on the context (e.g. \#PF in R0 & RC).
170 * @param cbWrite Write size; might be zero if the caller knows we're not crossing entry boundaries
171 */
172static void pgmPoolMonitorChainChanging(PVMCPU pVCpu, PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTGCPHYS GCPhysFault,
173 void const *pvAddress, unsigned cbWrite)
174{
175 AssertMsg(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX, ("%u (idx=%u)\n", pPage->iMonitoredPrev, pPage->idx));
176 const unsigned off = GCPhysFault & GUEST_PAGE_OFFSET_MASK;
177 PVMCC pVM = pPool->CTX_SUFF(pVM);
178 NOREF(pVCpu);
179
180 LogFlow(("pgmPoolMonitorChainChanging: %RGv phys=%RGp cbWrite=%d\n",
181 (RTGCPTR)(CTXTYPE(RTGCPTR, uintptr_t, RTGCPTR))(uintptr_t)pvAddress, GCPhysFault, cbWrite));
182
183 for (;;)
184 {
185 union
186 {
187 void *pv;
188 PX86PT pPT;
189 PPGMSHWPTPAE pPTPae;
190 PX86PD pPD;
191 PX86PDPAE pPDPae;
192 PX86PDPT pPDPT;
193 PX86PML4 pPML4;
194 } uShw;
195
196 LogFlow(("pgmPoolMonitorChainChanging: page idx=%d phys=%RGp (next=%d) kind=%s write=%#x\n",
197 pPage->idx, pPage->GCPhys, pPage->iMonitoredNext, pgmPoolPoolKindToStr(pPage->enmKind), cbWrite));
198
199 uShw.pv = NULL;
200 switch (pPage->enmKind)
201 {
202 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
203 {
204 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPT));
205 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
206 const unsigned iShw = off / sizeof(X86PTE);
207 LogFlow(("PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT iShw=%x\n", iShw));
208 X86PGUINT const uPde = uShw.pPT->a[iShw].u;
209 if (uPde & X86_PTE_P)
210 {
211 X86PTE GstPte;
212 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvAddress, GCPhysFault, sizeof(GstPte));
213 AssertRC(rc);
214 Log4(("pgmPoolMonitorChainChanging 32_32: deref %016RX64 GCPhys %08RX32\n", uPde & X86_PTE_PG_MASK, GstPte.u & X86_PTE_PG_MASK));
215 pgmPoolTracDerefGCPhysHint(pPool, pPage, uPde & X86_PTE_PG_MASK, GstPte.u & X86_PTE_PG_MASK, iShw);
216 ASMAtomicWriteU32(&uShw.pPT->a[iShw].u, 0);
217 }
218 break;
219 }
220
221 /* page/2 sized */
222 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
223 {
224 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPT));
225 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
226 if (!((off ^ pPage->GCPhys) & (PAGE_SIZE / 2)))
227 {
228 const unsigned iShw = (off / sizeof(X86PTE)) & (X86_PG_PAE_ENTRIES - 1);
229 LogFlow(("PGMPOOLKIND_PAE_PT_FOR_32BIT_PT iShw=%x\n", iShw));
230 if (PGMSHWPTEPAE_IS_P(uShw.pPTPae->a[iShw]))
231 {
232 X86PTE GstPte;
233 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvAddress, GCPhysFault, sizeof(GstPte));
234 AssertRC(rc);
235
236 Log4(("pgmPoolMonitorChainChanging pae_32: deref %016RX64 GCPhys %08RX32\n", uShw.pPT->a[iShw].u & X86_PTE_PAE_PG_MASK, GstPte.u & X86_PTE_PG_MASK));
237 pgmPoolTracDerefGCPhysHint(pPool, pPage,
238 PGMSHWPTEPAE_GET_HCPHYS(uShw.pPTPae->a[iShw]),
239 GstPte.u & X86_PTE_PG_MASK,
240 iShw);
241 PGMSHWPTEPAE_ATOMIC_SET(uShw.pPTPae->a[iShw], 0);
242 }
243 }
244 break;
245 }
246
247 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
248 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
249 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
250 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
251 {
252 unsigned iGst = off / sizeof(X86PDE);
253 unsigned iShwPdpt = iGst / 256;
254 unsigned iShw = (iGst % 256) * 2;
255 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
256
257 LogFlow(("pgmPoolMonitorChainChanging PAE for 32 bits: iGst=%x iShw=%x idx = %d page idx=%d\n", iGst, iShw, iShwPdpt, pPage->enmKind - PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD));
258 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPD));
259 if (iShwPdpt == pPage->enmKind - (unsigned)PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD)
260 {
261 for (unsigned i = 0; i < 2; i++)
262 {
263 X86PGPAEUINT const uPde = uShw.pPDPae->a[iShw + i].u;
264 if (uPde & X86_PDE_P)
265 {
266 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw + i, uPde));
267 pgmPoolFree(pVM, uPde & X86_PDE_PAE_PG_MASK, pPage->idx, iShw + i);
268 ASMAtomicWriteU64(&uShw.pPDPae->a[iShw + i].u, 0);
269 }
270
271 /* paranoia / a bit assumptive. */
272 if ( (off & 3)
273 && (off & 3) + cbWrite > 4)
274 {
275 const unsigned iShw2 = iShw + 2 + i;
276 if (iShw2 < RT_ELEMENTS(uShw.pPDPae->a))
277 {
278 X86PGPAEUINT const uPde2 = uShw.pPDPae->a[iShw2].u;
279 if (uPde2 & X86_PDE_P)
280 {
281 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw2, uPde2));
282 pgmPoolFree(pVM, uPde2 & X86_PDE_PAE_PG_MASK, pPage->idx, iShw2);
283 ASMAtomicWriteU64(&uShw.pPDPae->a[iShw2].u, 0);
284 }
285 }
286 }
287 }
288 }
289 break;
290 }
291
292 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
293 {
294 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
295 const unsigned iShw = off / sizeof(X86PTEPAE);
296 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPT));
297 if (PGMSHWPTEPAE_IS_P(uShw.pPTPae->a[iShw]))
298 {
299 X86PTEPAE GstPte;
300 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvAddress, GCPhysFault, sizeof(GstPte));
301 AssertRC(rc);
302
303 Log4(("pgmPoolMonitorChainChanging pae: deref %016RX64 GCPhys %016RX64\n", PGMSHWPTEPAE_GET_HCPHYS(uShw.pPTPae->a[iShw]), GstPte.u & X86_PTE_PAE_PG_MASK));
304 pgmPoolTracDerefGCPhysHint(pPool, pPage,
305 PGMSHWPTEPAE_GET_HCPHYS(uShw.pPTPae->a[iShw]),
306 GstPte.u & X86_PTE_PAE_PG_MASK,
307 iShw);
308 PGMSHWPTEPAE_ATOMIC_SET(uShw.pPTPae->a[iShw], 0);
309 }
310
311 /* paranoia / a bit assumptive. */
312 if ( (off & 7)
313 && (off & 7) + cbWrite > sizeof(X86PTEPAE))
314 {
315 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PTEPAE);
316 AssertBreak(iShw2 < RT_ELEMENTS(uShw.pPTPae->a));
317
318 if (PGMSHWPTEPAE_IS_P(uShw.pPTPae->a[iShw2]))
319 {
320 X86PTEPAE GstPte;
321 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte,
322 pvAddress ? (uint8_t const *)pvAddress + sizeof(GstPte) : NULL,
323 GCPhysFault + sizeof(GstPte), sizeof(GstPte));
324 AssertRC(rc);
325 Log4(("pgmPoolMonitorChainChanging pae: deref %016RX64 GCPhys %016RX64\n", PGMSHWPTEPAE_GET_HCPHYS(uShw.pPTPae->a[iShw2]), GstPte.u & X86_PTE_PAE_PG_MASK));
326 pgmPoolTracDerefGCPhysHint(pPool, pPage,
327 PGMSHWPTEPAE_GET_HCPHYS(uShw.pPTPae->a[iShw2]),
328 GstPte.u & X86_PTE_PAE_PG_MASK,
329 iShw2);
330 PGMSHWPTEPAE_ATOMIC_SET(uShw.pPTPae->a[iShw2], 0);
331 }
332 }
333 break;
334 }
335
336 case PGMPOOLKIND_32BIT_PD:
337 {
338 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
339 const unsigned iShw = off / sizeof(X86PTE); // ASSUMING 32-bit guest paging!
340
341 LogFlow(("pgmPoolMonitorChainChanging: PGMPOOLKIND_32BIT_PD %x\n", iShw));
342 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPD));
343 X86PGUINT const uPde = uShw.pPD->a[iShw].u;
344 if (uPde & X86_PDE_P)
345 {
346 LogFlow(("pgmPoolMonitorChainChanging: 32 bit pd iShw=%#x: %RX64 -> freeing it!\n", iShw, uPde));
347 pgmPoolFree(pVM, uPde & X86_PDE_PG_MASK, pPage->idx, iShw);
348 ASMAtomicWriteU32(&uShw.pPD->a[iShw].u, 0);
349 }
350
351 /* paranoia / a bit assumptive. */
352 if ( (off & 3)
353 && (off & 3) + cbWrite > sizeof(X86PTE))
354 {
355 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PTE);
356 if ( iShw2 != iShw
357 && iShw2 < RT_ELEMENTS(uShw.pPD->a))
358 {
359 X86PGUINT const uPde2 = uShw.pPD->a[iShw2].u;
360 if (uPde2 & X86_PDE_P)
361 {
362 LogFlow(("pgmPoolMonitorChainChanging: 32 bit pd iShw=%#x: %RX64 -> freeing it!\n", iShw2, uPde2));
363 pgmPoolFree(pVM, uPde2 & X86_PDE_PG_MASK, pPage->idx, iShw2);
364 ASMAtomicWriteU32(&uShw.pPD->a[iShw2].u, 0);
365 }
366 }
367 }
368#if 0 /* useful when running PGMAssertCR3(), a bit too troublesome for general use (TLBs). - not working any longer... */
369 if ( uShw.pPD->a[iShw].n.u1Present
370 && !VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3))
371 {
372 LogFlow(("pgmPoolMonitorChainChanging: iShw=%#x: %RX32 -> freeing it!\n", iShw, uShw.pPD->a[iShw].u));
373 pgmPoolFree(pVM, uShw.pPD->a[iShw].u & X86_PDE_PG_MASK, pPage->idx, iShw);
374 ASMAtomicWriteU32(&uShw.pPD->a[iShw].u, 0);
375 }
376#endif
377 break;
378 }
379
380 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
381 {
382 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
383 const unsigned iShw = off / sizeof(X86PDEPAE);
384 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPD));
385
386 /*
387 * Causes trouble when the guest uses a PDE to refer to the whole page table level
388 * structure. (Invalidate here; faults later on when it tries to change the page
389 * table entries -> recheck; probably only applies to the RC case.)
390 */
391 X86PGPAEUINT const uPde = uShw.pPDPae->a[iShw].u;
392 if (uPde & X86_PDE_P)
393 {
394 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw, uPde));
395 pgmPoolFree(pVM, uPde & X86_PDE_PAE_PG_MASK, pPage->idx, iShw);
396 ASMAtomicWriteU64(&uShw.pPDPae->a[iShw].u, 0);
397 }
398
399 /* paranoia / a bit assumptive. */
400 if ( (off & 7)
401 && (off & 7) + cbWrite > sizeof(X86PDEPAE))
402 {
403 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PDEPAE);
404 AssertBreak(iShw2 < RT_ELEMENTS(uShw.pPDPae->a));
405
406 X86PGPAEUINT const uPde2 = uShw.pPDPae->a[iShw2].u;
407 if (uPde2 & X86_PDE_P)
408 {
409 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uPde2));
410 pgmPoolFree(pVM, uPde2 & X86_PDE_PAE_PG_MASK, pPage->idx, iShw2);
411 ASMAtomicWriteU64(&uShw.pPDPae->a[iShw2].u, 0);
412 }
413 }
414 break;
415 }
416
417 case PGMPOOLKIND_PAE_PDPT:
418 {
419 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPDPT));
420 /*
421 * Hopefully this doesn't happen very often:
422 * - touching unused parts of the page
423 * - messing with the bits of pd pointers without changing the physical address
424 */
425 /* PDPT roots are not page aligned; 32 byte only! */
426 const unsigned offPdpt = GCPhysFault - pPage->GCPhys;
427
428 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
429 const unsigned iShw = offPdpt / sizeof(X86PDPE);
430 if (iShw < X86_PG_PAE_PDPE_ENTRIES) /* don't use RT_ELEMENTS(uShw.pPDPT->a), because that's for long mode only */
431 {
432 X86PGPAEUINT const uPdpe = uShw.pPDPT->a[iShw].u;
433 if (uPdpe & X86_PDPE_P)
434 {
435 LogFlow(("pgmPoolMonitorChainChanging: pae pdpt iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPT->a[iShw].u));
436 pgmPoolFree(pVM, uPdpe & X86_PDPE_PG_MASK, pPage->idx, iShw);
437 ASMAtomicWriteU64(&uShw.pPDPT->a[iShw].u, 0);
438 }
439
440 /* paranoia / a bit assumptive. */
441 if ( (offPdpt & 7)
442 && (offPdpt & 7) + cbWrite > sizeof(X86PDPE))
443 {
444 const unsigned iShw2 = (offPdpt + cbWrite - 1) / sizeof(X86PDPE);
445 if ( iShw2 != iShw
446 && iShw2 < X86_PG_PAE_PDPE_ENTRIES)
447 {
448 X86PGPAEUINT const uPdpe2 = uShw.pPDPT->a[iShw2].u;
449 if (uPdpe2 & X86_PDPE_P)
450 {
451 LogFlow(("pgmPoolMonitorChainChanging: pae pdpt iShw=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPT->a[iShw2].u));
452 pgmPoolFree(pVM, uPdpe2 & X86_PDPE_PG_MASK, pPage->idx, iShw2);
453 ASMAtomicWriteU64(&uShw.pPDPT->a[iShw2].u, 0);
454 }
455 }
456 }
457 }
458 break;
459 }
460
461 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
462 {
463 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPD));
464 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
465 const unsigned iShw = off / sizeof(X86PDEPAE);
466 X86PGPAEUINT const uPde = uShw.pPDPae->a[iShw].u;
467 if (uPde & X86_PDE_P)
468 {
469 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw, uPde));
470 pgmPoolFree(pVM, uPde & X86_PDE_PAE_PG_MASK, pPage->idx, iShw);
471 ASMAtomicWriteU64(&uShw.pPDPae->a[iShw].u, 0);
472 }
473
474 /* paranoia / a bit assumptive. */
475 if ( (off & 7)
476 && (off & 7) + cbWrite > sizeof(X86PDEPAE))
477 {
478 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PDEPAE);
479 AssertBreak(iShw2 < RT_ELEMENTS(uShw.pPDPae->a));
480 X86PGPAEUINT const uPde2 = uShw.pPDPae->a[iShw2].u;
481 if (uPde2 & X86_PDE_P)
482 {
483 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uPde2));
484 pgmPoolFree(pVM, uPde2 & X86_PDE_PAE_PG_MASK, pPage->idx, iShw2);
485 ASMAtomicWriteU64(&uShw.pPDPae->a[iShw2].u, 0);
486 }
487 }
488 break;
489 }
490
491 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
492 {
493 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPDPT));
494 /*
495 * Hopefully this doesn't happen very often:
496 * - messing with the bits of pd pointers without changing the physical address
497 */
498 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
499 const unsigned iShw = off / sizeof(X86PDPE);
500 X86PGPAEUINT const uPdpe = uShw.pPDPT->a[iShw].u;
501 if (uPdpe & X86_PDPE_P)
502 {
503 LogFlow(("pgmPoolMonitorChainChanging: pdpt iShw=%#x: %RX64 -> freeing it!\n", iShw, uPdpe));
504 pgmPoolFree(pVM, uPdpe & X86_PDPE_PG_MASK, pPage->idx, iShw);
505 ASMAtomicWriteU64(&uShw.pPDPT->a[iShw].u, 0);
506 }
507 /* paranoia / a bit assumptive. */
508 if ( (off & 7)
509 && (off & 7) + cbWrite > sizeof(X86PDPE))
510 {
511 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PDPE);
512 X86PGPAEUINT const uPdpe2 = uShw.pPDPT->a[iShw2].u;
513 if (uPdpe2 & X86_PDPE_P)
514 {
515 LogFlow(("pgmPoolMonitorChainChanging: pdpt iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uPdpe2));
516 pgmPoolFree(pVM, uPdpe2 & X86_PDPE_PG_MASK, pPage->idx, iShw2);
517 ASMAtomicWriteU64(&uShw.pPDPT->a[iShw2].u, 0);
518 }
519 }
520 break;
521 }
522
523 case PGMPOOLKIND_64BIT_PML4:
524 {
525 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPML4));
526 /*
527 * Hopefully this doesn't happen very often:
528 * - messing with the bits of pd pointers without changing the physical address
529 */
530 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
531 const unsigned iShw = off / sizeof(X86PDPE);
532 X86PGPAEUINT const uPml4e = uShw.pPML4->a[iShw].u;
533 if (uPml4e & X86_PML4E_P)
534 {
535 LogFlow(("pgmPoolMonitorChainChanging: pml4 iShw=%#x: %RX64 -> freeing it!\n", iShw, uPml4e));
536 pgmPoolFree(pVM, uPml4e & X86_PML4E_PG_MASK, pPage->idx, iShw);
537 ASMAtomicWriteU64(&uShw.pPML4->a[iShw].u, 0);
538 }
539 /* paranoia / a bit assumptive. */
540 if ( (off & 7)
541 && (off & 7) + cbWrite > sizeof(X86PDPE))
542 {
543 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PML4E);
544 X86PGPAEUINT const uPml4e2 = uShw.pPML4->a[iShw2].u;
545 if (uPml4e2 & X86_PML4E_P)
546 {
547 LogFlow(("pgmPoolMonitorChainChanging: pml4 iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uPml4e2));
548 pgmPoolFree(pVM, uPml4e2 & X86_PML4E_PG_MASK, pPage->idx, iShw2);
549 ASMAtomicWriteU64(&uShw.pPML4->a[iShw2].u, 0);
550 }
551 }
552 break;
553 }
554
555 default:
556 AssertFatalMsgFailed(("enmKind=%d\n", pPage->enmKind));
557 }
558 PGM_DYNMAP_UNUSED_HINT_VM(pVM, uShw.pv);
559
560 /* next */
561 if (pPage->iMonitoredNext == NIL_PGMPOOL_IDX)
562 return;
563 pPage = &pPool->aPages[pPage->iMonitoredNext];
564 }
565}
566
567#ifndef IN_RING3
568
569/**
570 * Checks if a access could be a fork operation in progress.
571 *
572 * Meaning, that the guest is setting up the parent process for Copy-On-Write.
573 *
574 * @returns true if it's likely that we're forking, otherwise false.
575 * @param pPool The pool.
576 * @param pDis The disassembled instruction.
577 * @param offFault The access offset.
578 */
579DECLINLINE(bool) pgmRZPoolMonitorIsForking(PPGMPOOL pPool, PDISCPUSTATE pDis, unsigned offFault)
580{
581 /*
582 * i386 linux is using btr to clear X86_PTE_RW.
583 * The functions involved are (2.6.16 source inspection):
584 * clear_bit
585 * ptep_set_wrprotect
586 * copy_one_pte
587 * copy_pte_range
588 * copy_pmd_range
589 * copy_pud_range
590 * copy_page_range
591 * dup_mmap
592 * dup_mm
593 * copy_mm
594 * copy_process
595 * do_fork
596 */
597 if ( pDis->pCurInstr->uOpcode == OP_BTR
598 && !(offFault & 4)
599 /** @todo Validate that the bit index is X86_PTE_RW. */
600 )
601 {
602 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitorPf,Fork)); RT_NOREF_PV(pPool);
603 return true;
604 }
605 return false;
606}
607
608
609/**
610 * Determine whether the page is likely to have been reused.
611 *
612 * @returns true if we consider the page as being reused for a different purpose.
613 * @returns false if we consider it to still be a paging page.
614 * @param pVM The cross context VM structure.
615 * @param pVCpu The cross context virtual CPU structure.
616 * @param pRegFrame Trap register frame.
617 * @param pDis The disassembly info for the faulting instruction.
618 * @param pvFault The fault address.
619 * @param pPage The pool page being accessed.
620 *
621 * @remark The REP prefix check is left to the caller because of STOSD/W.
622 */
623DECLINLINE(bool) pgmRZPoolMonitorIsReused(PVMCC pVM, PVMCPUCC pVCpu, PCPUMCTXCORE pRegFrame, PDISCPUSTATE pDis, RTGCPTR pvFault,
624 PPGMPOOLPAGE pPage)
625{
626 /* Locked (CR3, PDPTR*4) should not be reusable. Considering them as
627 such may cause loops booting tst-ubuntu-15_10-64-efi, ++. */
628 if (pPage->cLocked)
629 {
630 Log2(("pgmRZPoolMonitorIsReused: %RGv (%p) can't have been resued, because it's locked!\n", pvFault, pPage));
631 return false;
632 }
633
634 /** @todo could make this general, faulting close to rsp should be a safe reuse heuristic. */
635 if ( HMHasPendingIrq(pVM)
636 && pRegFrame->rsp - pvFault < 32)
637 {
638 /* Fault caused by stack writes while trying to inject an interrupt event. */
639 Log(("pgmRZPoolMonitorIsReused: reused %RGv for interrupt stack (rsp=%RGv).\n", pvFault, pRegFrame->rsp));
640 return true;
641 }
642
643 LogFlow(("Reused instr %RGv %d at %RGv param1.fUse=%llx param1.reg=%d\n", pRegFrame->rip, pDis->pCurInstr->uOpcode, pvFault, pDis->Param1.fUse, pDis->Param1.Base.idxGenReg));
644
645 /* Non-supervisor mode write means it's used for something else. */
646 if (CPUMGetGuestCPL(pVCpu) == 3)
647 return true;
648
649 switch (pDis->pCurInstr->uOpcode)
650 {
651 /* call implies the actual push of the return address faulted */
652 case OP_CALL:
653 Log4(("pgmRZPoolMonitorIsReused: CALL\n"));
654 return true;
655 case OP_PUSH:
656 Log4(("pgmRZPoolMonitorIsReused: PUSH\n"));
657 return true;
658 case OP_PUSHF:
659 Log4(("pgmRZPoolMonitorIsReused: PUSHF\n"));
660 return true;
661 case OP_PUSHA:
662 Log4(("pgmRZPoolMonitorIsReused: PUSHA\n"));
663 return true;
664 case OP_FXSAVE:
665 Log4(("pgmRZPoolMonitorIsReused: FXSAVE\n"));
666 return true;
667 case OP_MOVNTI: /* solaris - block_zero_no_xmm */
668 Log4(("pgmRZPoolMonitorIsReused: MOVNTI\n"));
669 return true;
670 case OP_MOVNTDQ: /* solaris - hwblkclr & hwblkpagecopy */
671 Log4(("pgmRZPoolMonitorIsReused: MOVNTDQ\n"));
672 return true;
673 case OP_MOVSWD:
674 case OP_STOSWD:
675 if ( pDis->fPrefix == (DISPREFIX_REP|DISPREFIX_REX)
676 && pRegFrame->rcx >= 0x40
677 )
678 {
679 Assert(pDis->uCpuMode == DISCPUMODE_64BIT);
680
681 Log(("pgmRZPoolMonitorIsReused: OP_STOSQ\n"));
682 return true;
683 }
684 break;
685
686 default:
687 /*
688 * Anything having ESP on the left side means stack writes.
689 */
690 if ( ( (pDis->Param1.fUse & DISUSE_REG_GEN32)
691 || (pDis->Param1.fUse & DISUSE_REG_GEN64))
692 && (pDis->Param1.Base.idxGenReg == DISGREG_ESP))
693 {
694 Log4(("pgmRZPoolMonitorIsReused: ESP\n"));
695 return true;
696 }
697 break;
698 }
699
700 /*
701 * Page table updates are very very unlikely to be crossing page boundraries,
702 * and we don't want to deal with that in pgmPoolMonitorChainChanging and such.
703 */
704 uint32_t const cbWrite = DISGetParamSize(pDis, &pDis->Param1);
705 if ( (((uintptr_t)pvFault + cbWrite) >> X86_PAGE_SHIFT) != ((uintptr_t)pvFault >> X86_PAGE_SHIFT) )
706 {
707 Log4(("pgmRZPoolMonitorIsReused: cross page write\n"));
708 return true;
709 }
710
711 /*
712 * Nobody does an unaligned 8 byte write to a page table, right.
713 */
714 if (cbWrite >= 8 && ((uintptr_t)pvFault & 7) != 0)
715 {
716 Log4(("pgmRZPoolMonitorIsReused: Unaligned 8+ byte write\n"));
717 return true;
718 }
719
720 return false;
721}
722
723
724/**
725 * Flushes the page being accessed.
726 *
727 * @returns VBox status code suitable for scheduling.
728 * @param pVM The cross context VM structure.
729 * @param pVCpu The cross context virtual CPU structure.
730 * @param pPool The pool.
731 * @param pPage The pool page (head).
732 * @param pDis The disassembly of the write instruction.
733 * @param pRegFrame The trap register frame.
734 * @param GCPhysFault The fault address as guest physical address.
735 * @param pvFault The fault address.
736 * @todo VBOXSTRICTRC
737 */
738static int pgmRZPoolAccessPfHandlerFlush(PVMCC pVM, PVMCPUCC pVCpu, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pDis,
739 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault)
740{
741 NOREF(pVM); NOREF(GCPhysFault);
742
743 /*
744 * First, do the flushing.
745 */
746 pgmPoolMonitorChainFlush(pPool, pPage);
747
748 /*
749 * Emulate the instruction (xp/w2k problem, requires pc/cr2/sp detection).
750 * Must do this in raw mode (!); XP boot will fail otherwise.
751 */
752 int rc = VINF_SUCCESS;
753 VBOXSTRICTRC rc2 = EMInterpretInstructionDisasState(pVCpu, pDis, pRegFrame, pvFault, EMCODETYPE_ALL);
754 if (rc2 == VINF_SUCCESS)
755 { /* do nothing */ }
756 else if (rc2 == VINF_EM_RESCHEDULE)
757 {
758 rc = VBOXSTRICTRC_VAL(rc2);
759# ifndef IN_RING3
760 VMCPU_FF_SET(pVCpu, VMCPU_FF_TO_R3);
761# endif
762 }
763 else if (rc2 == VERR_EM_INTERPRETER)
764 {
765 rc = VINF_EM_RAW_EMULATE_INSTR;
766 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitorPf,EmulateInstr));
767 }
768 else if (RT_FAILURE_NP(rc2))
769 rc = VBOXSTRICTRC_VAL(rc2);
770 else
771 AssertMsgFailed(("%Rrc\n", VBOXSTRICTRC_VAL(rc2))); /* ASSUMES no complicated stuff here. */
772
773 LogFlow(("pgmRZPoolAccessPfHandlerFlush: returns %Rrc (flushed)\n", rc));
774 return rc;
775}
776
777
778/**
779 * Handles the STOSD write accesses.
780 *
781 * @returns VBox status code suitable for scheduling.
782 * @param pVM The cross context VM structure.
783 * @param pPool The pool.
784 * @param pPage The pool page (head).
785 * @param pDis The disassembly of the write instruction.
786 * @param pRegFrame The trap register frame.
787 * @param GCPhysFault The fault address as guest physical address.
788 * @param pvFault The fault address.
789 */
790DECLINLINE(int) pgmRZPoolAccessPfHandlerSTOSD(PVMCC pVM, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pDis,
791 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault)
792{
793 unsigned uIncrement = pDis->Param1.cb;
794 NOREF(pVM);
795
796 Assert(pDis->uCpuMode == DISCPUMODE_32BIT || pDis->uCpuMode == DISCPUMODE_64BIT);
797 Assert(pRegFrame->rcx <= 0x20);
798
799# ifdef VBOX_STRICT
800 if (pDis->uOpMode == DISCPUMODE_32BIT)
801 Assert(uIncrement == 4);
802 else
803 Assert(uIncrement == 8);
804# endif
805
806 Log3(("pgmRZPoolAccessPfHandlerSTOSD\n"));
807
808 /*
809 * Increment the modification counter and insert it into the list
810 * of modified pages the first time.
811 */
812 if (!pPage->cModifications++)
813 pgmPoolMonitorModifiedInsert(pPool, pPage);
814
815 /*
816 * Execute REP STOSD.
817 *
818 * This ASSUMES that we're not invoked by Trap0e on in a out-of-sync
819 * write situation, meaning that it's safe to write here.
820 */
821 PVMCPUCC pVCpu = VMMGetCpu(pPool->CTX_SUFF(pVM));
822 RTGCUINTPTR pu32 = (RTGCUINTPTR)pvFault;
823 while (pRegFrame->rcx)
824 {
825 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault, NULL, uIncrement);
826 PGMPhysSimpleWriteGCPhys(pVM, GCPhysFault, &pRegFrame->rax, uIncrement);
827 pu32 += uIncrement;
828 GCPhysFault += uIncrement;
829 pRegFrame->rdi += uIncrement;
830 pRegFrame->rcx--;
831 }
832 pRegFrame->rip += pDis->cbInstr;
833
834 LogFlow(("pgmRZPoolAccessPfHandlerSTOSD: returns\n"));
835 return VINF_SUCCESS;
836}
837
838
839/**
840 * Handles the simple write accesses.
841 *
842 * @returns VBox status code suitable for scheduling.
843 * @param pVM The cross context VM structure.
844 * @param pVCpu The cross context virtual CPU structure.
845 * @param pPool The pool.
846 * @param pPage The pool page (head).
847 * @param pDis The disassembly of the write instruction.
848 * @param pRegFrame The trap register frame.
849 * @param GCPhysFault The fault address as guest physical address.
850 * @param pvFault The fault address.
851 * @param pfReused Reused state (in/out)
852 */
853DECLINLINE(int) pgmRZPoolAccessPfHandlerSimple(PVMCC pVM, PVMCPUCC pVCpu, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pDis,
854 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault, bool *pfReused)
855{
856 Log3(("pgmRZPoolAccessPfHandlerSimple\n"));
857 NOREF(pVM);
858 NOREF(pfReused); /* initialized by caller */
859
860 /*
861 * Increment the modification counter and insert it into the list
862 * of modified pages the first time.
863 */
864 if (!pPage->cModifications++)
865 pgmPoolMonitorModifiedInsert(pPool, pPage);
866
867 /*
868 * Clear all the pages. ASSUMES that pvFault is readable.
869 */
870 uint32_t cbWrite = DISGetParamSize(pDis, &pDis->Param1);
871 if (cbWrite <= 8)
872 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault, NULL, cbWrite);
873 else if (cbWrite <= 16)
874 {
875 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault, NULL, 8);
876 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault + 8, NULL, cbWrite - 8);
877 }
878 else
879 {
880 Assert(cbWrite <= 32);
881 for (uint32_t off = 0; off < cbWrite; off += 8)
882 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault + off, NULL, RT_MIN(8, cbWrite - off));
883 }
884
885 /*
886 * Interpret the instruction.
887 */
888 VBOXSTRICTRC rc = EMInterpretInstructionDisasState(pVCpu, pDis, pRegFrame, pvFault, EMCODETYPE_ALL);
889 if (RT_SUCCESS(rc))
890 AssertMsg(rc == VINF_SUCCESS, ("%Rrc\n", VBOXSTRICTRC_VAL(rc))); /* ASSUMES no complicated stuff here. */
891 else if (rc == VERR_EM_INTERPRETER)
892 {
893 LogFlow(("pgmRZPoolAccessPfHandlerSimple: Interpretation failed for %04x:%RGv - opcode=%d\n",
894 pRegFrame->cs.Sel, (RTGCPTR)pRegFrame->rip, pDis->pCurInstr->uOpcode));
895 rc = VINF_EM_RAW_EMULATE_INSTR;
896 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitorPf,EmulateInstr));
897 }
898
899# if 0 /* experimental code */
900 if (rc == VINF_SUCCESS)
901 {
902 switch (pPage->enmKind)
903 {
904 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
905 {
906 X86PTEPAE GstPte;
907 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvFault, GCPhysFault, sizeof(GstPte));
908 AssertRC(rc);
909
910 /* Check the new value written by the guest. If present and with a bogus physical address, then
911 * it's fairly safe to assume the guest is reusing the PT.
912 */
913 if (GstPte.n.u1Present)
914 {
915 RTHCPHYS HCPhys = -1;
916 int rc = PGMPhysGCPhys2HCPhys(pVM, GstPte.u & X86_PTE_PAE_PG_MASK, &HCPhys);
917 if (rc != VINF_SUCCESS)
918 {
919 *pfReused = true;
920 STAM_COUNTER_INC(&pPool->StatForceFlushReused);
921 }
922 }
923 break;
924 }
925 }
926 }
927# endif
928
929 LogFlow(("pgmRZPoolAccessPfHandlerSimple: returns %Rrc\n", VBOXSTRICTRC_VAL(rc)));
930 return VBOXSTRICTRC_VAL(rc);
931}
932
933
934/**
935 * @callback_method_impl{FNPGMRZPHYSPFHANDLER,
936 * \#PF access handler callback for page table pages.}
937 *
938 * @remarks The @a uUser argument is the index of the PGMPOOLPAGE.
939 */
940DECLEXPORT(VBOXSTRICTRC) pgmRZPoolAccessPfHandler(PVMCC pVM, PVMCPUCC pVCpu, RTGCUINT uErrorCode, PCPUMCTXCORE pRegFrame,
941 RTGCPTR pvFault, RTGCPHYS GCPhysFault, uint64_t uUser)
942{
943 STAM_PROFILE_START(&pVM->pgm.s.CTX_SUFF(pPool)->StatMonitorRZ, a);
944 PPGMPOOL const pPool = pVM->pgm.s.CTX_SUFF(pPool);
945 AssertReturn(uUser < pPool->cCurPages, VERR_PGM_POOL_IPE);
946 PPGMPOOLPAGE const pPage = &pPool->aPages[uUser];
947 unsigned cMaxModifications;
948 bool fForcedFlush = false;
949 RT_NOREF_PV(uErrorCode);
950
951 LogFlow(("pgmRZPoolAccessPfHandler: pvFault=%RGv pPage=%p:{.idx=%d} GCPhysFault=%RGp\n", pvFault, pPage, pPage->idx, GCPhysFault));
952
953 PGM_LOCK_VOID(pVM);
954 if (PHYS_PAGE_ADDRESS(GCPhysFault) != PHYS_PAGE_ADDRESS(pPage->GCPhys))
955 {
956 /* Pool page changed while we were waiting for the lock; ignore. */
957 Log(("CPU%d: pgmRZPoolAccessPfHandler pgm pool page for %RGp changed (to %RGp) while waiting!\n", pVCpu->idCpu, PHYS_PAGE_ADDRESS(GCPhysFault), PHYS_PAGE_ADDRESS(pPage->GCPhys)));
958 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->StatMonitorPfRZ, &pPool->StatMonitorPfRZHandled, a);
959 PGM_UNLOCK(pVM);
960 return VINF_SUCCESS;
961 }
962# ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
963 if (pPage->fDirty)
964 {
965 Assert(VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_TLB_FLUSH));
966 PGM_UNLOCK(pVM);
967 return VINF_SUCCESS; /* SMP guest case where we were blocking on the pgm lock while the same page was being marked dirty. */
968 }
969# endif
970
971# if 0 /* test code defined(VBOX_STRICT) && defined(PGMPOOL_WITH_OPTIMIZED_DIRTY_PT) */
972 if (pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT)
973 {
974 void *pvShw = PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage);
975 void *pvGst;
976 int rc = PGM_GCPHYS_2_PTR(pPool->CTX_SUFF(pVM), pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
977 pgmPoolTrackCheckPTPaePae(pPool, pPage, (PPGMSHWPTPAE)pvShw, (PCX86PTPAE)pvGst);
978 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvGst);
979 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvShw);
980 }
981# endif
982
983 /*
984 * Disassemble the faulting instruction.
985 */
986 PDISCPUSTATE pDis = &pVCpu->pgm.s.DisState;
987 int rc = EMInterpretDisasCurrent(pVM, pVCpu, pDis, NULL);
988 if (RT_UNLIKELY(rc != VINF_SUCCESS))
989 {
990 AssertMsg(rc == VERR_PAGE_NOT_PRESENT || rc == VERR_PAGE_TABLE_NOT_PRESENT, ("Unexpected rc %d\n", rc));
991 PGM_UNLOCK(pVM);
992 return rc;
993 }
994
995 Assert(pPage->enmKind != PGMPOOLKIND_FREE);
996
997 /*
998 * We should ALWAYS have the list head as user parameter. This
999 * is because we use that page to record the changes.
1000 */
1001 Assert(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
1002
1003# ifdef IN_RING0
1004 /* Maximum nr of modifications depends on the page type. */
1005 if ( pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT
1006 || pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_32BIT_PT)
1007 cMaxModifications = 4;
1008 else
1009 cMaxModifications = 24;
1010# else
1011 cMaxModifications = 48;
1012# endif
1013
1014 /*
1015 * Incremental page table updates should weigh more than random ones.
1016 * (Only applies when started from offset 0)
1017 */
1018 pVCpu->pgm.s.cPoolAccessHandler++;
1019 if ( pPage->GCPtrLastAccessHandlerRip >= pRegFrame->rip - 0x40 /* observed loops in Windows 7 x64 */
1020 && pPage->GCPtrLastAccessHandlerRip < pRegFrame->rip + 0x40
1021 && pvFault == (pPage->GCPtrLastAccessHandlerFault + pDis->Param1.cb)
1022 && pVCpu->pgm.s.cPoolAccessHandler == pPage->cLastAccessHandler + 1)
1023 {
1024 Log(("Possible page reuse cMods=%d -> %d (locked=%d type=%s)\n", pPage->cModifications, pPage->cModifications * 2, pgmPoolIsPageLocked(pPage), pgmPoolPoolKindToStr(pPage->enmKind)));
1025 Assert(pPage->cModifications < 32000);
1026 pPage->cModifications = pPage->cModifications * 2;
1027 pPage->GCPtrLastAccessHandlerFault = pvFault;
1028 pPage->cLastAccessHandler = pVCpu->pgm.s.cPoolAccessHandler;
1029 if (pPage->cModifications >= cMaxModifications)
1030 {
1031 STAM_COUNTER_INC(&pPool->StatMonitorPfRZFlushReinit);
1032 fForcedFlush = true;
1033 }
1034 }
1035
1036 if (pPage->cModifications >= cMaxModifications)
1037 Log(("Mod overflow %RGv cMods=%d (locked=%d type=%s)\n", pvFault, pPage->cModifications, pgmPoolIsPageLocked(pPage), pgmPoolPoolKindToStr(pPage->enmKind)));
1038
1039 /*
1040 * Check if it's worth dealing with.
1041 */
1042 bool fReused = false;
1043 bool fNotReusedNotForking = false;
1044 if ( ( pPage->cModifications < cMaxModifications /** @todo \#define */ /** @todo need to check that it's not mapping EIP. */ /** @todo adjust this! */
1045 || pgmPoolIsPageLocked(pPage)
1046 )
1047 && !(fReused = pgmRZPoolMonitorIsReused(pVM, pVCpu, pRegFrame, pDis, pvFault, pPage))
1048 && !pgmRZPoolMonitorIsForking(pPool, pDis, GCPhysFault & PAGE_OFFSET_MASK))
1049 {
1050 /*
1051 * Simple instructions, no REP prefix.
1052 */
1053 if (!(pDis->fPrefix & (DISPREFIX_REP | DISPREFIX_REPNE)))
1054 {
1055 rc = pgmRZPoolAccessPfHandlerSimple(pVM, pVCpu, pPool, pPage, pDis, pRegFrame, GCPhysFault, pvFault, &fReused);
1056 if (fReused)
1057 goto flushPage;
1058
1059 /* A mov instruction to change the first page table entry will be remembered so we can detect
1060 * full page table changes early on. This will reduce the amount of unnecessary traps we'll take.
1061 */
1062 if ( rc == VINF_SUCCESS
1063 && !pPage->cLocked /* only applies to unlocked pages as we can't free locked ones (e.g. cr3 root). */
1064 && pDis->pCurInstr->uOpcode == OP_MOV
1065 && (pvFault & PAGE_OFFSET_MASK) == 0)
1066 {
1067 pPage->GCPtrLastAccessHandlerFault = pvFault;
1068 pPage->cLastAccessHandler = pVCpu->pgm.s.cPoolAccessHandler;
1069 pPage->GCPtrLastAccessHandlerRip = pRegFrame->rip;
1070 /* Make sure we don't kick out a page too quickly. */
1071 if (pPage->cModifications > 8)
1072 pPage->cModifications = 2;
1073 }
1074 else if (pPage->GCPtrLastAccessHandlerFault == pvFault)
1075 {
1076 /* ignore the 2nd write to this page table entry. */
1077 pPage->cLastAccessHandler = pVCpu->pgm.s.cPoolAccessHandler;
1078 }
1079 else
1080 {
1081 pPage->GCPtrLastAccessHandlerFault = NIL_RTGCPTR;
1082 pPage->GCPtrLastAccessHandlerRip = 0;
1083 }
1084
1085 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->StatMonitorPfRZ, &pPool->StatMonitorPfRZHandled, a);
1086 PGM_UNLOCK(pVM);
1087 return rc;
1088 }
1089
1090 /*
1091 * Windows is frequently doing small memset() operations (netio test 4k+).
1092 * We have to deal with these or we'll kill the cache and performance.
1093 */
1094 if ( pDis->pCurInstr->uOpcode == OP_STOSWD
1095 && !pRegFrame->eflags.Bits.u1DF
1096 && pDis->uOpMode == pDis->uCpuMode
1097 && pDis->uAddrMode == pDis->uCpuMode)
1098 {
1099 bool fValidStosd = false;
1100
1101 if ( pDis->uCpuMode == DISCPUMODE_32BIT
1102 && pDis->fPrefix == DISPREFIX_REP
1103 && pRegFrame->ecx <= 0x20
1104 && pRegFrame->ecx * 4 <= GUEST_PAGE_SIZE - ((uintptr_t)pvFault & GUEST_PAGE_OFFSET_MASK)
1105 && !((uintptr_t)pvFault & 3)
1106 && (pRegFrame->eax == 0 || pRegFrame->eax == 0x80) /* the two values observed. */
1107 )
1108 {
1109 fValidStosd = true;
1110 pRegFrame->rcx &= 0xffffffff; /* paranoia */
1111 }
1112 else
1113 if ( pDis->uCpuMode == DISCPUMODE_64BIT
1114 && pDis->fPrefix == (DISPREFIX_REP | DISPREFIX_REX)
1115 && pRegFrame->rcx <= 0x20
1116 && pRegFrame->rcx * 8 <= GUEST_PAGE_SIZE - ((uintptr_t)pvFault & GUEST_PAGE_OFFSET_MASK)
1117 && !((uintptr_t)pvFault & 7)
1118 && (pRegFrame->rax == 0 || pRegFrame->rax == 0x80) /* the two values observed. */
1119 )
1120 {
1121 fValidStosd = true;
1122 }
1123
1124 if (fValidStosd)
1125 {
1126 rc = pgmRZPoolAccessPfHandlerSTOSD(pVM, pPool, pPage, pDis, pRegFrame, GCPhysFault, pvFault);
1127 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->StatMonitorPfRZ, &pPool->StatMonitorPfRZRepStosd, a);
1128 PGM_UNLOCK(pVM);
1129 return rc;
1130 }
1131 }
1132
1133 /* REP prefix, don't bother. */
1134 STAM_COUNTER_INC(&pPool->StatMonitorPfRZRepPrefix);
1135 Log4(("pgmRZPoolAccessPfHandler: eax=%#x ecx=%#x edi=%#x esi=%#x rip=%RGv opcode=%d prefix=%#x\n",
1136 pRegFrame->eax, pRegFrame->ecx, pRegFrame->edi, pRegFrame->esi, (RTGCPTR)pRegFrame->rip, pDis->pCurInstr->uOpcode, pDis->fPrefix));
1137 fNotReusedNotForking = true;
1138 }
1139
1140# if defined(PGMPOOL_WITH_OPTIMIZED_DIRTY_PT) && defined(IN_RING0)
1141 /* E.g. Windows 7 x64 initializes page tables and touches some pages in the table during the process. This
1142 * leads to pgm pool trashing and an excessive amount of write faults due to page monitoring.
1143 */
1144 if ( pPage->cModifications >= cMaxModifications
1145 && !fForcedFlush
1146 && (pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT || pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_32BIT_PT)
1147 && ( fNotReusedNotForking
1148 || ( !pgmRZPoolMonitorIsReused(pVM, pVCpu, pRegFrame, pDis, pvFault, pPage)
1149 && !pgmRZPoolMonitorIsForking(pPool, pDis, GCPhysFault & PAGE_OFFSET_MASK))
1150 )
1151 )
1152 {
1153 Assert(!pgmPoolIsPageLocked(pPage));
1154 Assert(pPage->fDirty == false);
1155
1156 /* Flush any monitored duplicates as we will disable write protection. */
1157 if ( pPage->iMonitoredNext != NIL_PGMPOOL_IDX
1158 || pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
1159 {
1160 PPGMPOOLPAGE pPageHead = pPage;
1161
1162 /* Find the monitor head. */
1163 while (pPageHead->iMonitoredPrev != NIL_PGMPOOL_IDX)
1164 pPageHead = &pPool->aPages[pPageHead->iMonitoredPrev];
1165
1166 while (pPageHead)
1167 {
1168 unsigned idxNext = pPageHead->iMonitoredNext;
1169
1170 if (pPageHead != pPage)
1171 {
1172 STAM_COUNTER_INC(&pPool->StatDirtyPageDupFlush);
1173 Log(("Flush duplicate page idx=%d GCPhys=%RGp type=%s\n", pPageHead->idx, pPageHead->GCPhys, pgmPoolPoolKindToStr(pPageHead->enmKind)));
1174 int rc2 = pgmPoolFlushPage(pPool, pPageHead);
1175 AssertRC(rc2);
1176 }
1177
1178 if (idxNext == NIL_PGMPOOL_IDX)
1179 break;
1180
1181 pPageHead = &pPool->aPages[idxNext];
1182 }
1183 }
1184
1185 /* The flushing above might fail for locked pages, so double check. */
1186 if ( pPage->iMonitoredNext == NIL_PGMPOOL_IDX
1187 && pPage->iMonitoredPrev == NIL_PGMPOOL_IDX)
1188 {
1189 pgmPoolAddDirtyPage(pVM, pPool, pPage);
1190
1191 /* Temporarily allow write access to the page table again. */
1192 rc = PGMHandlerPhysicalPageTempOff(pVM, pPage->GCPhys & PAGE_BASE_GC_MASK, pPage->GCPhys & PAGE_BASE_GC_MASK);
1193 if (rc == VINF_SUCCESS)
1194 {
1195 rc = PGMShwMakePageWritable(pVCpu, pvFault, PGM_MK_PG_IS_WRITE_FAULT);
1196 AssertMsg(rc == VINF_SUCCESS
1197 /* In the SMP case the page table might be removed while we wait for the PGM lock in the trap handler. */
1198 || rc == VERR_PAGE_TABLE_NOT_PRESENT
1199 || rc == VERR_PAGE_NOT_PRESENT,
1200 ("PGMShwModifyPage -> GCPtr=%RGv rc=%d\n", pvFault, rc));
1201# ifdef VBOX_STRICT
1202 pPage->GCPtrDirtyFault = pvFault;
1203# endif
1204
1205 STAM_PROFILE_STOP(&pVM->pgm.s.CTX_SUFF(pPool)->StatMonitorPfRZ, a);
1206 PGM_UNLOCK(pVM);
1207 return rc;
1208 }
1209 }
1210 }
1211# endif /* PGMPOOL_WITH_OPTIMIZED_DIRTY_PT && IN_RING0 */
1212
1213 STAM_COUNTER_INC(&pPool->StatMonitorPfRZFlushModOverflow);
1214flushPage:
1215 /*
1216 * Not worth it, so flush it.
1217 *
1218 * If we considered it to be reused, don't go back to ring-3
1219 * to emulate failed instructions since we usually cannot
1220 * interpret then. This may be a bit risky, in which case
1221 * the reuse detection must be fixed.
1222 */
1223 rc = pgmRZPoolAccessPfHandlerFlush(pVM, pVCpu, pPool, pPage, pDis, pRegFrame, GCPhysFault, pvFault);
1224 if ( rc == VINF_EM_RAW_EMULATE_INSTR
1225 && fReused)
1226 {
1227 /* Make sure that the current instruction still has shadow page backing, otherwise we'll end up in a loop. */
1228 if (PGMShwGetPage(pVCpu, pRegFrame->rip, NULL, NULL) == VINF_SUCCESS)
1229 rc = VINF_SUCCESS; /* safe to restart the instruction. */
1230 }
1231 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->StatMonitorPfRZ, &pPool->StatMonitorPfRZFlushPage, a);
1232 PGM_UNLOCK(pVM);
1233 return rc;
1234}
1235
1236#endif /* !IN_RING3 */
1237
1238/**
1239 * @callback_method_impl{FNPGMPHYSHANDLER,
1240 * Access handler for shadowed page table pages.}
1241 *
1242 * @remarks Only uses the VINF_PGM_HANDLER_DO_DEFAULT status.
1243 * @note The @a uUser argument is the index of the PGMPOOLPAGE.
1244 */
1245PGM_ALL_CB2_DECL(VBOXSTRICTRC)
1246pgmPoolAccessHandler(PVMCC pVM, PVMCPUCC pVCpu, RTGCPHYS GCPhys, void *pvPhys, void *pvBuf, size_t cbBuf,
1247 PGMACCESSTYPE enmAccessType, PGMACCESSORIGIN enmOrigin, uint64_t uUser)
1248{
1249 PPGMPOOL const pPool = pVM->pgm.s.CTX_SUFF(pPool);
1250 STAM_PROFILE_START(&pPool->CTX_SUFF_Z(StatMonitor), a);
1251 AssertReturn(uUser < pPool->cCurPages, VERR_PGM_POOL_IPE);
1252 PPGMPOOLPAGE const pPage = &pPool->aPages[uUser];
1253 LogFlow(("PGM_ALL_CB_DECL: GCPhys=%RGp %p:{.Core=%RHp, .idx=%d, .GCPhys=%RGp, .enmType=%d}\n",
1254 GCPhys, pPage, pPage->Core.Key, pPage->idx, pPage->GCPhys, pPage->enmKind));
1255
1256 NOREF(pvPhys); NOREF(pvBuf); NOREF(enmAccessType);
1257
1258 PGM_LOCK_VOID(pVM);
1259
1260#ifdef VBOX_WITH_STATISTICS
1261 /*
1262 * Collect stats on the access.
1263 */
1264 AssertCompile(RT_ELEMENTS(pPool->CTX_MID_Z(aStatMonitor,Sizes)) == 19);
1265 if (cbBuf <= 16 && cbBuf > 0)
1266 STAM_COUNTER_INC(&pPool->CTX_MID_Z(aStatMonitor,Sizes)[cbBuf - 1]);
1267 else if (cbBuf >= 17 && cbBuf < 32)
1268 STAM_COUNTER_INC(&pPool->CTX_MID_Z(aStatMonitor,Sizes)[16]);
1269 else if (cbBuf >= 32 && cbBuf < 64)
1270 STAM_COUNTER_INC(&pPool->CTX_MID_Z(aStatMonitor,Sizes)[17]);
1271 else if (cbBuf >= 64)
1272 STAM_COUNTER_INC(&pPool->CTX_MID_Z(aStatMonitor,Sizes)[18]);
1273
1274 uint8_t cbAlign;
1275 switch (pPage->enmKind)
1276 {
1277 default:
1278 cbAlign = 7;
1279 break;
1280 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1281 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1282 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1283 case PGMPOOLKIND_32BIT_PD:
1284 case PGMPOOLKIND_32BIT_PD_PHYS:
1285 cbAlign = 3;
1286 break;
1287 }
1288 AssertCompile(RT_ELEMENTS(pPool->CTX_MID_Z(aStatMonitor,Misaligned)) == 7);
1289 if ((uint8_t)GCPhys & cbAlign)
1290 STAM_COUNTER_INC(&pPool->CTX_MID_Z(aStatMonitor,Misaligned)[((uint8_t)GCPhys & cbAlign) - 1]);
1291#endif
1292
1293 /*
1294 * Make sure the pool page wasn't modified by a different CPU.
1295 */
1296 if (PHYS_PAGE_ADDRESS(GCPhys) == PHYS_PAGE_ADDRESS(pPage->GCPhys))
1297 {
1298 Assert(pPage->enmKind != PGMPOOLKIND_FREE);
1299
1300 /* The max modification count before flushing depends on the context and page type. */
1301#ifdef IN_RING3
1302 uint16_t const cMaxModifications = 96; /* it's cheaper here, right? */
1303#else
1304 uint16_t cMaxModifications;
1305 if ( pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT
1306 || pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_32BIT_PT)
1307 cMaxModifications = 4;
1308 else
1309 cMaxModifications = 24;
1310#endif
1311
1312 /*
1313 * We don't have to be very sophisticated about this since there are relativly few calls here.
1314 * However, we must try our best to detect any non-cpu accesses (disk / networking).
1315 */
1316 if ( ( pPage->cModifications < cMaxModifications
1317 || pgmPoolIsPageLocked(pPage) )
1318 && enmOrigin != PGMACCESSORIGIN_DEVICE
1319 && cbBuf <= 16)
1320 {
1321 /* Clear the shadow entry. */
1322 if (!pPage->cModifications++)
1323 pgmPoolMonitorModifiedInsert(pPool, pPage);
1324
1325 if (cbBuf <= 8)
1326 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhys, pvBuf, (uint32_t)cbBuf);
1327 else
1328 {
1329 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhys, pvBuf, 8);
1330 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhys + 8, (uint8_t *)pvBuf + 8, (uint32_t)cbBuf - 8);
1331 }
1332 }
1333 else
1334 pgmPoolMonitorChainFlush(pPool, pPage);
1335
1336 STAM_PROFILE_STOP_EX(&pPool->CTX_SUFF_Z(StatMonitor), &pPool->CTX_MID_Z(StatMonitor,FlushPage), a);
1337 }
1338 else
1339 Log(("CPU%d: PGM_ALL_CB_DECL pgm pool page for %RGp changed (to %RGp) while waiting!\n", pVCpu->idCpu, PHYS_PAGE_ADDRESS(GCPhys), PHYS_PAGE_ADDRESS(pPage->GCPhys)));
1340 PGM_UNLOCK(pVM);
1341 return VINF_PGM_HANDLER_DO_DEFAULT;
1342}
1343
1344
1345#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
1346
1347# if defined(VBOX_STRICT) && !defined(IN_RING3)
1348
1349/**
1350 * Check references to guest physical memory in a PAE / PAE page table.
1351 *
1352 * @param pPool The pool.
1353 * @param pPage The page.
1354 * @param pShwPT The shadow page table (mapping of the page).
1355 * @param pGstPT The guest page table.
1356 */
1357static void pgmPoolTrackCheckPTPaePae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT, PCX86PTPAE pGstPT)
1358{
1359 unsigned cErrors = 0;
1360 int LastRc = -1; /* initialized to shut up gcc */
1361 unsigned LastPTE = ~0U; /* initialized to shut up gcc */
1362 RTHCPHYS LastHCPhys = NIL_RTHCPHYS; /* initialized to shut up gcc */
1363 PVMCC pVM = pPool->CTX_SUFF(pVM);
1364
1365# ifdef VBOX_STRICT
1366 for (unsigned i = 0; i < RT_MIN(RT_ELEMENTS(pShwPT->a), pPage->iFirstPresent); i++)
1367 AssertMsg(!PGMSHWPTEPAE_IS_P(pShwPT->a[i]), ("Unexpected PTE: idx=%d %RX64 (first=%d)\n", i, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), pPage->iFirstPresent));
1368# endif
1369 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
1370 {
1371 if (PGMSHWPTEPAE_IS_P(pShwPT->a[i]))
1372 {
1373 RTHCPHYS HCPhys = NIL_RTHCPHYS;
1374 int rc = PGMPhysGCPhys2HCPhys(pVM, pGstPT->a[i].u & X86_PTE_PAE_PG_MASK, &HCPhys);
1375 if ( rc != VINF_SUCCESS
1376 || PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]) != HCPhys)
1377 {
1378 Log(("rc=%d idx=%d guest %RX64 shw=%RX64 vs %RHp\n", rc, i, pGstPT->a[i].u, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), HCPhys));
1379 LastPTE = i;
1380 LastRc = rc;
1381 LastHCPhys = HCPhys;
1382 cErrors++;
1383
1384 RTHCPHYS HCPhysPT = NIL_RTHCPHYS;
1385 rc = PGMPhysGCPhys2HCPhys(pVM, pPage->GCPhys, &HCPhysPT);
1386 AssertRC(rc);
1387
1388 for (unsigned iPage = 0; iPage < pPool->cCurPages; iPage++)
1389 {
1390 PPGMPOOLPAGE pTempPage = &pPool->aPages[iPage];
1391
1392 if (pTempPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT)
1393 {
1394 PPGMSHWPTPAE pShwPT2 = (PPGMSHWPTPAE)PGMPOOL_PAGE_2_PTR(pVM, pTempPage);
1395
1396 for (unsigned j = 0; j < RT_ELEMENTS(pShwPT->a); j++)
1397 {
1398 if ( PGMSHWPTEPAE_IS_P_RW(pShwPT2->a[j])
1399 && PGMSHWPTEPAE_GET_HCPHYS(pShwPT2->a[j]) == HCPhysPT)
1400 {
1401 Log(("GCPhys=%RGp idx=%d %RX64 vs %RX64\n", pTempPage->GCPhys, j, PGMSHWPTEPAE_GET_LOG(pShwPT->a[j]), PGMSHWPTEPAE_GET_LOG(pShwPT2->a[j])));
1402 }
1403 }
1404
1405 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pShwPT2);
1406 }
1407 }
1408 }
1409 }
1410 }
1411 AssertMsg(!cErrors, ("cErrors=%d: last rc=%d idx=%d guest %RX64 shw=%RX64 vs %RHp\n", cErrors, LastRc, LastPTE, pGstPT->a[LastPTE].u, PGMSHWPTEPAE_GET_LOG(pShwPT->a[LastPTE]), LastHCPhys));
1412}
1413
1414
1415/**
1416 * Check references to guest physical memory in a PAE / 32-bit page table.
1417 *
1418 * @param pPool The pool.
1419 * @param pPage The page.
1420 * @param pShwPT The shadow page table (mapping of the page).
1421 * @param pGstPT The guest page table.
1422 */
1423static void pgmPoolTrackCheckPTPae32Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT, PCX86PT pGstPT)
1424{
1425 unsigned cErrors = 0;
1426 int LastRc = -1; /* initialized to shut up gcc */
1427 unsigned LastPTE = ~0U; /* initialized to shut up gcc */
1428 RTHCPHYS LastHCPhys = NIL_RTHCPHYS; /* initialized to shut up gcc */
1429 PVMCC pVM = pPool->CTX_SUFF(pVM);
1430
1431# ifdef VBOX_STRICT
1432 for (unsigned i = 0; i < RT_MIN(RT_ELEMENTS(pShwPT->a), pPage->iFirstPresent); i++)
1433 AssertMsg(!PGMSHWPTEPAE_IS_P(pShwPT->a[i]), ("Unexpected PTE: idx=%d %RX64 (first=%d)\n", i, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), pPage->iFirstPresent));
1434# endif
1435 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
1436 {
1437 if (PGMSHWPTEPAE_IS_P(pShwPT->a[i]))
1438 {
1439 RTHCPHYS HCPhys = NIL_RTHCPHYS;
1440 int rc = PGMPhysGCPhys2HCPhys(pVM, pGstPT->a[i].u & X86_PTE_PG_MASK, &HCPhys);
1441 if ( rc != VINF_SUCCESS
1442 || PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]) != HCPhys)
1443 {
1444 Log(("rc=%d idx=%d guest %x shw=%RX64 vs %RHp\n", rc, i, pGstPT->a[i].u, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), HCPhys));
1445 LastPTE = i;
1446 LastRc = rc;
1447 LastHCPhys = HCPhys;
1448 cErrors++;
1449
1450 RTHCPHYS HCPhysPT = NIL_RTHCPHYS;
1451 rc = PGMPhysGCPhys2HCPhys(pVM, pPage->GCPhys, &HCPhysPT);
1452 AssertRC(rc);
1453
1454 for (unsigned iPage = 0; iPage < pPool->cCurPages; iPage++)
1455 {
1456 PPGMPOOLPAGE pTempPage = &pPool->aPages[iPage];
1457
1458 if (pTempPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_32BIT_PT)
1459 {
1460 PPGMSHWPTPAE pShwPT2 = (PPGMSHWPTPAE)PGMPOOL_PAGE_2_PTR(pVM, pTempPage);
1461
1462 for (unsigned j = 0; j < RT_ELEMENTS(pShwPT->a); j++)
1463 {
1464 if ( PGMSHWPTEPAE_IS_P_RW(pShwPT2->a[j])
1465 && PGMSHWPTEPAE_GET_HCPHYS(pShwPT2->a[j]) == HCPhysPT)
1466 {
1467 Log(("GCPhys=%RGp idx=%d %RX64 vs %RX64\n", pTempPage->GCPhys, j, PGMSHWPTEPAE_GET_LOG(pShwPT->a[j]), PGMSHWPTEPAE_GET_LOG(pShwPT2->a[j])));
1468 }
1469 }
1470
1471 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pShwPT2);
1472 }
1473 }
1474 }
1475 }
1476 }
1477 AssertMsg(!cErrors, ("cErrors=%d: last rc=%d idx=%d guest %x shw=%RX64 vs %RHp\n", cErrors, LastRc, LastPTE, pGstPT->a[LastPTE].u, PGMSHWPTEPAE_GET_LOG(pShwPT->a[LastPTE]), LastHCPhys));
1478}
1479
1480# endif /* VBOX_STRICT && !IN_RING3 */
1481
1482/**
1483 * Clear references to guest physical memory in a PAE / PAE page table.
1484 *
1485 * @returns nr of changed PTEs
1486 * @param pPool The pool.
1487 * @param pPage The page.
1488 * @param pShwPT The shadow page table (mapping of the page).
1489 * @param pGstPT The guest page table.
1490 * @param pOldGstPT The old cached guest page table.
1491 * @param fAllowRemoval Bail out as soon as we encounter an invalid PTE
1492 * @param pfFlush Flush reused page table (out)
1493 */
1494DECLINLINE(unsigned) pgmPoolTrackFlushPTPaePae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT, PCX86PTPAE pGstPT,
1495 PCX86PTPAE pOldGstPT, bool fAllowRemoval, bool *pfFlush)
1496{
1497 unsigned cChanged = 0;
1498
1499# ifdef VBOX_STRICT
1500 for (unsigned i = 0; i < RT_MIN(RT_ELEMENTS(pShwPT->a), pPage->iFirstPresent); i++)
1501 AssertMsg(!PGMSHWPTEPAE_IS_P(pShwPT->a[i]), ("Unexpected PTE: idx=%d %RX64 (first=%d)\n", i, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), pPage->iFirstPresent));
1502# endif
1503 *pfFlush = false;
1504
1505 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
1506 {
1507 /* Check the new value written by the guest. If present and with a bogus physical address, then
1508 * it's fairly safe to assume the guest is reusing the PT.
1509 */
1510 if ( fAllowRemoval
1511 && (pGstPT->a[i].u & X86_PTE_P))
1512 {
1513 if (!PGMPhysIsGCPhysValid(pPool->CTX_SUFF(pVM), pGstPT->a[i].u & X86_PTE_PAE_PG_MASK))
1514 {
1515 *pfFlush = true;
1516 return ++cChanged;
1517 }
1518 }
1519 if (PGMSHWPTEPAE_IS_P(pShwPT->a[i]))
1520 {
1521 /* If the old cached PTE is identical, then there's no need to flush the shadow copy. */
1522 if ((pGstPT->a[i].u & X86_PTE_PAE_PG_MASK) == (pOldGstPT->a[i].u & X86_PTE_PAE_PG_MASK))
1523 {
1524# ifdef VBOX_STRICT
1525 RTHCPHYS HCPhys = NIL_RTGCPHYS;
1526 int rc = PGMPhysGCPhys2HCPhys(pPool->CTX_SUFF(pVM), pGstPT->a[i].u & X86_PTE_PAE_PG_MASK, &HCPhys);
1527 AssertMsg(rc == VINF_SUCCESS && PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]) == HCPhys, ("rc=%d guest %RX64 old %RX64 shw=%RX64 vs %RHp\n", rc, pGstPT->a[i].u, pOldGstPT->a[i].u, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), HCPhys));
1528# endif
1529 uint64_t uHostAttr = PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & (X86_PTE_P | X86_PTE_US | X86_PTE_A | X86_PTE_D | X86_PTE_G | X86_PTE_PAE_NX);
1530 bool fHostRW = !!(PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & X86_PTE_RW);
1531 uint64_t uGuestAttr = pGstPT->a[i].u & (X86_PTE_P | X86_PTE_US | X86_PTE_A | X86_PTE_D | X86_PTE_G | X86_PTE_PAE_NX);
1532 bool fGuestRW = !!(pGstPT->a[i].u & X86_PTE_RW);
1533
1534 if ( uHostAttr == uGuestAttr
1535 && fHostRW <= fGuestRW)
1536 continue;
1537 }
1538 cChanged++;
1539 /* Something was changed, so flush it. */
1540 Log4(("pgmPoolTrackDerefPTPaePae: i=%d pte=%RX64 hint=%RX64\n",
1541 i, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pOldGstPT->a[i].u & X86_PTE_PAE_PG_MASK));
1542 pgmPoolTracDerefGCPhysHint(pPool, pPage, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pOldGstPT->a[i].u & X86_PTE_PAE_PG_MASK, i);
1543 PGMSHWPTEPAE_ATOMIC_SET(pShwPT->a[i], 0);
1544 }
1545 }
1546 return cChanged;
1547}
1548
1549
1550/**
1551 * Clear references to guest physical memory in a PAE / PAE page table.
1552 *
1553 * @returns nr of changed PTEs
1554 * @param pPool The pool.
1555 * @param pPage The page.
1556 * @param pShwPT The shadow page table (mapping of the page).
1557 * @param pGstPT The guest page table.
1558 * @param pOldGstPT The old cached guest page table.
1559 * @param fAllowRemoval Bail out as soon as we encounter an invalid PTE
1560 * @param pfFlush Flush reused page table (out)
1561 */
1562DECLINLINE(unsigned) pgmPoolTrackFlushPTPae32Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT, PCX86PT pGstPT,
1563 PCX86PT pOldGstPT, bool fAllowRemoval, bool *pfFlush)
1564{
1565 unsigned cChanged = 0;
1566
1567# ifdef VBOX_STRICT
1568 for (unsigned i = 0; i < RT_MIN(RT_ELEMENTS(pShwPT->a), pPage->iFirstPresent); i++)
1569 AssertMsg(!PGMSHWPTEPAE_IS_P(pShwPT->a[i]), ("Unexpected PTE: idx=%d %RX64 (first=%d)\n", i, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), pPage->iFirstPresent));
1570# endif
1571 *pfFlush = false;
1572
1573 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
1574 {
1575 /* Check the new value written by the guest. If present and with a bogus physical address, then
1576 * it's fairly safe to assume the guest is reusing the PT. */
1577 if (fAllowRemoval)
1578 {
1579 X86PGUINT const uPte = pGstPT->a[i].u;
1580 if ( (uPte & X86_PTE_P)
1581 && !PGMPhysIsGCPhysValid(pPool->CTX_SUFF(pVM), uPte & X86_PTE_PG_MASK))
1582 {
1583 *pfFlush = true;
1584 return ++cChanged;
1585 }
1586 }
1587 if (PGMSHWPTEPAE_IS_P(pShwPT->a[i]))
1588 {
1589 /* If the old cached PTE is identical, then there's no need to flush the shadow copy. */
1590 if ((pGstPT->a[i].u & X86_PTE_PG_MASK) == (pOldGstPT->a[i].u & X86_PTE_PG_MASK))
1591 {
1592# ifdef VBOX_STRICT
1593 RTHCPHYS HCPhys = NIL_RTGCPHYS;
1594 int rc = PGMPhysGCPhys2HCPhys(pPool->CTX_SUFF(pVM), pGstPT->a[i].u & X86_PTE_PG_MASK, &HCPhys);
1595 AssertMsg(rc == VINF_SUCCESS && PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]) == HCPhys, ("rc=%d guest %x old %x shw=%RX64 vs %RHp\n", rc, pGstPT->a[i].u, pOldGstPT->a[i].u, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), HCPhys));
1596# endif
1597 uint64_t uHostAttr = PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & (X86_PTE_P | X86_PTE_US | X86_PTE_A | X86_PTE_D | X86_PTE_G);
1598 bool fHostRW = !!(PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & X86_PTE_RW);
1599 uint64_t uGuestAttr = pGstPT->a[i].u & (X86_PTE_P | X86_PTE_US | X86_PTE_A | X86_PTE_D | X86_PTE_G);
1600 bool fGuestRW = !!(pGstPT->a[i].u & X86_PTE_RW);
1601
1602 if ( uHostAttr == uGuestAttr
1603 && fHostRW <= fGuestRW)
1604 continue;
1605 }
1606 cChanged++;
1607 /* Something was changed, so flush it. */
1608 Log4(("pgmPoolTrackDerefPTPaePae: i=%d pte=%RX64 hint=%x\n",
1609 i, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pOldGstPT->a[i].u & X86_PTE_PG_MASK));
1610 pgmPoolTracDerefGCPhysHint(pPool, pPage, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pOldGstPT->a[i].u & X86_PTE_PG_MASK, i);
1611 PGMSHWPTEPAE_ATOMIC_SET(pShwPT->a[i], 0);
1612 }
1613 }
1614 return cChanged;
1615}
1616
1617
1618/**
1619 * Flush a dirty page
1620 *
1621 * @param pVM The cross context VM structure.
1622 * @param pPool The pool.
1623 * @param idxSlot Dirty array slot index
1624 * @param fAllowRemoval Allow a reused page table to be removed
1625 */
1626static void pgmPoolFlushDirtyPage(PVMCC pVM, PPGMPOOL pPool, unsigned idxSlot, bool fAllowRemoval = false)
1627{
1628 AssertCompile(RT_ELEMENTS(pPool->aidxDirtyPages) == RT_ELEMENTS(pPool->aDirtyPages));
1629
1630 Assert(idxSlot < RT_ELEMENTS(pPool->aDirtyPages));
1631 unsigned idxPage = pPool->aidxDirtyPages[idxSlot];
1632 if (idxPage == NIL_PGMPOOL_IDX)
1633 return;
1634
1635 PPGMPOOLPAGE pPage = &pPool->aPages[idxPage];
1636 Assert(pPage->idx == idxPage);
1637 Assert(pPage->iMonitoredNext == NIL_PGMPOOL_IDX && pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
1638
1639 AssertMsg(pPage->fDirty, ("Page %RGp (slot=%d) not marked dirty!", pPage->GCPhys, idxSlot));
1640 Log(("Flush dirty page %RGp cMods=%d\n", pPage->GCPhys, pPage->cModifications));
1641
1642 /* First write protect the page again to catch all write accesses. (before checking for changes -> SMP) */
1643 int rc = PGMHandlerPhysicalReset(pVM, pPage->GCPhys & PAGE_BASE_GC_MASK);
1644 Assert(rc == VINF_SUCCESS);
1645 pPage->fDirty = false;
1646
1647# ifdef VBOX_STRICT
1648 uint64_t fFlags = 0;
1649 RTHCPHYS HCPhys;
1650 rc = PGMShwGetPage(VMMGetCpu(pVM), pPage->GCPtrDirtyFault, &fFlags, &HCPhys);
1651 AssertMsg( ( rc == VINF_SUCCESS
1652 && (!(fFlags & X86_PTE_RW) || HCPhys != pPage->Core.Key))
1653 /* In the SMP case the page table might be removed while we wait for the PGM lock in the trap handler. */
1654 || rc == VERR_PAGE_TABLE_NOT_PRESENT
1655 || rc == VERR_PAGE_NOT_PRESENT,
1656 ("PGMShwGetPage -> GCPtr=%RGv rc=%d flags=%RX64\n", pPage->GCPtrDirtyFault, rc, fFlags));
1657# endif
1658
1659 /* Flush those PTEs that have changed. */
1660 STAM_PROFILE_START(&pPool->StatTrackDeref,a);
1661 void *pvShw = PGMPOOL_PAGE_2_PTR(pVM, pPage);
1662 void *pvGst;
1663 rc = PGM_GCPHYS_2_PTR_EX(pVM, pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
1664 bool fFlush;
1665 unsigned cChanges;
1666
1667 if (pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT)
1668 cChanges = pgmPoolTrackFlushPTPaePae(pPool, pPage, (PPGMSHWPTPAE)pvShw, (PCX86PTPAE)pvGst,
1669 (PCX86PTPAE)&pPool->aDirtyPages[idxSlot].aPage[0], fAllowRemoval, &fFlush);
1670 else
1671 cChanges = pgmPoolTrackFlushPTPae32Bit(pPool, pPage, (PPGMSHWPTPAE)pvShw, (PCX86PT)pvGst,
1672 (PCX86PT)&pPool->aDirtyPages[idxSlot].aPage[0], fAllowRemoval, &fFlush);
1673
1674 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvGst);
1675 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvShw);
1676 STAM_PROFILE_STOP(&pPool->StatTrackDeref,a);
1677 /* Note: we might want to consider keeping the dirty page active in case there were many changes. */
1678
1679 /* This page is likely to be modified again, so reduce the nr of modifications just a bit here. */
1680 Assert(pPage->cModifications);
1681 if (cChanges < 4)
1682 pPage->cModifications = 1; /* must use > 0 here */
1683 else
1684 pPage->cModifications = RT_MAX(1, pPage->cModifications / 2);
1685
1686 STAM_COUNTER_INC(&pPool->StatResetDirtyPages);
1687 if (pPool->cDirtyPages == RT_ELEMENTS(pPool->aDirtyPages))
1688 pPool->idxFreeDirtyPage = idxSlot;
1689
1690 pPool->cDirtyPages--;
1691 pPool->aidxDirtyPages[idxSlot] = NIL_PGMPOOL_IDX;
1692 Assert(pPool->cDirtyPages <= RT_ELEMENTS(pPool->aDirtyPages));
1693 if (fFlush)
1694 {
1695 Assert(fAllowRemoval);
1696 Log(("Flush reused page table!\n"));
1697 pgmPoolFlushPage(pPool, pPage);
1698 STAM_COUNTER_INC(&pPool->StatForceFlushReused);
1699 }
1700 else
1701 Log(("Removed dirty page %RGp cMods=%d cChanges=%d\n", pPage->GCPhys, pPage->cModifications, cChanges));
1702}
1703
1704
1705# ifndef IN_RING3
1706/**
1707 * Add a new dirty page
1708 *
1709 * @param pVM The cross context VM structure.
1710 * @param pPool The pool.
1711 * @param pPage The page.
1712 */
1713void pgmPoolAddDirtyPage(PVMCC pVM, PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1714{
1715 PGM_LOCK_ASSERT_OWNER(pVM);
1716 AssertCompile(RT_ELEMENTS(pPool->aDirtyPages) == 8 || RT_ELEMENTS(pPool->aDirtyPages) == 16);
1717 Assert(!pPage->fDirty);
1718
1719 unsigned idxFree = pPool->idxFreeDirtyPage;
1720 Assert(idxFree < RT_ELEMENTS(pPool->aDirtyPages));
1721 Assert(pPage->iMonitoredNext == NIL_PGMPOOL_IDX && pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
1722
1723 if (pPool->cDirtyPages >= RT_ELEMENTS(pPool->aDirtyPages))
1724 {
1725 STAM_COUNTER_INC(&pPool->StatDirtyPageOverFlowFlush);
1726 pgmPoolFlushDirtyPage(pVM, pPool, idxFree, true /* allow removal of reused page tables*/);
1727 }
1728 Assert(pPool->cDirtyPages < RT_ELEMENTS(pPool->aDirtyPages));
1729 AssertMsg(pPool->aidxDirtyPages[idxFree] == NIL_PGMPOOL_IDX, ("idxFree=%d cDirtyPages=%d\n", idxFree, pPool->cDirtyPages));
1730
1731 Log(("Add dirty page %RGp (slot=%d)\n", pPage->GCPhys, idxFree));
1732
1733 /*
1734 * Make a copy of the guest page table as we require valid GCPhys addresses
1735 * when removing references to physical pages.
1736 * (The HCPhys linear lookup is *extremely* expensive!)
1737 */
1738 void *pvGst;
1739 int rc = PGM_GCPHYS_2_PTR_EX(pVM, pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
1740 memcpy(&pPool->aDirtyPages[idxFree].aPage[0], pvGst,
1741 pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT ? PAGE_SIZE : PAGE_SIZE / 2);
1742# ifdef VBOX_STRICT
1743 void *pvShw = PGMPOOL_PAGE_2_PTR(pVM, pPage);
1744 if (pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT)
1745 pgmPoolTrackCheckPTPaePae(pPool, pPage, (PPGMSHWPTPAE)pvShw, (PCX86PTPAE)pvGst);
1746 else
1747 pgmPoolTrackCheckPTPae32Bit(pPool, pPage, (PPGMSHWPTPAE)pvShw, (PCX86PT)pvGst);
1748 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvShw);
1749# endif
1750 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvGst);
1751
1752 STAM_COUNTER_INC(&pPool->StatDirtyPage);
1753 pPage->fDirty = true;
1754 pPage->idxDirtyEntry = (uint8_t)idxFree; Assert(pPage->idxDirtyEntry == idxFree);
1755 pPool->aidxDirtyPages[idxFree] = pPage->idx;
1756 pPool->cDirtyPages++;
1757
1758 pPool->idxFreeDirtyPage = (pPool->idxFreeDirtyPage + 1) & (RT_ELEMENTS(pPool->aDirtyPages) - 1);
1759 if ( pPool->cDirtyPages < RT_ELEMENTS(pPool->aDirtyPages)
1760 && pPool->aidxDirtyPages[pPool->idxFreeDirtyPage] != NIL_PGMPOOL_IDX)
1761 {
1762 unsigned i;
1763 for (i = 1; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
1764 {
1765 idxFree = (pPool->idxFreeDirtyPage + i) & (RT_ELEMENTS(pPool->aDirtyPages) - 1);
1766 if (pPool->aidxDirtyPages[idxFree] == NIL_PGMPOOL_IDX)
1767 {
1768 pPool->idxFreeDirtyPage = idxFree;
1769 break;
1770 }
1771 }
1772 Assert(i != RT_ELEMENTS(pPool->aDirtyPages));
1773 }
1774
1775 Assert(pPool->cDirtyPages == RT_ELEMENTS(pPool->aDirtyPages) || pPool->aidxDirtyPages[pPool->idxFreeDirtyPage] == NIL_PGMPOOL_IDX);
1776
1777 /*
1778 * Clear all references to this shadow table. See @bugref{7298}.
1779 */
1780 pgmPoolTrackClearPageUsers(pPool, pPage);
1781}
1782# endif /* !IN_RING3 */
1783
1784
1785/**
1786 * Check if the specified page is dirty (not write monitored)
1787 *
1788 * @return dirty or not
1789 * @param pVM The cross context VM structure.
1790 * @param GCPhys Guest physical address
1791 */
1792bool pgmPoolIsDirtyPageSlow(PVMCC pVM, RTGCPHYS GCPhys)
1793{
1794 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1795 PGM_LOCK_ASSERT_OWNER(pVM);
1796 if (!pPool->cDirtyPages)
1797 return false;
1798
1799 GCPhys = GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK;
1800
1801 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
1802 {
1803 unsigned idxPage = pPool->aidxDirtyPages[i];
1804 if (idxPage != NIL_PGMPOOL_IDX)
1805 {
1806 PPGMPOOLPAGE pPage = &pPool->aPages[idxPage];
1807 if (pPage->GCPhys == GCPhys)
1808 return true;
1809 }
1810 }
1811 return false;
1812}
1813
1814
1815/**
1816 * Reset all dirty pages by reinstating page monitoring.
1817 *
1818 * @param pVM The cross context VM structure.
1819 */
1820void pgmPoolResetDirtyPages(PVMCC pVM)
1821{
1822 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1823 PGM_LOCK_ASSERT_OWNER(pVM);
1824 Assert(pPool->cDirtyPages <= RT_ELEMENTS(pPool->aDirtyPages));
1825
1826 if (!pPool->cDirtyPages)
1827 return;
1828
1829 Log(("pgmPoolResetDirtyPages\n"));
1830 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
1831 pgmPoolFlushDirtyPage(pVM, pPool, i, true /* allow removal of reused page tables*/);
1832
1833 pPool->idxFreeDirtyPage = 0;
1834 if ( pPool->cDirtyPages != RT_ELEMENTS(pPool->aDirtyPages)
1835 && pPool->aidxDirtyPages[pPool->idxFreeDirtyPage] != NIL_PGMPOOL_IDX)
1836 {
1837 unsigned i;
1838 for (i = 1; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
1839 {
1840 if (pPool->aidxDirtyPages[i] == NIL_PGMPOOL_IDX)
1841 {
1842 pPool->idxFreeDirtyPage = i;
1843 break;
1844 }
1845 }
1846 AssertMsg(i != RT_ELEMENTS(pPool->aDirtyPages), ("cDirtyPages %d", pPool->cDirtyPages));
1847 }
1848
1849 Assert(pPool->aidxDirtyPages[pPool->idxFreeDirtyPage] == NIL_PGMPOOL_IDX || pPool->cDirtyPages == RT_ELEMENTS(pPool->aDirtyPages));
1850 return;
1851}
1852
1853
1854/**
1855 * Invalidate the PT entry for the specified page
1856 *
1857 * @param pVM The cross context VM structure.
1858 * @param GCPtrPage Guest page to invalidate
1859 */
1860void pgmPoolResetDirtyPage(PVMCC pVM, RTGCPTR GCPtrPage)
1861{
1862 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1863 PGM_LOCK_ASSERT_OWNER(pVM);
1864 Assert(pPool->cDirtyPages <= RT_ELEMENTS(pPool->aDirtyPages));
1865
1866 if (!pPool->cDirtyPages)
1867 return;
1868
1869 Log(("pgmPoolResetDirtyPage %RGv\n", GCPtrPage)); RT_NOREF_PV(GCPtrPage);
1870 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
1871 {
1872 /** @todo What was intended here??? This looks incomplete... */
1873 }
1874}
1875
1876
1877/**
1878 * Reset all dirty pages by reinstating page monitoring.
1879 *
1880 * @param pVM The cross context VM structure.
1881 * @param GCPhysPT Physical address of the page table
1882 */
1883void pgmPoolInvalidateDirtyPage(PVMCC pVM, RTGCPHYS GCPhysPT)
1884{
1885 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1886 PGM_LOCK_ASSERT_OWNER(pVM);
1887 Assert(pPool->cDirtyPages <= RT_ELEMENTS(pPool->aDirtyPages));
1888 unsigned idxDirtyPage = RT_ELEMENTS(pPool->aDirtyPages);
1889
1890 if (!pPool->cDirtyPages)
1891 return;
1892
1893 GCPhysPT = GCPhysPT & ~(RTGCPHYS)PAGE_OFFSET_MASK;
1894
1895 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
1896 {
1897 unsigned idxPage = pPool->aidxDirtyPages[i];
1898 if (idxPage != NIL_PGMPOOL_IDX)
1899 {
1900 PPGMPOOLPAGE pPage = &pPool->aPages[idxPage];
1901 if (pPage->GCPhys == GCPhysPT)
1902 {
1903 idxDirtyPage = i;
1904 break;
1905 }
1906 }
1907 }
1908
1909 if (idxDirtyPage != RT_ELEMENTS(pPool->aDirtyPages))
1910 {
1911 pgmPoolFlushDirtyPage(pVM, pPool, idxDirtyPage, true /* allow removal of reused page tables*/);
1912 if ( pPool->cDirtyPages != RT_ELEMENTS(pPool->aDirtyPages)
1913 && pPool->aidxDirtyPages[pPool->idxFreeDirtyPage] != NIL_PGMPOOL_IDX)
1914 {
1915 unsigned i;
1916 for (i = 0; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
1917 {
1918 if (pPool->aidxDirtyPages[i] == NIL_PGMPOOL_IDX)
1919 {
1920 pPool->idxFreeDirtyPage = i;
1921 break;
1922 }
1923 }
1924 AssertMsg(i != RT_ELEMENTS(pPool->aDirtyPages), ("cDirtyPages %d", pPool->cDirtyPages));
1925 }
1926 }
1927}
1928
1929#endif /* PGMPOOL_WITH_OPTIMIZED_DIRTY_PT */
1930
1931/**
1932 * Inserts a page into the GCPhys hash table.
1933 *
1934 * @param pPool The pool.
1935 * @param pPage The page.
1936 */
1937DECLINLINE(void) pgmPoolHashInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1938{
1939 Log3(("pgmPoolHashInsert: %RGp\n", pPage->GCPhys));
1940 Assert(pPage->GCPhys != NIL_RTGCPHYS); Assert(pPage->iNext == NIL_PGMPOOL_IDX);
1941 uint16_t iHash = PGMPOOL_HASH(pPage->GCPhys);
1942 pPage->iNext = pPool->aiHash[iHash];
1943 pPool->aiHash[iHash] = pPage->idx;
1944}
1945
1946
1947/**
1948 * Removes a page from the GCPhys hash table.
1949 *
1950 * @param pPool The pool.
1951 * @param pPage The page.
1952 */
1953DECLINLINE(void) pgmPoolHashRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1954{
1955 Log3(("pgmPoolHashRemove: %RGp\n", pPage->GCPhys));
1956 uint16_t iHash = PGMPOOL_HASH(pPage->GCPhys);
1957 if (pPool->aiHash[iHash] == pPage->idx)
1958 pPool->aiHash[iHash] = pPage->iNext;
1959 else
1960 {
1961 uint16_t iPrev = pPool->aiHash[iHash];
1962 for (;;)
1963 {
1964 const int16_t i = pPool->aPages[iPrev].iNext;
1965 if (i == pPage->idx)
1966 {
1967 pPool->aPages[iPrev].iNext = pPage->iNext;
1968 break;
1969 }
1970 if (i == NIL_PGMPOOL_IDX)
1971 {
1972 AssertReleaseMsgFailed(("GCPhys=%RGp idx=%d\n", pPage->GCPhys, pPage->idx));
1973 break;
1974 }
1975 iPrev = i;
1976 }
1977 }
1978 pPage->iNext = NIL_PGMPOOL_IDX;
1979}
1980
1981
1982/**
1983 * Frees up one cache page.
1984 *
1985 * @returns VBox status code.
1986 * @retval VINF_SUCCESS on success.
1987 * @param pPool The pool.
1988 * @param iUser The user index.
1989 */
1990static int pgmPoolCacheFreeOne(PPGMPOOL pPool, uint16_t iUser)
1991{
1992 const PVMCC pVM = pPool->CTX_SUFF(pVM);
1993 Assert(pPool->iAgeHead != pPool->iAgeTail); /* We shouldn't be here if there < 2 cached entries! */
1994 STAM_COUNTER_INC(&pPool->StatCacheFreeUpOne);
1995
1996 /*
1997 * Select one page from the tail of the age list.
1998 */
1999 PPGMPOOLPAGE pPage;
2000 for (unsigned iLoop = 0; ; iLoop++)
2001 {
2002 uint16_t iToFree = pPool->iAgeTail;
2003 if (iToFree == iUser && iUser != NIL_PGMPOOL_IDX)
2004 iToFree = pPool->aPages[iToFree].iAgePrev;
2005/* This is the alternative to the SyncCR3 pgmPoolCacheUsed calls.
2006 if (pPool->aPages[iToFree].iUserHead != NIL_PGMPOOL_USER_INDEX)
2007 {
2008 uint16_t i = pPool->aPages[iToFree].iAgePrev;
2009 for (unsigned j = 0; j < 10 && i != NIL_PGMPOOL_USER_INDEX; j++, i = pPool->aPages[i].iAgePrev)
2010 {
2011 if (pPool->aPages[iToFree].iUserHead == NIL_PGMPOOL_USER_INDEX)
2012 continue;
2013 iToFree = i;
2014 break;
2015 }
2016 }
2017*/
2018 Assert(iToFree != iUser);
2019 AssertReleaseMsg(iToFree != NIL_PGMPOOL_IDX,
2020 ("iToFree=%#x (iAgeTail=%#x) iUser=%#x iLoop=%u - pPool=%p LB %#zx\n",
2021 iToFree, pPool->iAgeTail, iUser, iLoop, pPool,
2022 RT_UOFFSETOF_DYN(PGMPOOL, aPages[pPool->cMaxPages])
2023 + pPool->cMaxUsers * sizeof(PGMPOOLUSER)
2024 + pPool->cMaxPhysExts * sizeof(PGMPOOLPHYSEXT) ));
2025
2026 pPage = &pPool->aPages[iToFree];
2027
2028 /*
2029 * Reject any attempts at flushing the currently active shadow CR3 mapping.
2030 * Call pgmPoolCacheUsed to move the page to the head of the age list.
2031 */
2032 if ( !pgmPoolIsPageLocked(pPage)
2033 && pPage->idx >= PGMPOOL_IDX_FIRST /* paranoia (#6349) */)
2034 break;
2035 LogFlow(("pgmPoolCacheFreeOne: refuse CR3 mapping\n"));
2036 pgmPoolCacheUsed(pPool, pPage);
2037 AssertLogRelReturn(iLoop < 8192, VERR_PGM_POOL_TOO_MANY_LOOPS);
2038 }
2039
2040 /*
2041 * Found a usable page, flush it and return.
2042 */
2043 int rc = pgmPoolFlushPage(pPool, pPage);
2044 /* This flush was initiated by us and not the guest, so explicitly flush the TLB. */
2045 /** @todo find out why this is necessary; pgmPoolFlushPage should trigger a flush if one is really needed. */
2046 if (rc == VINF_SUCCESS)
2047 PGM_INVL_ALL_VCPU_TLBS(pVM);
2048 return rc;
2049}
2050
2051
2052/**
2053 * Checks if a kind mismatch is really a page being reused
2054 * or if it's just normal remappings.
2055 *
2056 * @returns true if reused and the cached page (enmKind1) should be flushed
2057 * @returns false if not reused.
2058 * @param enmKind1 The kind of the cached page.
2059 * @param enmKind2 The kind of the requested page.
2060 */
2061static bool pgmPoolCacheReusedByKind(PGMPOOLKIND enmKind1, PGMPOOLKIND enmKind2)
2062{
2063 switch (enmKind1)
2064 {
2065 /*
2066 * Never reuse them. There is no remapping in non-paging mode.
2067 */
2068 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2069 case PGMPOOLKIND_32BIT_PD_PHYS:
2070 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2071 case PGMPOOLKIND_PAE_PD_PHYS:
2072 case PGMPOOLKIND_PAE_PDPT_PHYS:
2073 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2074 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2075 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2076 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2077 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2078 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT: /* never reuse them for other types */
2079 return false;
2080
2081 /*
2082 * It's perfectly fine to reuse these, except for PAE and non-paging stuff.
2083 */
2084 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2085 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2086 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2087 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2088 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2089 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2090 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2091 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2092 case PGMPOOLKIND_32BIT_PD:
2093 case PGMPOOLKIND_PAE_PDPT:
2094 switch (enmKind2)
2095 {
2096 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2097 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2098 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2099 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2100 case PGMPOOLKIND_64BIT_PML4:
2101 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2102 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2103 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2104 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2105 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2106 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2107 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2108 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2109 return true;
2110 default:
2111 return false;
2112 }
2113
2114 /*
2115 * It's perfectly fine to reuse these, except for PAE and non-paging stuff.
2116 */
2117 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2118 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2119 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2120 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2121 case PGMPOOLKIND_64BIT_PML4:
2122 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2123 switch (enmKind2)
2124 {
2125 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2126 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2127 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2128 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2129 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2130 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2131 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2132 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2133 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2134 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2135 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2136 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2137 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2138 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2139 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2140 return true;
2141 default:
2142 return false;
2143 }
2144
2145 /*
2146 * These cannot be flushed, and it's common to reuse the PDs as PTs.
2147 */
2148 case PGMPOOLKIND_ROOT_NESTED:
2149 return false;
2150
2151 default:
2152 AssertFatalMsgFailed(("enmKind1=%d\n", enmKind1));
2153 }
2154}
2155
2156
2157/**
2158 * Attempts to satisfy a pgmPoolAlloc request from the cache.
2159 *
2160 * @returns VBox status code.
2161 * @retval VINF_PGM_CACHED_PAGE on success.
2162 * @retval VERR_FILE_NOT_FOUND if not found.
2163 * @param pPool The pool.
2164 * @param GCPhys The GC physical address of the page we're gonna shadow.
2165 * @param enmKind The kind of mapping.
2166 * @param enmAccess Access type for the mapping (only relevant for big pages)
2167 * @param fA20Enabled Whether the CPU has the A20 gate enabled.
2168 * @param iUser The shadow page pool index of the user table. This is
2169 * NIL_PGMPOOL_IDX for root pages.
2170 * @param iUserTable The index into the user table (shadowed). Ignored if
2171 * root page
2172 * @param ppPage Where to store the pointer to the page.
2173 */
2174static int pgmPoolCacheAlloc(PPGMPOOL pPool, RTGCPHYS GCPhys, PGMPOOLKIND enmKind, PGMPOOLACCESS enmAccess, bool fA20Enabled,
2175 uint16_t iUser, uint32_t iUserTable, PPPGMPOOLPAGE ppPage)
2176{
2177 /*
2178 * Look up the GCPhys in the hash.
2179 */
2180 unsigned i = pPool->aiHash[PGMPOOL_HASH(GCPhys)];
2181 Log3(("pgmPoolCacheAlloc: %RGp kind %s iUser=%d iUserTable=%x SLOT=%d\n", GCPhys, pgmPoolPoolKindToStr(enmKind), iUser, iUserTable, i));
2182 if (i != NIL_PGMPOOL_IDX)
2183 {
2184 do
2185 {
2186 PPGMPOOLPAGE pPage = &pPool->aPages[i];
2187 Log4(("pgmPoolCacheAlloc: slot %d found page %RGp\n", i, pPage->GCPhys));
2188 if (pPage->GCPhys == GCPhys)
2189 {
2190 if ( (PGMPOOLKIND)pPage->enmKind == enmKind
2191 && (PGMPOOLACCESS)pPage->enmAccess == enmAccess
2192 && pPage->fA20Enabled == fA20Enabled)
2193 {
2194 /* Put it at the start of the use list to make sure pgmPoolTrackAddUser
2195 * doesn't flush it in case there are no more free use records.
2196 */
2197 pgmPoolCacheUsed(pPool, pPage);
2198
2199 int rc = VINF_SUCCESS;
2200 if (iUser != NIL_PGMPOOL_IDX)
2201 rc = pgmPoolTrackAddUser(pPool, pPage, iUser, iUserTable);
2202 if (RT_SUCCESS(rc))
2203 {
2204 Assert((PGMPOOLKIND)pPage->enmKind == enmKind);
2205 *ppPage = pPage;
2206 if (pPage->cModifications)
2207 pPage->cModifications = 1; /* reset counter (can't use 0, or else it will be reinserted in the modified list) */
2208 STAM_COUNTER_INC(&pPool->StatCacheHits);
2209 return VINF_PGM_CACHED_PAGE;
2210 }
2211 return rc;
2212 }
2213
2214 if ((PGMPOOLKIND)pPage->enmKind != enmKind)
2215 {
2216 /*
2217 * The kind is different. In some cases we should now flush the page
2218 * as it has been reused, but in most cases this is normal remapping
2219 * of PDs as PT or big pages using the GCPhys field in a slightly
2220 * different way than the other kinds.
2221 */
2222 if (pgmPoolCacheReusedByKind((PGMPOOLKIND)pPage->enmKind, enmKind))
2223 {
2224 STAM_COUNTER_INC(&pPool->StatCacheKindMismatches);
2225 pgmPoolFlushPage(pPool, pPage);
2226 break;
2227 }
2228 }
2229 }
2230
2231 /* next */
2232 i = pPage->iNext;
2233 } while (i != NIL_PGMPOOL_IDX);
2234 }
2235
2236 Log3(("pgmPoolCacheAlloc: Missed GCPhys=%RGp enmKind=%s\n", GCPhys, pgmPoolPoolKindToStr(enmKind)));
2237 STAM_COUNTER_INC(&pPool->StatCacheMisses);
2238 return VERR_FILE_NOT_FOUND;
2239}
2240
2241
2242/**
2243 * Inserts a page into the cache.
2244 *
2245 * @param pPool The pool.
2246 * @param pPage The cached page.
2247 * @param fCanBeCached Set if the page is fit for caching from the caller's point of view.
2248 */
2249static void pgmPoolCacheInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage, bool fCanBeCached)
2250{
2251 /*
2252 * Insert into the GCPhys hash if the page is fit for that.
2253 */
2254 Assert(!pPage->fCached);
2255 if (fCanBeCached)
2256 {
2257 pPage->fCached = true;
2258 pgmPoolHashInsert(pPool, pPage);
2259 Log3(("pgmPoolCacheInsert: Caching %p:{.Core=%RHp, .idx=%d, .enmKind=%s, GCPhys=%RGp}\n",
2260 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), pPage->GCPhys));
2261 STAM_COUNTER_INC(&pPool->StatCacheCacheable);
2262 }
2263 else
2264 {
2265 Log3(("pgmPoolCacheInsert: Not caching %p:{.Core=%RHp, .idx=%d, .enmKind=%s, GCPhys=%RGp}\n",
2266 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), pPage->GCPhys));
2267 STAM_COUNTER_INC(&pPool->StatCacheUncacheable);
2268 }
2269
2270 /*
2271 * Insert at the head of the age list.
2272 */
2273 pPage->iAgePrev = NIL_PGMPOOL_IDX;
2274 pPage->iAgeNext = pPool->iAgeHead;
2275 if (pPool->iAgeHead != NIL_PGMPOOL_IDX)
2276 pPool->aPages[pPool->iAgeHead].iAgePrev = pPage->idx;
2277 else
2278 pPool->iAgeTail = pPage->idx;
2279 pPool->iAgeHead = pPage->idx;
2280}
2281
2282
2283/**
2284 * Flushes a cached page.
2285 *
2286 * @param pPool The pool.
2287 * @param pPage The cached page.
2288 */
2289static void pgmPoolCacheFlushPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2290{
2291 Log3(("pgmPoolCacheFlushPage: %RGp\n", pPage->GCPhys));
2292
2293 /*
2294 * Remove the page from the hash.
2295 */
2296 if (pPage->fCached)
2297 {
2298 pPage->fCached = false;
2299 pgmPoolHashRemove(pPool, pPage);
2300 }
2301 else
2302 Assert(pPage->iNext == NIL_PGMPOOL_IDX);
2303
2304 /*
2305 * Remove it from the age list.
2306 */
2307 if (pPage->iAgeNext != NIL_PGMPOOL_IDX)
2308 pPool->aPages[pPage->iAgeNext].iAgePrev = pPage->iAgePrev;
2309 else
2310 pPool->iAgeTail = pPage->iAgePrev;
2311 if (pPage->iAgePrev != NIL_PGMPOOL_IDX)
2312 pPool->aPages[pPage->iAgePrev].iAgeNext = pPage->iAgeNext;
2313 else
2314 pPool->iAgeHead = pPage->iAgeNext;
2315 pPage->iAgeNext = NIL_PGMPOOL_IDX;
2316 pPage->iAgePrev = NIL_PGMPOOL_IDX;
2317}
2318
2319
2320/**
2321 * Looks for pages sharing the monitor.
2322 *
2323 * @returns Pointer to the head page.
2324 * @returns NULL if not found.
2325 * @param pPool The Pool
2326 * @param pNewPage The page which is going to be monitored.
2327 */
2328static PPGMPOOLPAGE pgmPoolMonitorGetPageByGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pNewPage)
2329{
2330 /*
2331 * Look up the GCPhys in the hash.
2332 */
2333 RTGCPHYS GCPhys = pNewPage->GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK;
2334 unsigned i = pPool->aiHash[PGMPOOL_HASH(GCPhys)];
2335 if (i == NIL_PGMPOOL_IDX)
2336 return NULL;
2337 do
2338 {
2339 PPGMPOOLPAGE pPage = &pPool->aPages[i];
2340 if ( pPage->GCPhys - GCPhys < PAGE_SIZE
2341 && pPage != pNewPage)
2342 {
2343 switch (pPage->enmKind)
2344 {
2345 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2346 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2347 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2348 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2349 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2350 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2351 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2352 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2353 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2354 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2355 case PGMPOOLKIND_64BIT_PML4:
2356 case PGMPOOLKIND_32BIT_PD:
2357 case PGMPOOLKIND_PAE_PDPT:
2358 {
2359 /* find the head */
2360 while (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
2361 {
2362 Assert(pPage->iMonitoredPrev != pPage->idx);
2363 pPage = &pPool->aPages[pPage->iMonitoredPrev];
2364 }
2365 return pPage;
2366 }
2367
2368 /* ignore, no monitoring. */
2369 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2370 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2371 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2372 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2373 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2374 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2375 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2376 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2377 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2378 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2379 case PGMPOOLKIND_ROOT_NESTED:
2380 case PGMPOOLKIND_PAE_PD_PHYS:
2381 case PGMPOOLKIND_PAE_PDPT_PHYS:
2382 case PGMPOOLKIND_32BIT_PD_PHYS:
2383 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
2384 break;
2385 default:
2386 AssertFatalMsgFailed(("enmKind=%d idx=%d\n", pPage->enmKind, pPage->idx));
2387 }
2388 }
2389
2390 /* next */
2391 i = pPage->iNext;
2392 } while (i != NIL_PGMPOOL_IDX);
2393 return NULL;
2394}
2395
2396
2397/**
2398 * Enabled write monitoring of a guest page.
2399 *
2400 * @returns VBox status code.
2401 * @retval VINF_SUCCESS on success.
2402 * @param pPool The pool.
2403 * @param pPage The cached page.
2404 */
2405static int pgmPoolMonitorInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2406{
2407 LogFlow(("pgmPoolMonitorInsert %RGp\n", pPage->GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK));
2408
2409 /*
2410 * Filter out the relevant kinds.
2411 */
2412 switch (pPage->enmKind)
2413 {
2414 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2415 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2416 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2417 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2418 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2419 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2420 case PGMPOOLKIND_64BIT_PML4:
2421 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2422 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2423 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2424 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2425 case PGMPOOLKIND_32BIT_PD:
2426 case PGMPOOLKIND_PAE_PDPT:
2427 break;
2428
2429 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2430 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2431 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2432 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2433 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2434 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2435 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2436 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2437 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2438 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2439 case PGMPOOLKIND_ROOT_NESTED:
2440 /* Nothing to monitor here. */
2441 return VINF_SUCCESS;
2442
2443 case PGMPOOLKIND_32BIT_PD_PHYS:
2444 case PGMPOOLKIND_PAE_PDPT_PHYS:
2445 case PGMPOOLKIND_PAE_PD_PHYS:
2446 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
2447 /* Nothing to monitor here. */
2448 return VINF_SUCCESS;
2449 default:
2450 AssertFatalMsgFailed(("This can't happen! enmKind=%d\n", pPage->enmKind));
2451 }
2452
2453 /*
2454 * Install handler.
2455 */
2456 int rc;
2457 PPGMPOOLPAGE pPageHead = pgmPoolMonitorGetPageByGCPhys(pPool, pPage);
2458 if (pPageHead)
2459 {
2460 Assert(pPageHead != pPage); Assert(pPageHead->iMonitoredNext != pPage->idx);
2461 Assert(pPageHead->iMonitoredPrev != pPage->idx);
2462
2463#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
2464 if (pPageHead->fDirty)
2465 pgmPoolFlushDirtyPage(pPool->CTX_SUFF(pVM), pPool, pPageHead->idxDirtyEntry, false /* do not remove */);
2466#endif
2467
2468 pPage->iMonitoredPrev = pPageHead->idx;
2469 pPage->iMonitoredNext = pPageHead->iMonitoredNext;
2470 if (pPageHead->iMonitoredNext != NIL_PGMPOOL_IDX)
2471 pPool->aPages[pPageHead->iMonitoredNext].iMonitoredPrev = pPage->idx;
2472 pPageHead->iMonitoredNext = pPage->idx;
2473 rc = VINF_SUCCESS;
2474 }
2475 else
2476 {
2477 Assert(pPage->iMonitoredNext == NIL_PGMPOOL_IDX); Assert(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
2478 PVMCC pVM = pPool->CTX_SUFF(pVM);
2479 const RTGCPHYS GCPhysPage = pPage->GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK;
2480 rc = PGMHandlerPhysicalRegister(pVM, GCPhysPage, GCPhysPage + PAGE_OFFSET_MASK, pPool->hAccessHandlerType,
2481 pPage - &pPool->aPages[0], NIL_RTR3PTR /*pszDesc*/);
2482 /** @todo we should probably deal with out-of-memory conditions here, but for now increasing
2483 * the heap size should suffice. */
2484 AssertFatalMsgRC(rc, ("PGMHandlerPhysicalRegisterEx %RGp failed with %Rrc\n", GCPhysPage, rc));
2485 PVMCPU pVCpu = VMMGetCpu(pVM);
2486 AssertFatalMsg(!(pVCpu->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL) || VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3), ("fSyncFlags=%x syncff=%d\n", pVCpu->pgm.s.fSyncFlags, VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3)));
2487 }
2488 pPage->fMonitored = true;
2489 return rc;
2490}
2491
2492
2493/**
2494 * Disables write monitoring of a guest page.
2495 *
2496 * @returns VBox status code.
2497 * @retval VINF_SUCCESS on success.
2498 * @param pPool The pool.
2499 * @param pPage The cached page.
2500 */
2501static int pgmPoolMonitorFlush(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2502{
2503 /*
2504 * Filter out the relevant kinds.
2505 */
2506 switch (pPage->enmKind)
2507 {
2508 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2509 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2510 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2511 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2512 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2513 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2514 case PGMPOOLKIND_64BIT_PML4:
2515 case PGMPOOLKIND_32BIT_PD:
2516 case PGMPOOLKIND_PAE_PDPT:
2517 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2518 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2519 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2520 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2521 break;
2522
2523 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2524 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2525 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2526 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2527 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2528 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2529 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2530 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2531 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2532 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2533 case PGMPOOLKIND_ROOT_NESTED:
2534 case PGMPOOLKIND_PAE_PD_PHYS:
2535 case PGMPOOLKIND_PAE_PDPT_PHYS:
2536 case PGMPOOLKIND_32BIT_PD_PHYS:
2537 /* Nothing to monitor here. */
2538 Assert(!pPage->fMonitored);
2539 return VINF_SUCCESS;
2540
2541 default:
2542 AssertFatalMsgFailed(("This can't happen! enmKind=%d\n", pPage->enmKind));
2543 }
2544 Assert(pPage->fMonitored);
2545
2546 /*
2547 * Remove the page from the monitored list or uninstall it if last.
2548 */
2549 const PVMCC pVM = pPool->CTX_SUFF(pVM);
2550 int rc;
2551 if ( pPage->iMonitoredNext != NIL_PGMPOOL_IDX
2552 || pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
2553 {
2554 if (pPage->iMonitoredPrev == NIL_PGMPOOL_IDX)
2555 {
2556 PPGMPOOLPAGE pNewHead = &pPool->aPages[pPage->iMonitoredNext];
2557 pNewHead->iMonitoredPrev = NIL_PGMPOOL_IDX;
2558 rc = PGMHandlerPhysicalChangeUserArg(pVM, pPage->GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK, pPage->iMonitoredNext);
2559
2560 AssertFatalRCSuccess(rc);
2561 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
2562 }
2563 else
2564 {
2565 pPool->aPages[pPage->iMonitoredPrev].iMonitoredNext = pPage->iMonitoredNext;
2566 if (pPage->iMonitoredNext != NIL_PGMPOOL_IDX)
2567 {
2568 pPool->aPages[pPage->iMonitoredNext].iMonitoredPrev = pPage->iMonitoredPrev;
2569 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
2570 }
2571 pPage->iMonitoredPrev = NIL_PGMPOOL_IDX;
2572 rc = VINF_SUCCESS;
2573 }
2574 }
2575 else
2576 {
2577 rc = PGMHandlerPhysicalDeregister(pVM, pPage->GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK);
2578 AssertFatalRC(rc);
2579 PVMCPU pVCpu = VMMGetCpu(pVM);
2580 AssertFatalMsg(!(pVCpu->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL) || VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3),
2581 ("%#x %#x\n", pVCpu->pgm.s.fSyncFlags, pVM->fGlobalForcedActions));
2582 }
2583 pPage->fMonitored = false;
2584
2585 /*
2586 * Remove it from the list of modified pages (if in it).
2587 */
2588 pgmPoolMonitorModifiedRemove(pPool, pPage);
2589
2590 return rc;
2591}
2592
2593
2594/**
2595 * Inserts the page into the list of modified pages.
2596 *
2597 * @param pPool The pool.
2598 * @param pPage The page.
2599 */
2600void pgmPoolMonitorModifiedInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2601{
2602 Log3(("pgmPoolMonitorModifiedInsert: idx=%d\n", pPage->idx));
2603 AssertMsg( pPage->iModifiedNext == NIL_PGMPOOL_IDX
2604 && pPage->iModifiedPrev == NIL_PGMPOOL_IDX
2605 && pPool->iModifiedHead != pPage->idx,
2606 ("Next=%d Prev=%d idx=%d cModifications=%d Head=%d cModifiedPages=%d\n",
2607 pPage->iModifiedNext, pPage->iModifiedPrev, pPage->idx, pPage->cModifications,
2608 pPool->iModifiedHead, pPool->cModifiedPages));
2609
2610 pPage->iModifiedNext = pPool->iModifiedHead;
2611 if (pPool->iModifiedHead != NIL_PGMPOOL_IDX)
2612 pPool->aPages[pPool->iModifiedHead].iModifiedPrev = pPage->idx;
2613 pPool->iModifiedHead = pPage->idx;
2614 pPool->cModifiedPages++;
2615#ifdef VBOX_WITH_STATISTICS
2616 if (pPool->cModifiedPages > pPool->cModifiedPagesHigh)
2617 pPool->cModifiedPagesHigh = pPool->cModifiedPages;
2618#endif
2619}
2620
2621
2622/**
2623 * Removes the page from the list of modified pages and resets the
2624 * modification counter.
2625 *
2626 * @param pPool The pool.
2627 * @param pPage The page which is believed to be in the list of modified pages.
2628 */
2629static void pgmPoolMonitorModifiedRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2630{
2631 Log3(("pgmPoolMonitorModifiedRemove: idx=%d cModifications=%d\n", pPage->idx, pPage->cModifications));
2632 if (pPool->iModifiedHead == pPage->idx)
2633 {
2634 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX);
2635 pPool->iModifiedHead = pPage->iModifiedNext;
2636 if (pPage->iModifiedNext != NIL_PGMPOOL_IDX)
2637 {
2638 pPool->aPages[pPage->iModifiedNext].iModifiedPrev = NIL_PGMPOOL_IDX;
2639 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
2640 }
2641 pPool->cModifiedPages--;
2642 }
2643 else if (pPage->iModifiedPrev != NIL_PGMPOOL_IDX)
2644 {
2645 pPool->aPages[pPage->iModifiedPrev].iModifiedNext = pPage->iModifiedNext;
2646 if (pPage->iModifiedNext != NIL_PGMPOOL_IDX)
2647 {
2648 pPool->aPages[pPage->iModifiedNext].iModifiedPrev = pPage->iModifiedPrev;
2649 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
2650 }
2651 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
2652 pPool->cModifiedPages--;
2653 }
2654 else
2655 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX);
2656 pPage->cModifications = 0;
2657}
2658
2659
2660/**
2661 * Zaps the list of modified pages, resetting their modification counters in the process.
2662 *
2663 * @param pVM The cross context VM structure.
2664 */
2665static void pgmPoolMonitorModifiedClearAll(PVMCC pVM)
2666{
2667 PGM_LOCK_VOID(pVM);
2668 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
2669 LogFlow(("pgmPoolMonitorModifiedClearAll: cModifiedPages=%d\n", pPool->cModifiedPages));
2670
2671 unsigned cPages = 0; NOREF(cPages);
2672
2673#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
2674 pgmPoolResetDirtyPages(pVM);
2675#endif
2676
2677 uint16_t idx = pPool->iModifiedHead;
2678 pPool->iModifiedHead = NIL_PGMPOOL_IDX;
2679 while (idx != NIL_PGMPOOL_IDX)
2680 {
2681 PPGMPOOLPAGE pPage = &pPool->aPages[idx];
2682 idx = pPage->iModifiedNext;
2683 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
2684 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
2685 pPage->cModifications = 0;
2686 Assert(++cPages);
2687 }
2688 AssertMsg(cPages == pPool->cModifiedPages, ("%d != %d\n", cPages, pPool->cModifiedPages));
2689 pPool->cModifiedPages = 0;
2690 PGM_UNLOCK(pVM);
2691}
2692
2693
2694/**
2695 * Handle SyncCR3 pool tasks
2696 *
2697 * @returns VBox status code.
2698 * @retval VINF_SUCCESS if successfully added.
2699 * @retval VINF_PGM_SYNC_CR3 is it needs to be deferred to ring 3 (GC only)
2700 * @param pVCpu The cross context virtual CPU structure.
2701 * @remark Should only be used when monitoring is available, thus placed in
2702 * the PGMPOOL_WITH_MONITORING \#ifdef.
2703 */
2704int pgmPoolSyncCR3(PVMCPUCC pVCpu)
2705{
2706 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
2707 LogFlow(("pgmPoolSyncCR3 fSyncFlags=%x\n", pVCpu->pgm.s.fSyncFlags));
2708
2709 /*
2710 * When monitoring shadowed pages, we reset the modification counters on CR3 sync.
2711 * Occasionally we will have to clear all the shadow page tables because we wanted
2712 * to monitor a page which was mapped by too many shadowed page tables. This operation
2713 * sometimes referred to as a 'lightweight flush'.
2714 */
2715# ifdef IN_RING3 /* Don't flush in ring-0 or raw mode, it's taking too long. */
2716 if (pVCpu->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)
2717 pgmR3PoolClearAll(pVM, false /*fFlushRemTlb*/);
2718# else /* !IN_RING3 */
2719 if (pVCpu->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)
2720 {
2721 Log(("SyncCR3: PGM_SYNC_CLEAR_PGM_POOL is set -> VINF_PGM_SYNC_CR3\n"));
2722 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3); /** @todo no need to do global sync, right? */
2723
2724 /* Make sure all other VCPUs return to ring 3. */
2725 if (pVM->cCpus > 1)
2726 {
2727 VM_FF_SET(pVM, VM_FF_PGM_POOL_FLUSH_PENDING);
2728 PGM_INVL_ALL_VCPU_TLBS(pVM);
2729 }
2730 return VINF_PGM_SYNC_CR3;
2731 }
2732# endif /* !IN_RING3 */
2733 else
2734 {
2735 pgmPoolMonitorModifiedClearAll(pVM);
2736
2737 /* pgmPoolMonitorModifiedClearAll can cause a pgm pool flush (dirty page clearing), so make sure we handle this! */
2738 if (pVCpu->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)
2739 {
2740 Log(("pgmPoolMonitorModifiedClearAll caused a pgm flush -> call pgmPoolSyncCR3 again!\n"));
2741 return pgmPoolSyncCR3(pVCpu);
2742 }
2743 }
2744 return VINF_SUCCESS;
2745}
2746
2747
2748/**
2749 * Frees up at least one user entry.
2750 *
2751 * @returns VBox status code.
2752 * @retval VINF_SUCCESS if successfully added.
2753 *
2754 * @param pPool The pool.
2755 * @param iUser The user index.
2756 */
2757static int pgmPoolTrackFreeOneUser(PPGMPOOL pPool, uint16_t iUser)
2758{
2759 STAM_COUNTER_INC(&pPool->StatTrackFreeUpOneUser);
2760 /*
2761 * Just free cached pages in a braindead fashion.
2762 */
2763 /** @todo walk the age list backwards and free the first with usage. */
2764 int rc = VINF_SUCCESS;
2765 do
2766 {
2767 int rc2 = pgmPoolCacheFreeOne(pPool, iUser);
2768 if (RT_FAILURE(rc2) && rc == VINF_SUCCESS)
2769 rc = rc2;
2770 } while (pPool->iUserFreeHead == NIL_PGMPOOL_USER_INDEX);
2771 return rc;
2772}
2773
2774
2775/**
2776 * Inserts a page into the cache.
2777 *
2778 * This will create user node for the page, insert it into the GCPhys
2779 * hash, and insert it into the age list.
2780 *
2781 * @returns VBox status code.
2782 * @retval VINF_SUCCESS if successfully added.
2783 *
2784 * @param pPool The pool.
2785 * @param pPage The cached page.
2786 * @param GCPhys The GC physical address of the page we're gonna shadow.
2787 * @param iUser The user index.
2788 * @param iUserTable The user table index.
2789 */
2790DECLINLINE(int) pgmPoolTrackInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTGCPHYS GCPhys, uint16_t iUser, uint32_t iUserTable)
2791{
2792 int rc = VINF_SUCCESS;
2793 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
2794
2795 LogFlow(("pgmPoolTrackInsert GCPhys=%RGp iUser=%d iUserTable=%x\n", GCPhys, iUser, iUserTable)); RT_NOREF_PV(GCPhys);
2796
2797 if (iUser != NIL_PGMPOOL_IDX)
2798 {
2799#ifdef VBOX_STRICT
2800 /*
2801 * Check that the entry doesn't already exists.
2802 */
2803 if (pPage->iUserHead != NIL_PGMPOOL_USER_INDEX)
2804 {
2805 uint16_t i = pPage->iUserHead;
2806 do
2807 {
2808 Assert(i < pPool->cMaxUsers);
2809 AssertMsg(paUsers[i].iUser != iUser || paUsers[i].iUserTable != iUserTable, ("%x %x vs new %x %x\n", paUsers[i].iUser, paUsers[i].iUserTable, iUser, iUserTable));
2810 i = paUsers[i].iNext;
2811 } while (i != NIL_PGMPOOL_USER_INDEX);
2812 }
2813#endif
2814
2815 /*
2816 * Find free a user node.
2817 */
2818 uint16_t i = pPool->iUserFreeHead;
2819 if (i == NIL_PGMPOOL_USER_INDEX)
2820 {
2821 rc = pgmPoolTrackFreeOneUser(pPool, iUser);
2822 if (RT_FAILURE(rc))
2823 return rc;
2824 i = pPool->iUserFreeHead;
2825 }
2826
2827 /*
2828 * Unlink the user node from the free list,
2829 * initialize and insert it into the user list.
2830 */
2831 pPool->iUserFreeHead = paUsers[i].iNext;
2832 paUsers[i].iNext = NIL_PGMPOOL_USER_INDEX;
2833 paUsers[i].iUser = iUser;
2834 paUsers[i].iUserTable = iUserTable;
2835 pPage->iUserHead = i;
2836 }
2837 else
2838 pPage->iUserHead = NIL_PGMPOOL_USER_INDEX;
2839
2840
2841 /*
2842 * Insert into cache and enable monitoring of the guest page if enabled.
2843 *
2844 * Until we implement caching of all levels, including the CR3 one, we'll
2845 * have to make sure we don't try monitor & cache any recursive reuse of
2846 * a monitored CR3 page. Because all windows versions are doing this we'll
2847 * have to be able to do combined access monitoring, CR3 + PT and
2848 * PD + PT (guest PAE).
2849 *
2850 * Update:
2851 * We're now cooperating with the CR3 monitor if an uncachable page is found.
2852 */
2853 const bool fCanBeMonitored = true;
2854 pgmPoolCacheInsert(pPool, pPage, fCanBeMonitored); /* This can be expanded. */
2855 if (fCanBeMonitored)
2856 {
2857 rc = pgmPoolMonitorInsert(pPool, pPage);
2858 AssertRC(rc);
2859 }
2860 return rc;
2861}
2862
2863
2864/**
2865 * Adds a user reference to a page.
2866 *
2867 * This will move the page to the head of the
2868 *
2869 * @returns VBox status code.
2870 * @retval VINF_SUCCESS if successfully added.
2871 *
2872 * @param pPool The pool.
2873 * @param pPage The cached page.
2874 * @param iUser The user index.
2875 * @param iUserTable The user table.
2876 */
2877static int pgmPoolTrackAddUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable)
2878{
2879 Log3(("pgmPoolTrackAddUser: GCPhys=%RGp iUser=%x iUserTable=%x\n", pPage->GCPhys, iUser, iUserTable));
2880 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
2881 Assert(iUser != NIL_PGMPOOL_IDX);
2882
2883# ifdef VBOX_STRICT
2884 /*
2885 * Check that the entry doesn't already exists. We only allow multiple
2886 * users of top-level paging structures (SHW_POOL_ROOT_IDX).
2887 */
2888 if (pPage->iUserHead != NIL_PGMPOOL_USER_INDEX)
2889 {
2890 uint16_t i = pPage->iUserHead;
2891 do
2892 {
2893 Assert(i < pPool->cMaxUsers);
2894 /** @todo this assertion looks odd... Shouldn't it be && here? */
2895 AssertMsg(paUsers[i].iUser != iUser || paUsers[i].iUserTable != iUserTable, ("%x %x vs new %x %x\n", paUsers[i].iUser, paUsers[i].iUserTable, iUser, iUserTable));
2896 i = paUsers[i].iNext;
2897 } while (i != NIL_PGMPOOL_USER_INDEX);
2898 }
2899# endif
2900
2901 /*
2902 * Allocate a user node.
2903 */
2904 uint16_t i = pPool->iUserFreeHead;
2905 if (i == NIL_PGMPOOL_USER_INDEX)
2906 {
2907 int rc = pgmPoolTrackFreeOneUser(pPool, iUser);
2908 if (RT_FAILURE(rc))
2909 return rc;
2910 i = pPool->iUserFreeHead;
2911 }
2912 pPool->iUserFreeHead = paUsers[i].iNext;
2913
2914 /*
2915 * Initialize the user node and insert it.
2916 */
2917 paUsers[i].iNext = pPage->iUserHead;
2918 paUsers[i].iUser = iUser;
2919 paUsers[i].iUserTable = iUserTable;
2920 pPage->iUserHead = i;
2921
2922# ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
2923 if (pPage->fDirty)
2924 pgmPoolFlushDirtyPage(pPool->CTX_SUFF(pVM), pPool, pPage->idxDirtyEntry, false /* do not remove */);
2925# endif
2926
2927 /*
2928 * Tell the cache to update its replacement stats for this page.
2929 */
2930 pgmPoolCacheUsed(pPool, pPage);
2931 return VINF_SUCCESS;
2932}
2933
2934
2935/**
2936 * Frees a user record associated with a page.
2937 *
2938 * This does not clear the entry in the user table, it simply replaces the
2939 * user record to the chain of free records.
2940 *
2941 * @param pPool The pool.
2942 * @param pPage The shadow page.
2943 * @param iUser The shadow page pool index of the user table.
2944 * @param iUserTable The index into the user table (shadowed).
2945 *
2946 * @remarks Don't call this for root pages.
2947 */
2948static void pgmPoolTrackFreeUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable)
2949{
2950 Log3(("pgmPoolTrackFreeUser %RGp %x %x\n", pPage->GCPhys, iUser, iUserTable));
2951 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
2952 Assert(iUser != NIL_PGMPOOL_IDX);
2953
2954 /*
2955 * Unlink and free the specified user entry.
2956 */
2957
2958 /* Special: For PAE and 32-bit paging, there is usually no more than one user. */
2959 uint16_t i = pPage->iUserHead;
2960 if ( i != NIL_PGMPOOL_USER_INDEX
2961 && paUsers[i].iUser == iUser
2962 && paUsers[i].iUserTable == iUserTable)
2963 {
2964 pPage->iUserHead = paUsers[i].iNext;
2965
2966 paUsers[i].iUser = NIL_PGMPOOL_IDX;
2967 paUsers[i].iNext = pPool->iUserFreeHead;
2968 pPool->iUserFreeHead = i;
2969 return;
2970 }
2971
2972 /* General: Linear search. */
2973 uint16_t iPrev = NIL_PGMPOOL_USER_INDEX;
2974 while (i != NIL_PGMPOOL_USER_INDEX)
2975 {
2976 if ( paUsers[i].iUser == iUser
2977 && paUsers[i].iUserTable == iUserTable)
2978 {
2979 if (iPrev != NIL_PGMPOOL_USER_INDEX)
2980 paUsers[iPrev].iNext = paUsers[i].iNext;
2981 else
2982 pPage->iUserHead = paUsers[i].iNext;
2983
2984 paUsers[i].iUser = NIL_PGMPOOL_IDX;
2985 paUsers[i].iNext = pPool->iUserFreeHead;
2986 pPool->iUserFreeHead = i;
2987 return;
2988 }
2989 iPrev = i;
2990 i = paUsers[i].iNext;
2991 }
2992
2993 /* Fatal: didn't find it */
2994 AssertFatalMsgFailed(("Didn't find the user entry! iUser=%d iUserTable=%#x GCPhys=%RGp\n",
2995 iUser, iUserTable, pPage->GCPhys));
2996}
2997
2998
2999#if 0 /* unused */
3000/**
3001 * Gets the entry size of a shadow table.
3002 *
3003 * @param enmKind The kind of page.
3004 *
3005 * @returns The size of the entry in bytes. That is, 4 or 8.
3006 * @returns If the kind is not for a table, an assertion is raised and 0 is
3007 * returned.
3008 */
3009DECLINLINE(unsigned) pgmPoolTrackGetShadowEntrySize(PGMPOOLKIND enmKind)
3010{
3011 switch (enmKind)
3012 {
3013 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
3014 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
3015 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
3016 case PGMPOOLKIND_32BIT_PD:
3017 case PGMPOOLKIND_32BIT_PD_PHYS:
3018 return 4;
3019
3020 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
3021 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
3022 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
3023 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
3024 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
3025 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
3026 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
3027 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
3028 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
3029 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
3030 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
3031 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
3032 case PGMPOOLKIND_64BIT_PML4:
3033 case PGMPOOLKIND_PAE_PDPT:
3034 case PGMPOOLKIND_ROOT_NESTED:
3035 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
3036 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
3037 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
3038 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
3039 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
3040 case PGMPOOLKIND_PAE_PD_PHYS:
3041 case PGMPOOLKIND_PAE_PDPT_PHYS:
3042 return 8;
3043
3044 default:
3045 AssertFatalMsgFailed(("enmKind=%d\n", enmKind));
3046 }
3047}
3048#endif /* unused */
3049
3050#if 0 /* unused */
3051/**
3052 * Gets the entry size of a guest table.
3053 *
3054 * @param enmKind The kind of page.
3055 *
3056 * @returns The size of the entry in bytes. That is, 0, 4 or 8.
3057 * @returns If the kind is not for a table, an assertion is raised and 0 is
3058 * returned.
3059 */
3060DECLINLINE(unsigned) pgmPoolTrackGetGuestEntrySize(PGMPOOLKIND enmKind)
3061{
3062 switch (enmKind)
3063 {
3064 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
3065 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
3066 case PGMPOOLKIND_32BIT_PD:
3067 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
3068 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
3069 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
3070 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
3071 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
3072 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
3073 return 4;
3074
3075 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
3076 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
3077 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
3078 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
3079 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
3080 case PGMPOOLKIND_64BIT_PML4:
3081 case PGMPOOLKIND_PAE_PDPT:
3082 return 8;
3083
3084 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
3085 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
3086 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
3087 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
3088 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
3089 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
3090 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
3091 case PGMPOOLKIND_ROOT_NESTED:
3092 case PGMPOOLKIND_PAE_PD_PHYS:
3093 case PGMPOOLKIND_PAE_PDPT_PHYS:
3094 case PGMPOOLKIND_32BIT_PD_PHYS:
3095 /** @todo can we return 0? (nobody is calling this...) */
3096 AssertFailed();
3097 return 0;
3098
3099 default:
3100 AssertFatalMsgFailed(("enmKind=%d\n", enmKind));
3101 }
3102}
3103#endif /* unused */
3104
3105
3106/**
3107 * Checks one shadow page table entry for a mapping of a physical page.
3108 *
3109 * @returns true / false indicating removal of all relevant PTEs
3110 *
3111 * @param pVM The cross context VM structure.
3112 * @param pPhysPage The guest page in question.
3113 * @param fFlushPTEs Flush PTEs or allow them to be updated (e.g. in case of an RW bit change)
3114 * @param iShw The shadow page table.
3115 * @param iPte Page table entry or NIL_PGMPOOL_PHYSEXT_IDX_PTE if unknown
3116 */
3117static bool pgmPoolTrackFlushGCPhysPTInt(PVM pVM, PCPGMPAGE pPhysPage, bool fFlushPTEs, uint16_t iShw, uint16_t iPte)
3118{
3119 LogFlow(("pgmPoolTrackFlushGCPhysPTInt: pPhysPage=%RHp iShw=%d iPte=%d\n", PGM_PAGE_GET_HCPHYS(pPhysPage), iShw, iPte));
3120 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3121 bool fRet = false;
3122
3123 /*
3124 * Assert sanity.
3125 */
3126 Assert(iPte != NIL_PGMPOOL_PHYSEXT_IDX_PTE);
3127 AssertFatalMsg(iShw < pPool->cCurPages && iShw != NIL_PGMPOOL_IDX, ("iShw=%d\n", iShw));
3128 PPGMPOOLPAGE pPage = &pPool->aPages[iShw];
3129
3130 /*
3131 * Then, clear the actual mappings to the page in the shadow PT.
3132 */
3133 switch (pPage->enmKind)
3134 {
3135 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
3136 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
3137 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
3138 {
3139 const uint32_t u32 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P;
3140 PX86PT pPT = (PX86PT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3141 uint32_t u32AndMask = 0;
3142 uint32_t u32OrMask = 0;
3143
3144 if (!fFlushPTEs)
3145 {
3146 switch (PGM_PAGE_GET_HNDL_PHYS_STATE(pPhysPage))
3147 {
3148 case PGM_PAGE_HNDL_PHYS_STATE_NONE: /* No handler installed. */
3149 case PGM_PAGE_HNDL_PHYS_STATE_DISABLED: /* Monitoring is temporarily disabled. */
3150 u32OrMask = X86_PTE_RW;
3151 u32AndMask = UINT32_MAX;
3152 fRet = true;
3153 STAM_COUNTER_INC(&pPool->StatTrackFlushEntryKeep);
3154 break;
3155
3156 case PGM_PAGE_HNDL_PHYS_STATE_WRITE: /* Write access is monitored. */
3157 u32OrMask = 0;
3158 u32AndMask = ~X86_PTE_RW;
3159 fRet = true;
3160 STAM_COUNTER_INC(&pPool->StatTrackFlushEntryKeep);
3161 break;
3162 default:
3163 /* (shouldn't be here, will assert below) */
3164 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3165 break;
3166 }
3167 }
3168 else
3169 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3170
3171 /* Update the counter if we're removing references. */
3172 if (!u32AndMask)
3173 {
3174 Assert(pPage->cPresent);
3175 Assert(pPool->cPresent);
3176 pPage->cPresent--;
3177 pPool->cPresent--;
3178 }
3179
3180 if ((pPT->a[iPte].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
3181 {
3182 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pte=%RX32\n", iPte, pPT->a[iPte]));
3183 X86PTE Pte;
3184 Pte.u = (pPT->a[iPte].u & u32AndMask) | u32OrMask;
3185 if (Pte.u & PGM_PTFLAGS_TRACK_DIRTY)
3186 Pte.u &= ~(X86PGUINT)X86_PTE_RW; /* need to disallow writes when dirty bit tracking is still active. */
3187 ASMAtomicWriteU32(&pPT->a[iPte].u, Pte.u);
3188 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);
3189 return fRet;
3190 }
3191#ifdef LOG_ENABLED
3192 Log(("iFirstPresent=%d cPresent=%d\n", pPage->iFirstPresent, pPage->cPresent));
3193 for (unsigned i = 0, cFound = 0; i < RT_ELEMENTS(pPT->a); i++)
3194 if ((pPT->a[i].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
3195 {
3196 Log(("i=%d cFound=%d\n", i, ++cFound));
3197 }
3198#endif
3199 AssertFatalMsgFailed(("iFirstPresent=%d cPresent=%d u32=%RX32 poolkind=%x\n", pPage->iFirstPresent, pPage->cPresent, u32, pPage->enmKind));
3200 /*PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);*/
3201 break;
3202 }
3203
3204 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
3205 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
3206 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
3207 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
3208 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
3209 case PGMPOOLKIND_EPT_PT_FOR_PHYS: /* physical mask the same as PAE; RW bit as well; be careful! */
3210 {
3211 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P;
3212 PPGMSHWPTPAE pPT = (PPGMSHWPTPAE)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3213 uint64_t u64OrMask = 0;
3214 uint64_t u64AndMask = 0;
3215
3216 if (!fFlushPTEs)
3217 {
3218 switch (PGM_PAGE_GET_HNDL_PHYS_STATE(pPhysPage))
3219 {
3220 case PGM_PAGE_HNDL_PHYS_STATE_NONE: /* No handler installed. */
3221 case PGM_PAGE_HNDL_PHYS_STATE_DISABLED: /* Monitoring is temporarily disabled. */
3222 u64OrMask = X86_PTE_RW;
3223 u64AndMask = UINT64_MAX;
3224 fRet = true;
3225 STAM_COUNTER_INC(&pPool->StatTrackFlushEntryKeep);
3226 break;
3227
3228 case PGM_PAGE_HNDL_PHYS_STATE_WRITE: /* Write access is monitored. */
3229 u64OrMask = 0;
3230 u64AndMask = ~(uint64_t)X86_PTE_RW;
3231 fRet = true;
3232 STAM_COUNTER_INC(&pPool->StatTrackFlushEntryKeep);
3233 break;
3234
3235 default:
3236 /* (shouldn't be here, will assert below) */
3237 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3238 break;
3239 }
3240 }
3241 else
3242 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3243
3244 /* Update the counter if we're removing references. */
3245 if (!u64AndMask)
3246 {
3247 Assert(pPage->cPresent);
3248 Assert(pPool->cPresent);
3249 pPage->cPresent--;
3250 pPool->cPresent--;
3251 }
3252
3253 if ((PGMSHWPTEPAE_GET_U(pPT->a[iPte]) & (X86_PTE_PAE_PG_MASK | X86_PTE_P | X86_PTE_PAE_MBZ_MASK_NX)) == u64)
3254 {
3255 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pte=%RX64\n", iPte, PGMSHWPTEPAE_GET_LOG(pPT->a[iPte])));
3256 X86PTEPAE Pte;
3257 Pte.u = (PGMSHWPTEPAE_GET_U(pPT->a[iPte]) & u64AndMask) | u64OrMask;
3258 if (Pte.u & PGM_PTFLAGS_TRACK_DIRTY)
3259 Pte.u &= ~(X86PGPAEUINT)X86_PTE_RW; /* need to disallow writes when dirty bit tracking is still active. */
3260
3261 PGMSHWPTEPAE_ATOMIC_SET(pPT->a[iPte], Pte.u);
3262 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);
3263 return fRet;
3264 }
3265#ifdef LOG_ENABLED
3266 Log(("iFirstPresent=%d cPresent=%d\n", pPage->iFirstPresent, pPage->cPresent));
3267 Log(("Found %RX64 expected %RX64\n", PGMSHWPTEPAE_GET_U(pPT->a[iPte]) & (X86_PTE_PAE_PG_MASK | X86_PTE_P | X86_PTE_PAE_MBZ_MASK_NX), u64));
3268 for (unsigned i = 0, cFound = 0; i < RT_ELEMENTS(pPT->a); i++)
3269 if ((PGMSHWPTEPAE_GET_U(pPT->a[i]) & (X86_PTE_PAE_PG_MASK | X86_PTE_P | X86_PTE_PAE_MBZ_MASK_NX)) == u64)
3270 Log(("i=%d cFound=%d\n", i, ++cFound));
3271#endif
3272 AssertFatalMsgFailed(("iFirstPresent=%d cPresent=%d u64=%RX64 poolkind=%x iPte=%d PT=%RX64\n", pPage->iFirstPresent, pPage->cPresent, u64, pPage->enmKind, iPte, PGMSHWPTEPAE_GET_LOG(pPT->a[iPte])));
3273 /*PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);*/
3274 break;
3275 }
3276
3277#ifdef PGM_WITH_LARGE_PAGES
3278 /* Large page case only. */
3279 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
3280 {
3281 Assert(pVM->pgm.s.fNestedPaging);
3282
3283 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PDE4M_P | X86_PDE4M_PS;
3284 PEPTPD pPD = (PEPTPD)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3285
3286 if ((pPD->a[iPte].u & (EPT_PDE2M_PG_MASK | X86_PDE4M_P | X86_PDE4M_PS)) == u64)
3287 {
3288 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pde=%RX64\n", iPte, pPD->a[iPte]));
3289 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3290 pPD->a[iPte].u = 0;
3291 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPD);
3292
3293 /* Update the counter as we're removing references. */
3294 Assert(pPage->cPresent);
3295 Assert(pPool->cPresent);
3296 pPage->cPresent--;
3297 pPool->cPresent--;
3298
3299 return fRet;
3300 }
3301# ifdef LOG_ENABLED
3302 Log(("iFirstPresent=%d cPresent=%d\n", pPage->iFirstPresent, pPage->cPresent));
3303 for (unsigned i = 0, cFound = 0; i < RT_ELEMENTS(pPD->a); i++)
3304 if ((pPD->a[i].u & (EPT_PDE2M_PG_MASK | X86_PDE4M_P | X86_PDE4M_PS)) == u64)
3305 Log(("i=%d cFound=%d\n", i, ++cFound));
3306# endif
3307 AssertFatalMsgFailed(("iFirstPresent=%d cPresent=%d\n", pPage->iFirstPresent, pPage->cPresent));
3308 /*PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPD);*/
3309 break;
3310 }
3311
3312 /* AMD-V nested paging */ /** @todo merge with EPT as we only check the parts that are identical. */
3313 case PGMPOOLKIND_PAE_PD_PHYS:
3314 {
3315 Assert(pVM->pgm.s.fNestedPaging);
3316
3317 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PDE4M_P | X86_PDE4M_PS;
3318 PX86PDPAE pPD = (PX86PDPAE)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3319
3320 if ((pPD->a[iPte].u & (X86_PDE2M_PAE_PG_MASK | X86_PDE4M_P | X86_PDE4M_PS)) == u64)
3321 {
3322 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pde=%RX64\n", iPte, pPD->a[iPte]));
3323 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3324 pPD->a[iPte].u = 0;
3325 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPD);
3326
3327 /* Update the counter as we're removing references. */
3328 Assert(pPage->cPresent);
3329 Assert(pPool->cPresent);
3330 pPage->cPresent--;
3331 pPool->cPresent--;
3332 return fRet;
3333 }
3334# ifdef LOG_ENABLED
3335 Log(("iFirstPresent=%d cPresent=%d\n", pPage->iFirstPresent, pPage->cPresent));
3336 for (unsigned i = 0, cFound = 0; i < RT_ELEMENTS(pPD->a); i++)
3337 if ((pPD->a[i].u & (X86_PDE2M_PAE_PG_MASK | X86_PDE4M_P | X86_PDE4M_PS)) == u64)
3338 Log(("i=%d cFound=%d\n", i, ++cFound));
3339# endif
3340 AssertFatalMsgFailed(("iFirstPresent=%d cPresent=%d\n", pPage->iFirstPresent, pPage->cPresent));
3341 /*PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPD);*/
3342 break;
3343 }
3344#endif /* PGM_WITH_LARGE_PAGES */
3345
3346 default:
3347 AssertFatalMsgFailed(("enmKind=%d iShw=%d\n", pPage->enmKind, iShw));
3348 }
3349
3350 /* not reached. */
3351#ifndef _MSC_VER
3352 return fRet;
3353#endif
3354}
3355
3356
3357/**
3358 * Scans one shadow page table for mappings of a physical page.
3359 *
3360 * @param pVM The cross context VM structure.
3361 * @param pPhysPage The guest page in question.
3362 * @param fFlushPTEs Flush PTEs or allow them to be updated (e.g. in case of an RW bit change)
3363 * @param iShw The shadow page table.
3364 */
3365static void pgmPoolTrackFlushGCPhysPT(PVM pVM, PPGMPAGE pPhysPage, bool fFlushPTEs, uint16_t iShw)
3366{
3367 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool); NOREF(pPool);
3368
3369 /* We should only come here with when there's only one reference to this physical page. */
3370 Assert(PGMPOOL_TD_GET_CREFS(PGM_PAGE_GET_TRACKING(pPhysPage)) == 1);
3371
3372 Log2(("pgmPoolTrackFlushGCPhysPT: pPhysPage=%RHp iShw=%d\n", PGM_PAGE_GET_HCPHYS(pPhysPage), iShw));
3373 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPT, f);
3374 bool fKeptPTEs = pgmPoolTrackFlushGCPhysPTInt(pVM, pPhysPage, fFlushPTEs, iShw, PGM_PAGE_GET_PTE_INDEX(pPhysPage));
3375 if (!fKeptPTEs)
3376 PGM_PAGE_SET_TRACKING(pVM, pPhysPage, 0);
3377 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPT, f);
3378}
3379
3380
3381/**
3382 * Flushes a list of shadow page tables mapping the same physical page.
3383 *
3384 * @param pVM The cross context VM structure.
3385 * @param pPhysPage The guest page in question.
3386 * @param fFlushPTEs Flush PTEs or allow them to be updated (e.g. in case of an RW bit change)
3387 * @param iPhysExt The physical cross reference extent list to flush.
3388 */
3389static void pgmPoolTrackFlushGCPhysPTs(PVMCC pVM, PPGMPAGE pPhysPage, bool fFlushPTEs, uint16_t iPhysExt)
3390{
3391 PGM_LOCK_ASSERT_OWNER(pVM);
3392 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3393 bool fKeepList = false;
3394
3395 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPTs, f);
3396 Log2(("pgmPoolTrackFlushGCPhysPTs: pPhysPage=%RHp iPhysExt=%u\n", PGM_PAGE_GET_HCPHYS(pPhysPage), iPhysExt));
3397
3398 const uint16_t iPhysExtStart = iPhysExt;
3399 PPGMPOOLPHYSEXT pPhysExt;
3400 do
3401 {
3402 Assert(iPhysExt < pPool->cMaxPhysExts);
3403 pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
3404 for (unsigned i = 0; i < RT_ELEMENTS(pPhysExt->aidx); i++)
3405 {
3406 if (pPhysExt->aidx[i] != NIL_PGMPOOL_IDX)
3407 {
3408 bool fKeptPTEs = pgmPoolTrackFlushGCPhysPTInt(pVM, pPhysPage, fFlushPTEs, pPhysExt->aidx[i], pPhysExt->apte[i]);
3409 if (!fKeptPTEs)
3410 {
3411 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
3412 pPhysExt->apte[i] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
3413 }
3414 else
3415 fKeepList = true;
3416 }
3417 }
3418 /* next */
3419 iPhysExt = pPhysExt->iNext;
3420 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
3421
3422 if (!fKeepList)
3423 {
3424 /* insert the list into the free list and clear the ram range entry. */
3425 pPhysExt->iNext = pPool->iPhysExtFreeHead;
3426 pPool->iPhysExtFreeHead = iPhysExtStart;
3427 /* Invalidate the tracking data. */
3428 PGM_PAGE_SET_TRACKING(pVM, pPhysPage, 0);
3429 }
3430
3431 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTs, f);
3432}
3433
3434
3435/**
3436 * Flushes all shadow page table mappings of the given guest page.
3437 *
3438 * This is typically called when the host page backing the guest one has been
3439 * replaced or when the page protection was changed due to a guest access
3440 * caught by the monitoring.
3441 *
3442 * @returns VBox status code.
3443 * @retval VINF_SUCCESS if all references has been successfully cleared.
3444 * @retval VINF_PGM_SYNC_CR3 if we're better off with a CR3 sync and a page
3445 * pool cleaning. FF and sync flags are set.
3446 *
3447 * @param pVM The cross context VM structure.
3448 * @param GCPhysPage GC physical address of the page in question
3449 * @param pPhysPage The guest page in question.
3450 * @param fFlushPTEs Flush PTEs or allow them to be updated (e.g. in case of an RW bit change)
3451 * @param pfFlushTLBs This is set to @a true if the shadow TLBs should be
3452 * flushed, it is NOT touched if this isn't necessary.
3453 * The caller MUST initialized this to @a false.
3454 */
3455int pgmPoolTrackUpdateGCPhys(PVMCC pVM, RTGCPHYS GCPhysPage, PPGMPAGE pPhysPage, bool fFlushPTEs, bool *pfFlushTLBs)
3456{
3457 PVMCPUCC pVCpu = VMMGetCpu(pVM);
3458 PGM_LOCK_VOID(pVM);
3459 int rc = VINF_SUCCESS;
3460
3461#ifdef PGM_WITH_LARGE_PAGES
3462 /* Is this page part of a large page? */
3463 if (PGM_PAGE_GET_PDE_TYPE(pPhysPage) == PGM_PAGE_PDE_TYPE_PDE)
3464 {
3465 RTGCPHYS GCPhysBase = GCPhysPage & X86_PDE2M_PAE_PG_MASK;
3466 GCPhysPage &= X86_PDE_PAE_PG_MASK;
3467
3468 /* Fetch the large page base. */
3469 PPGMPAGE pLargePage;
3470 if (GCPhysBase != GCPhysPage)
3471 {
3472 pLargePage = pgmPhysGetPage(pVM, GCPhysBase);
3473 AssertFatal(pLargePage);
3474 }
3475 else
3476 pLargePage = pPhysPage;
3477
3478 Log(("pgmPoolTrackUpdateGCPhys: update large page PDE for %RGp (%RGp)\n", GCPhysBase, GCPhysPage));
3479
3480 if (PGM_PAGE_GET_PDE_TYPE(pLargePage) == PGM_PAGE_PDE_TYPE_PDE)
3481 {
3482 /* Mark the large page as disabled as we need to break it up to change a single page in the 2 MB range. */
3483 PGM_PAGE_SET_PDE_TYPE(pVM, pLargePage, PGM_PAGE_PDE_TYPE_PDE_DISABLED);
3484 pVM->pgm.s.cLargePagesDisabled++;
3485
3486 /* Update the base as that *only* that one has a reference and there's only one PDE to clear. */
3487 rc = pgmPoolTrackUpdateGCPhys(pVM, GCPhysBase, pLargePage, fFlushPTEs, pfFlushTLBs);
3488
3489 *pfFlushTLBs = true;
3490 PGM_UNLOCK(pVM);
3491 return rc;
3492 }
3493 }
3494#else
3495 NOREF(GCPhysPage);
3496#endif /* PGM_WITH_LARGE_PAGES */
3497
3498 const uint16_t u16 = PGM_PAGE_GET_TRACKING(pPhysPage);
3499 if (u16)
3500 {
3501 /*
3502 * The zero page is currently screwing up the tracking and we'll
3503 * have to flush the whole shebang. Unless VBOX_WITH_NEW_LAZY_PAGE_ALLOC
3504 * is defined, zero pages won't normally be mapped. Some kind of solution
3505 * will be needed for this problem of course, but it will have to wait...
3506 */
3507 if ( PGM_PAGE_IS_ZERO(pPhysPage)
3508 || PGM_PAGE_IS_BALLOONED(pPhysPage))
3509 rc = VINF_PGM_GCPHYS_ALIASED;
3510 else
3511 {
3512 if (PGMPOOL_TD_GET_CREFS(u16) != PGMPOOL_TD_CREFS_PHYSEXT)
3513 {
3514 Assert(PGMPOOL_TD_GET_CREFS(u16) == 1);
3515 pgmPoolTrackFlushGCPhysPT(pVM,
3516 pPhysPage,
3517 fFlushPTEs,
3518 PGMPOOL_TD_GET_IDX(u16));
3519 }
3520 else if (u16 != PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED))
3521 pgmPoolTrackFlushGCPhysPTs(pVM, pPhysPage, fFlushPTEs, PGMPOOL_TD_GET_IDX(u16));
3522 else
3523 rc = pgmPoolTrackFlushGCPhysPTsSlow(pVM, pPhysPage);
3524 *pfFlushTLBs = true;
3525 }
3526 }
3527
3528 if (rc == VINF_PGM_GCPHYS_ALIASED)
3529 {
3530 pVCpu->pgm.s.fSyncFlags |= PGM_SYNC_CLEAR_PGM_POOL;
3531 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
3532 rc = VINF_PGM_SYNC_CR3;
3533 }
3534 PGM_UNLOCK(pVM);
3535 return rc;
3536}
3537
3538
3539/**
3540 * Scans all shadow page tables for mappings of a physical page.
3541 *
3542 * This may be slow, but it's most likely more efficient than cleaning
3543 * out the entire page pool / cache.
3544 *
3545 * @returns VBox status code.
3546 * @retval VINF_SUCCESS if all references has been successfully cleared.
3547 * @retval VINF_PGM_GCPHYS_ALIASED if we're better off with a CR3 sync and
3548 * a page pool cleaning.
3549 *
3550 * @param pVM The cross context VM structure.
3551 * @param pPhysPage The guest page in question.
3552 */
3553int pgmPoolTrackFlushGCPhysPTsSlow(PVMCC pVM, PPGMPAGE pPhysPage)
3554{
3555 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3556 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPTsSlow, s);
3557 LogFlow(("pgmPoolTrackFlushGCPhysPTsSlow: cUsedPages=%d cPresent=%d pPhysPage=%R[pgmpage]\n",
3558 pPool->cUsedPages, pPool->cPresent, pPhysPage));
3559
3560 /*
3561 * There is a limit to what makes sense.
3562 */
3563 if ( pPool->cPresent > 1024
3564 && pVM->cCpus == 1)
3565 {
3566 LogFlow(("pgmPoolTrackFlushGCPhysPTsSlow: giving up... (cPresent=%d)\n", pPool->cPresent));
3567 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTsSlow, s);
3568 return VINF_PGM_GCPHYS_ALIASED;
3569 }
3570
3571 /*
3572 * Iterate all the pages until we've encountered all that in use.
3573 * This is simple but not quite optimal solution.
3574 */
3575 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage);
3576 unsigned cLeft = pPool->cUsedPages;
3577 unsigned iPage = pPool->cCurPages;
3578 while (--iPage >= PGMPOOL_IDX_FIRST)
3579 {
3580 PPGMPOOLPAGE pPage = &pPool->aPages[iPage];
3581 if ( pPage->GCPhys != NIL_RTGCPHYS
3582 && pPage->cPresent)
3583 {
3584 switch (pPage->enmKind)
3585 {
3586 /*
3587 * We only care about shadow page tables.
3588 */
3589 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
3590 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
3591 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
3592 {
3593 const uint32_t u32 = (uint32_t)u64;
3594 unsigned cPresent = pPage->cPresent;
3595 PX86PT pPT = (PX86PT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3596 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
3597 {
3598 const X86PGUINT uPte = pPT->a[i].u;
3599 if (uPte & X86_PTE_P)
3600 {
3601 if ((uPte & X86_PTE_PG_MASK) == u32)
3602 {
3603 //Log4(("pgmPoolTrackFlushGCPhysPTsSlow: idx=%d i=%d pte=%RX32\n", iPage, i, pPT->a[i]));
3604 ASMAtomicWriteU32(&pPT->a[i].u, 0);
3605
3606 /* Update the counter as we're removing references. */
3607 Assert(pPage->cPresent);
3608 Assert(pPool->cPresent);
3609 pPage->cPresent--;
3610 pPool->cPresent--;
3611 }
3612 if (!--cPresent)
3613 break;
3614 }
3615 }
3616 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);
3617 break;
3618 }
3619
3620 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
3621 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
3622 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
3623 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
3624 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
3625 {
3626 unsigned cPresent = pPage->cPresent;
3627 PPGMSHWPTPAE pPT = (PPGMSHWPTPAE)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3628 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
3629 if (PGMSHWPTEPAE_IS_P(pPT->a[i]))
3630 {
3631 if ((PGMSHWPTEPAE_GET_U(pPT->a[i]) & X86_PTE_PAE_PG_MASK) == u64)
3632 {
3633 //Log4(("pgmPoolTrackFlushGCPhysPTsSlow: idx=%d i=%d pte=%RX64\n", iPage, i, pPT->a[i]));
3634 PGMSHWPTEPAE_ATOMIC_SET(pPT->a[i], 0); /// @todo why not atomic?
3635
3636 /* Update the counter as we're removing references. */
3637 Assert(pPage->cPresent);
3638 Assert(pPool->cPresent);
3639 pPage->cPresent--;
3640 pPool->cPresent--;
3641 }
3642 if (!--cPresent)
3643 break;
3644 }
3645 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);
3646 break;
3647 }
3648
3649 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
3650 {
3651 unsigned cPresent = pPage->cPresent;
3652 PEPTPT pPT = (PEPTPT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3653 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
3654 {
3655 X86PGPAEUINT const uPte = pPT->a[i].u;
3656 if (uPte & EPT_E_READ)
3657 {
3658 if ((uPte & EPT_PTE_PG_MASK) == u64)
3659 {
3660 //Log4(("pgmPoolTrackFlushGCPhysPTsSlow: idx=%d i=%d pte=%RX64\n", iPage, i, pPT->a[i]));
3661 ASMAtomicWriteU64(&pPT->a[i].u, 0);
3662
3663 /* Update the counter as we're removing references. */
3664 Assert(pPage->cPresent);
3665 Assert(pPool->cPresent);
3666 pPage->cPresent--;
3667 pPool->cPresent--;
3668 }
3669 if (!--cPresent)
3670 break;
3671 }
3672 }
3673 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);
3674 break;
3675 }
3676 }
3677
3678 if (!--cLeft)
3679 break;
3680 }
3681 }
3682
3683 PGM_PAGE_SET_TRACKING(pVM, pPhysPage, 0);
3684 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTsSlow, s);
3685
3686 /*
3687 * There is a limit to what makes sense. The above search is very expensive, so force a pgm pool flush.
3688 */
3689 if (pPool->cPresent > 1024)
3690 {
3691 LogFlow(("pgmPoolTrackFlushGCPhysPTsSlow: giving up... (cPresent=%d)\n", pPool->cPresent));
3692 return VINF_PGM_GCPHYS_ALIASED;
3693 }
3694
3695 return VINF_SUCCESS;
3696}
3697
3698
3699/**
3700 * Clears the user entry in a user table.
3701 *
3702 * This is used to remove all references to a page when flushing it.
3703 */
3704static void pgmPoolTrackClearPageUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PCPGMPOOLUSER pUser)
3705{
3706 Assert(pUser->iUser != NIL_PGMPOOL_IDX);
3707 Assert(pUser->iUser < pPool->cCurPages);
3708 uint32_t iUserTable = pUser->iUserTable;
3709
3710 /*
3711 * Map the user page. Ignore references made by fictitious pages.
3712 */
3713 PPGMPOOLPAGE pUserPage = &pPool->aPages[pUser->iUser];
3714 LogFlow(("pgmPoolTrackClearPageUser: clear %x in %s (%RGp) (flushing %s)\n", iUserTable, pgmPoolPoolKindToStr(pUserPage->enmKind), pUserPage->Core.Key, pgmPoolPoolKindToStr(pPage->enmKind)));
3715 union
3716 {
3717 uint64_t *pau64;
3718 uint32_t *pau32;
3719 } u;
3720 if (pUserPage->idx < PGMPOOL_IDX_FIRST)
3721 {
3722 Assert(!pUserPage->pvPageR3);
3723 return;
3724 }
3725 u.pau64 = (uint64_t *)PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pUserPage);
3726
3727
3728 /* Safety precaution in case we change the paging for other modes too in the future. */
3729 Assert(!pgmPoolIsPageLocked(pPage)); RT_NOREF_PV(pPage);
3730
3731#ifdef VBOX_STRICT
3732 /*
3733 * Some sanity checks.
3734 */
3735 switch (pUserPage->enmKind)
3736 {
3737 case PGMPOOLKIND_32BIT_PD:
3738 case PGMPOOLKIND_32BIT_PD_PHYS:
3739 Assert(iUserTable < X86_PG_ENTRIES);
3740 break;
3741 case PGMPOOLKIND_PAE_PDPT:
3742 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
3743 case PGMPOOLKIND_PAE_PDPT_PHYS:
3744 Assert(iUserTable < 4);
3745 Assert(!(u.pau64[iUserTable] & PGM_PLXFLAGS_PERMANENT));
3746 break;
3747 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
3748 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
3749 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
3750 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
3751 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
3752 case PGMPOOLKIND_PAE_PD_PHYS:
3753 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3754 break;
3755 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
3756 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3757 break;
3758 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
3759 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3760 Assert(!(u.pau64[iUserTable] & PGM_PLXFLAGS_PERMANENT));
3761 break;
3762 case PGMPOOLKIND_64BIT_PML4:
3763 Assert(!(u.pau64[iUserTable] & PGM_PLXFLAGS_PERMANENT));
3764 /* GCPhys >> PAGE_SHIFT is the index here */
3765 break;
3766 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
3767 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
3768 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3769 break;
3770
3771 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
3772 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
3773 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3774 break;
3775
3776 case PGMPOOLKIND_ROOT_NESTED:
3777 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3778 break;
3779
3780 default:
3781 AssertMsgFailed(("enmKind=%d\n", pUserPage->enmKind));
3782 break;
3783 }
3784#endif /* VBOX_STRICT */
3785
3786 /*
3787 * Clear the entry in the user page.
3788 */
3789 switch (pUserPage->enmKind)
3790 {
3791 /* 32-bit entries */
3792 case PGMPOOLKIND_32BIT_PD:
3793 case PGMPOOLKIND_32BIT_PD_PHYS:
3794 ASMAtomicWriteU32(&u.pau32[iUserTable], 0);
3795 break;
3796
3797 /* 64-bit entries */
3798 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
3799 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
3800 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
3801 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
3802 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
3803 case PGMPOOLKIND_PAE_PD_PHYS:
3804 case PGMPOOLKIND_PAE_PDPT_PHYS:
3805 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
3806 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
3807 case PGMPOOLKIND_64BIT_PML4:
3808 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
3809 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
3810 case PGMPOOLKIND_PAE_PDPT:
3811 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
3812 case PGMPOOLKIND_ROOT_NESTED:
3813 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
3814 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
3815 ASMAtomicWriteU64(&u.pau64[iUserTable], 0);
3816 break;
3817
3818 default:
3819 AssertFatalMsgFailed(("enmKind=%d iUser=%d iUserTable=%#x\n", pUserPage->enmKind, pUser->iUser, pUser->iUserTable));
3820 }
3821 PGM_DYNMAP_UNUSED_HINT_VM(pPool->CTX_SUFF(pVM), u.pau64);
3822}
3823
3824
3825/**
3826 * Clears all users of a page.
3827 */
3828static void pgmPoolTrackClearPageUsers(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
3829{
3830 /*
3831 * Free all the user records.
3832 */
3833 LogFlow(("pgmPoolTrackClearPageUsers %RGp\n", pPage->GCPhys));
3834
3835 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
3836 uint16_t i = pPage->iUserHead;
3837 while (i != NIL_PGMPOOL_USER_INDEX)
3838 {
3839 /* Clear enter in user table. */
3840 pgmPoolTrackClearPageUser(pPool, pPage, &paUsers[i]);
3841
3842 /* Free it. */
3843 const uint16_t iNext = paUsers[i].iNext;
3844 paUsers[i].iUser = NIL_PGMPOOL_IDX;
3845 paUsers[i].iNext = pPool->iUserFreeHead;
3846 pPool->iUserFreeHead = i;
3847
3848 /* Next. */
3849 i = iNext;
3850 }
3851 pPage->iUserHead = NIL_PGMPOOL_USER_INDEX;
3852}
3853
3854
3855/**
3856 * Allocates a new physical cross reference extent.
3857 *
3858 * @returns Pointer to the allocated extent on success. NULL if we're out of them.
3859 * @param pVM The cross context VM structure.
3860 * @param piPhysExt Where to store the phys ext index.
3861 */
3862PPGMPOOLPHYSEXT pgmPoolTrackPhysExtAlloc(PVMCC pVM, uint16_t *piPhysExt)
3863{
3864 PGM_LOCK_ASSERT_OWNER(pVM);
3865 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3866 uint16_t iPhysExt = pPool->iPhysExtFreeHead;
3867 if (iPhysExt == NIL_PGMPOOL_PHYSEXT_INDEX)
3868 {
3869 STAM_COUNTER_INC(&pPool->StamTrackPhysExtAllocFailures);
3870 return NULL;
3871 }
3872 PPGMPOOLPHYSEXT pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
3873 pPool->iPhysExtFreeHead = pPhysExt->iNext;
3874 pPhysExt->iNext = NIL_PGMPOOL_PHYSEXT_INDEX;
3875 *piPhysExt = iPhysExt;
3876 return pPhysExt;
3877}
3878
3879
3880/**
3881 * Frees a physical cross reference extent.
3882 *
3883 * @param pVM The cross context VM structure.
3884 * @param iPhysExt The extent to free.
3885 */
3886void pgmPoolTrackPhysExtFree(PVMCC pVM, uint16_t iPhysExt)
3887{
3888 PGM_LOCK_ASSERT_OWNER(pVM);
3889 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3890 Assert(iPhysExt < pPool->cMaxPhysExts);
3891 PPGMPOOLPHYSEXT pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
3892 for (unsigned i = 0; i < RT_ELEMENTS(pPhysExt->aidx); i++)
3893 {
3894 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
3895 pPhysExt->apte[i] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
3896 }
3897 pPhysExt->iNext = pPool->iPhysExtFreeHead;
3898 pPool->iPhysExtFreeHead = iPhysExt;
3899}
3900
3901
3902/**
3903 * Frees a physical cross reference extent.
3904 *
3905 * @param pVM The cross context VM structure.
3906 * @param iPhysExt The extent to free.
3907 */
3908void pgmPoolTrackPhysExtFreeList(PVMCC pVM, uint16_t iPhysExt)
3909{
3910 PGM_LOCK_ASSERT_OWNER(pVM);
3911 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3912
3913 const uint16_t iPhysExtStart = iPhysExt;
3914 PPGMPOOLPHYSEXT pPhysExt;
3915 do
3916 {
3917 Assert(iPhysExt < pPool->cMaxPhysExts);
3918 pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
3919 for (unsigned i = 0; i < RT_ELEMENTS(pPhysExt->aidx); i++)
3920 {
3921 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
3922 pPhysExt->apte[i] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
3923 }
3924
3925 /* next */
3926 iPhysExt = pPhysExt->iNext;
3927 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
3928
3929 pPhysExt->iNext = pPool->iPhysExtFreeHead;
3930 pPool->iPhysExtFreeHead = iPhysExtStart;
3931}
3932
3933
3934/**
3935 * Insert a reference into a list of physical cross reference extents.
3936 *
3937 * @returns The new tracking data for PGMPAGE.
3938 *
3939 * @param pVM The cross context VM structure.
3940 * @param iPhysExt The physical extent index of the list head.
3941 * @param iShwPT The shadow page table index.
3942 * @param iPte Page table entry
3943 *
3944 */
3945static uint16_t pgmPoolTrackPhysExtInsert(PVMCC pVM, uint16_t iPhysExt, uint16_t iShwPT, uint16_t iPte)
3946{
3947 PGM_LOCK_ASSERT_OWNER(pVM);
3948 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3949 PPGMPOOLPHYSEXT paPhysExts = pPool->CTX_SUFF(paPhysExts);
3950
3951 /*
3952 * Special common cases.
3953 */
3954 if (paPhysExts[iPhysExt].aidx[1] == NIL_PGMPOOL_IDX)
3955 {
3956 paPhysExts[iPhysExt].aidx[1] = iShwPT;
3957 paPhysExts[iPhysExt].apte[1] = iPte;
3958 STAM_COUNTER_INC(&pVM->pgm.s.Stats.StatTrackAliasedMany);
3959 LogFlow(("pgmPoolTrackPhysExtInsert: %d:{,%d pte %d,}\n", iPhysExt, iShwPT, iPte));
3960 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExt);
3961 }
3962 if (paPhysExts[iPhysExt].aidx[2] == NIL_PGMPOOL_IDX)
3963 {
3964 paPhysExts[iPhysExt].aidx[2] = iShwPT;
3965 paPhysExts[iPhysExt].apte[2] = iPte;
3966 STAM_COUNTER_INC(&pVM->pgm.s.Stats.StatTrackAliasedMany);
3967 LogFlow(("pgmPoolTrackPhysExtInsert: %d:{,,%d pte %d}\n", iPhysExt, iShwPT, iPte));
3968 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExt);
3969 }
3970 AssertCompile(RT_ELEMENTS(paPhysExts[iPhysExt].aidx) == 3);
3971
3972 /*
3973 * General treatment.
3974 */
3975 const uint16_t iPhysExtStart = iPhysExt;
3976 unsigned cMax = 15;
3977 for (;;)
3978 {
3979 Assert(iPhysExt < pPool->cMaxPhysExts);
3980 for (unsigned i = 0; i < RT_ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
3981 if (paPhysExts[iPhysExt].aidx[i] == NIL_PGMPOOL_IDX)
3982 {
3983 paPhysExts[iPhysExt].aidx[i] = iShwPT;
3984 paPhysExts[iPhysExt].apte[i] = iPte;
3985 STAM_COUNTER_INC(&pVM->pgm.s.Stats.StatTrackAliasedMany);
3986 LogFlow(("pgmPoolTrackPhysExtInsert: %d:{%d pte %d} i=%d cMax=%d\n", iPhysExt, iShwPT, iPte, i, cMax));
3987 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExtStart);
3988 }
3989 if (!--cMax)
3990 {
3991 STAM_COUNTER_INC(&pVM->pgm.s.Stats.StatTrackOverflows);
3992 pgmPoolTrackPhysExtFreeList(pVM, iPhysExtStart);
3993 LogFlow(("pgmPoolTrackPhysExtInsert: overflow (1) iShwPT=%d\n", iShwPT));
3994 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED);
3995 }
3996
3997 /* advance */
3998 iPhysExt = paPhysExts[iPhysExt].iNext;
3999 if (iPhysExt == NIL_PGMPOOL_PHYSEXT_INDEX)
4000 break;
4001 }
4002
4003 /*
4004 * Add another extent to the list.
4005 */
4006 PPGMPOOLPHYSEXT pNew = pgmPoolTrackPhysExtAlloc(pVM, &iPhysExt);
4007 if (!pNew)
4008 {
4009 STAM_COUNTER_INC(&pVM->pgm.s.Stats.StatTrackNoExtentsLeft);
4010 pgmPoolTrackPhysExtFreeList(pVM, iPhysExtStart);
4011 LogFlow(("pgmPoolTrackPhysExtInsert: pgmPoolTrackPhysExtAlloc failed iShwPT=%d\n", iShwPT));
4012 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED);
4013 }
4014 pNew->iNext = iPhysExtStart;
4015 pNew->aidx[0] = iShwPT;
4016 pNew->apte[0] = iPte;
4017 LogFlow(("pgmPoolTrackPhysExtInsert: added new extent %d:{%d pte %d}->%d\n", iPhysExt, iShwPT, iPte, iPhysExtStart));
4018 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExt);
4019}
4020
4021
4022/**
4023 * Add a reference to guest physical page where extents are in use.
4024 *
4025 * @returns The new tracking data for PGMPAGE.
4026 *
4027 * @param pVM The cross context VM structure.
4028 * @param pPhysPage Pointer to the aPages entry in the ram range.
4029 * @param u16 The ram range flags (top 16-bits).
4030 * @param iShwPT The shadow page table index.
4031 * @param iPte Page table entry
4032 */
4033uint16_t pgmPoolTrackPhysExtAddref(PVMCC pVM, PPGMPAGE pPhysPage, uint16_t u16, uint16_t iShwPT, uint16_t iPte)
4034{
4035 PGM_LOCK_VOID(pVM);
4036 if (PGMPOOL_TD_GET_CREFS(u16) != PGMPOOL_TD_CREFS_PHYSEXT)
4037 {
4038 /*
4039 * Convert to extent list.
4040 */
4041 Assert(PGMPOOL_TD_GET_CREFS(u16) == 1);
4042 uint16_t iPhysExt;
4043 PPGMPOOLPHYSEXT pPhysExt = pgmPoolTrackPhysExtAlloc(pVM, &iPhysExt);
4044 if (pPhysExt)
4045 {
4046 LogFlow(("pgmPoolTrackPhysExtAddref: new extent: %d:{%d, %d}\n", iPhysExt, PGMPOOL_TD_GET_IDX(u16), iShwPT));
4047 STAM_COUNTER_INC(&pVM->pgm.s.Stats.StatTrackAliased);
4048 pPhysExt->aidx[0] = PGMPOOL_TD_GET_IDX(u16);
4049 pPhysExt->apte[0] = PGM_PAGE_GET_PTE_INDEX(pPhysPage);
4050 pPhysExt->aidx[1] = iShwPT;
4051 pPhysExt->apte[1] = iPte;
4052 u16 = PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExt);
4053 }
4054 else
4055 u16 = PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED);
4056 }
4057 else if (u16 != PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED))
4058 {
4059 /*
4060 * Insert into the extent list.
4061 */
4062 u16 = pgmPoolTrackPhysExtInsert(pVM, PGMPOOL_TD_GET_IDX(u16), iShwPT, iPte);
4063 }
4064 else
4065 STAM_COUNTER_INC(&pVM->pgm.s.Stats.StatTrackAliasedLots);
4066 PGM_UNLOCK(pVM);
4067 return u16;
4068}
4069
4070
4071/**
4072 * Clear references to guest physical memory.
4073 *
4074 * @param pPool The pool.
4075 * @param pPage The page.
4076 * @param pPhysPage Pointer to the aPages entry in the ram range.
4077 * @param iPte Shadow PTE index
4078 */
4079void pgmPoolTrackPhysExtDerefGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMPAGE pPhysPage, uint16_t iPte)
4080{
4081 PVMCC pVM = pPool->CTX_SUFF(pVM);
4082 const unsigned cRefs = PGM_PAGE_GET_TD_CREFS(pPhysPage);
4083 AssertFatalMsg(cRefs == PGMPOOL_TD_CREFS_PHYSEXT, ("cRefs=%d pPhysPage=%R[pgmpage] pPage=%p:{.idx=%d}\n", cRefs, pPhysPage, pPage, pPage->idx));
4084
4085 uint16_t iPhysExt = PGM_PAGE_GET_TD_IDX(pPhysPage);
4086 if (iPhysExt != PGMPOOL_TD_IDX_OVERFLOWED)
4087 {
4088 PGM_LOCK_VOID(pVM);
4089
4090 uint16_t iPhysExtPrev = NIL_PGMPOOL_PHYSEXT_INDEX;
4091 PPGMPOOLPHYSEXT paPhysExts = pPool->CTX_SUFF(paPhysExts);
4092 do
4093 {
4094 Assert(iPhysExt < pPool->cMaxPhysExts);
4095
4096 /*
4097 * Look for the shadow page and check if it's all freed.
4098 */
4099 for (unsigned i = 0; i < RT_ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
4100 {
4101 if ( paPhysExts[iPhysExt].aidx[i] == pPage->idx
4102 && paPhysExts[iPhysExt].apte[i] == iPte)
4103 {
4104 paPhysExts[iPhysExt].aidx[i] = NIL_PGMPOOL_IDX;
4105 paPhysExts[iPhysExt].apte[i] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
4106
4107 for (i = 0; i < RT_ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
4108 if (paPhysExts[iPhysExt].aidx[i] != NIL_PGMPOOL_IDX)
4109 {
4110 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage] idx=%d\n", pPhysPage, pPage->idx));
4111 PGM_UNLOCK(pVM);
4112 return;
4113 }
4114
4115 /* we can free the node. */
4116 const uint16_t iPhysExtNext = paPhysExts[iPhysExt].iNext;
4117 if ( iPhysExtPrev == NIL_PGMPOOL_PHYSEXT_INDEX
4118 && iPhysExtNext == NIL_PGMPOOL_PHYSEXT_INDEX)
4119 {
4120 /* lonely node */
4121 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
4122 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage] idx=%d lonely\n", pPhysPage, pPage->idx));
4123 PGM_PAGE_SET_TRACKING(pVM, pPhysPage, 0);
4124 }
4125 else if (iPhysExtPrev == NIL_PGMPOOL_PHYSEXT_INDEX)
4126 {
4127 /* head */
4128 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage] idx=%d head\n", pPhysPage, pPage->idx));
4129 PGM_PAGE_SET_TRACKING(pVM, pPhysPage, PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExtNext));
4130 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
4131 }
4132 else
4133 {
4134 /* in list */
4135 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage] idx=%d in list\n", pPhysPage, pPage->idx));
4136 paPhysExts[iPhysExtPrev].iNext = iPhysExtNext;
4137 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
4138 }
4139 iPhysExt = iPhysExtNext;
4140 PGM_UNLOCK(pVM);
4141 return;
4142 }
4143 }
4144
4145 /* next */
4146 iPhysExtPrev = iPhysExt;
4147 iPhysExt = paPhysExts[iPhysExt].iNext;
4148 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
4149
4150 PGM_UNLOCK(pVM);
4151 AssertFatalMsgFailed(("not-found! cRefs=%d pPhysPage=%R[pgmpage] pPage=%p:{.idx=%d}\n", cRefs, pPhysPage, pPage, pPage->idx));
4152 }
4153 else /* nothing to do */
4154 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage]\n", pPhysPage));
4155}
4156
4157/**
4158 * Clear references to guest physical memory.
4159 *
4160 * This is the same as pgmPoolTracDerefGCPhysHint except that the guest
4161 * physical address is assumed to be correct, so the linear search can be
4162 * skipped and we can assert at an earlier point.
4163 *
4164 * @param pPool The pool.
4165 * @param pPage The page.
4166 * @param HCPhys The host physical address corresponding to the guest page.
4167 * @param GCPhys The guest physical address corresponding to HCPhys.
4168 * @param iPte Shadow PTE index
4169 */
4170static void pgmPoolTracDerefGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTHCPHYS HCPhys, RTGCPHYS GCPhys, uint16_t iPte)
4171{
4172 /*
4173 * Lookup the page and check if it checks out before derefing it.
4174 */
4175 PVMCC pVM = pPool->CTX_SUFF(pVM);
4176 PPGMPAGE pPhysPage = pgmPhysGetPage(pVM, GCPhys);
4177 if (pPhysPage)
4178 {
4179 Assert(PGM_PAGE_GET_HCPHYS(pPhysPage));
4180#ifdef LOG_ENABLED
4181 RTHCPHYS HCPhysPage = PGM_PAGE_GET_HCPHYS(pPhysPage);
4182 Log2(("pgmPoolTracDerefGCPhys %RHp vs %RHp\n", HCPhysPage, HCPhys));
4183#endif
4184 if (PGM_PAGE_GET_HCPHYS(pPhysPage) == HCPhys)
4185 {
4186 Assert(pPage->cPresent);
4187 Assert(pPool->cPresent);
4188 pPage->cPresent--;
4189 pPool->cPresent--;
4190 pgmTrackDerefGCPhys(pPool, pPage, pPhysPage, iPte);
4191 return;
4192 }
4193
4194 AssertFatalMsgFailed(("HCPhys=%RHp GCPhys=%RGp; found page has HCPhys=%RHp\n",
4195 HCPhys, GCPhys, PGM_PAGE_GET_HCPHYS(pPhysPage)));
4196 }
4197 AssertFatalMsgFailed(("HCPhys=%RHp GCPhys=%RGp\n", HCPhys, GCPhys));
4198}
4199
4200
4201/**
4202 * Clear references to guest physical memory.
4203 *
4204 * @param pPool The pool.
4205 * @param pPage The page.
4206 * @param HCPhys The host physical address corresponding to the guest page.
4207 * @param GCPhysHint The guest physical address which may corresponding to HCPhys.
4208 * @param iPte Shadow pte index
4209 */
4210void pgmPoolTracDerefGCPhysHint(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTHCPHYS HCPhys, RTGCPHYS GCPhysHint, uint16_t iPte)
4211{
4212 Log4(("pgmPoolTracDerefGCPhysHint %RHp %RGp\n", HCPhys, GCPhysHint));
4213
4214 /*
4215 * Try the hint first.
4216 */
4217 RTHCPHYS HCPhysHinted;
4218 PVMCC pVM = pPool->CTX_SUFF(pVM);
4219 PPGMPAGE pPhysPage = pgmPhysGetPage(pVM, GCPhysHint);
4220 if (pPhysPage)
4221 {
4222 HCPhysHinted = PGM_PAGE_GET_HCPHYS(pPhysPage);
4223 Assert(HCPhysHinted);
4224 if (HCPhysHinted == HCPhys)
4225 {
4226 Assert(pPage->cPresent);
4227 Assert(pPool->cPresent);
4228 pPage->cPresent--;
4229 pPool->cPresent--;
4230 pgmTrackDerefGCPhys(pPool, pPage, pPhysPage, iPte);
4231 return;
4232 }
4233 }
4234 else
4235 HCPhysHinted = UINT64_C(0xdeadbeefdeadbeef);
4236
4237 /*
4238 * Damn, the hint didn't work. We'll have to do an expensive linear search.
4239 */
4240 STAM_COUNTER_INC(&pPool->StatTrackLinearRamSearches);
4241 PPGMRAMRANGE pRam = pPool->CTX_SUFF(pVM)->pgm.s.CTX_SUFF(pRamRangesX);
4242 while (pRam)
4243 {
4244 unsigned iPage = pRam->cb >> PAGE_SHIFT;
4245 while (iPage-- > 0)
4246 {
4247 if (PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]) == HCPhys)
4248 {
4249 Log4(("pgmPoolTracDerefGCPhysHint: Linear HCPhys=%RHp GCPhysHint=%RGp GCPhysReal=%RGp\n",
4250 HCPhys, GCPhysHint, pRam->GCPhys + (iPage << PAGE_SHIFT)));
4251 Assert(pPage->cPresent);
4252 Assert(pPool->cPresent);
4253 pPage->cPresent--;
4254 pPool->cPresent--;
4255 pgmTrackDerefGCPhys(pPool, pPage, &pRam->aPages[iPage], iPte);
4256 return;
4257 }
4258 }
4259 pRam = pRam->CTX_SUFF(pNext);
4260 }
4261
4262 AssertFatalMsgFailed(("HCPhys=%RHp GCPhysHint=%RGp (Hinted page has HCPhys = %RHp)\n", HCPhys, GCPhysHint, HCPhysHinted));
4263}
4264
4265
4266/**
4267 * Clear references to guest physical memory in a 32-bit / 32-bit page table.
4268 *
4269 * @param pPool The pool.
4270 * @param pPage The page.
4271 * @param pShwPT The shadow page table (mapping of the page).
4272 * @param pGstPT The guest page table.
4273 */
4274DECLINLINE(void) pgmPoolTrackDerefPT32Bit32Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PT pShwPT, PCX86PT pGstPT)
4275{
4276 RTGCPHYS32 const fPgMask = pPage->fA20Enabled ? X86_PTE_PG_MASK : X86_PTE_PG_MASK & ~RT_BIT_32(20);
4277 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
4278 {
4279 const X86PGUINT uPte = pShwPT->a[i].u;
4280 Assert(!(uPte & RT_BIT_32(10)));
4281 if (uPte & X86_PTE_P)
4282 {
4283 Log4(("pgmPoolTrackDerefPT32Bit32Bit: i=%d pte=%RX32 hint=%RX32\n",
4284 i, uPte & X86_PTE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK));
4285 pgmPoolTracDerefGCPhysHint(pPool, pPage, uPte & X86_PTE_PG_MASK, pGstPT->a[i].u & fPgMask, i);
4286 if (!pPage->cPresent)
4287 break;
4288 }
4289 }
4290}
4291
4292
4293/**
4294 * Clear references to guest physical memory in a PAE / 32-bit page table.
4295 *
4296 * @param pPool The pool.
4297 * @param pPage The page.
4298 * @param pShwPT The shadow page table (mapping of the page).
4299 * @param pGstPT The guest page table (just a half one).
4300 */
4301DECLINLINE(void) pgmPoolTrackDerefPTPae32Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT, PCX86PT pGstPT)
4302{
4303 RTGCPHYS32 const fPgMask = pPage->fA20Enabled ? X86_PTE_PG_MASK : X86_PTE_PG_MASK & ~RT_BIT_32(20);
4304 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
4305 {
4306 Assert( (PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & UINT64_C(0x7ff0000000000400)) == 0
4307 || (PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & UINT64_C(0x7ff0000000000400)) == UINT64_C(0x7ff0000000000000));
4308 if (PGMSHWPTEPAE_IS_P(pShwPT->a[i]))
4309 {
4310 Log4(("pgmPoolTrackDerefPTPae32Bit: i=%d pte=%RX64 hint=%RX32\n",
4311 i, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pGstPT->a[i].u & X86_PTE_PG_MASK));
4312 pgmPoolTracDerefGCPhysHint(pPool, pPage, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pGstPT->a[i].u & fPgMask, i);
4313 if (!pPage->cPresent)
4314 break;
4315 }
4316 }
4317}
4318
4319
4320/**
4321 * Clear references to guest physical memory in a PAE / PAE page table.
4322 *
4323 * @param pPool The pool.
4324 * @param pPage The page.
4325 * @param pShwPT The shadow page table (mapping of the page).
4326 * @param pGstPT The guest page table.
4327 */
4328DECLINLINE(void) pgmPoolTrackDerefPTPaePae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT, PCX86PTPAE pGstPT)
4329{
4330 RTGCPHYS const fPgMask = pPage->fA20Enabled ? X86_PTE_PAE_PG_MASK : X86_PTE_PAE_PG_MASK & ~RT_BIT_64(20);
4331 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
4332 {
4333 Assert( (PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & UINT64_C(0x7ff0000000000400)) == 0
4334 || (PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & UINT64_C(0x7ff0000000000400)) == UINT64_C(0x7ff0000000000000));
4335 if (PGMSHWPTEPAE_IS_P(pShwPT->a[i]))
4336 {
4337 Log4(("pgmPoolTrackDerefPTPaePae: i=%d pte=%RX32 hint=%RX32\n",
4338 i, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pGstPT->a[i].u & X86_PTE_PAE_PG_MASK));
4339 pgmPoolTracDerefGCPhysHint(pPool, pPage, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pGstPT->a[i].u & fPgMask, i);
4340 if (!pPage->cPresent)
4341 break;
4342 }
4343 }
4344}
4345
4346
4347/**
4348 * Clear references to guest physical memory in a 32-bit / 4MB page table.
4349 *
4350 * @param pPool The pool.
4351 * @param pPage The page.
4352 * @param pShwPT The shadow page table (mapping of the page).
4353 */
4354DECLINLINE(void) pgmPoolTrackDerefPT32Bit4MB(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PT pShwPT)
4355{
4356 RTGCPHYS const GCPhysA20Mask = pPage->fA20Enabled ? UINT64_MAX : ~RT_BIT_64(20);
4357 RTGCPHYS GCPhys = pPage->GCPhys + PAGE_SIZE * pPage->iFirstPresent;
4358 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
4359 {
4360 const X86PGUINT uPte = pShwPT->a[i].u;
4361 Assert(!(uPte & RT_BIT_32(10)));
4362 if (uPte & X86_PTE_P)
4363 {
4364 Log4(("pgmPoolTrackDerefPT32Bit4MB: i=%d pte=%RX32 GCPhys=%RGp\n",
4365 i, uPte & X86_PTE_PG_MASK, GCPhys));
4366 pgmPoolTracDerefGCPhys(pPool, pPage, uPte & X86_PTE_PG_MASK, GCPhys & GCPhysA20Mask, i);
4367 if (!pPage->cPresent)
4368 break;
4369 }
4370 }
4371}
4372
4373
4374/**
4375 * Clear references to guest physical memory in a PAE / 2/4MB page table.
4376 *
4377 * @param pPool The pool.
4378 * @param pPage The page.
4379 * @param pShwPT The shadow page table (mapping of the page).
4380 */
4381DECLINLINE(void) pgmPoolTrackDerefPTPaeBig(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT)
4382{
4383 RTGCPHYS const GCPhysA20Mask = pPage->fA20Enabled ? UINT64_MAX : ~RT_BIT_64(20);
4384 RTGCPHYS GCPhys = pPage->GCPhys + PAGE_SIZE * pPage->iFirstPresent;
4385 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
4386 {
4387 Assert( (PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & UINT64_C(0x7ff0000000000400)) == 0
4388 || (PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & UINT64_C(0x7ff0000000000400)) == UINT64_C(0x7ff0000000000000));
4389 if (PGMSHWPTEPAE_IS_P(pShwPT->a[i]))
4390 {
4391 Log4(("pgmPoolTrackDerefPTPaeBig: i=%d pte=%RX64 hint=%RGp\n",
4392 i, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), GCPhys));
4393 pgmPoolTracDerefGCPhys(pPool, pPage, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), GCPhys & GCPhysA20Mask, i);
4394 if (!pPage->cPresent)
4395 break;
4396 }
4397 }
4398}
4399
4400
4401/**
4402 * Clear references to shadowed pages in an EPT page table.
4403 *
4404 * @param pPool The pool.
4405 * @param pPage The page.
4406 * @param pShwPT The shadow page directory pointer table (mapping of the
4407 * page).
4408 */
4409DECLINLINE(void) pgmPoolTrackDerefPTEPT(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PEPTPT pShwPT)
4410{
4411 RTGCPHYS const GCPhysA20Mask = pPage->fA20Enabled ? UINT64_MAX : ~RT_BIT_64(20);
4412 RTGCPHYS GCPhys = pPage->GCPhys + PAGE_SIZE * pPage->iFirstPresent;
4413 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
4414 {
4415 X86PGPAEUINT const uPte = pShwPT->a[i].u;
4416 Assert((uPte & UINT64_C(0xfff0000000000f80)) == 0);
4417 if (uPte & EPT_E_READ)
4418 {
4419 Log4(("pgmPoolTrackDerefPTEPT: i=%d pte=%RX64 GCPhys=%RX64\n",
4420 i, uPte & EPT_PTE_PG_MASK, pPage->GCPhys));
4421 pgmPoolTracDerefGCPhys(pPool, pPage, uPte & EPT_PTE_PG_MASK, GCPhys & GCPhysA20Mask, i);
4422 if (!pPage->cPresent)
4423 break;
4424 }
4425 }
4426}
4427
4428
4429/**
4430 * Clear references to shadowed pages in a 32 bits page directory.
4431 *
4432 * @param pPool The pool.
4433 * @param pPage The page.
4434 * @param pShwPD The shadow page directory (mapping of the page).
4435 */
4436DECLINLINE(void) pgmPoolTrackDerefPD(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PD pShwPD)
4437{
4438 for (unsigned i = 0; i < RT_ELEMENTS(pShwPD->a); i++)
4439 {
4440 X86PGUINT const uPde = pShwPD->a[i].u;
4441 if (uPde & X86_PDE_P)
4442 {
4443 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPD->a[i].u & X86_PDE_PG_MASK);
4444 if (pSubPage)
4445 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4446 else
4447 AssertFatalMsgFailed(("%x\n", pShwPD->a[i].u & X86_PDE_PG_MASK));
4448 }
4449 }
4450}
4451
4452
4453/**
4454 * Clear references to shadowed pages in a PAE (legacy or 64 bits) page directory.
4455 *
4456 * @param pPool The pool.
4457 * @param pPage The page.
4458 * @param pShwPD The shadow page directory (mapping of the page).
4459 */
4460DECLINLINE(void) pgmPoolTrackDerefPDPae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PDPAE pShwPD)
4461{
4462 for (unsigned i = 0; i < RT_ELEMENTS(pShwPD->a); i++)
4463 {
4464 X86PGPAEUINT const uPde = pShwPD->a[i].u;
4465 if (uPde & X86_PDE_P)
4466 {
4467#ifdef PGM_WITH_LARGE_PAGES
4468 if (uPde & X86_PDE_PS)
4469 {
4470 Log4(("pgmPoolTrackDerefPDPae: i=%d pde=%RX64 GCPhys=%RX64\n",
4471 i, uPde & X86_PDE2M_PAE_PG_MASK, pPage->GCPhys));
4472 pgmPoolTracDerefGCPhys(pPool, pPage, uPde & X86_PDE2M_PAE_PG_MASK,
4473 pPage->GCPhys + i * 2 * _1M /* pPage->GCPhys = base address of the memory described by the PD */,
4474 i);
4475 }
4476 else
4477#endif
4478 {
4479 Assert((uPde & (X86_PDE_PAE_MBZ_MASK_NX | UINT64_C(0x7ff0000000000000))) == 0);
4480 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, uPde & X86_PDE_PAE_PG_MASK);
4481 if (pSubPage)
4482 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4483 else
4484 AssertFatalMsgFailed(("%RX64\n", uPde & X86_PDE_PAE_PG_MASK));
4485 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4486 }
4487 }
4488 }
4489}
4490
4491
4492/**
4493 * Clear references to shadowed pages in a PAE page directory pointer table.
4494 *
4495 * @param pPool The pool.
4496 * @param pPage The page.
4497 * @param pShwPDPT The shadow page directory pointer table (mapping of the page).
4498 */
4499DECLINLINE(void) pgmPoolTrackDerefPDPTPae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PDPT pShwPDPT)
4500{
4501 for (unsigned i = 0; i < X86_PG_PAE_PDPE_ENTRIES; i++)
4502 {
4503 X86PGPAEUINT const uPdpe = pShwPDPT->a[i].u;
4504 Assert((uPdpe & (X86_PDPE_PAE_MBZ_MASK | UINT64_C(0x7ff0000000000200))) == 0);
4505 if (uPdpe & X86_PDPE_P)
4506 {
4507 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, uPdpe & X86_PDPE_PG_MASK);
4508 if (pSubPage)
4509 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4510 else
4511 AssertFatalMsgFailed(("%RX64\n", uPdpe & X86_PDPE_PG_MASK));
4512 }
4513 }
4514}
4515
4516
4517/**
4518 * Clear references to shadowed pages in a 64-bit page directory pointer table.
4519 *
4520 * @param pPool The pool.
4521 * @param pPage The page.
4522 * @param pShwPDPT The shadow page directory pointer table (mapping of the page).
4523 */
4524DECLINLINE(void) pgmPoolTrackDerefPDPT64Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PDPT pShwPDPT)
4525{
4526 for (unsigned i = 0; i < RT_ELEMENTS(pShwPDPT->a); i++)
4527 {
4528 X86PGPAEUINT const uPdpe = pShwPDPT->a[i].u;
4529 Assert((uPdpe & (X86_PDPE_LM_MBZ_MASK_NX | UINT64_C(0x7ff0000000000200))) == 0);
4530 if (uPdpe & X86_PDPE_P)
4531 {
4532 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, uPdpe & X86_PDPE_PG_MASK);
4533 if (pSubPage)
4534 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4535 else
4536 AssertFatalMsgFailed(("%RX64\n", uPdpe & X86_PDPE_PG_MASK));
4537 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4538 }
4539 }
4540}
4541
4542
4543/**
4544 * Clear references to shadowed pages in a 64-bit level 4 page table.
4545 *
4546 * @param pPool The pool.
4547 * @param pPage The page.
4548 * @param pShwPML4 The shadow page directory pointer table (mapping of the page).
4549 */
4550DECLINLINE(void) pgmPoolTrackDerefPML464Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PML4 pShwPML4)
4551{
4552 for (unsigned i = 0; i < RT_ELEMENTS(pShwPML4->a); i++)
4553 {
4554 X86PGPAEUINT const uPml4e = pShwPML4->a[i].u;
4555 Assert((uPml4e & (X86_PML4E_MBZ_MASK_NX | UINT64_C(0x7ff0000000000200))) == 0);
4556 if (uPml4e & X86_PML4E_P)
4557 {
4558 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, uPml4e & X86_PDPE_PG_MASK);
4559 if (pSubPage)
4560 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4561 else
4562 AssertFatalMsgFailed(("%RX64\n", uPml4e & X86_PML4E_PG_MASK));
4563 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4564 }
4565 }
4566}
4567
4568
4569/**
4570 * Clear references to shadowed pages in an EPT page directory.
4571 *
4572 * @param pPool The pool.
4573 * @param pPage The page.
4574 * @param pShwPD The shadow page directory (mapping of the page).
4575 */
4576DECLINLINE(void) pgmPoolTrackDerefPDEPT(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PEPTPD pShwPD)
4577{
4578 for (unsigned i = 0; i < RT_ELEMENTS(pShwPD->a); i++)
4579 {
4580 X86PGPAEUINT const uPde = pShwPD->a[i].u;
4581 Assert((uPde & UINT64_C(0xfff0000000000f80)) == 0);
4582 if (uPde & EPT_E_READ)
4583 {
4584#ifdef PGM_WITH_LARGE_PAGES
4585 if (uPde & EPT_E_LEAF)
4586 {
4587 Log4(("pgmPoolTrackDerefPDEPT: i=%d pde=%RX64 GCPhys=%RX64\n",
4588 i, uPde & EPT_PDE2M_PG_MASK, pPage->GCPhys));
4589 pgmPoolTracDerefGCPhys(pPool, pPage, uPde & EPT_PDE2M_PG_MASK,
4590 pPage->GCPhys + i * 2 * _1M /* pPage->GCPhys = base address of the memory described by the PD */,
4591 i);
4592 }
4593 else
4594#endif
4595 {
4596 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, uPde & EPT_PDE_PG_MASK);
4597 if (pSubPage)
4598 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4599 else
4600 AssertFatalMsgFailed(("%RX64\n", pShwPD->a[i].u & EPT_PDE_PG_MASK));
4601 }
4602 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4603 }
4604 }
4605}
4606
4607
4608/**
4609 * Clear references to shadowed pages in an EPT page directory pointer table.
4610 *
4611 * @param pPool The pool.
4612 * @param pPage The page.
4613 * @param pShwPDPT The shadow page directory pointer table (mapping of the page).
4614 */
4615DECLINLINE(void) pgmPoolTrackDerefPDPTEPT(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PEPTPDPT pShwPDPT)
4616{
4617 for (unsigned i = 0; i < RT_ELEMENTS(pShwPDPT->a); i++)
4618 {
4619 X86PGPAEUINT const uPdpe = pShwPDPT->a[i].u;
4620 Assert((uPdpe & UINT64_C(0xfff0000000000f80)) == 0);
4621 if (uPdpe & EPT_E_READ)
4622 {
4623 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, uPdpe & EPT_PDPTE_PG_MASK);
4624 if (pSubPage)
4625 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4626 else
4627 AssertFatalMsgFailed(("%RX64\n", uPdpe & EPT_PDPTE_PG_MASK));
4628 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4629 }
4630 }
4631}
4632
4633
4634/**
4635 * Clears all references made by this page.
4636 *
4637 * This includes other shadow pages and GC physical addresses.
4638 *
4639 * @param pPool The pool.
4640 * @param pPage The page.
4641 */
4642static void pgmPoolTrackDeref(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
4643{
4644 /*
4645 * Map the shadow page and take action according to the page kind.
4646 */
4647 PVMCC pVM = pPool->CTX_SUFF(pVM);
4648 void *pvShw = PGMPOOL_PAGE_2_PTR(pVM, pPage);
4649 switch (pPage->enmKind)
4650 {
4651 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
4652 {
4653 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
4654 void *pvGst;
4655 int rc = PGM_GCPHYS_2_PTR(pVM, pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
4656 pgmPoolTrackDerefPT32Bit32Bit(pPool, pPage, (PX86PT)pvShw, (PCX86PT)pvGst);
4657 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvGst);
4658 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
4659 break;
4660 }
4661
4662 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
4663 {
4664 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
4665 void *pvGst;
4666 int rc = PGM_GCPHYS_2_PTR_EX(pVM, pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
4667 pgmPoolTrackDerefPTPae32Bit(pPool, pPage, (PPGMSHWPTPAE)pvShw, (PCX86PT)pvGst);
4668 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvGst);
4669 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
4670 break;
4671 }
4672
4673 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
4674 {
4675 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
4676 void *pvGst;
4677 int rc = PGM_GCPHYS_2_PTR(pVM, pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
4678 pgmPoolTrackDerefPTPaePae(pPool, pPage, (PPGMSHWPTPAE)pvShw, (PCX86PTPAE)pvGst);
4679 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvGst);
4680 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
4681 break;
4682 }
4683
4684 case PGMPOOLKIND_32BIT_PT_FOR_PHYS: /* treat it like a 4 MB page */
4685 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
4686 {
4687 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
4688 pgmPoolTrackDerefPT32Bit4MB(pPool, pPage, (PX86PT)pvShw);
4689 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
4690 break;
4691 }
4692
4693 case PGMPOOLKIND_PAE_PT_FOR_PHYS: /* treat it like a 2 MB page */
4694 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
4695 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
4696 {
4697 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
4698 pgmPoolTrackDerefPTPaeBig(pPool, pPage, (PPGMSHWPTPAE)pvShw);
4699 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
4700 break;
4701 }
4702
4703 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
4704 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
4705 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
4706 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
4707 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
4708 case PGMPOOLKIND_PAE_PD_PHYS:
4709 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
4710 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
4711 pgmPoolTrackDerefPDPae(pPool, pPage, (PX86PDPAE)pvShw);
4712 break;
4713
4714 case PGMPOOLKIND_32BIT_PD_PHYS:
4715 case PGMPOOLKIND_32BIT_PD:
4716 pgmPoolTrackDerefPD(pPool, pPage, (PX86PD)pvShw);
4717 break;
4718
4719 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
4720 case PGMPOOLKIND_PAE_PDPT:
4721 case PGMPOOLKIND_PAE_PDPT_PHYS:
4722 pgmPoolTrackDerefPDPTPae(pPool, pPage, (PX86PDPT)pvShw);
4723 break;
4724
4725 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
4726 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
4727 pgmPoolTrackDerefPDPT64Bit(pPool, pPage, (PX86PDPT)pvShw);
4728 break;
4729
4730 case PGMPOOLKIND_64BIT_PML4:
4731 pgmPoolTrackDerefPML464Bit(pPool, pPage, (PX86PML4)pvShw);
4732 break;
4733
4734 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
4735 pgmPoolTrackDerefPTEPT(pPool, pPage, (PEPTPT)pvShw);
4736 break;
4737
4738 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
4739 pgmPoolTrackDerefPDEPT(pPool, pPage, (PEPTPD)pvShw);
4740 break;
4741
4742 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
4743 pgmPoolTrackDerefPDPTEPT(pPool, pPage, (PEPTPDPT)pvShw);
4744 break;
4745
4746 default:
4747 AssertFatalMsgFailed(("enmKind=%d\n", pPage->enmKind));
4748 }
4749
4750 /* paranoia, clear the shadow page. Remove this laser (i.e. let Alloc and ClearAll do it). */
4751 STAM_PROFILE_START(&pPool->StatZeroPage, z);
4752 ASMMemZeroPage(pvShw);
4753 STAM_PROFILE_STOP(&pPool->StatZeroPage, z);
4754 pPage->fZeroed = true;
4755 Assert(!pPage->cPresent);
4756 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvShw);
4757}
4758
4759
4760/**
4761 * Flushes a pool page.
4762 *
4763 * This moves the page to the free list after removing all user references to it.
4764 *
4765 * @returns VBox status code.
4766 * @retval VINF_SUCCESS on success.
4767 * @param pPool The pool.
4768 * @param pPage The shadow page.
4769 * @param fFlush Flush the TLBS when required (should only be false in very specific use cases!!)
4770 */
4771int pgmPoolFlushPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage, bool fFlush)
4772{
4773 PVMCC pVM = pPool->CTX_SUFF(pVM);
4774 bool fFlushRequired = false;
4775
4776 int rc = VINF_SUCCESS;
4777 STAM_PROFILE_START(&pPool->StatFlushPage, f);
4778 LogFlow(("pgmPoolFlushPage: pPage=%p:{.Key=%RHp, .idx=%d, .enmKind=%s, .GCPhys=%RGp}\n",
4779 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), pPage->GCPhys));
4780
4781 /*
4782 * Reject any attempts at flushing any of the special root pages (shall
4783 * not happen).
4784 */
4785 AssertMsgReturn(pPage->idx >= PGMPOOL_IDX_FIRST,
4786 ("pgmPoolFlushPage: special root page, rejected. enmKind=%s idx=%d\n",
4787 pgmPoolPoolKindToStr(pPage->enmKind), pPage->idx),
4788 VINF_SUCCESS);
4789
4790 PGM_LOCK_VOID(pVM);
4791
4792 /*
4793 * Quietly reject any attempts at flushing the currently active shadow CR3 mapping
4794 */
4795 if (pgmPoolIsPageLocked(pPage))
4796 {
4797 AssertMsg( pPage->enmKind == PGMPOOLKIND_64BIT_PML4
4798 || pPage->enmKind == PGMPOOLKIND_PAE_PDPT
4799 || pPage->enmKind == PGMPOOLKIND_PAE_PDPT_FOR_32BIT
4800 || pPage->enmKind == PGMPOOLKIND_32BIT_PD
4801 || pPage->enmKind == PGMPOOLKIND_PAE_PD_FOR_PAE_PD
4802 || pPage->enmKind == PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD
4803 || pPage->enmKind == PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD
4804 || pPage->enmKind == PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD
4805 || pPage->enmKind == PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD
4806 || pPage->enmKind == PGMPOOLKIND_ROOT_NESTED,
4807 ("Can't free the shadow CR3! (%RHp vs %RHp kind=%d\n", PGMGetHyperCR3(VMMGetCpu(pVM)), pPage->Core.Key, pPage->enmKind));
4808 Log(("pgmPoolFlushPage: current active shadow CR3, rejected. enmKind=%s idx=%d\n", pgmPoolPoolKindToStr(pPage->enmKind), pPage->idx));
4809 PGM_UNLOCK(pVM);
4810 return VINF_SUCCESS;
4811 }
4812
4813 /*
4814 * Mark the page as being in need of an ASMMemZeroPage().
4815 */
4816 pPage->fZeroed = false;
4817
4818#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
4819 if (pPage->fDirty)
4820 pgmPoolFlushDirtyPage(pVM, pPool, pPage->idxDirtyEntry, false /* do not remove */);
4821#endif
4822
4823 /* If there are any users of this table, then we *must* issue a tlb flush on all VCPUs. */
4824 if (pPage->iUserHead != NIL_PGMPOOL_USER_INDEX)
4825 fFlushRequired = true;
4826
4827 /*
4828 * Clear the page.
4829 */
4830 pgmPoolTrackClearPageUsers(pPool, pPage);
4831 STAM_PROFILE_START(&pPool->StatTrackDeref,a);
4832 pgmPoolTrackDeref(pPool, pPage);
4833 STAM_PROFILE_STOP(&pPool->StatTrackDeref,a);
4834
4835 /*
4836 * Flush it from the cache.
4837 */
4838 pgmPoolCacheFlushPage(pPool, pPage);
4839
4840 /*
4841 * Deregistering the monitoring.
4842 */
4843 if (pPage->fMonitored)
4844 rc = pgmPoolMonitorFlush(pPool, pPage);
4845
4846 /*
4847 * Free the page.
4848 */
4849 Assert(pPage->iNext == NIL_PGMPOOL_IDX);
4850 pPage->iNext = pPool->iFreeHead;
4851 pPool->iFreeHead = pPage->idx;
4852 pPage->enmKind = PGMPOOLKIND_FREE;
4853 pPage->enmAccess = PGMPOOLACCESS_DONTCARE;
4854 pPage->GCPhys = NIL_RTGCPHYS;
4855 pPage->fReusedFlushPending = false;
4856
4857 pPool->cUsedPages--;
4858
4859 /* Flush the TLBs of all VCPUs if required. */
4860 if ( fFlushRequired
4861 && fFlush)
4862 {
4863 PGM_INVL_ALL_VCPU_TLBS(pVM);
4864 }
4865
4866 PGM_UNLOCK(pVM);
4867 STAM_PROFILE_STOP(&pPool->StatFlushPage, f);
4868 return rc;
4869}
4870
4871
4872/**
4873 * Frees a usage of a pool page.
4874 *
4875 * The caller is responsible to updating the user table so that it no longer
4876 * references the shadow page.
4877 *
4878 * @param pPool The pool.
4879 * @param pPage The shadow page.
4880 * @param iUser The shadow page pool index of the user table.
4881 * NIL_PGMPOOL_IDX for root pages.
4882 * @param iUserTable The index into the user table (shadowed). Ignored if
4883 * root page.
4884 */
4885void pgmPoolFreeByPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable)
4886{
4887 PVMCC pVM = pPool->CTX_SUFF(pVM);
4888
4889 STAM_PROFILE_START(&pPool->StatFree, a);
4890 LogFlow(("pgmPoolFreeByPage: pPage=%p:{.Key=%RHp, .idx=%d, enmKind=%s} iUser=%d iUserTable=%#x\n",
4891 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), iUser, iUserTable));
4892 AssertReturnVoid(pPage->idx >= PGMPOOL_IDX_FIRST); /* paranoia (#6349) */
4893
4894 PGM_LOCK_VOID(pVM);
4895 if (iUser != NIL_PGMPOOL_IDX)
4896 pgmPoolTrackFreeUser(pPool, pPage, iUser, iUserTable);
4897 if (!pPage->fCached)
4898 pgmPoolFlushPage(pPool, pPage);
4899 PGM_UNLOCK(pVM);
4900 STAM_PROFILE_STOP(&pPool->StatFree, a);
4901}
4902
4903
4904/**
4905 * Makes one or more free page free.
4906 *
4907 * @returns VBox status code.
4908 * @retval VINF_SUCCESS on success.
4909 *
4910 * @param pPool The pool.
4911 * @param enmKind Page table kind
4912 * @param iUser The user of the page.
4913 */
4914static int pgmPoolMakeMoreFreePages(PPGMPOOL pPool, PGMPOOLKIND enmKind, uint16_t iUser)
4915{
4916 PVMCC pVM = pPool->CTX_SUFF(pVM);
4917 LogFlow(("pgmPoolMakeMoreFreePages: enmKind=%d iUser=%d\n", enmKind, iUser));
4918 NOREF(enmKind);
4919
4920 /*
4921 * If the pool isn't full grown yet, expand it.
4922 */
4923 if (pPool->cCurPages < pPool->cMaxPages)
4924 {
4925 STAM_PROFILE_ADV_SUSPEND(&pPool->StatAlloc, a);
4926#ifdef IN_RING3
4927 int rc = PGMR3PoolGrow(pVM, VMMGetCpu(pVM));
4928#else
4929 int rc = PGMR0PoolGrow(pVM, VMMGetCpuId(pVM));
4930#endif
4931 if (RT_FAILURE(rc))
4932 return rc;
4933 STAM_PROFILE_ADV_RESUME(&pPool->StatAlloc, a);
4934 if (pPool->iFreeHead != NIL_PGMPOOL_IDX)
4935 return VINF_SUCCESS;
4936 }
4937
4938 /*
4939 * Free one cached page.
4940 */
4941 return pgmPoolCacheFreeOne(pPool, iUser);
4942}
4943
4944
4945/**
4946 * Allocates a page from the pool.
4947 *
4948 * This page may actually be a cached page and not in need of any processing
4949 * on the callers part.
4950 *
4951 * @returns VBox status code.
4952 * @retval VINF_SUCCESS if a NEW page was allocated.
4953 * @retval VINF_PGM_CACHED_PAGE if a CACHED page was returned.
4954 *
4955 * @param pVM The cross context VM structure.
4956 * @param GCPhys The GC physical address of the page we're gonna shadow.
4957 * For 4MB and 2MB PD entries, it's the first address the
4958 * shadow PT is covering.
4959 * @param enmKind The kind of mapping.
4960 * @param enmAccess Access type for the mapping (only relevant for big pages)
4961 * @param fA20Enabled Whether the A20 gate is enabled or not.
4962 * @param iUser The shadow page pool index of the user table. Root
4963 * pages should pass NIL_PGMPOOL_IDX.
4964 * @param iUserTable The index into the user table (shadowed). Ignored for
4965 * root pages (iUser == NIL_PGMPOOL_IDX).
4966 * @param fLockPage Lock the page
4967 * @param ppPage Where to store the pointer to the page. NULL is stored here on failure.
4968 */
4969int pgmPoolAlloc(PVMCC pVM, RTGCPHYS GCPhys, PGMPOOLKIND enmKind, PGMPOOLACCESS enmAccess, bool fA20Enabled,
4970 uint16_t iUser, uint32_t iUserTable, bool fLockPage, PPPGMPOOLPAGE ppPage)
4971{
4972 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
4973 STAM_PROFILE_ADV_START(&pPool->StatAlloc, a);
4974 LogFlow(("pgmPoolAlloc: GCPhys=%RGp enmKind=%s iUser=%d iUserTable=%#x\n", GCPhys, pgmPoolPoolKindToStr(enmKind), iUser, iUserTable));
4975 *ppPage = NULL;
4976 /** @todo CSAM/PGMPrefetchPage messes up here during CSAMR3CheckGates
4977 * (TRPMR3SyncIDT) because of FF priority. Try fix that?
4978 * Assert(!(pVM->pgm.s.fGlobalSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)); */
4979
4980 PGM_LOCK_VOID(pVM);
4981
4982 if (pPool->fCacheEnabled)
4983 {
4984 int rc2 = pgmPoolCacheAlloc(pPool, GCPhys, enmKind, enmAccess, fA20Enabled, iUser, iUserTable, ppPage);
4985 if (RT_SUCCESS(rc2))
4986 {
4987 if (fLockPage)
4988 pgmPoolLockPage(pPool, *ppPage);
4989 PGM_UNLOCK(pVM);
4990 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
4991 LogFlow(("pgmPoolAlloc: cached returns %Rrc *ppPage=%p:{.Key=%RHp, .idx=%d}\n", rc2, *ppPage, (*ppPage)->Core.Key, (*ppPage)->idx));
4992 return rc2;
4993 }
4994 }
4995
4996 /*
4997 * Allocate a new one.
4998 */
4999 int rc = VINF_SUCCESS;
5000 uint16_t iNew = pPool->iFreeHead;
5001 if (iNew == NIL_PGMPOOL_IDX)
5002 {
5003 rc = pgmPoolMakeMoreFreePages(pPool, enmKind, iUser);
5004 if (RT_FAILURE(rc))
5005 {
5006 PGM_UNLOCK(pVM);
5007 Log(("pgmPoolAlloc: returns %Rrc (Free)\n", rc));
5008 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
5009 return rc;
5010 }
5011 iNew = pPool->iFreeHead;
5012 AssertReleaseMsgReturn(iNew != NIL_PGMPOOL_IDX, ("iNew=%#x\n", iNew), VERR_PGM_POOL_IPE);
5013 }
5014
5015 /* unlink the free head */
5016 PPGMPOOLPAGE pPage = &pPool->aPages[iNew];
5017 pPool->iFreeHead = pPage->iNext;
5018 pPage->iNext = NIL_PGMPOOL_IDX;
5019
5020 /*
5021 * Initialize it.
5022 */
5023 pPool->cUsedPages++; /* physical handler registration / pgmPoolTrackFlushGCPhysPTsSlow requirement. */
5024 pPage->enmKind = enmKind;
5025 pPage->enmAccess = enmAccess;
5026 pPage->GCPhys = GCPhys;
5027 pPage->fA20Enabled = fA20Enabled;
5028 pPage->fSeenNonGlobal = false; /* Set this to 'true' to disable this feature. */
5029 pPage->fMonitored = false;
5030 pPage->fCached = false;
5031 pPage->fDirty = false;
5032 pPage->fReusedFlushPending = false;
5033 pPage->cModifications = 0;
5034 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
5035 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
5036 pPage->cPresent = 0;
5037 pPage->iFirstPresent = NIL_PGMPOOL_PRESENT_INDEX;
5038 pPage->idxDirtyEntry = 0;
5039 pPage->GCPtrLastAccessHandlerFault = NIL_RTGCPTR;
5040 pPage->GCPtrLastAccessHandlerRip = NIL_RTGCPTR;
5041 pPage->cLastAccessHandler = 0;
5042 pPage->cLocked = 0;
5043# ifdef VBOX_STRICT
5044 pPage->GCPtrDirtyFault = NIL_RTGCPTR;
5045# endif
5046
5047 /*
5048 * Insert into the tracking and cache. If this fails, free the page.
5049 */
5050 int rc3 = pgmPoolTrackInsert(pPool, pPage, GCPhys, iUser, iUserTable);
5051 if (RT_FAILURE(rc3))
5052 {
5053 pPool->cUsedPages--;
5054 pPage->enmKind = PGMPOOLKIND_FREE;
5055 pPage->enmAccess = PGMPOOLACCESS_DONTCARE;
5056 pPage->GCPhys = NIL_RTGCPHYS;
5057 pPage->iNext = pPool->iFreeHead;
5058 pPool->iFreeHead = pPage->idx;
5059 PGM_UNLOCK(pVM);
5060 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
5061 Log(("pgmPoolAlloc: returns %Rrc (Insert)\n", rc3));
5062 return rc3;
5063 }
5064
5065 /*
5066 * Commit the allocation, clear the page and return.
5067 */
5068#ifdef VBOX_WITH_STATISTICS
5069 if (pPool->cUsedPages > pPool->cUsedPagesHigh)
5070 pPool->cUsedPagesHigh = pPool->cUsedPages;
5071#endif
5072
5073 if (!pPage->fZeroed)
5074 {
5075 STAM_PROFILE_START(&pPool->StatZeroPage, z);
5076 void *pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
5077 ASMMemZeroPage(pv);
5078 STAM_PROFILE_STOP(&pPool->StatZeroPage, z);
5079 }
5080
5081 *ppPage = pPage;
5082 if (fLockPage)
5083 pgmPoolLockPage(pPool, pPage);
5084 PGM_UNLOCK(pVM);
5085 LogFlow(("pgmPoolAlloc: returns %Rrc *ppPage=%p:{.Key=%RHp, .idx=%d, .fCached=%RTbool, .fMonitored=%RTbool}\n",
5086 rc, pPage, pPage->Core.Key, pPage->idx, pPage->fCached, pPage->fMonitored));
5087 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
5088 return rc;
5089}
5090
5091
5092/**
5093 * Frees a usage of a pool page.
5094 *
5095 * @param pVM The cross context VM structure.
5096 * @param HCPhys The HC physical address of the shadow page.
5097 * @param iUser The shadow page pool index of the user table.
5098 * NIL_PGMPOOL_IDX if root page.
5099 * @param iUserTable The index into the user table (shadowed). Ignored if
5100 * root page.
5101 */
5102void pgmPoolFree(PVM pVM, RTHCPHYS HCPhys, uint16_t iUser, uint32_t iUserTable)
5103{
5104 LogFlow(("pgmPoolFree: HCPhys=%RHp iUser=%d iUserTable=%#x\n", HCPhys, iUser, iUserTable));
5105 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
5106 pgmPoolFreeByPage(pPool, pgmPoolGetPage(pPool, HCPhys), iUser, iUserTable);
5107}
5108
5109
5110/**
5111 * Internal worker for finding a 'in-use' shadow page give by it's physical address.
5112 *
5113 * @returns Pointer to the shadow page structure.
5114 * @param pPool The pool.
5115 * @param HCPhys The HC physical address of the shadow page.
5116 */
5117PPGMPOOLPAGE pgmPoolGetPage(PPGMPOOL pPool, RTHCPHYS HCPhys)
5118{
5119 PGM_LOCK_ASSERT_OWNER(pPool->CTX_SUFF(pVM));
5120
5121 /*
5122 * Look up the page.
5123 */
5124 PPGMPOOLPAGE pPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, HCPhys & X86_PTE_PAE_PG_MASK);
5125
5126 AssertFatalMsg(pPage && pPage->enmKind != PGMPOOLKIND_FREE, ("HCPhys=%RHp pPage=%p idx=%d\n", HCPhys, pPage, (pPage) ? pPage->idx : 0));
5127 return pPage;
5128}
5129
5130
5131/**
5132 * Internal worker for finding a page for debugging purposes, no assertions.
5133 *
5134 * @returns Pointer to the shadow page structure. NULL on if not found.
5135 * @param pPool The pool.
5136 * @param HCPhys The HC physical address of the shadow page.
5137 */
5138PPGMPOOLPAGE pgmPoolQueryPageForDbg(PPGMPOOL pPool, RTHCPHYS HCPhys)
5139{
5140 PGM_LOCK_ASSERT_OWNER(pPool->CTX_SUFF(pVM));
5141 return (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, HCPhys & X86_PTE_PAE_PG_MASK);
5142}
5143
5144
5145/**
5146 * Internal worker for PGM_HCPHYS_2_PTR.
5147 *
5148 * @returns VBox status code.
5149 * @param pVM The cross context VM structure.
5150 * @param HCPhys The HC physical address of the shadow page.
5151 * @param ppv Where to return the address.
5152 */
5153int pgmPoolHCPhys2Ptr(PVM pVM, RTHCPHYS HCPhys, void **ppv)
5154{
5155 PPGMPOOLPAGE pPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pVM->pgm.s.CTX_SUFF(pPool)->HCPhysTree, HCPhys & X86_PTE_PAE_PG_MASK);
5156 AssertMsgReturn(pPage && pPage->enmKind != PGMPOOLKIND_FREE,
5157 ("HCPhys=%RHp pPage=%p idx=%d\n", HCPhys, pPage, (pPage) ? pPage->idx : 0),
5158 VERR_PGM_POOL_GET_PAGE_FAILED);
5159 *ppv = (uint8_t *)pPage->CTX_SUFF(pvPage) + (HCPhys & PAGE_OFFSET_MASK);
5160 return VINF_SUCCESS;
5161}
5162
5163#ifdef IN_RING3 /* currently only used in ring 3; save some space in the R0 & GC modules (left it here as we might need it elsewhere later on) */
5164
5165/**
5166 * Flush the specified page if present
5167 *
5168 * @param pVM The cross context VM structure.
5169 * @param GCPhys Guest physical address of the page to flush
5170 */
5171void pgmPoolFlushPageByGCPhys(PVM pVM, RTGCPHYS GCPhys)
5172{
5173 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
5174
5175 VM_ASSERT_EMT(pVM);
5176
5177 /*
5178 * Look up the GCPhys in the hash.
5179 */
5180 GCPhys = GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK;
5181 unsigned i = pPool->aiHash[PGMPOOL_HASH(GCPhys)];
5182 if (i == NIL_PGMPOOL_IDX)
5183 return;
5184
5185 do
5186 {
5187 PPGMPOOLPAGE pPage = &pPool->aPages[i];
5188 if (pPage->GCPhys - GCPhys < PAGE_SIZE)
5189 {
5190 switch (pPage->enmKind)
5191 {
5192 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
5193 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
5194 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
5195 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
5196 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
5197 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
5198 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
5199 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
5200 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
5201 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
5202 case PGMPOOLKIND_64BIT_PML4:
5203 case PGMPOOLKIND_32BIT_PD:
5204 case PGMPOOLKIND_PAE_PDPT:
5205 {
5206 Log(("PGMPoolFlushPage: found pgm pool pages for %RGp\n", GCPhys));
5207# ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
5208 if (pPage->fDirty)
5209 STAM_COUNTER_INC(&pPool->StatForceFlushDirtyPage);
5210 else
5211# endif
5212 STAM_COUNTER_INC(&pPool->StatForceFlushPage);
5213 Assert(!pgmPoolIsPageLocked(pPage));
5214 pgmPoolMonitorChainFlush(pPool, pPage);
5215 return;
5216 }
5217
5218 /* ignore, no monitoring. */
5219 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
5220 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
5221 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
5222 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
5223 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
5224 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
5225 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
5226 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
5227 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
5228 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
5229 case PGMPOOLKIND_ROOT_NESTED:
5230 case PGMPOOLKIND_PAE_PD_PHYS:
5231 case PGMPOOLKIND_PAE_PDPT_PHYS:
5232 case PGMPOOLKIND_32BIT_PD_PHYS:
5233 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
5234 break;
5235
5236 default:
5237 AssertFatalMsgFailed(("enmKind=%d idx=%d\n", pPage->enmKind, pPage->idx));
5238 }
5239 }
5240
5241 /* next */
5242 i = pPage->iNext;
5243 } while (i != NIL_PGMPOOL_IDX);
5244 return;
5245}
5246
5247
5248/**
5249 * Reset CPU on hot plugging.
5250 *
5251 * @param pVM The cross context VM structure.
5252 * @param pVCpu The cross context virtual CPU structure.
5253 */
5254void pgmR3PoolResetUnpluggedCpu(PVM pVM, PVMCPU pVCpu)
5255{
5256 pgmR3ExitShadowModeBeforePoolFlush(pVCpu);
5257
5258 pgmR3ReEnterShadowModeAfterPoolFlush(pVM, pVCpu);
5259 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
5260 VMCPU_FF_SET(pVCpu, VMCPU_FF_TLB_FLUSH);
5261}
5262
5263
5264/**
5265 * Flushes the entire cache.
5266 *
5267 * It will assert a global CR3 flush (FF) and assumes the caller is aware of
5268 * this and execute this CR3 flush.
5269 *
5270 * @param pVM The cross context VM structure.
5271 */
5272void pgmR3PoolReset(PVM pVM)
5273{
5274 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
5275
5276 PGM_LOCK_ASSERT_OWNER(pVM);
5277 STAM_PROFILE_START(&pPool->StatR3Reset, a);
5278 LogFlow(("pgmR3PoolReset:\n"));
5279
5280 /*
5281 * If there are no pages in the pool, there is nothing to do.
5282 */
5283 if (pPool->cCurPages <= PGMPOOL_IDX_FIRST)
5284 {
5285 STAM_PROFILE_STOP(&pPool->StatR3Reset, a);
5286 return;
5287 }
5288
5289 /*
5290 * Exit the shadow mode since we're going to clear everything,
5291 * including the root page.
5292 */
5293 VMCC_FOR_EACH_VMCPU(pVM)
5294 pgmR3ExitShadowModeBeforePoolFlush(pVCpu);
5295 VMCC_FOR_EACH_VMCPU_END(pVM);
5296
5297
5298 /*
5299 * Nuke the free list and reinsert all pages into it.
5300 */
5301 for (unsigned i = pPool->cCurPages - 1; i >= PGMPOOL_IDX_FIRST; i--)
5302 {
5303 PPGMPOOLPAGE pPage = &pPool->aPages[i];
5304
5305 if (pPage->fMonitored)
5306 pgmPoolMonitorFlush(pPool, pPage);
5307 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
5308 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
5309 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
5310 pPage->iMonitoredPrev = NIL_PGMPOOL_IDX;
5311 pPage->GCPhys = NIL_RTGCPHYS;
5312 pPage->enmKind = PGMPOOLKIND_FREE;
5313 pPage->enmAccess = PGMPOOLACCESS_DONTCARE;
5314 Assert(pPage->idx == i);
5315 pPage->iNext = i + 1;
5316 pPage->fA20Enabled = true;
5317 pPage->fZeroed = false; /* This could probably be optimized, but better safe than sorry. */
5318 pPage->fSeenNonGlobal = false;
5319 pPage->fMonitored = false;
5320 pPage->fDirty = false;
5321 pPage->fCached = false;
5322 pPage->fReusedFlushPending = false;
5323 pPage->iUserHead = NIL_PGMPOOL_USER_INDEX;
5324 pPage->cPresent = 0;
5325 pPage->iFirstPresent = NIL_PGMPOOL_PRESENT_INDEX;
5326 pPage->cModifications = 0;
5327 pPage->iAgeNext = NIL_PGMPOOL_IDX;
5328 pPage->iAgePrev = NIL_PGMPOOL_IDX;
5329 pPage->idxDirtyEntry = 0;
5330 pPage->GCPtrLastAccessHandlerRip = NIL_RTGCPTR;
5331 pPage->GCPtrLastAccessHandlerFault = NIL_RTGCPTR;
5332 pPage->cLastAccessHandler = 0;
5333 pPage->cLocked = 0;
5334# ifdef VBOX_STRICT
5335 pPage->GCPtrDirtyFault = NIL_RTGCPTR;
5336# endif
5337 }
5338 pPool->aPages[pPool->cCurPages - 1].iNext = NIL_PGMPOOL_IDX;
5339 pPool->iFreeHead = PGMPOOL_IDX_FIRST;
5340 pPool->cUsedPages = 0;
5341
5342 /*
5343 * Zap and reinitialize the user records.
5344 */
5345 pPool->cPresent = 0;
5346 pPool->iUserFreeHead = 0;
5347 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
5348 const unsigned cMaxUsers = pPool->cMaxUsers;
5349 for (unsigned i = 0; i < cMaxUsers; i++)
5350 {
5351 paUsers[i].iNext = i + 1;
5352 paUsers[i].iUser = NIL_PGMPOOL_IDX;
5353 paUsers[i].iUserTable = 0xfffffffe;
5354 }
5355 paUsers[cMaxUsers - 1].iNext = NIL_PGMPOOL_USER_INDEX;
5356
5357 /*
5358 * Clear all the GCPhys links and rebuild the phys ext free list.
5359 */
5360 for (PPGMRAMRANGE pRam = pVM->pgm.s.CTX_SUFF(pRamRangesX);
5361 pRam;
5362 pRam = pRam->CTX_SUFF(pNext))
5363 {
5364 unsigned iPage = pRam->cb >> PAGE_SHIFT;
5365 while (iPage-- > 0)
5366 PGM_PAGE_SET_TRACKING(pVM, &pRam->aPages[iPage], 0);
5367 }
5368
5369 pPool->iPhysExtFreeHead = 0;
5370 PPGMPOOLPHYSEXT paPhysExts = pPool->CTX_SUFF(paPhysExts);
5371 const unsigned cMaxPhysExts = pPool->cMaxPhysExts;
5372 for (unsigned i = 0; i < cMaxPhysExts; i++)
5373 {
5374 paPhysExts[i].iNext = i + 1;
5375 paPhysExts[i].aidx[0] = NIL_PGMPOOL_IDX;
5376 paPhysExts[i].apte[0] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
5377 paPhysExts[i].aidx[1] = NIL_PGMPOOL_IDX;
5378 paPhysExts[i].apte[1] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
5379 paPhysExts[i].aidx[2] = NIL_PGMPOOL_IDX;
5380 paPhysExts[i].apte[2] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
5381 }
5382 paPhysExts[cMaxPhysExts - 1].iNext = NIL_PGMPOOL_PHYSEXT_INDEX;
5383
5384 /*
5385 * Just zap the modified list.
5386 */
5387 pPool->cModifiedPages = 0;
5388 pPool->iModifiedHead = NIL_PGMPOOL_IDX;
5389
5390 /*
5391 * Clear the GCPhys hash and the age list.
5392 */
5393 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aiHash); i++)
5394 pPool->aiHash[i] = NIL_PGMPOOL_IDX;
5395 pPool->iAgeHead = NIL_PGMPOOL_IDX;
5396 pPool->iAgeTail = NIL_PGMPOOL_IDX;
5397
5398# ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
5399 /* Clear all dirty pages. */
5400 pPool->idxFreeDirtyPage = 0;
5401 pPool->cDirtyPages = 0;
5402 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aidxDirtyPages); i++)
5403 pPool->aidxDirtyPages[i] = NIL_PGMPOOL_IDX;
5404# endif
5405
5406 /*
5407 * Reinsert active pages into the hash and ensure monitoring chains are correct.
5408 */
5409 VMCC_FOR_EACH_VMCPU(pVM)
5410 {
5411 /*
5412 * Re-enter the shadowing mode and assert Sync CR3 FF.
5413 */
5414 pgmR3ReEnterShadowModeAfterPoolFlush(pVM, pVCpu);
5415 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
5416 VMCPU_FF_SET(pVCpu, VMCPU_FF_TLB_FLUSH);
5417 }
5418 VMCC_FOR_EACH_VMCPU_END(pVM);
5419
5420 STAM_PROFILE_STOP(&pPool->StatR3Reset, a);
5421}
5422
5423#endif /* IN_RING3 */
5424
5425#if defined(LOG_ENABLED) || defined(VBOX_STRICT)
5426/**
5427 * Stringifies a PGMPOOLKIND value.
5428 */
5429static const char *pgmPoolPoolKindToStr(uint8_t enmKind)
5430{
5431 switch ((PGMPOOLKIND)enmKind)
5432 {
5433 case PGMPOOLKIND_INVALID:
5434 return "PGMPOOLKIND_INVALID";
5435 case PGMPOOLKIND_FREE:
5436 return "PGMPOOLKIND_FREE";
5437 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
5438 return "PGMPOOLKIND_32BIT_PT_FOR_PHYS";
5439 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
5440 return "PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT";
5441 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
5442 return "PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB";
5443 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
5444 return "PGMPOOLKIND_PAE_PT_FOR_PHYS";
5445 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
5446 return "PGMPOOLKIND_PAE_PT_FOR_32BIT_PT";
5447 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
5448 return "PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB";
5449 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
5450 return "PGMPOOLKIND_PAE_PT_FOR_PAE_PT";
5451 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
5452 return "PGMPOOLKIND_PAE_PT_FOR_PAE_2MB";
5453 case PGMPOOLKIND_32BIT_PD:
5454 return "PGMPOOLKIND_32BIT_PD";
5455 case PGMPOOLKIND_32BIT_PD_PHYS:
5456 return "PGMPOOLKIND_32BIT_PD_PHYS";
5457 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
5458 return "PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD";
5459 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
5460 return "PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD";
5461 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
5462 return "PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD";
5463 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
5464 return "PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD";
5465 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
5466 return "PGMPOOLKIND_PAE_PD_FOR_PAE_PD";
5467 case PGMPOOLKIND_PAE_PD_PHYS:
5468 return "PGMPOOLKIND_PAE_PD_PHYS";
5469 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
5470 return "PGMPOOLKIND_PAE_PDPT_FOR_32BIT";
5471 case PGMPOOLKIND_PAE_PDPT:
5472 return "PGMPOOLKIND_PAE_PDPT";
5473 case PGMPOOLKIND_PAE_PDPT_PHYS:
5474 return "PGMPOOLKIND_PAE_PDPT_PHYS";
5475 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
5476 return "PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT";
5477 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
5478 return "PGMPOOLKIND_64BIT_PDPT_FOR_PHYS";
5479 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
5480 return "PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD";
5481 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
5482 return "PGMPOOLKIND_64BIT_PD_FOR_PHYS";
5483 case PGMPOOLKIND_64BIT_PML4:
5484 return "PGMPOOLKIND_64BIT_PML4";
5485 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
5486 return "PGMPOOLKIND_EPT_PDPT_FOR_PHYS";
5487 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
5488 return "PGMPOOLKIND_EPT_PD_FOR_PHYS";
5489 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
5490 return "PGMPOOLKIND_EPT_PT_FOR_PHYS";
5491 case PGMPOOLKIND_ROOT_NESTED:
5492 return "PGMPOOLKIND_ROOT_NESTED";
5493 }
5494 return "Unknown kind!";
5495}
5496#endif /* LOG_ENABLED || VBOX_STRICT */
5497
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette