VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/PGMAllPool.cpp@ 93635

Last change on this file since 93635 was 93635, checked in by vboxsync, 3 years ago

VMM/PGM,VMM/PDM,VGA: Consolidate the user parameters of the physical access handlers into a single uint64_t value that shouldn't be a pointer, at least not for ring-0 callbacks. Special hack for devices where it's translated from a ring-0 device instance index into a current context PPDMDEVINS (not really tested yet). bugref:10094

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id Revision
File size: 208.2 KB
Line 
1/* $Id: PGMAllPool.cpp 93635 2022-02-07 10:43:45Z vboxsync $ */
2/** @file
3 * PGM Shadow Page Pool.
4 */
5
6/*
7 * Copyright (C) 2006-2022 Oracle Corporation
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 */
17
18
19/*********************************************************************************************************************************
20* Header Files *
21*********************************************************************************************************************************/
22#define LOG_GROUP LOG_GROUP_PGM_POOL
23#define VBOX_WITHOUT_PAGING_BIT_FIELDS /* 64-bit bitfields are just asking for trouble. See @bugref{9841} and others. */
24#include <VBox/vmm/pgm.h>
25#include <VBox/vmm/mm.h>
26#include <VBox/vmm/em.h>
27#include <VBox/vmm/cpum.h>
28#include "PGMInternal.h"
29#include <VBox/vmm/vmcc.h>
30#include "PGMInline.h"
31#include <VBox/disopcode.h>
32#include <VBox/vmm/hm_vmx.h>
33
34#include <VBox/log.h>
35#include <VBox/err.h>
36#include <iprt/asm.h>
37#include <iprt/asm-amd64-x86.h>
38#include <iprt/string.h>
39
40
41/*********************************************************************************************************************************
42* Internal Functions *
43*********************************************************************************************************************************/
44RT_C_DECLS_BEGIN
45#if 0 /* unused */
46DECLINLINE(unsigned) pgmPoolTrackGetShadowEntrySize(PGMPOOLKIND enmKind);
47DECLINLINE(unsigned) pgmPoolTrackGetGuestEntrySize(PGMPOOLKIND enmKind);
48#endif /* unused */
49static void pgmPoolTrackClearPageUsers(PPGMPOOL pPool, PPGMPOOLPAGE pPage);
50static void pgmPoolTrackDeref(PPGMPOOL pPool, PPGMPOOLPAGE pPage);
51static int pgmPoolTrackAddUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable);
52static void pgmPoolMonitorModifiedRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage);
53#if defined(LOG_ENABLED) || defined(VBOX_STRICT)
54static const char *pgmPoolPoolKindToStr(uint8_t enmKind);
55#endif
56#if 0 /*defined(VBOX_STRICT) && defined(PGMPOOL_WITH_OPTIMIZED_DIRTY_PT)*/
57static void pgmPoolTrackCheckPTPaePae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT, PCX86PTPAE pGstPT);
58#endif
59
60int pgmPoolTrackFlushGCPhysPTsSlow(PVMCC pVM, PPGMPAGE pPhysPage);
61PPGMPOOLPHYSEXT pgmPoolTrackPhysExtAlloc(PVMCC pVM, uint16_t *piPhysExt);
62void pgmPoolTrackPhysExtFree(PVMCC pVM, uint16_t iPhysExt);
63void pgmPoolTrackPhysExtFreeList(PVMCC pVM, uint16_t iPhysExt);
64
65RT_C_DECLS_END
66
67
68#if 0 /* unused */
69/**
70 * Checks if the specified page pool kind is for a 4MB or 2MB guest page.
71 *
72 * @returns true if it's the shadow of a 4MB or 2MB guest page, otherwise false.
73 * @param enmKind The page kind.
74 */
75DECLINLINE(bool) pgmPoolIsBigPage(PGMPOOLKIND enmKind)
76{
77 switch (enmKind)
78 {
79 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
80 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
81 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
82 return true;
83 default:
84 return false;
85 }
86}
87#endif /* unused */
88
89
90/**
91 * Flushes a chain of pages sharing the same access monitor.
92 *
93 * @param pPool The pool.
94 * @param pPage A page in the chain.
95 */
96void pgmPoolMonitorChainFlush(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
97{
98 LogFlow(("pgmPoolMonitorChainFlush: Flush page %RGp type=%d\n", pPage->GCPhys, pPage->enmKind));
99
100 /*
101 * Find the list head.
102 */
103 uint16_t idx = pPage->idx;
104 if (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
105 {
106 while (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
107 {
108 idx = pPage->iMonitoredPrev;
109 Assert(idx != pPage->idx);
110 pPage = &pPool->aPages[idx];
111 }
112 }
113
114 /*
115 * Iterate the list flushing each shadow page.
116 */
117 for (;;)
118 {
119 idx = pPage->iMonitoredNext;
120 Assert(idx != pPage->idx);
121 if (pPage->idx >= PGMPOOL_IDX_FIRST)
122 {
123 int rc2 = pgmPoolFlushPage(pPool, pPage);
124 AssertRC(rc2);
125 }
126 /* next */
127 if (idx == NIL_PGMPOOL_IDX)
128 break;
129 pPage = &pPool->aPages[idx];
130 }
131}
132
133
134/**
135 * Wrapper for getting the current context pointer to the entry being modified.
136 *
137 * @returns VBox status code suitable for scheduling.
138 * @param pVM The cross context VM structure.
139 * @param pvDst Destination address
140 * @param pvSrc Pointer to the mapping of @a GCPhysSrc or NULL depending
141 * on the context (e.g. \#PF in R0 & RC).
142 * @param GCPhysSrc The source guest physical address.
143 * @param cb Size of data to read
144 */
145DECLINLINE(int) pgmPoolPhysSimpleReadGCPhys(PVMCC pVM, void *pvDst, void const *pvSrc, RTGCPHYS GCPhysSrc, size_t cb)
146{
147#if defined(IN_RING3)
148 NOREF(pVM); NOREF(GCPhysSrc);
149 memcpy(pvDst, (RTHCPTR)((uintptr_t)pvSrc & ~(RTHCUINTPTR)(cb - 1)), cb);
150 return VINF_SUCCESS;
151#else
152 /** @todo in RC we could attempt to use the virtual address, although this can cause many faults (PAE Windows XP guest). */
153 NOREF(pvSrc);
154 return PGMPhysSimpleReadGCPhys(pVM, pvDst, GCPhysSrc & ~(RTGCPHYS)(cb - 1), cb);
155#endif
156}
157
158
159/**
160 * Process shadow entries before they are changed by the guest.
161 *
162 * For PT entries we will clear them. For PD entries, we'll simply check
163 * for mapping conflicts and set the SyncCR3 FF if found.
164 *
165 * @param pVCpu The cross context virtual CPU structure.
166 * @param pPool The pool.
167 * @param pPage The head page.
168 * @param GCPhysFault The guest physical fault address.
169 * @param pvAddress Pointer to the mapping of @a GCPhysFault or NULL
170 * depending on the context (e.g. \#PF in R0 & RC).
171 * @param cbWrite Write size; might be zero if the caller knows we're not crossing entry boundaries
172 */
173static void pgmPoolMonitorChainChanging(PVMCPU pVCpu, PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTGCPHYS GCPhysFault,
174 void const *pvAddress, unsigned cbWrite)
175{
176 AssertMsg(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX, ("%u (idx=%u)\n", pPage->iMonitoredPrev, pPage->idx));
177 const unsigned off = GCPhysFault & GUEST_PAGE_OFFSET_MASK;
178 PVMCC pVM = pPool->CTX_SUFF(pVM);
179 NOREF(pVCpu);
180
181 LogFlow(("pgmPoolMonitorChainChanging: %RGv phys=%RGp cbWrite=%d\n",
182 (RTGCPTR)(CTXTYPE(RTGCPTR, uintptr_t, RTGCPTR))(uintptr_t)pvAddress, GCPhysFault, cbWrite));
183
184 for (;;)
185 {
186 union
187 {
188 void *pv;
189 PX86PT pPT;
190 PPGMSHWPTPAE pPTPae;
191 PX86PD pPD;
192 PX86PDPAE pPDPae;
193 PX86PDPT pPDPT;
194 PX86PML4 pPML4;
195 } uShw;
196
197 LogFlow(("pgmPoolMonitorChainChanging: page idx=%d phys=%RGp (next=%d) kind=%s write=%#x\n",
198 pPage->idx, pPage->GCPhys, pPage->iMonitoredNext, pgmPoolPoolKindToStr(pPage->enmKind), cbWrite));
199
200 uShw.pv = NULL;
201 switch (pPage->enmKind)
202 {
203 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
204 {
205 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPT));
206 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
207 const unsigned iShw = off / sizeof(X86PTE);
208 LogFlow(("PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT iShw=%x\n", iShw));
209 X86PGUINT const uPde = uShw.pPT->a[iShw].u;
210 if (uPde & X86_PTE_P)
211 {
212 X86PTE GstPte;
213 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvAddress, GCPhysFault, sizeof(GstPte));
214 AssertRC(rc);
215 Log4(("pgmPoolMonitorChainChanging 32_32: deref %016RX64 GCPhys %08RX32\n", uPde & X86_PTE_PG_MASK, GstPte.u & X86_PTE_PG_MASK));
216 pgmPoolTracDerefGCPhysHint(pPool, pPage, uPde & X86_PTE_PG_MASK, GstPte.u & X86_PTE_PG_MASK, iShw);
217 ASMAtomicWriteU32(&uShw.pPT->a[iShw].u, 0);
218 }
219 break;
220 }
221
222 /* page/2 sized */
223 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
224 {
225 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPT));
226 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
227 if (!((off ^ pPage->GCPhys) & (PAGE_SIZE / 2)))
228 {
229 const unsigned iShw = (off / sizeof(X86PTE)) & (X86_PG_PAE_ENTRIES - 1);
230 LogFlow(("PGMPOOLKIND_PAE_PT_FOR_32BIT_PT iShw=%x\n", iShw));
231 if (PGMSHWPTEPAE_IS_P(uShw.pPTPae->a[iShw]))
232 {
233 X86PTE GstPte;
234 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvAddress, GCPhysFault, sizeof(GstPte));
235 AssertRC(rc);
236
237 Log4(("pgmPoolMonitorChainChanging pae_32: deref %016RX64 GCPhys %08RX32\n", uShw.pPT->a[iShw].u & X86_PTE_PAE_PG_MASK, GstPte.u & X86_PTE_PG_MASK));
238 pgmPoolTracDerefGCPhysHint(pPool, pPage,
239 PGMSHWPTEPAE_GET_HCPHYS(uShw.pPTPae->a[iShw]),
240 GstPte.u & X86_PTE_PG_MASK,
241 iShw);
242 PGMSHWPTEPAE_ATOMIC_SET(uShw.pPTPae->a[iShw], 0);
243 }
244 }
245 break;
246 }
247
248 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
249 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
250 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
251 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
252 {
253 unsigned iGst = off / sizeof(X86PDE);
254 unsigned iShwPdpt = iGst / 256;
255 unsigned iShw = (iGst % 256) * 2;
256 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
257
258 LogFlow(("pgmPoolMonitorChainChanging PAE for 32 bits: iGst=%x iShw=%x idx = %d page idx=%d\n", iGst, iShw, iShwPdpt, pPage->enmKind - PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD));
259 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPD));
260 if (iShwPdpt == pPage->enmKind - (unsigned)PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD)
261 {
262 for (unsigned i = 0; i < 2; i++)
263 {
264 X86PGPAEUINT const uPde = uShw.pPDPae->a[iShw + i].u;
265 if (uPde & X86_PDE_P)
266 {
267 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw + i, uPde));
268 pgmPoolFree(pVM, uPde & X86_PDE_PAE_PG_MASK, pPage->idx, iShw + i);
269 ASMAtomicWriteU64(&uShw.pPDPae->a[iShw + i].u, 0);
270 }
271
272 /* paranoia / a bit assumptive. */
273 if ( (off & 3)
274 && (off & 3) + cbWrite > 4)
275 {
276 const unsigned iShw2 = iShw + 2 + i;
277 if (iShw2 < RT_ELEMENTS(uShw.pPDPae->a))
278 {
279 X86PGPAEUINT const uPde2 = uShw.pPDPae->a[iShw2].u;
280 if (uPde2 & X86_PDE_P)
281 {
282 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw2, uPde2));
283 pgmPoolFree(pVM, uPde2 & X86_PDE_PAE_PG_MASK, pPage->idx, iShw2);
284 ASMAtomicWriteU64(&uShw.pPDPae->a[iShw2].u, 0);
285 }
286 }
287 }
288 }
289 }
290 break;
291 }
292
293 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
294 {
295 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
296 const unsigned iShw = off / sizeof(X86PTEPAE);
297 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPT));
298 if (PGMSHWPTEPAE_IS_P(uShw.pPTPae->a[iShw]))
299 {
300 X86PTEPAE GstPte;
301 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvAddress, GCPhysFault, sizeof(GstPte));
302 AssertRC(rc);
303
304 Log4(("pgmPoolMonitorChainChanging pae: deref %016RX64 GCPhys %016RX64\n", PGMSHWPTEPAE_GET_HCPHYS(uShw.pPTPae->a[iShw]), GstPte.u & X86_PTE_PAE_PG_MASK));
305 pgmPoolTracDerefGCPhysHint(pPool, pPage,
306 PGMSHWPTEPAE_GET_HCPHYS(uShw.pPTPae->a[iShw]),
307 GstPte.u & X86_PTE_PAE_PG_MASK,
308 iShw);
309 PGMSHWPTEPAE_ATOMIC_SET(uShw.pPTPae->a[iShw], 0);
310 }
311
312 /* paranoia / a bit assumptive. */
313 if ( (off & 7)
314 && (off & 7) + cbWrite > sizeof(X86PTEPAE))
315 {
316 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PTEPAE);
317 AssertBreak(iShw2 < RT_ELEMENTS(uShw.pPTPae->a));
318
319 if (PGMSHWPTEPAE_IS_P(uShw.pPTPae->a[iShw2]))
320 {
321 X86PTEPAE GstPte;
322 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte,
323 pvAddress ? (uint8_t const *)pvAddress + sizeof(GstPte) : NULL,
324 GCPhysFault + sizeof(GstPte), sizeof(GstPte));
325 AssertRC(rc);
326 Log4(("pgmPoolMonitorChainChanging pae: deref %016RX64 GCPhys %016RX64\n", PGMSHWPTEPAE_GET_HCPHYS(uShw.pPTPae->a[iShw2]), GstPte.u & X86_PTE_PAE_PG_MASK));
327 pgmPoolTracDerefGCPhysHint(pPool, pPage,
328 PGMSHWPTEPAE_GET_HCPHYS(uShw.pPTPae->a[iShw2]),
329 GstPte.u & X86_PTE_PAE_PG_MASK,
330 iShw2);
331 PGMSHWPTEPAE_ATOMIC_SET(uShw.pPTPae->a[iShw2], 0);
332 }
333 }
334 break;
335 }
336
337 case PGMPOOLKIND_32BIT_PD:
338 {
339 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
340 const unsigned iShw = off / sizeof(X86PTE); // ASSUMING 32-bit guest paging!
341
342 LogFlow(("pgmPoolMonitorChainChanging: PGMPOOLKIND_32BIT_PD %x\n", iShw));
343 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPD));
344 X86PGUINT const uPde = uShw.pPD->a[iShw].u;
345 if (uPde & X86_PDE_P)
346 {
347 LogFlow(("pgmPoolMonitorChainChanging: 32 bit pd iShw=%#x: %RX64 -> freeing it!\n", iShw, uPde));
348 pgmPoolFree(pVM, uPde & X86_PDE_PG_MASK, pPage->idx, iShw);
349 ASMAtomicWriteU32(&uShw.pPD->a[iShw].u, 0);
350 }
351
352 /* paranoia / a bit assumptive. */
353 if ( (off & 3)
354 && (off & 3) + cbWrite > sizeof(X86PTE))
355 {
356 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PTE);
357 if ( iShw2 != iShw
358 && iShw2 < RT_ELEMENTS(uShw.pPD->a))
359 {
360 X86PGUINT const uPde2 = uShw.pPD->a[iShw2].u;
361 if (uPde2 & X86_PDE_P)
362 {
363 LogFlow(("pgmPoolMonitorChainChanging: 32 bit pd iShw=%#x: %RX64 -> freeing it!\n", iShw2, uPde2));
364 pgmPoolFree(pVM, uPde2 & X86_PDE_PG_MASK, pPage->idx, iShw2);
365 ASMAtomicWriteU32(&uShw.pPD->a[iShw2].u, 0);
366 }
367 }
368 }
369#if 0 /* useful when running PGMAssertCR3(), a bit too troublesome for general use (TLBs). - not working any longer... */
370 if ( uShw.pPD->a[iShw].n.u1Present
371 && !VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3))
372 {
373 LogFlow(("pgmPoolMonitorChainChanging: iShw=%#x: %RX32 -> freeing it!\n", iShw, uShw.pPD->a[iShw].u));
374 pgmPoolFree(pVM, uShw.pPD->a[iShw].u & X86_PDE_PG_MASK, pPage->idx, iShw);
375 ASMAtomicWriteU32(&uShw.pPD->a[iShw].u, 0);
376 }
377#endif
378 break;
379 }
380
381 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
382 {
383 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
384 const unsigned iShw = off / sizeof(X86PDEPAE);
385 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPD));
386
387 /*
388 * Causes trouble when the guest uses a PDE to refer to the whole page table level
389 * structure. (Invalidate here; faults later on when it tries to change the page
390 * table entries -> recheck; probably only applies to the RC case.)
391 */
392 X86PGPAEUINT const uPde = uShw.pPDPae->a[iShw].u;
393 if (uPde & X86_PDE_P)
394 {
395 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw, uPde));
396 pgmPoolFree(pVM, uPde & X86_PDE_PAE_PG_MASK, pPage->idx, iShw);
397 ASMAtomicWriteU64(&uShw.pPDPae->a[iShw].u, 0);
398 }
399
400 /* paranoia / a bit assumptive. */
401 if ( (off & 7)
402 && (off & 7) + cbWrite > sizeof(X86PDEPAE))
403 {
404 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PDEPAE);
405 AssertBreak(iShw2 < RT_ELEMENTS(uShw.pPDPae->a));
406
407 X86PGPAEUINT const uPde2 = uShw.pPDPae->a[iShw2].u;
408 if (uPde2 & X86_PDE_P)
409 {
410 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uPde2));
411 pgmPoolFree(pVM, uPde2 & X86_PDE_PAE_PG_MASK, pPage->idx, iShw2);
412 ASMAtomicWriteU64(&uShw.pPDPae->a[iShw2].u, 0);
413 }
414 }
415 break;
416 }
417
418 case PGMPOOLKIND_PAE_PDPT:
419 {
420 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPDPT));
421 /*
422 * Hopefully this doesn't happen very often:
423 * - touching unused parts of the page
424 * - messing with the bits of pd pointers without changing the physical address
425 */
426 /* PDPT roots are not page aligned; 32 byte only! */
427 const unsigned offPdpt = GCPhysFault - pPage->GCPhys;
428
429 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
430 const unsigned iShw = offPdpt / sizeof(X86PDPE);
431 if (iShw < X86_PG_PAE_PDPE_ENTRIES) /* don't use RT_ELEMENTS(uShw.pPDPT->a), because that's for long mode only */
432 {
433 X86PGPAEUINT const uPdpe = uShw.pPDPT->a[iShw].u;
434 if (uPdpe & X86_PDPE_P)
435 {
436 LogFlow(("pgmPoolMonitorChainChanging: pae pdpt iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPT->a[iShw].u));
437 pgmPoolFree(pVM, uPdpe & X86_PDPE_PG_MASK, pPage->idx, iShw);
438 ASMAtomicWriteU64(&uShw.pPDPT->a[iShw].u, 0);
439 }
440
441 /* paranoia / a bit assumptive. */
442 if ( (offPdpt & 7)
443 && (offPdpt & 7) + cbWrite > sizeof(X86PDPE))
444 {
445 const unsigned iShw2 = (offPdpt + cbWrite - 1) / sizeof(X86PDPE);
446 if ( iShw2 != iShw
447 && iShw2 < X86_PG_PAE_PDPE_ENTRIES)
448 {
449 X86PGPAEUINT const uPdpe2 = uShw.pPDPT->a[iShw2].u;
450 if (uPdpe2 & X86_PDPE_P)
451 {
452 LogFlow(("pgmPoolMonitorChainChanging: pae pdpt iShw=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPT->a[iShw2].u));
453 pgmPoolFree(pVM, uPdpe2 & X86_PDPE_PG_MASK, pPage->idx, iShw2);
454 ASMAtomicWriteU64(&uShw.pPDPT->a[iShw2].u, 0);
455 }
456 }
457 }
458 }
459 break;
460 }
461
462 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
463 {
464 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPD));
465 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
466 const unsigned iShw = off / sizeof(X86PDEPAE);
467 X86PGPAEUINT const uPde = uShw.pPDPae->a[iShw].u;
468 if (uPde & X86_PDE_P)
469 {
470 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw, uPde));
471 pgmPoolFree(pVM, uPde & X86_PDE_PAE_PG_MASK, pPage->idx, iShw);
472 ASMAtomicWriteU64(&uShw.pPDPae->a[iShw].u, 0);
473 }
474
475 /* paranoia / a bit assumptive. */
476 if ( (off & 7)
477 && (off & 7) + cbWrite > sizeof(X86PDEPAE))
478 {
479 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PDEPAE);
480 AssertBreak(iShw2 < RT_ELEMENTS(uShw.pPDPae->a));
481 X86PGPAEUINT const uPde2 = uShw.pPDPae->a[iShw2].u;
482 if (uPde2 & X86_PDE_P)
483 {
484 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uPde2));
485 pgmPoolFree(pVM, uPde2 & X86_PDE_PAE_PG_MASK, pPage->idx, iShw2);
486 ASMAtomicWriteU64(&uShw.pPDPae->a[iShw2].u, 0);
487 }
488 }
489 break;
490 }
491
492 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
493 {
494 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPDPT));
495 /*
496 * Hopefully this doesn't happen very often:
497 * - messing with the bits of pd pointers without changing the physical address
498 */
499 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
500 const unsigned iShw = off / sizeof(X86PDPE);
501 X86PGPAEUINT const uPdpe = uShw.pPDPT->a[iShw].u;
502 if (uPdpe & X86_PDPE_P)
503 {
504 LogFlow(("pgmPoolMonitorChainChanging: pdpt iShw=%#x: %RX64 -> freeing it!\n", iShw, uPdpe));
505 pgmPoolFree(pVM, uPdpe & X86_PDPE_PG_MASK, pPage->idx, iShw);
506 ASMAtomicWriteU64(&uShw.pPDPT->a[iShw].u, 0);
507 }
508 /* paranoia / a bit assumptive. */
509 if ( (off & 7)
510 && (off & 7) + cbWrite > sizeof(X86PDPE))
511 {
512 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PDPE);
513 X86PGPAEUINT const uPdpe2 = uShw.pPDPT->a[iShw2].u;
514 if (uPdpe2 & X86_PDPE_P)
515 {
516 LogFlow(("pgmPoolMonitorChainChanging: pdpt iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uPdpe2));
517 pgmPoolFree(pVM, uPdpe2 & X86_PDPE_PG_MASK, pPage->idx, iShw2);
518 ASMAtomicWriteU64(&uShw.pPDPT->a[iShw2].u, 0);
519 }
520 }
521 break;
522 }
523
524 case PGMPOOLKIND_64BIT_PML4:
525 {
526 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPML4));
527 /*
528 * Hopefully this doesn't happen very often:
529 * - messing with the bits of pd pointers without changing the physical address
530 */
531 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
532 const unsigned iShw = off / sizeof(X86PDPE);
533 X86PGPAEUINT const uPml4e = uShw.pPML4->a[iShw].u;
534 if (uPml4e & X86_PML4E_P)
535 {
536 LogFlow(("pgmPoolMonitorChainChanging: pml4 iShw=%#x: %RX64 -> freeing it!\n", iShw, uPml4e));
537 pgmPoolFree(pVM, uPml4e & X86_PML4E_PG_MASK, pPage->idx, iShw);
538 ASMAtomicWriteU64(&uShw.pPML4->a[iShw].u, 0);
539 }
540 /* paranoia / a bit assumptive. */
541 if ( (off & 7)
542 && (off & 7) + cbWrite > sizeof(X86PDPE))
543 {
544 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PML4E);
545 X86PGPAEUINT const uPml4e2 = uShw.pPML4->a[iShw2].u;
546 if (uPml4e2 & X86_PML4E_P)
547 {
548 LogFlow(("pgmPoolMonitorChainChanging: pml4 iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uPml4e2));
549 pgmPoolFree(pVM, uPml4e2 & X86_PML4E_PG_MASK, pPage->idx, iShw2);
550 ASMAtomicWriteU64(&uShw.pPML4->a[iShw2].u, 0);
551 }
552 }
553 break;
554 }
555
556 default:
557 AssertFatalMsgFailed(("enmKind=%d\n", pPage->enmKind));
558 }
559 PGM_DYNMAP_UNUSED_HINT_VM(pVM, uShw.pv);
560
561 /* next */
562 if (pPage->iMonitoredNext == NIL_PGMPOOL_IDX)
563 return;
564 pPage = &pPool->aPages[pPage->iMonitoredNext];
565 }
566}
567
568#ifndef IN_RING3
569
570/**
571 * Checks if a access could be a fork operation in progress.
572 *
573 * Meaning, that the guest is setting up the parent process for Copy-On-Write.
574 *
575 * @returns true if it's likely that we're forking, otherwise false.
576 * @param pPool The pool.
577 * @param pDis The disassembled instruction.
578 * @param offFault The access offset.
579 */
580DECLINLINE(bool) pgmRZPoolMonitorIsForking(PPGMPOOL pPool, PDISCPUSTATE pDis, unsigned offFault)
581{
582 /*
583 * i386 linux is using btr to clear X86_PTE_RW.
584 * The functions involved are (2.6.16 source inspection):
585 * clear_bit
586 * ptep_set_wrprotect
587 * copy_one_pte
588 * copy_pte_range
589 * copy_pmd_range
590 * copy_pud_range
591 * copy_page_range
592 * dup_mmap
593 * dup_mm
594 * copy_mm
595 * copy_process
596 * do_fork
597 */
598 if ( pDis->pCurInstr->uOpcode == OP_BTR
599 && !(offFault & 4)
600 /** @todo Validate that the bit index is X86_PTE_RW. */
601 )
602 {
603 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitorPf,Fork)); RT_NOREF_PV(pPool);
604 return true;
605 }
606 return false;
607}
608
609
610/**
611 * Determine whether the page is likely to have been reused.
612 *
613 * @returns true if we consider the page as being reused for a different purpose.
614 * @returns false if we consider it to still be a paging page.
615 * @param pVM The cross context VM structure.
616 * @param pVCpu The cross context virtual CPU structure.
617 * @param pRegFrame Trap register frame.
618 * @param pDis The disassembly info for the faulting instruction.
619 * @param pvFault The fault address.
620 * @param pPage The pool page being accessed.
621 *
622 * @remark The REP prefix check is left to the caller because of STOSD/W.
623 */
624DECLINLINE(bool) pgmRZPoolMonitorIsReused(PVMCC pVM, PVMCPUCC pVCpu, PCPUMCTXCORE pRegFrame, PDISCPUSTATE pDis, RTGCPTR pvFault,
625 PPGMPOOLPAGE pPage)
626{
627 /* Locked (CR3, PDPTR*4) should not be reusable. Considering them as
628 such may cause loops booting tst-ubuntu-15_10-64-efi, ++. */
629 if (pPage->cLocked)
630 {
631 Log2(("pgmRZPoolMonitorIsReused: %RGv (%p) can't have been resued, because it's locked!\n", pvFault, pPage));
632 return false;
633 }
634
635 /** @todo could make this general, faulting close to rsp should be a safe reuse heuristic. */
636 if ( HMHasPendingIrq(pVM)
637 && pRegFrame->rsp - pvFault < 32)
638 {
639 /* Fault caused by stack writes while trying to inject an interrupt event. */
640 Log(("pgmRZPoolMonitorIsReused: reused %RGv for interrupt stack (rsp=%RGv).\n", pvFault, pRegFrame->rsp));
641 return true;
642 }
643
644 LogFlow(("Reused instr %RGv %d at %RGv param1.fUse=%llx param1.reg=%d\n", pRegFrame->rip, pDis->pCurInstr->uOpcode, pvFault, pDis->Param1.fUse, pDis->Param1.Base.idxGenReg));
645
646 /* Non-supervisor mode write means it's used for something else. */
647 if (CPUMGetGuestCPL(pVCpu) == 3)
648 return true;
649
650 switch (pDis->pCurInstr->uOpcode)
651 {
652 /* call implies the actual push of the return address faulted */
653 case OP_CALL:
654 Log4(("pgmRZPoolMonitorIsReused: CALL\n"));
655 return true;
656 case OP_PUSH:
657 Log4(("pgmRZPoolMonitorIsReused: PUSH\n"));
658 return true;
659 case OP_PUSHF:
660 Log4(("pgmRZPoolMonitorIsReused: PUSHF\n"));
661 return true;
662 case OP_PUSHA:
663 Log4(("pgmRZPoolMonitorIsReused: PUSHA\n"));
664 return true;
665 case OP_FXSAVE:
666 Log4(("pgmRZPoolMonitorIsReused: FXSAVE\n"));
667 return true;
668 case OP_MOVNTI: /* solaris - block_zero_no_xmm */
669 Log4(("pgmRZPoolMonitorIsReused: MOVNTI\n"));
670 return true;
671 case OP_MOVNTDQ: /* solaris - hwblkclr & hwblkpagecopy */
672 Log4(("pgmRZPoolMonitorIsReused: MOVNTDQ\n"));
673 return true;
674 case OP_MOVSWD:
675 case OP_STOSWD:
676 if ( pDis->fPrefix == (DISPREFIX_REP|DISPREFIX_REX)
677 && pRegFrame->rcx >= 0x40
678 )
679 {
680 Assert(pDis->uCpuMode == DISCPUMODE_64BIT);
681
682 Log(("pgmRZPoolMonitorIsReused: OP_STOSQ\n"));
683 return true;
684 }
685 break;
686
687 default:
688 /*
689 * Anything having ESP on the left side means stack writes.
690 */
691 if ( ( (pDis->Param1.fUse & DISUSE_REG_GEN32)
692 || (pDis->Param1.fUse & DISUSE_REG_GEN64))
693 && (pDis->Param1.Base.idxGenReg == DISGREG_ESP))
694 {
695 Log4(("pgmRZPoolMonitorIsReused: ESP\n"));
696 return true;
697 }
698 break;
699 }
700
701 /*
702 * Page table updates are very very unlikely to be crossing page boundraries,
703 * and we don't want to deal with that in pgmPoolMonitorChainChanging and such.
704 */
705 uint32_t const cbWrite = DISGetParamSize(pDis, &pDis->Param1);
706 if ( (((uintptr_t)pvFault + cbWrite) >> X86_PAGE_SHIFT) != ((uintptr_t)pvFault >> X86_PAGE_SHIFT) )
707 {
708 Log4(("pgmRZPoolMonitorIsReused: cross page write\n"));
709 return true;
710 }
711
712 /*
713 * Nobody does an unaligned 8 byte write to a page table, right.
714 */
715 if (cbWrite >= 8 && ((uintptr_t)pvFault & 7) != 0)
716 {
717 Log4(("pgmRZPoolMonitorIsReused: Unaligned 8+ byte write\n"));
718 return true;
719 }
720
721 return false;
722}
723
724
725/**
726 * Flushes the page being accessed.
727 *
728 * @returns VBox status code suitable for scheduling.
729 * @param pVM The cross context VM structure.
730 * @param pVCpu The cross context virtual CPU structure.
731 * @param pPool The pool.
732 * @param pPage The pool page (head).
733 * @param pDis The disassembly of the write instruction.
734 * @param pRegFrame The trap register frame.
735 * @param GCPhysFault The fault address as guest physical address.
736 * @param pvFault The fault address.
737 * @todo VBOXSTRICTRC
738 */
739static int pgmRZPoolAccessPfHandlerFlush(PVMCC pVM, PVMCPUCC pVCpu, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pDis,
740 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault)
741{
742 NOREF(pVM); NOREF(GCPhysFault);
743
744 /*
745 * First, do the flushing.
746 */
747 pgmPoolMonitorChainFlush(pPool, pPage);
748
749 /*
750 * Emulate the instruction (xp/w2k problem, requires pc/cr2/sp detection).
751 * Must do this in raw mode (!); XP boot will fail otherwise.
752 */
753 int rc = VINF_SUCCESS;
754 VBOXSTRICTRC rc2 = EMInterpretInstructionDisasState(pVCpu, pDis, pRegFrame, pvFault, EMCODETYPE_ALL);
755 if (rc2 == VINF_SUCCESS)
756 { /* do nothing */ }
757 else if (rc2 == VINF_EM_RESCHEDULE)
758 {
759 rc = VBOXSTRICTRC_VAL(rc2);
760# ifndef IN_RING3
761 VMCPU_FF_SET(pVCpu, VMCPU_FF_TO_R3);
762# endif
763 }
764 else if (rc2 == VERR_EM_INTERPRETER)
765 {
766 rc = VINF_EM_RAW_EMULATE_INSTR;
767 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitorPf,EmulateInstr));
768 }
769 else if (RT_FAILURE_NP(rc2))
770 rc = VBOXSTRICTRC_VAL(rc2);
771 else
772 AssertMsgFailed(("%Rrc\n", VBOXSTRICTRC_VAL(rc2))); /* ASSUMES no complicated stuff here. */
773
774 LogFlow(("pgmRZPoolAccessPfHandlerFlush: returns %Rrc (flushed)\n", rc));
775 return rc;
776}
777
778
779/**
780 * Handles the STOSD write accesses.
781 *
782 * @returns VBox status code suitable for scheduling.
783 * @param pVM The cross context VM structure.
784 * @param pPool The pool.
785 * @param pPage The pool page (head).
786 * @param pDis The disassembly of the write instruction.
787 * @param pRegFrame The trap register frame.
788 * @param GCPhysFault The fault address as guest physical address.
789 * @param pvFault The fault address.
790 */
791DECLINLINE(int) pgmRZPoolAccessPfHandlerSTOSD(PVMCC pVM, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pDis,
792 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault)
793{
794 unsigned uIncrement = pDis->Param1.cb;
795 NOREF(pVM);
796
797 Assert(pDis->uCpuMode == DISCPUMODE_32BIT || pDis->uCpuMode == DISCPUMODE_64BIT);
798 Assert(pRegFrame->rcx <= 0x20);
799
800# ifdef VBOX_STRICT
801 if (pDis->uOpMode == DISCPUMODE_32BIT)
802 Assert(uIncrement == 4);
803 else
804 Assert(uIncrement == 8);
805# endif
806
807 Log3(("pgmRZPoolAccessPfHandlerSTOSD\n"));
808
809 /*
810 * Increment the modification counter and insert it into the list
811 * of modified pages the first time.
812 */
813 if (!pPage->cModifications++)
814 pgmPoolMonitorModifiedInsert(pPool, pPage);
815
816 /*
817 * Execute REP STOSD.
818 *
819 * This ASSUMES that we're not invoked by Trap0e on in a out-of-sync
820 * write situation, meaning that it's safe to write here.
821 */
822 PVMCPUCC pVCpu = VMMGetCpu(pPool->CTX_SUFF(pVM));
823 RTGCUINTPTR pu32 = (RTGCUINTPTR)pvFault;
824 while (pRegFrame->rcx)
825 {
826 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault, NULL, uIncrement);
827 PGMPhysSimpleWriteGCPhys(pVM, GCPhysFault, &pRegFrame->rax, uIncrement);
828 pu32 += uIncrement;
829 GCPhysFault += uIncrement;
830 pRegFrame->rdi += uIncrement;
831 pRegFrame->rcx--;
832 }
833 pRegFrame->rip += pDis->cbInstr;
834
835 LogFlow(("pgmRZPoolAccessPfHandlerSTOSD: returns\n"));
836 return VINF_SUCCESS;
837}
838
839
840/**
841 * Handles the simple write accesses.
842 *
843 * @returns VBox status code suitable for scheduling.
844 * @param pVM The cross context VM structure.
845 * @param pVCpu The cross context virtual CPU structure.
846 * @param pPool The pool.
847 * @param pPage The pool page (head).
848 * @param pDis The disassembly of the write instruction.
849 * @param pRegFrame The trap register frame.
850 * @param GCPhysFault The fault address as guest physical address.
851 * @param pvFault The fault address.
852 * @param pfReused Reused state (in/out)
853 */
854DECLINLINE(int) pgmRZPoolAccessPfHandlerSimple(PVMCC pVM, PVMCPUCC pVCpu, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pDis,
855 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault, bool *pfReused)
856{
857 Log3(("pgmRZPoolAccessPfHandlerSimple\n"));
858 NOREF(pVM);
859 NOREF(pfReused); /* initialized by caller */
860
861 /*
862 * Increment the modification counter and insert it into the list
863 * of modified pages the first time.
864 */
865 if (!pPage->cModifications++)
866 pgmPoolMonitorModifiedInsert(pPool, pPage);
867
868 /*
869 * Clear all the pages. ASSUMES that pvFault is readable.
870 */
871 uint32_t cbWrite = DISGetParamSize(pDis, &pDis->Param1);
872 if (cbWrite <= 8)
873 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault, NULL, cbWrite);
874 else if (cbWrite <= 16)
875 {
876 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault, NULL, 8);
877 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault + 8, NULL, cbWrite - 8);
878 }
879 else
880 {
881 Assert(cbWrite <= 32);
882 for (uint32_t off = 0; off < cbWrite; off += 8)
883 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault + off, NULL, RT_MIN(8, cbWrite - off));
884 }
885
886 /*
887 * Interpret the instruction.
888 */
889 VBOXSTRICTRC rc = EMInterpretInstructionDisasState(pVCpu, pDis, pRegFrame, pvFault, EMCODETYPE_ALL);
890 if (RT_SUCCESS(rc))
891 AssertMsg(rc == VINF_SUCCESS, ("%Rrc\n", VBOXSTRICTRC_VAL(rc))); /* ASSUMES no complicated stuff here. */
892 else if (rc == VERR_EM_INTERPRETER)
893 {
894 LogFlow(("pgmRZPoolAccessPfHandlerSimple: Interpretation failed for %04x:%RGv - opcode=%d\n",
895 pRegFrame->cs.Sel, (RTGCPTR)pRegFrame->rip, pDis->pCurInstr->uOpcode));
896 rc = VINF_EM_RAW_EMULATE_INSTR;
897 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitorPf,EmulateInstr));
898 }
899
900# if 0 /* experimental code */
901 if (rc == VINF_SUCCESS)
902 {
903 switch (pPage->enmKind)
904 {
905 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
906 {
907 X86PTEPAE GstPte;
908 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvFault, GCPhysFault, sizeof(GstPte));
909 AssertRC(rc);
910
911 /* Check the new value written by the guest. If present and with a bogus physical address, then
912 * it's fairly safe to assume the guest is reusing the PT.
913 */
914 if (GstPte.n.u1Present)
915 {
916 RTHCPHYS HCPhys = -1;
917 int rc = PGMPhysGCPhys2HCPhys(pVM, GstPte.u & X86_PTE_PAE_PG_MASK, &HCPhys);
918 if (rc != VINF_SUCCESS)
919 {
920 *pfReused = true;
921 STAM_COUNTER_INC(&pPool->StatForceFlushReused);
922 }
923 }
924 break;
925 }
926 }
927 }
928# endif
929
930 LogFlow(("pgmRZPoolAccessPfHandlerSimple: returns %Rrc\n", VBOXSTRICTRC_VAL(rc)));
931 return VBOXSTRICTRC_VAL(rc);
932}
933
934
935/**
936 * @callback_method_impl{FNPGMRZPHYSPFHANDLER,
937 * \#PF access handler callback for page table pages.}
938 *
939 * @remarks The @a uUser argument is the index of the PGMPOOLPAGE.
940 */
941DECLEXPORT(VBOXSTRICTRC) pgmRZPoolAccessPfHandler(PVMCC pVM, PVMCPUCC pVCpu, RTGCUINT uErrorCode, PCPUMCTXCORE pRegFrame,
942 RTGCPTR pvFault, RTGCPHYS GCPhysFault, uint64_t uUser)
943{
944 STAM_PROFILE_START(&pVM->pgm.s.CTX_SUFF(pPool)->StatMonitorRZ, a);
945 PPGMPOOL const pPool = pVM->pgm.s.CTX_SUFF(pPool);
946 AssertReturn(uUser < pPool->cCurPages, VERR_PGM_POOL_IPE);
947 PPGMPOOLPAGE const pPage = &pPool->aPages[uUser];
948 unsigned cMaxModifications;
949 bool fForcedFlush = false;
950 RT_NOREF_PV(uErrorCode);
951
952 LogFlow(("pgmRZPoolAccessPfHandler: pvFault=%RGv pPage=%p:{.idx=%d} GCPhysFault=%RGp\n", pvFault, pPage, pPage->idx, GCPhysFault));
953
954 PGM_LOCK_VOID(pVM);
955 if (PHYS_PAGE_ADDRESS(GCPhysFault) != PHYS_PAGE_ADDRESS(pPage->GCPhys))
956 {
957 /* Pool page changed while we were waiting for the lock; ignore. */
958 Log(("CPU%d: pgmRZPoolAccessPfHandler pgm pool page for %RGp changed (to %RGp) while waiting!\n", pVCpu->idCpu, PHYS_PAGE_ADDRESS(GCPhysFault), PHYS_PAGE_ADDRESS(pPage->GCPhys)));
959 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->StatMonitorPfRZ, &pPool->StatMonitorPfRZHandled, a);
960 PGM_UNLOCK(pVM);
961 return VINF_SUCCESS;
962 }
963# ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
964 if (pPage->fDirty)
965 {
966 Assert(VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_TLB_FLUSH));
967 PGM_UNLOCK(pVM);
968 return VINF_SUCCESS; /* SMP guest case where we were blocking on the pgm lock while the same page was being marked dirty. */
969 }
970# endif
971
972# if 0 /* test code defined(VBOX_STRICT) && defined(PGMPOOL_WITH_OPTIMIZED_DIRTY_PT) */
973 if (pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT)
974 {
975 void *pvShw = PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage);
976 void *pvGst;
977 int rc = PGM_GCPHYS_2_PTR(pPool->CTX_SUFF(pVM), pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
978 pgmPoolTrackCheckPTPaePae(pPool, pPage, (PPGMSHWPTPAE)pvShw, (PCX86PTPAE)pvGst);
979 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvGst);
980 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvShw);
981 }
982# endif
983
984 /*
985 * Disassemble the faulting instruction.
986 */
987 PDISCPUSTATE pDis = &pVCpu->pgm.s.DisState;
988 int rc = EMInterpretDisasCurrent(pVM, pVCpu, pDis, NULL);
989 if (RT_UNLIKELY(rc != VINF_SUCCESS))
990 {
991 AssertMsg(rc == VERR_PAGE_NOT_PRESENT || rc == VERR_PAGE_TABLE_NOT_PRESENT, ("Unexpected rc %d\n", rc));
992 PGM_UNLOCK(pVM);
993 return rc;
994 }
995
996 Assert(pPage->enmKind != PGMPOOLKIND_FREE);
997
998 /*
999 * We should ALWAYS have the list head as user parameter. This
1000 * is because we use that page to record the changes.
1001 */
1002 Assert(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
1003
1004# ifdef IN_RING0
1005 /* Maximum nr of modifications depends on the page type. */
1006 if ( pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT
1007 || pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_32BIT_PT)
1008 cMaxModifications = 4;
1009 else
1010 cMaxModifications = 24;
1011# else
1012 cMaxModifications = 48;
1013# endif
1014
1015 /*
1016 * Incremental page table updates should weigh more than random ones.
1017 * (Only applies when started from offset 0)
1018 */
1019 pVCpu->pgm.s.cPoolAccessHandler++;
1020 if ( pPage->GCPtrLastAccessHandlerRip >= pRegFrame->rip - 0x40 /* observed loops in Windows 7 x64 */
1021 && pPage->GCPtrLastAccessHandlerRip < pRegFrame->rip + 0x40
1022 && pvFault == (pPage->GCPtrLastAccessHandlerFault + pDis->Param1.cb)
1023 && pVCpu->pgm.s.cPoolAccessHandler == pPage->cLastAccessHandler + 1)
1024 {
1025 Log(("Possible page reuse cMods=%d -> %d (locked=%d type=%s)\n", pPage->cModifications, pPage->cModifications * 2, pgmPoolIsPageLocked(pPage), pgmPoolPoolKindToStr(pPage->enmKind)));
1026 Assert(pPage->cModifications < 32000);
1027 pPage->cModifications = pPage->cModifications * 2;
1028 pPage->GCPtrLastAccessHandlerFault = pvFault;
1029 pPage->cLastAccessHandler = pVCpu->pgm.s.cPoolAccessHandler;
1030 if (pPage->cModifications >= cMaxModifications)
1031 {
1032 STAM_COUNTER_INC(&pPool->StatMonitorPfRZFlushReinit);
1033 fForcedFlush = true;
1034 }
1035 }
1036
1037 if (pPage->cModifications >= cMaxModifications)
1038 Log(("Mod overflow %RGv cMods=%d (locked=%d type=%s)\n", pvFault, pPage->cModifications, pgmPoolIsPageLocked(pPage), pgmPoolPoolKindToStr(pPage->enmKind)));
1039
1040 /*
1041 * Check if it's worth dealing with.
1042 */
1043 bool fReused = false;
1044 bool fNotReusedNotForking = false;
1045 if ( ( pPage->cModifications < cMaxModifications /** @todo \#define */ /** @todo need to check that it's not mapping EIP. */ /** @todo adjust this! */
1046 || pgmPoolIsPageLocked(pPage)
1047 )
1048 && !(fReused = pgmRZPoolMonitorIsReused(pVM, pVCpu, pRegFrame, pDis, pvFault, pPage))
1049 && !pgmRZPoolMonitorIsForking(pPool, pDis, GCPhysFault & PAGE_OFFSET_MASK))
1050 {
1051 /*
1052 * Simple instructions, no REP prefix.
1053 */
1054 if (!(pDis->fPrefix & (DISPREFIX_REP | DISPREFIX_REPNE)))
1055 {
1056 rc = pgmRZPoolAccessPfHandlerSimple(pVM, pVCpu, pPool, pPage, pDis, pRegFrame, GCPhysFault, pvFault, &fReused);
1057 if (fReused)
1058 goto flushPage;
1059
1060 /* A mov instruction to change the first page table entry will be remembered so we can detect
1061 * full page table changes early on. This will reduce the amount of unnecessary traps we'll take.
1062 */
1063 if ( rc == VINF_SUCCESS
1064 && !pPage->cLocked /* only applies to unlocked pages as we can't free locked ones (e.g. cr3 root). */
1065 && pDis->pCurInstr->uOpcode == OP_MOV
1066 && (pvFault & PAGE_OFFSET_MASK) == 0)
1067 {
1068 pPage->GCPtrLastAccessHandlerFault = pvFault;
1069 pPage->cLastAccessHandler = pVCpu->pgm.s.cPoolAccessHandler;
1070 pPage->GCPtrLastAccessHandlerRip = pRegFrame->rip;
1071 /* Make sure we don't kick out a page too quickly. */
1072 if (pPage->cModifications > 8)
1073 pPage->cModifications = 2;
1074 }
1075 else if (pPage->GCPtrLastAccessHandlerFault == pvFault)
1076 {
1077 /* ignore the 2nd write to this page table entry. */
1078 pPage->cLastAccessHandler = pVCpu->pgm.s.cPoolAccessHandler;
1079 }
1080 else
1081 {
1082 pPage->GCPtrLastAccessHandlerFault = NIL_RTGCPTR;
1083 pPage->GCPtrLastAccessHandlerRip = 0;
1084 }
1085
1086 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->StatMonitorPfRZ, &pPool->StatMonitorPfRZHandled, a);
1087 PGM_UNLOCK(pVM);
1088 return rc;
1089 }
1090
1091 /*
1092 * Windows is frequently doing small memset() operations (netio test 4k+).
1093 * We have to deal with these or we'll kill the cache and performance.
1094 */
1095 if ( pDis->pCurInstr->uOpcode == OP_STOSWD
1096 && !pRegFrame->eflags.Bits.u1DF
1097 && pDis->uOpMode == pDis->uCpuMode
1098 && pDis->uAddrMode == pDis->uCpuMode)
1099 {
1100 bool fValidStosd = false;
1101
1102 if ( pDis->uCpuMode == DISCPUMODE_32BIT
1103 && pDis->fPrefix == DISPREFIX_REP
1104 && pRegFrame->ecx <= 0x20
1105 && pRegFrame->ecx * 4 <= GUEST_PAGE_SIZE - ((uintptr_t)pvFault & GUEST_PAGE_OFFSET_MASK)
1106 && !((uintptr_t)pvFault & 3)
1107 && (pRegFrame->eax == 0 || pRegFrame->eax == 0x80) /* the two values observed. */
1108 )
1109 {
1110 fValidStosd = true;
1111 pRegFrame->rcx &= 0xffffffff; /* paranoia */
1112 }
1113 else
1114 if ( pDis->uCpuMode == DISCPUMODE_64BIT
1115 && pDis->fPrefix == (DISPREFIX_REP | DISPREFIX_REX)
1116 && pRegFrame->rcx <= 0x20
1117 && pRegFrame->rcx * 8 <= GUEST_PAGE_SIZE - ((uintptr_t)pvFault & GUEST_PAGE_OFFSET_MASK)
1118 && !((uintptr_t)pvFault & 7)
1119 && (pRegFrame->rax == 0 || pRegFrame->rax == 0x80) /* the two values observed. */
1120 )
1121 {
1122 fValidStosd = true;
1123 }
1124
1125 if (fValidStosd)
1126 {
1127 rc = pgmRZPoolAccessPfHandlerSTOSD(pVM, pPool, pPage, pDis, pRegFrame, GCPhysFault, pvFault);
1128 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->StatMonitorPfRZ, &pPool->StatMonitorPfRZRepStosd, a);
1129 PGM_UNLOCK(pVM);
1130 return rc;
1131 }
1132 }
1133
1134 /* REP prefix, don't bother. */
1135 STAM_COUNTER_INC(&pPool->StatMonitorPfRZRepPrefix);
1136 Log4(("pgmRZPoolAccessPfHandler: eax=%#x ecx=%#x edi=%#x esi=%#x rip=%RGv opcode=%d prefix=%#x\n",
1137 pRegFrame->eax, pRegFrame->ecx, pRegFrame->edi, pRegFrame->esi, (RTGCPTR)pRegFrame->rip, pDis->pCurInstr->uOpcode, pDis->fPrefix));
1138 fNotReusedNotForking = true;
1139 }
1140
1141# if defined(PGMPOOL_WITH_OPTIMIZED_DIRTY_PT) && defined(IN_RING0)
1142 /* E.g. Windows 7 x64 initializes page tables and touches some pages in the table during the process. This
1143 * leads to pgm pool trashing and an excessive amount of write faults due to page monitoring.
1144 */
1145 if ( pPage->cModifications >= cMaxModifications
1146 && !fForcedFlush
1147 && (pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT || pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_32BIT_PT)
1148 && ( fNotReusedNotForking
1149 || ( !pgmRZPoolMonitorIsReused(pVM, pVCpu, pRegFrame, pDis, pvFault, pPage)
1150 && !pgmRZPoolMonitorIsForking(pPool, pDis, GCPhysFault & PAGE_OFFSET_MASK))
1151 )
1152 )
1153 {
1154 Assert(!pgmPoolIsPageLocked(pPage));
1155 Assert(pPage->fDirty == false);
1156
1157 /* Flush any monitored duplicates as we will disable write protection. */
1158 if ( pPage->iMonitoredNext != NIL_PGMPOOL_IDX
1159 || pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
1160 {
1161 PPGMPOOLPAGE pPageHead = pPage;
1162
1163 /* Find the monitor head. */
1164 while (pPageHead->iMonitoredPrev != NIL_PGMPOOL_IDX)
1165 pPageHead = &pPool->aPages[pPageHead->iMonitoredPrev];
1166
1167 while (pPageHead)
1168 {
1169 unsigned idxNext = pPageHead->iMonitoredNext;
1170
1171 if (pPageHead != pPage)
1172 {
1173 STAM_COUNTER_INC(&pPool->StatDirtyPageDupFlush);
1174 Log(("Flush duplicate page idx=%d GCPhys=%RGp type=%s\n", pPageHead->idx, pPageHead->GCPhys, pgmPoolPoolKindToStr(pPageHead->enmKind)));
1175 int rc2 = pgmPoolFlushPage(pPool, pPageHead);
1176 AssertRC(rc2);
1177 }
1178
1179 if (idxNext == NIL_PGMPOOL_IDX)
1180 break;
1181
1182 pPageHead = &pPool->aPages[idxNext];
1183 }
1184 }
1185
1186 /* The flushing above might fail for locked pages, so double check. */
1187 if ( pPage->iMonitoredNext == NIL_PGMPOOL_IDX
1188 && pPage->iMonitoredPrev == NIL_PGMPOOL_IDX)
1189 {
1190 pgmPoolAddDirtyPage(pVM, pPool, pPage);
1191
1192 /* Temporarily allow write access to the page table again. */
1193 rc = PGMHandlerPhysicalPageTempOff(pVM, pPage->GCPhys & PAGE_BASE_GC_MASK, pPage->GCPhys & PAGE_BASE_GC_MASK);
1194 if (rc == VINF_SUCCESS)
1195 {
1196 rc = PGMShwMakePageWritable(pVCpu, pvFault, PGM_MK_PG_IS_WRITE_FAULT);
1197 AssertMsg(rc == VINF_SUCCESS
1198 /* In the SMP case the page table might be removed while we wait for the PGM lock in the trap handler. */
1199 || rc == VERR_PAGE_TABLE_NOT_PRESENT
1200 || rc == VERR_PAGE_NOT_PRESENT,
1201 ("PGMShwModifyPage -> GCPtr=%RGv rc=%d\n", pvFault, rc));
1202# ifdef VBOX_STRICT
1203 pPage->GCPtrDirtyFault = pvFault;
1204# endif
1205
1206 STAM_PROFILE_STOP(&pVM->pgm.s.CTX_SUFF(pPool)->StatMonitorPfRZ, a);
1207 PGM_UNLOCK(pVM);
1208 return rc;
1209 }
1210 }
1211 }
1212# endif /* PGMPOOL_WITH_OPTIMIZED_DIRTY_PT && IN_RING0 */
1213
1214 STAM_COUNTER_INC(&pPool->StatMonitorPfRZFlushModOverflow);
1215flushPage:
1216 /*
1217 * Not worth it, so flush it.
1218 *
1219 * If we considered it to be reused, don't go back to ring-3
1220 * to emulate failed instructions since we usually cannot
1221 * interpret then. This may be a bit risky, in which case
1222 * the reuse detection must be fixed.
1223 */
1224 rc = pgmRZPoolAccessPfHandlerFlush(pVM, pVCpu, pPool, pPage, pDis, pRegFrame, GCPhysFault, pvFault);
1225 if ( rc == VINF_EM_RAW_EMULATE_INSTR
1226 && fReused)
1227 {
1228 /* Make sure that the current instruction still has shadow page backing, otherwise we'll end up in a loop. */
1229 if (PGMShwGetPage(pVCpu, pRegFrame->rip, NULL, NULL) == VINF_SUCCESS)
1230 rc = VINF_SUCCESS; /* safe to restart the instruction. */
1231 }
1232 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->StatMonitorPfRZ, &pPool->StatMonitorPfRZFlushPage, a);
1233 PGM_UNLOCK(pVM);
1234 return rc;
1235}
1236
1237#endif /* !IN_RING3 */
1238
1239/**
1240 * @callback_method_impl{FNPGMPHYSHANDLER,
1241 * Access handler for shadowed page table pages.}
1242 *
1243 * @remarks Only uses the VINF_PGM_HANDLER_DO_DEFAULT status.
1244 * @note The @a uUser argument is the index of the PGMPOOLPAGE.
1245 */
1246PGM_ALL_CB2_DECL(VBOXSTRICTRC)
1247pgmPoolAccessHandler(PVMCC pVM, PVMCPUCC pVCpu, RTGCPHYS GCPhys, void *pvPhys, void *pvBuf, size_t cbBuf,
1248 PGMACCESSTYPE enmAccessType, PGMACCESSORIGIN enmOrigin, uint64_t uUser)
1249{
1250 PPGMPOOL const pPool = pVM->pgm.s.CTX_SUFF(pPool);
1251 STAM_PROFILE_START(&pPool->CTX_SUFF_Z(StatMonitor), a);
1252 AssertReturn(uUser < pPool->cCurPages, VERR_PGM_POOL_IPE);
1253 PPGMPOOLPAGE const pPage = &pPool->aPages[uUser];
1254 LogFlow(("PGM_ALL_CB_DECL: GCPhys=%RGp %p:{.Core=%RHp, .idx=%d, .GCPhys=%RGp, .enmType=%d}\n",
1255 GCPhys, pPage, pPage->Core.Key, pPage->idx, pPage->GCPhys, pPage->enmKind));
1256
1257 NOREF(pvPhys); NOREF(pvBuf); NOREF(enmAccessType);
1258
1259 PGM_LOCK_VOID(pVM);
1260
1261#ifdef VBOX_WITH_STATISTICS
1262 /*
1263 * Collect stats on the access.
1264 */
1265 AssertCompile(RT_ELEMENTS(pPool->CTX_MID_Z(aStatMonitor,Sizes)) == 19);
1266 if (cbBuf <= 16 && cbBuf > 0)
1267 STAM_COUNTER_INC(&pPool->CTX_MID_Z(aStatMonitor,Sizes)[cbBuf - 1]);
1268 else if (cbBuf >= 17 && cbBuf < 32)
1269 STAM_COUNTER_INC(&pPool->CTX_MID_Z(aStatMonitor,Sizes)[16]);
1270 else if (cbBuf >= 32 && cbBuf < 64)
1271 STAM_COUNTER_INC(&pPool->CTX_MID_Z(aStatMonitor,Sizes)[17]);
1272 else if (cbBuf >= 64)
1273 STAM_COUNTER_INC(&pPool->CTX_MID_Z(aStatMonitor,Sizes)[18]);
1274
1275 uint8_t cbAlign;
1276 switch (pPage->enmKind)
1277 {
1278 default:
1279 cbAlign = 7;
1280 break;
1281 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1282 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1283 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1284 case PGMPOOLKIND_32BIT_PD:
1285 case PGMPOOLKIND_32BIT_PD_PHYS:
1286 cbAlign = 3;
1287 break;
1288 }
1289 AssertCompile(RT_ELEMENTS(pPool->CTX_MID_Z(aStatMonitor,Misaligned)) == 7);
1290 if ((uint8_t)GCPhys & cbAlign)
1291 STAM_COUNTER_INC(&pPool->CTX_MID_Z(aStatMonitor,Misaligned)[((uint8_t)GCPhys & cbAlign) - 1]);
1292#endif
1293
1294 /*
1295 * Make sure the pool page wasn't modified by a different CPU.
1296 */
1297 if (PHYS_PAGE_ADDRESS(GCPhys) == PHYS_PAGE_ADDRESS(pPage->GCPhys))
1298 {
1299 Assert(pPage->enmKind != PGMPOOLKIND_FREE);
1300
1301 /* The max modification count before flushing depends on the context and page type. */
1302#ifdef IN_RING3
1303 uint16_t const cMaxModifications = 96; /* it's cheaper here, right? */
1304#else
1305 uint16_t cMaxModifications;
1306 if ( pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT
1307 || pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_32BIT_PT)
1308 cMaxModifications = 4;
1309 else
1310 cMaxModifications = 24;
1311#endif
1312
1313 /*
1314 * We don't have to be very sophisticated about this since there are relativly few calls here.
1315 * However, we must try our best to detect any non-cpu accesses (disk / networking).
1316 */
1317 if ( ( pPage->cModifications < cMaxModifications
1318 || pgmPoolIsPageLocked(pPage) )
1319 && enmOrigin != PGMACCESSORIGIN_DEVICE
1320 && cbBuf <= 16)
1321 {
1322 /* Clear the shadow entry. */
1323 if (!pPage->cModifications++)
1324 pgmPoolMonitorModifiedInsert(pPool, pPage);
1325
1326 if (cbBuf <= 8)
1327 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhys, pvBuf, (uint32_t)cbBuf);
1328 else
1329 {
1330 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhys, pvBuf, 8);
1331 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhys + 8, (uint8_t *)pvBuf + 8, (uint32_t)cbBuf - 8);
1332 }
1333 }
1334 else
1335 pgmPoolMonitorChainFlush(pPool, pPage);
1336
1337 STAM_PROFILE_STOP_EX(&pPool->CTX_SUFF_Z(StatMonitor), &pPool->CTX_MID_Z(StatMonitor,FlushPage), a);
1338 }
1339 else
1340 Log(("CPU%d: PGM_ALL_CB_DECL pgm pool page for %RGp changed (to %RGp) while waiting!\n", pVCpu->idCpu, PHYS_PAGE_ADDRESS(GCPhys), PHYS_PAGE_ADDRESS(pPage->GCPhys)));
1341 PGM_UNLOCK(pVM);
1342 return VINF_PGM_HANDLER_DO_DEFAULT;
1343}
1344
1345
1346#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
1347
1348# if defined(VBOX_STRICT) && !defined(IN_RING3)
1349
1350/**
1351 * Check references to guest physical memory in a PAE / PAE page table.
1352 *
1353 * @param pPool The pool.
1354 * @param pPage The page.
1355 * @param pShwPT The shadow page table (mapping of the page).
1356 * @param pGstPT The guest page table.
1357 */
1358static void pgmPoolTrackCheckPTPaePae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT, PCX86PTPAE pGstPT)
1359{
1360 unsigned cErrors = 0;
1361 int LastRc = -1; /* initialized to shut up gcc */
1362 unsigned LastPTE = ~0U; /* initialized to shut up gcc */
1363 RTHCPHYS LastHCPhys = NIL_RTHCPHYS; /* initialized to shut up gcc */
1364 PVMCC pVM = pPool->CTX_SUFF(pVM);
1365
1366# ifdef VBOX_STRICT
1367 for (unsigned i = 0; i < RT_MIN(RT_ELEMENTS(pShwPT->a), pPage->iFirstPresent); i++)
1368 AssertMsg(!PGMSHWPTEPAE_IS_P(pShwPT->a[i]), ("Unexpected PTE: idx=%d %RX64 (first=%d)\n", i, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), pPage->iFirstPresent));
1369# endif
1370 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
1371 {
1372 if (PGMSHWPTEPAE_IS_P(pShwPT->a[i]))
1373 {
1374 RTHCPHYS HCPhys = NIL_RTHCPHYS;
1375 int rc = PGMPhysGCPhys2HCPhys(pVM, pGstPT->a[i].u & X86_PTE_PAE_PG_MASK, &HCPhys);
1376 if ( rc != VINF_SUCCESS
1377 || PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]) != HCPhys)
1378 {
1379 Log(("rc=%d idx=%d guest %RX64 shw=%RX64 vs %RHp\n", rc, i, pGstPT->a[i].u, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), HCPhys));
1380 LastPTE = i;
1381 LastRc = rc;
1382 LastHCPhys = HCPhys;
1383 cErrors++;
1384
1385 RTHCPHYS HCPhysPT = NIL_RTHCPHYS;
1386 rc = PGMPhysGCPhys2HCPhys(pVM, pPage->GCPhys, &HCPhysPT);
1387 AssertRC(rc);
1388
1389 for (unsigned iPage = 0; iPage < pPool->cCurPages; iPage++)
1390 {
1391 PPGMPOOLPAGE pTempPage = &pPool->aPages[iPage];
1392
1393 if (pTempPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT)
1394 {
1395 PPGMSHWPTPAE pShwPT2 = (PPGMSHWPTPAE)PGMPOOL_PAGE_2_PTR(pVM, pTempPage);
1396
1397 for (unsigned j = 0; j < RT_ELEMENTS(pShwPT->a); j++)
1398 {
1399 if ( PGMSHWPTEPAE_IS_P_RW(pShwPT2->a[j])
1400 && PGMSHWPTEPAE_GET_HCPHYS(pShwPT2->a[j]) == HCPhysPT)
1401 {
1402 Log(("GCPhys=%RGp idx=%d %RX64 vs %RX64\n", pTempPage->GCPhys, j, PGMSHWPTEPAE_GET_LOG(pShwPT->a[j]), PGMSHWPTEPAE_GET_LOG(pShwPT2->a[j])));
1403 }
1404 }
1405
1406 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pShwPT2);
1407 }
1408 }
1409 }
1410 }
1411 }
1412 AssertMsg(!cErrors, ("cErrors=%d: last rc=%d idx=%d guest %RX64 shw=%RX64 vs %RHp\n", cErrors, LastRc, LastPTE, pGstPT->a[LastPTE].u, PGMSHWPTEPAE_GET_LOG(pShwPT->a[LastPTE]), LastHCPhys));
1413}
1414
1415
1416/**
1417 * Check references to guest physical memory in a PAE / 32-bit page table.
1418 *
1419 * @param pPool The pool.
1420 * @param pPage The page.
1421 * @param pShwPT The shadow page table (mapping of the page).
1422 * @param pGstPT The guest page table.
1423 */
1424static void pgmPoolTrackCheckPTPae32Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT, PCX86PT pGstPT)
1425{
1426 unsigned cErrors = 0;
1427 int LastRc = -1; /* initialized to shut up gcc */
1428 unsigned LastPTE = ~0U; /* initialized to shut up gcc */
1429 RTHCPHYS LastHCPhys = NIL_RTHCPHYS; /* initialized to shut up gcc */
1430 PVMCC pVM = pPool->CTX_SUFF(pVM);
1431
1432# ifdef VBOX_STRICT
1433 for (unsigned i = 0; i < RT_MIN(RT_ELEMENTS(pShwPT->a), pPage->iFirstPresent); i++)
1434 AssertMsg(!PGMSHWPTEPAE_IS_P(pShwPT->a[i]), ("Unexpected PTE: idx=%d %RX64 (first=%d)\n", i, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), pPage->iFirstPresent));
1435# endif
1436 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
1437 {
1438 if (PGMSHWPTEPAE_IS_P(pShwPT->a[i]))
1439 {
1440 RTHCPHYS HCPhys = NIL_RTHCPHYS;
1441 int rc = PGMPhysGCPhys2HCPhys(pVM, pGstPT->a[i].u & X86_PTE_PG_MASK, &HCPhys);
1442 if ( rc != VINF_SUCCESS
1443 || PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]) != HCPhys)
1444 {
1445 Log(("rc=%d idx=%d guest %x shw=%RX64 vs %RHp\n", rc, i, pGstPT->a[i].u, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), HCPhys));
1446 LastPTE = i;
1447 LastRc = rc;
1448 LastHCPhys = HCPhys;
1449 cErrors++;
1450
1451 RTHCPHYS HCPhysPT = NIL_RTHCPHYS;
1452 rc = PGMPhysGCPhys2HCPhys(pVM, pPage->GCPhys, &HCPhysPT);
1453 AssertRC(rc);
1454
1455 for (unsigned iPage = 0; iPage < pPool->cCurPages; iPage++)
1456 {
1457 PPGMPOOLPAGE pTempPage = &pPool->aPages[iPage];
1458
1459 if (pTempPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_32BIT_PT)
1460 {
1461 PPGMSHWPTPAE pShwPT2 = (PPGMSHWPTPAE)PGMPOOL_PAGE_2_PTR(pVM, pTempPage);
1462
1463 for (unsigned j = 0; j < RT_ELEMENTS(pShwPT->a); j++)
1464 {
1465 if ( PGMSHWPTEPAE_IS_P_RW(pShwPT2->a[j])
1466 && PGMSHWPTEPAE_GET_HCPHYS(pShwPT2->a[j]) == HCPhysPT)
1467 {
1468 Log(("GCPhys=%RGp idx=%d %RX64 vs %RX64\n", pTempPage->GCPhys, j, PGMSHWPTEPAE_GET_LOG(pShwPT->a[j]), PGMSHWPTEPAE_GET_LOG(pShwPT2->a[j])));
1469 }
1470 }
1471
1472 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pShwPT2);
1473 }
1474 }
1475 }
1476 }
1477 }
1478 AssertMsg(!cErrors, ("cErrors=%d: last rc=%d idx=%d guest %x shw=%RX64 vs %RHp\n", cErrors, LastRc, LastPTE, pGstPT->a[LastPTE].u, PGMSHWPTEPAE_GET_LOG(pShwPT->a[LastPTE]), LastHCPhys));
1479}
1480
1481# endif /* VBOX_STRICT && !IN_RING3 */
1482
1483/**
1484 * Clear references to guest physical memory in a PAE / PAE page table.
1485 *
1486 * @returns nr of changed PTEs
1487 * @param pPool The pool.
1488 * @param pPage The page.
1489 * @param pShwPT The shadow page table (mapping of the page).
1490 * @param pGstPT The guest page table.
1491 * @param pOldGstPT The old cached guest page table.
1492 * @param fAllowRemoval Bail out as soon as we encounter an invalid PTE
1493 * @param pfFlush Flush reused page table (out)
1494 */
1495DECLINLINE(unsigned) pgmPoolTrackFlushPTPaePae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT, PCX86PTPAE pGstPT,
1496 PCX86PTPAE pOldGstPT, bool fAllowRemoval, bool *pfFlush)
1497{
1498 unsigned cChanged = 0;
1499
1500# ifdef VBOX_STRICT
1501 for (unsigned i = 0; i < RT_MIN(RT_ELEMENTS(pShwPT->a), pPage->iFirstPresent); i++)
1502 AssertMsg(!PGMSHWPTEPAE_IS_P(pShwPT->a[i]), ("Unexpected PTE: idx=%d %RX64 (first=%d)\n", i, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), pPage->iFirstPresent));
1503# endif
1504 *pfFlush = false;
1505
1506 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
1507 {
1508 /* Check the new value written by the guest. If present and with a bogus physical address, then
1509 * it's fairly safe to assume the guest is reusing the PT.
1510 */
1511 if ( fAllowRemoval
1512 && (pGstPT->a[i].u & X86_PTE_P))
1513 {
1514 if (!PGMPhysIsGCPhysValid(pPool->CTX_SUFF(pVM), pGstPT->a[i].u & X86_PTE_PAE_PG_MASK))
1515 {
1516 *pfFlush = true;
1517 return ++cChanged;
1518 }
1519 }
1520 if (PGMSHWPTEPAE_IS_P(pShwPT->a[i]))
1521 {
1522 /* If the old cached PTE is identical, then there's no need to flush the shadow copy. */
1523 if ((pGstPT->a[i].u & X86_PTE_PAE_PG_MASK) == (pOldGstPT->a[i].u & X86_PTE_PAE_PG_MASK))
1524 {
1525# ifdef VBOX_STRICT
1526 RTHCPHYS HCPhys = NIL_RTGCPHYS;
1527 int rc = PGMPhysGCPhys2HCPhys(pPool->CTX_SUFF(pVM), pGstPT->a[i].u & X86_PTE_PAE_PG_MASK, &HCPhys);
1528 AssertMsg(rc == VINF_SUCCESS && PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]) == HCPhys, ("rc=%d guest %RX64 old %RX64 shw=%RX64 vs %RHp\n", rc, pGstPT->a[i].u, pOldGstPT->a[i].u, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), HCPhys));
1529# endif
1530 uint64_t uHostAttr = PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & (X86_PTE_P | X86_PTE_US | X86_PTE_A | X86_PTE_D | X86_PTE_G | X86_PTE_PAE_NX);
1531 bool fHostRW = !!(PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & X86_PTE_RW);
1532 uint64_t uGuestAttr = pGstPT->a[i].u & (X86_PTE_P | X86_PTE_US | X86_PTE_A | X86_PTE_D | X86_PTE_G | X86_PTE_PAE_NX);
1533 bool fGuestRW = !!(pGstPT->a[i].u & X86_PTE_RW);
1534
1535 if ( uHostAttr == uGuestAttr
1536 && fHostRW <= fGuestRW)
1537 continue;
1538 }
1539 cChanged++;
1540 /* Something was changed, so flush it. */
1541 Log4(("pgmPoolTrackDerefPTPaePae: i=%d pte=%RX64 hint=%RX64\n",
1542 i, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pOldGstPT->a[i].u & X86_PTE_PAE_PG_MASK));
1543 pgmPoolTracDerefGCPhysHint(pPool, pPage, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pOldGstPT->a[i].u & X86_PTE_PAE_PG_MASK, i);
1544 PGMSHWPTEPAE_ATOMIC_SET(pShwPT->a[i], 0);
1545 }
1546 }
1547 return cChanged;
1548}
1549
1550
1551/**
1552 * Clear references to guest physical memory in a PAE / PAE page table.
1553 *
1554 * @returns nr of changed PTEs
1555 * @param pPool The pool.
1556 * @param pPage The page.
1557 * @param pShwPT The shadow page table (mapping of the page).
1558 * @param pGstPT The guest page table.
1559 * @param pOldGstPT The old cached guest page table.
1560 * @param fAllowRemoval Bail out as soon as we encounter an invalid PTE
1561 * @param pfFlush Flush reused page table (out)
1562 */
1563DECLINLINE(unsigned) pgmPoolTrackFlushPTPae32Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT, PCX86PT pGstPT,
1564 PCX86PT pOldGstPT, bool fAllowRemoval, bool *pfFlush)
1565{
1566 unsigned cChanged = 0;
1567
1568# ifdef VBOX_STRICT
1569 for (unsigned i = 0; i < RT_MIN(RT_ELEMENTS(pShwPT->a), pPage->iFirstPresent); i++)
1570 AssertMsg(!PGMSHWPTEPAE_IS_P(pShwPT->a[i]), ("Unexpected PTE: idx=%d %RX64 (first=%d)\n", i, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), pPage->iFirstPresent));
1571# endif
1572 *pfFlush = false;
1573
1574 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
1575 {
1576 /* Check the new value written by the guest. If present and with a bogus physical address, then
1577 * it's fairly safe to assume the guest is reusing the PT. */
1578 if (fAllowRemoval)
1579 {
1580 X86PGUINT const uPte = pGstPT->a[i].u;
1581 if ( (uPte & X86_PTE_P)
1582 && !PGMPhysIsGCPhysValid(pPool->CTX_SUFF(pVM), uPte & X86_PTE_PG_MASK))
1583 {
1584 *pfFlush = true;
1585 return ++cChanged;
1586 }
1587 }
1588 if (PGMSHWPTEPAE_IS_P(pShwPT->a[i]))
1589 {
1590 /* If the old cached PTE is identical, then there's no need to flush the shadow copy. */
1591 if ((pGstPT->a[i].u & X86_PTE_PG_MASK) == (pOldGstPT->a[i].u & X86_PTE_PG_MASK))
1592 {
1593# ifdef VBOX_STRICT
1594 RTHCPHYS HCPhys = NIL_RTGCPHYS;
1595 int rc = PGMPhysGCPhys2HCPhys(pPool->CTX_SUFF(pVM), pGstPT->a[i].u & X86_PTE_PG_MASK, &HCPhys);
1596 AssertMsg(rc == VINF_SUCCESS && PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]) == HCPhys, ("rc=%d guest %x old %x shw=%RX64 vs %RHp\n", rc, pGstPT->a[i].u, pOldGstPT->a[i].u, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), HCPhys));
1597# endif
1598 uint64_t uHostAttr = PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & (X86_PTE_P | X86_PTE_US | X86_PTE_A | X86_PTE_D | X86_PTE_G);
1599 bool fHostRW = !!(PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & X86_PTE_RW);
1600 uint64_t uGuestAttr = pGstPT->a[i].u & (X86_PTE_P | X86_PTE_US | X86_PTE_A | X86_PTE_D | X86_PTE_G);
1601 bool fGuestRW = !!(pGstPT->a[i].u & X86_PTE_RW);
1602
1603 if ( uHostAttr == uGuestAttr
1604 && fHostRW <= fGuestRW)
1605 continue;
1606 }
1607 cChanged++;
1608 /* Something was changed, so flush it. */
1609 Log4(("pgmPoolTrackDerefPTPaePae: i=%d pte=%RX64 hint=%x\n",
1610 i, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pOldGstPT->a[i].u & X86_PTE_PG_MASK));
1611 pgmPoolTracDerefGCPhysHint(pPool, pPage, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pOldGstPT->a[i].u & X86_PTE_PG_MASK, i);
1612 PGMSHWPTEPAE_ATOMIC_SET(pShwPT->a[i], 0);
1613 }
1614 }
1615 return cChanged;
1616}
1617
1618
1619/**
1620 * Flush a dirty page
1621 *
1622 * @param pVM The cross context VM structure.
1623 * @param pPool The pool.
1624 * @param idxSlot Dirty array slot index
1625 * @param fAllowRemoval Allow a reused page table to be removed
1626 */
1627static void pgmPoolFlushDirtyPage(PVMCC pVM, PPGMPOOL pPool, unsigned idxSlot, bool fAllowRemoval = false)
1628{
1629 AssertCompile(RT_ELEMENTS(pPool->aidxDirtyPages) == RT_ELEMENTS(pPool->aDirtyPages));
1630
1631 Assert(idxSlot < RT_ELEMENTS(pPool->aDirtyPages));
1632 unsigned idxPage = pPool->aidxDirtyPages[idxSlot];
1633 if (idxPage == NIL_PGMPOOL_IDX)
1634 return;
1635
1636 PPGMPOOLPAGE pPage = &pPool->aPages[idxPage];
1637 Assert(pPage->idx == idxPage);
1638 Assert(pPage->iMonitoredNext == NIL_PGMPOOL_IDX && pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
1639
1640 AssertMsg(pPage->fDirty, ("Page %RGp (slot=%d) not marked dirty!", pPage->GCPhys, idxSlot));
1641 Log(("Flush dirty page %RGp cMods=%d\n", pPage->GCPhys, pPage->cModifications));
1642
1643 /* First write protect the page again to catch all write accesses. (before checking for changes -> SMP) */
1644 int rc = PGMHandlerPhysicalReset(pVM, pPage->GCPhys & PAGE_BASE_GC_MASK);
1645 Assert(rc == VINF_SUCCESS);
1646 pPage->fDirty = false;
1647
1648# ifdef VBOX_STRICT
1649 uint64_t fFlags = 0;
1650 RTHCPHYS HCPhys;
1651 rc = PGMShwGetPage(VMMGetCpu(pVM), pPage->GCPtrDirtyFault, &fFlags, &HCPhys);
1652 AssertMsg( ( rc == VINF_SUCCESS
1653 && (!(fFlags & X86_PTE_RW) || HCPhys != pPage->Core.Key))
1654 /* In the SMP case the page table might be removed while we wait for the PGM lock in the trap handler. */
1655 || rc == VERR_PAGE_TABLE_NOT_PRESENT
1656 || rc == VERR_PAGE_NOT_PRESENT,
1657 ("PGMShwGetPage -> GCPtr=%RGv rc=%d flags=%RX64\n", pPage->GCPtrDirtyFault, rc, fFlags));
1658# endif
1659
1660 /* Flush those PTEs that have changed. */
1661 STAM_PROFILE_START(&pPool->StatTrackDeref,a);
1662 void *pvShw = PGMPOOL_PAGE_2_PTR(pVM, pPage);
1663 void *pvGst;
1664 rc = PGM_GCPHYS_2_PTR_EX(pVM, pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
1665 bool fFlush;
1666 unsigned cChanges;
1667
1668 if (pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT)
1669 cChanges = pgmPoolTrackFlushPTPaePae(pPool, pPage, (PPGMSHWPTPAE)pvShw, (PCX86PTPAE)pvGst,
1670 (PCX86PTPAE)&pPool->aDirtyPages[idxSlot].aPage[0], fAllowRemoval, &fFlush);
1671 else
1672 cChanges = pgmPoolTrackFlushPTPae32Bit(pPool, pPage, (PPGMSHWPTPAE)pvShw, (PCX86PT)pvGst,
1673 (PCX86PT)&pPool->aDirtyPages[idxSlot].aPage[0], fAllowRemoval, &fFlush);
1674
1675 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvGst);
1676 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvShw);
1677 STAM_PROFILE_STOP(&pPool->StatTrackDeref,a);
1678 /* Note: we might want to consider keeping the dirty page active in case there were many changes. */
1679
1680 /* This page is likely to be modified again, so reduce the nr of modifications just a bit here. */
1681 Assert(pPage->cModifications);
1682 if (cChanges < 4)
1683 pPage->cModifications = 1; /* must use > 0 here */
1684 else
1685 pPage->cModifications = RT_MAX(1, pPage->cModifications / 2);
1686
1687 STAM_COUNTER_INC(&pPool->StatResetDirtyPages);
1688 if (pPool->cDirtyPages == RT_ELEMENTS(pPool->aDirtyPages))
1689 pPool->idxFreeDirtyPage = idxSlot;
1690
1691 pPool->cDirtyPages--;
1692 pPool->aidxDirtyPages[idxSlot] = NIL_PGMPOOL_IDX;
1693 Assert(pPool->cDirtyPages <= RT_ELEMENTS(pPool->aDirtyPages));
1694 if (fFlush)
1695 {
1696 Assert(fAllowRemoval);
1697 Log(("Flush reused page table!\n"));
1698 pgmPoolFlushPage(pPool, pPage);
1699 STAM_COUNTER_INC(&pPool->StatForceFlushReused);
1700 }
1701 else
1702 Log(("Removed dirty page %RGp cMods=%d cChanges=%d\n", pPage->GCPhys, pPage->cModifications, cChanges));
1703}
1704
1705
1706# ifndef IN_RING3
1707/**
1708 * Add a new dirty page
1709 *
1710 * @param pVM The cross context VM structure.
1711 * @param pPool The pool.
1712 * @param pPage The page.
1713 */
1714void pgmPoolAddDirtyPage(PVMCC pVM, PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1715{
1716 PGM_LOCK_ASSERT_OWNER(pVM);
1717 AssertCompile(RT_ELEMENTS(pPool->aDirtyPages) == 8 || RT_ELEMENTS(pPool->aDirtyPages) == 16);
1718 Assert(!pPage->fDirty);
1719
1720 unsigned idxFree = pPool->idxFreeDirtyPage;
1721 Assert(idxFree < RT_ELEMENTS(pPool->aDirtyPages));
1722 Assert(pPage->iMonitoredNext == NIL_PGMPOOL_IDX && pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
1723
1724 if (pPool->cDirtyPages >= RT_ELEMENTS(pPool->aDirtyPages))
1725 {
1726 STAM_COUNTER_INC(&pPool->StatDirtyPageOverFlowFlush);
1727 pgmPoolFlushDirtyPage(pVM, pPool, idxFree, true /* allow removal of reused page tables*/);
1728 }
1729 Assert(pPool->cDirtyPages < RT_ELEMENTS(pPool->aDirtyPages));
1730 AssertMsg(pPool->aidxDirtyPages[idxFree] == NIL_PGMPOOL_IDX, ("idxFree=%d cDirtyPages=%d\n", idxFree, pPool->cDirtyPages));
1731
1732 Log(("Add dirty page %RGp (slot=%d)\n", pPage->GCPhys, idxFree));
1733
1734 /*
1735 * Make a copy of the guest page table as we require valid GCPhys addresses
1736 * when removing references to physical pages.
1737 * (The HCPhys linear lookup is *extremely* expensive!)
1738 */
1739 void *pvGst;
1740 int rc = PGM_GCPHYS_2_PTR_EX(pVM, pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
1741 memcpy(&pPool->aDirtyPages[idxFree].aPage[0], pvGst,
1742 pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT ? PAGE_SIZE : PAGE_SIZE / 2);
1743# ifdef VBOX_STRICT
1744 void *pvShw = PGMPOOL_PAGE_2_PTR(pVM, pPage);
1745 if (pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT)
1746 pgmPoolTrackCheckPTPaePae(pPool, pPage, (PPGMSHWPTPAE)pvShw, (PCX86PTPAE)pvGst);
1747 else
1748 pgmPoolTrackCheckPTPae32Bit(pPool, pPage, (PPGMSHWPTPAE)pvShw, (PCX86PT)pvGst);
1749 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvShw);
1750# endif
1751 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvGst);
1752
1753 STAM_COUNTER_INC(&pPool->StatDirtyPage);
1754 pPage->fDirty = true;
1755 pPage->idxDirtyEntry = (uint8_t)idxFree; Assert(pPage->idxDirtyEntry == idxFree);
1756 pPool->aidxDirtyPages[idxFree] = pPage->idx;
1757 pPool->cDirtyPages++;
1758
1759 pPool->idxFreeDirtyPage = (pPool->idxFreeDirtyPage + 1) & (RT_ELEMENTS(pPool->aDirtyPages) - 1);
1760 if ( pPool->cDirtyPages < RT_ELEMENTS(pPool->aDirtyPages)
1761 && pPool->aidxDirtyPages[pPool->idxFreeDirtyPage] != NIL_PGMPOOL_IDX)
1762 {
1763 unsigned i;
1764 for (i = 1; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
1765 {
1766 idxFree = (pPool->idxFreeDirtyPage + i) & (RT_ELEMENTS(pPool->aDirtyPages) - 1);
1767 if (pPool->aidxDirtyPages[idxFree] == NIL_PGMPOOL_IDX)
1768 {
1769 pPool->idxFreeDirtyPage = idxFree;
1770 break;
1771 }
1772 }
1773 Assert(i != RT_ELEMENTS(pPool->aDirtyPages));
1774 }
1775
1776 Assert(pPool->cDirtyPages == RT_ELEMENTS(pPool->aDirtyPages) || pPool->aidxDirtyPages[pPool->idxFreeDirtyPage] == NIL_PGMPOOL_IDX);
1777
1778 /*
1779 * Clear all references to this shadow table. See @bugref{7298}.
1780 */
1781 pgmPoolTrackClearPageUsers(pPool, pPage);
1782}
1783# endif /* !IN_RING3 */
1784
1785
1786/**
1787 * Check if the specified page is dirty (not write monitored)
1788 *
1789 * @return dirty or not
1790 * @param pVM The cross context VM structure.
1791 * @param GCPhys Guest physical address
1792 */
1793bool pgmPoolIsDirtyPageSlow(PVMCC pVM, RTGCPHYS GCPhys)
1794{
1795 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1796 PGM_LOCK_ASSERT_OWNER(pVM);
1797 if (!pPool->cDirtyPages)
1798 return false;
1799
1800 GCPhys = GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK;
1801
1802 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
1803 {
1804 unsigned idxPage = pPool->aidxDirtyPages[i];
1805 if (idxPage != NIL_PGMPOOL_IDX)
1806 {
1807 PPGMPOOLPAGE pPage = &pPool->aPages[idxPage];
1808 if (pPage->GCPhys == GCPhys)
1809 return true;
1810 }
1811 }
1812 return false;
1813}
1814
1815
1816/**
1817 * Reset all dirty pages by reinstating page monitoring.
1818 *
1819 * @param pVM The cross context VM structure.
1820 */
1821void pgmPoolResetDirtyPages(PVMCC pVM)
1822{
1823 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1824 PGM_LOCK_ASSERT_OWNER(pVM);
1825 Assert(pPool->cDirtyPages <= RT_ELEMENTS(pPool->aDirtyPages));
1826
1827 if (!pPool->cDirtyPages)
1828 return;
1829
1830 Log(("pgmPoolResetDirtyPages\n"));
1831 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
1832 pgmPoolFlushDirtyPage(pVM, pPool, i, true /* allow removal of reused page tables*/);
1833
1834 pPool->idxFreeDirtyPage = 0;
1835 if ( pPool->cDirtyPages != RT_ELEMENTS(pPool->aDirtyPages)
1836 && pPool->aidxDirtyPages[pPool->idxFreeDirtyPage] != NIL_PGMPOOL_IDX)
1837 {
1838 unsigned i;
1839 for (i = 1; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
1840 {
1841 if (pPool->aidxDirtyPages[i] == NIL_PGMPOOL_IDX)
1842 {
1843 pPool->idxFreeDirtyPage = i;
1844 break;
1845 }
1846 }
1847 AssertMsg(i != RT_ELEMENTS(pPool->aDirtyPages), ("cDirtyPages %d", pPool->cDirtyPages));
1848 }
1849
1850 Assert(pPool->aidxDirtyPages[pPool->idxFreeDirtyPage] == NIL_PGMPOOL_IDX || pPool->cDirtyPages == RT_ELEMENTS(pPool->aDirtyPages));
1851 return;
1852}
1853
1854
1855/**
1856 * Invalidate the PT entry for the specified page
1857 *
1858 * @param pVM The cross context VM structure.
1859 * @param GCPtrPage Guest page to invalidate
1860 */
1861void pgmPoolResetDirtyPage(PVMCC pVM, RTGCPTR GCPtrPage)
1862{
1863 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1864 PGM_LOCK_ASSERT_OWNER(pVM);
1865 Assert(pPool->cDirtyPages <= RT_ELEMENTS(pPool->aDirtyPages));
1866
1867 if (!pPool->cDirtyPages)
1868 return;
1869
1870 Log(("pgmPoolResetDirtyPage %RGv\n", GCPtrPage)); RT_NOREF_PV(GCPtrPage);
1871 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
1872 {
1873 /** @todo What was intended here??? This looks incomplete... */
1874 }
1875}
1876
1877
1878/**
1879 * Reset all dirty pages by reinstating page monitoring.
1880 *
1881 * @param pVM The cross context VM structure.
1882 * @param GCPhysPT Physical address of the page table
1883 */
1884void pgmPoolInvalidateDirtyPage(PVMCC pVM, RTGCPHYS GCPhysPT)
1885{
1886 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1887 PGM_LOCK_ASSERT_OWNER(pVM);
1888 Assert(pPool->cDirtyPages <= RT_ELEMENTS(pPool->aDirtyPages));
1889 unsigned idxDirtyPage = RT_ELEMENTS(pPool->aDirtyPages);
1890
1891 if (!pPool->cDirtyPages)
1892 return;
1893
1894 GCPhysPT = GCPhysPT & ~(RTGCPHYS)PAGE_OFFSET_MASK;
1895
1896 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
1897 {
1898 unsigned idxPage = pPool->aidxDirtyPages[i];
1899 if (idxPage != NIL_PGMPOOL_IDX)
1900 {
1901 PPGMPOOLPAGE pPage = &pPool->aPages[idxPage];
1902 if (pPage->GCPhys == GCPhysPT)
1903 {
1904 idxDirtyPage = i;
1905 break;
1906 }
1907 }
1908 }
1909
1910 if (idxDirtyPage != RT_ELEMENTS(pPool->aDirtyPages))
1911 {
1912 pgmPoolFlushDirtyPage(pVM, pPool, idxDirtyPage, true /* allow removal of reused page tables*/);
1913 if ( pPool->cDirtyPages != RT_ELEMENTS(pPool->aDirtyPages)
1914 && pPool->aidxDirtyPages[pPool->idxFreeDirtyPage] != NIL_PGMPOOL_IDX)
1915 {
1916 unsigned i;
1917 for (i = 0; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
1918 {
1919 if (pPool->aidxDirtyPages[i] == NIL_PGMPOOL_IDX)
1920 {
1921 pPool->idxFreeDirtyPage = i;
1922 break;
1923 }
1924 }
1925 AssertMsg(i != RT_ELEMENTS(pPool->aDirtyPages), ("cDirtyPages %d", pPool->cDirtyPages));
1926 }
1927 }
1928}
1929
1930#endif /* PGMPOOL_WITH_OPTIMIZED_DIRTY_PT */
1931
1932/**
1933 * Inserts a page into the GCPhys hash table.
1934 *
1935 * @param pPool The pool.
1936 * @param pPage The page.
1937 */
1938DECLINLINE(void) pgmPoolHashInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1939{
1940 Log3(("pgmPoolHashInsert: %RGp\n", pPage->GCPhys));
1941 Assert(pPage->GCPhys != NIL_RTGCPHYS); Assert(pPage->iNext == NIL_PGMPOOL_IDX);
1942 uint16_t iHash = PGMPOOL_HASH(pPage->GCPhys);
1943 pPage->iNext = pPool->aiHash[iHash];
1944 pPool->aiHash[iHash] = pPage->idx;
1945}
1946
1947
1948/**
1949 * Removes a page from the GCPhys hash table.
1950 *
1951 * @param pPool The pool.
1952 * @param pPage The page.
1953 */
1954DECLINLINE(void) pgmPoolHashRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1955{
1956 Log3(("pgmPoolHashRemove: %RGp\n", pPage->GCPhys));
1957 uint16_t iHash = PGMPOOL_HASH(pPage->GCPhys);
1958 if (pPool->aiHash[iHash] == pPage->idx)
1959 pPool->aiHash[iHash] = pPage->iNext;
1960 else
1961 {
1962 uint16_t iPrev = pPool->aiHash[iHash];
1963 for (;;)
1964 {
1965 const int16_t i = pPool->aPages[iPrev].iNext;
1966 if (i == pPage->idx)
1967 {
1968 pPool->aPages[iPrev].iNext = pPage->iNext;
1969 break;
1970 }
1971 if (i == NIL_PGMPOOL_IDX)
1972 {
1973 AssertReleaseMsgFailed(("GCPhys=%RGp idx=%d\n", pPage->GCPhys, pPage->idx));
1974 break;
1975 }
1976 iPrev = i;
1977 }
1978 }
1979 pPage->iNext = NIL_PGMPOOL_IDX;
1980}
1981
1982
1983/**
1984 * Frees up one cache page.
1985 *
1986 * @returns VBox status code.
1987 * @retval VINF_SUCCESS on success.
1988 * @param pPool The pool.
1989 * @param iUser The user index.
1990 */
1991static int pgmPoolCacheFreeOne(PPGMPOOL pPool, uint16_t iUser)
1992{
1993 const PVMCC pVM = pPool->CTX_SUFF(pVM);
1994 Assert(pPool->iAgeHead != pPool->iAgeTail); /* We shouldn't be here if there < 2 cached entries! */
1995 STAM_COUNTER_INC(&pPool->StatCacheFreeUpOne);
1996
1997 /*
1998 * Select one page from the tail of the age list.
1999 */
2000 PPGMPOOLPAGE pPage;
2001 for (unsigned iLoop = 0; ; iLoop++)
2002 {
2003 uint16_t iToFree = pPool->iAgeTail;
2004 if (iToFree == iUser && iUser != NIL_PGMPOOL_IDX)
2005 iToFree = pPool->aPages[iToFree].iAgePrev;
2006/* This is the alternative to the SyncCR3 pgmPoolCacheUsed calls.
2007 if (pPool->aPages[iToFree].iUserHead != NIL_PGMPOOL_USER_INDEX)
2008 {
2009 uint16_t i = pPool->aPages[iToFree].iAgePrev;
2010 for (unsigned j = 0; j < 10 && i != NIL_PGMPOOL_USER_INDEX; j++, i = pPool->aPages[i].iAgePrev)
2011 {
2012 if (pPool->aPages[iToFree].iUserHead == NIL_PGMPOOL_USER_INDEX)
2013 continue;
2014 iToFree = i;
2015 break;
2016 }
2017 }
2018*/
2019 Assert(iToFree != iUser);
2020 AssertReleaseMsg(iToFree != NIL_PGMPOOL_IDX,
2021 ("iToFree=%#x (iAgeTail=%#x) iUser=%#x iLoop=%u - pPool=%p LB %#zx\n",
2022 iToFree, pPool->iAgeTail, iUser, iLoop, pPool,
2023 RT_UOFFSETOF_DYN(PGMPOOL, aPages[pPool->cMaxPages])
2024 + pPool->cMaxUsers * sizeof(PGMPOOLUSER)
2025 + pPool->cMaxPhysExts * sizeof(PGMPOOLPHYSEXT) ));
2026
2027 pPage = &pPool->aPages[iToFree];
2028
2029 /*
2030 * Reject any attempts at flushing the currently active shadow CR3 mapping.
2031 * Call pgmPoolCacheUsed to move the page to the head of the age list.
2032 */
2033 if ( !pgmPoolIsPageLocked(pPage)
2034 && pPage->idx >= PGMPOOL_IDX_FIRST /* paranoia (#6349) */)
2035 break;
2036 LogFlow(("pgmPoolCacheFreeOne: refuse CR3 mapping\n"));
2037 pgmPoolCacheUsed(pPool, pPage);
2038 AssertLogRelReturn(iLoop < 8192, VERR_PGM_POOL_TOO_MANY_LOOPS);
2039 }
2040
2041 /*
2042 * Found a usable page, flush it and return.
2043 */
2044 int rc = pgmPoolFlushPage(pPool, pPage);
2045 /* This flush was initiated by us and not the guest, so explicitly flush the TLB. */
2046 /** @todo find out why this is necessary; pgmPoolFlushPage should trigger a flush if one is really needed. */
2047 if (rc == VINF_SUCCESS)
2048 PGM_INVL_ALL_VCPU_TLBS(pVM);
2049 return rc;
2050}
2051
2052
2053/**
2054 * Checks if a kind mismatch is really a page being reused
2055 * or if it's just normal remappings.
2056 *
2057 * @returns true if reused and the cached page (enmKind1) should be flushed
2058 * @returns false if not reused.
2059 * @param enmKind1 The kind of the cached page.
2060 * @param enmKind2 The kind of the requested page.
2061 */
2062static bool pgmPoolCacheReusedByKind(PGMPOOLKIND enmKind1, PGMPOOLKIND enmKind2)
2063{
2064 switch (enmKind1)
2065 {
2066 /*
2067 * Never reuse them. There is no remapping in non-paging mode.
2068 */
2069 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2070 case PGMPOOLKIND_32BIT_PD_PHYS:
2071 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2072 case PGMPOOLKIND_PAE_PD_PHYS:
2073 case PGMPOOLKIND_PAE_PDPT_PHYS:
2074 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2075 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2076 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2077 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2078 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2079 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT: /* never reuse them for other types */
2080 return false;
2081
2082 /*
2083 * It's perfectly fine to reuse these, except for PAE and non-paging stuff.
2084 */
2085 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2086 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2087 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2088 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2089 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2090 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2091 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2092 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2093 case PGMPOOLKIND_32BIT_PD:
2094 case PGMPOOLKIND_PAE_PDPT:
2095 switch (enmKind2)
2096 {
2097 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2098 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2099 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2100 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2101 case PGMPOOLKIND_64BIT_PML4:
2102 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2103 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2104 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2105 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2106 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2107 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2108 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2109 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2110 return true;
2111 default:
2112 return false;
2113 }
2114
2115 /*
2116 * It's perfectly fine to reuse these, except for PAE and non-paging stuff.
2117 */
2118 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2119 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2120 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2121 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2122 case PGMPOOLKIND_64BIT_PML4:
2123 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2124 switch (enmKind2)
2125 {
2126 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2127 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2128 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2129 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2130 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2131 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2132 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2133 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2134 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2135 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2136 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2137 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2138 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2139 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2140 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2141 return true;
2142 default:
2143 return false;
2144 }
2145
2146 /*
2147 * These cannot be flushed, and it's common to reuse the PDs as PTs.
2148 */
2149 case PGMPOOLKIND_ROOT_NESTED:
2150 return false;
2151
2152 default:
2153 AssertFatalMsgFailed(("enmKind1=%d\n", enmKind1));
2154 }
2155}
2156
2157
2158/**
2159 * Attempts to satisfy a pgmPoolAlloc request from the cache.
2160 *
2161 * @returns VBox status code.
2162 * @retval VINF_PGM_CACHED_PAGE on success.
2163 * @retval VERR_FILE_NOT_FOUND if not found.
2164 * @param pPool The pool.
2165 * @param GCPhys The GC physical address of the page we're gonna shadow.
2166 * @param enmKind The kind of mapping.
2167 * @param enmAccess Access type for the mapping (only relevant for big pages)
2168 * @param fA20Enabled Whether the CPU has the A20 gate enabled.
2169 * @param iUser The shadow page pool index of the user table. This is
2170 * NIL_PGMPOOL_IDX for root pages.
2171 * @param iUserTable The index into the user table (shadowed). Ignored if
2172 * root page
2173 * @param ppPage Where to store the pointer to the page.
2174 */
2175static int pgmPoolCacheAlloc(PPGMPOOL pPool, RTGCPHYS GCPhys, PGMPOOLKIND enmKind, PGMPOOLACCESS enmAccess, bool fA20Enabled,
2176 uint16_t iUser, uint32_t iUserTable, PPPGMPOOLPAGE ppPage)
2177{
2178 /*
2179 * Look up the GCPhys in the hash.
2180 */
2181 unsigned i = pPool->aiHash[PGMPOOL_HASH(GCPhys)];
2182 Log3(("pgmPoolCacheAlloc: %RGp kind %s iUser=%d iUserTable=%x SLOT=%d\n", GCPhys, pgmPoolPoolKindToStr(enmKind), iUser, iUserTable, i));
2183 if (i != NIL_PGMPOOL_IDX)
2184 {
2185 do
2186 {
2187 PPGMPOOLPAGE pPage = &pPool->aPages[i];
2188 Log4(("pgmPoolCacheAlloc: slot %d found page %RGp\n", i, pPage->GCPhys));
2189 if (pPage->GCPhys == GCPhys)
2190 {
2191 if ( (PGMPOOLKIND)pPage->enmKind == enmKind
2192 && (PGMPOOLACCESS)pPage->enmAccess == enmAccess
2193 && pPage->fA20Enabled == fA20Enabled)
2194 {
2195 /* Put it at the start of the use list to make sure pgmPoolTrackAddUser
2196 * doesn't flush it in case there are no more free use records.
2197 */
2198 pgmPoolCacheUsed(pPool, pPage);
2199
2200 int rc = VINF_SUCCESS;
2201 if (iUser != NIL_PGMPOOL_IDX)
2202 rc = pgmPoolTrackAddUser(pPool, pPage, iUser, iUserTable);
2203 if (RT_SUCCESS(rc))
2204 {
2205 Assert((PGMPOOLKIND)pPage->enmKind == enmKind);
2206 *ppPage = pPage;
2207 if (pPage->cModifications)
2208 pPage->cModifications = 1; /* reset counter (can't use 0, or else it will be reinserted in the modified list) */
2209 STAM_COUNTER_INC(&pPool->StatCacheHits);
2210 return VINF_PGM_CACHED_PAGE;
2211 }
2212 return rc;
2213 }
2214
2215 if ((PGMPOOLKIND)pPage->enmKind != enmKind)
2216 {
2217 /*
2218 * The kind is different. In some cases we should now flush the page
2219 * as it has been reused, but in most cases this is normal remapping
2220 * of PDs as PT or big pages using the GCPhys field in a slightly
2221 * different way than the other kinds.
2222 */
2223 if (pgmPoolCacheReusedByKind((PGMPOOLKIND)pPage->enmKind, enmKind))
2224 {
2225 STAM_COUNTER_INC(&pPool->StatCacheKindMismatches);
2226 pgmPoolFlushPage(pPool, pPage);
2227 break;
2228 }
2229 }
2230 }
2231
2232 /* next */
2233 i = pPage->iNext;
2234 } while (i != NIL_PGMPOOL_IDX);
2235 }
2236
2237 Log3(("pgmPoolCacheAlloc: Missed GCPhys=%RGp enmKind=%s\n", GCPhys, pgmPoolPoolKindToStr(enmKind)));
2238 STAM_COUNTER_INC(&pPool->StatCacheMisses);
2239 return VERR_FILE_NOT_FOUND;
2240}
2241
2242
2243/**
2244 * Inserts a page into the cache.
2245 *
2246 * @param pPool The pool.
2247 * @param pPage The cached page.
2248 * @param fCanBeCached Set if the page is fit for caching from the caller's point of view.
2249 */
2250static void pgmPoolCacheInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage, bool fCanBeCached)
2251{
2252 /*
2253 * Insert into the GCPhys hash if the page is fit for that.
2254 */
2255 Assert(!pPage->fCached);
2256 if (fCanBeCached)
2257 {
2258 pPage->fCached = true;
2259 pgmPoolHashInsert(pPool, pPage);
2260 Log3(("pgmPoolCacheInsert: Caching %p:{.Core=%RHp, .idx=%d, .enmKind=%s, GCPhys=%RGp}\n",
2261 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), pPage->GCPhys));
2262 STAM_COUNTER_INC(&pPool->StatCacheCacheable);
2263 }
2264 else
2265 {
2266 Log3(("pgmPoolCacheInsert: Not caching %p:{.Core=%RHp, .idx=%d, .enmKind=%s, GCPhys=%RGp}\n",
2267 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), pPage->GCPhys));
2268 STAM_COUNTER_INC(&pPool->StatCacheUncacheable);
2269 }
2270
2271 /*
2272 * Insert at the head of the age list.
2273 */
2274 pPage->iAgePrev = NIL_PGMPOOL_IDX;
2275 pPage->iAgeNext = pPool->iAgeHead;
2276 if (pPool->iAgeHead != NIL_PGMPOOL_IDX)
2277 pPool->aPages[pPool->iAgeHead].iAgePrev = pPage->idx;
2278 else
2279 pPool->iAgeTail = pPage->idx;
2280 pPool->iAgeHead = pPage->idx;
2281}
2282
2283
2284/**
2285 * Flushes a cached page.
2286 *
2287 * @param pPool The pool.
2288 * @param pPage The cached page.
2289 */
2290static void pgmPoolCacheFlushPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2291{
2292 Log3(("pgmPoolCacheFlushPage: %RGp\n", pPage->GCPhys));
2293
2294 /*
2295 * Remove the page from the hash.
2296 */
2297 if (pPage->fCached)
2298 {
2299 pPage->fCached = false;
2300 pgmPoolHashRemove(pPool, pPage);
2301 }
2302 else
2303 Assert(pPage->iNext == NIL_PGMPOOL_IDX);
2304
2305 /*
2306 * Remove it from the age list.
2307 */
2308 if (pPage->iAgeNext != NIL_PGMPOOL_IDX)
2309 pPool->aPages[pPage->iAgeNext].iAgePrev = pPage->iAgePrev;
2310 else
2311 pPool->iAgeTail = pPage->iAgePrev;
2312 if (pPage->iAgePrev != NIL_PGMPOOL_IDX)
2313 pPool->aPages[pPage->iAgePrev].iAgeNext = pPage->iAgeNext;
2314 else
2315 pPool->iAgeHead = pPage->iAgeNext;
2316 pPage->iAgeNext = NIL_PGMPOOL_IDX;
2317 pPage->iAgePrev = NIL_PGMPOOL_IDX;
2318}
2319
2320
2321/**
2322 * Looks for pages sharing the monitor.
2323 *
2324 * @returns Pointer to the head page.
2325 * @returns NULL if not found.
2326 * @param pPool The Pool
2327 * @param pNewPage The page which is going to be monitored.
2328 */
2329static PPGMPOOLPAGE pgmPoolMonitorGetPageByGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pNewPage)
2330{
2331 /*
2332 * Look up the GCPhys in the hash.
2333 */
2334 RTGCPHYS GCPhys = pNewPage->GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK;
2335 unsigned i = pPool->aiHash[PGMPOOL_HASH(GCPhys)];
2336 if (i == NIL_PGMPOOL_IDX)
2337 return NULL;
2338 do
2339 {
2340 PPGMPOOLPAGE pPage = &pPool->aPages[i];
2341 if ( pPage->GCPhys - GCPhys < PAGE_SIZE
2342 && pPage != pNewPage)
2343 {
2344 switch (pPage->enmKind)
2345 {
2346 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2347 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2348 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2349 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2350 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2351 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2352 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2353 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2354 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2355 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2356 case PGMPOOLKIND_64BIT_PML4:
2357 case PGMPOOLKIND_32BIT_PD:
2358 case PGMPOOLKIND_PAE_PDPT:
2359 {
2360 /* find the head */
2361 while (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
2362 {
2363 Assert(pPage->iMonitoredPrev != pPage->idx);
2364 pPage = &pPool->aPages[pPage->iMonitoredPrev];
2365 }
2366 return pPage;
2367 }
2368
2369 /* ignore, no monitoring. */
2370 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2371 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2372 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2373 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2374 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2375 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2376 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2377 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2378 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2379 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2380 case PGMPOOLKIND_ROOT_NESTED:
2381 case PGMPOOLKIND_PAE_PD_PHYS:
2382 case PGMPOOLKIND_PAE_PDPT_PHYS:
2383 case PGMPOOLKIND_32BIT_PD_PHYS:
2384 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
2385 break;
2386 default:
2387 AssertFatalMsgFailed(("enmKind=%d idx=%d\n", pPage->enmKind, pPage->idx));
2388 }
2389 }
2390
2391 /* next */
2392 i = pPage->iNext;
2393 } while (i != NIL_PGMPOOL_IDX);
2394 return NULL;
2395}
2396
2397
2398/**
2399 * Enabled write monitoring of a guest page.
2400 *
2401 * @returns VBox status code.
2402 * @retval VINF_SUCCESS on success.
2403 * @param pPool The pool.
2404 * @param pPage The cached page.
2405 */
2406static int pgmPoolMonitorInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2407{
2408 LogFlow(("pgmPoolMonitorInsert %RGp\n", pPage->GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK));
2409
2410 /*
2411 * Filter out the relevant kinds.
2412 */
2413 switch (pPage->enmKind)
2414 {
2415 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2416 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2417 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2418 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2419 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2420 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2421 case PGMPOOLKIND_64BIT_PML4:
2422 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2423 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2424 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2425 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2426 case PGMPOOLKIND_32BIT_PD:
2427 case PGMPOOLKIND_PAE_PDPT:
2428 break;
2429
2430 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2431 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2432 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2433 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2434 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2435 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2436 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2437 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2438 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2439 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2440 case PGMPOOLKIND_ROOT_NESTED:
2441 /* Nothing to monitor here. */
2442 return VINF_SUCCESS;
2443
2444 case PGMPOOLKIND_32BIT_PD_PHYS:
2445 case PGMPOOLKIND_PAE_PDPT_PHYS:
2446 case PGMPOOLKIND_PAE_PD_PHYS:
2447 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
2448 /* Nothing to monitor here. */
2449 return VINF_SUCCESS;
2450 default:
2451 AssertFatalMsgFailed(("This can't happen! enmKind=%d\n", pPage->enmKind));
2452 }
2453
2454 /*
2455 * Install handler.
2456 */
2457 int rc;
2458 PPGMPOOLPAGE pPageHead = pgmPoolMonitorGetPageByGCPhys(pPool, pPage);
2459 if (pPageHead)
2460 {
2461 Assert(pPageHead != pPage); Assert(pPageHead->iMonitoredNext != pPage->idx);
2462 Assert(pPageHead->iMonitoredPrev != pPage->idx);
2463
2464#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
2465 if (pPageHead->fDirty)
2466 pgmPoolFlushDirtyPage(pPool->CTX_SUFF(pVM), pPool, pPageHead->idxDirtyEntry, false /* do not remove */);
2467#endif
2468
2469 pPage->iMonitoredPrev = pPageHead->idx;
2470 pPage->iMonitoredNext = pPageHead->iMonitoredNext;
2471 if (pPageHead->iMonitoredNext != NIL_PGMPOOL_IDX)
2472 pPool->aPages[pPageHead->iMonitoredNext].iMonitoredPrev = pPage->idx;
2473 pPageHead->iMonitoredNext = pPage->idx;
2474 rc = VINF_SUCCESS;
2475 }
2476 else
2477 {
2478 Assert(pPage->iMonitoredNext == NIL_PGMPOOL_IDX); Assert(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
2479 PVMCC pVM = pPool->CTX_SUFF(pVM);
2480 const RTGCPHYS GCPhysPage = pPage->GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK;
2481 rc = PGMHandlerPhysicalRegister(pVM, GCPhysPage, GCPhysPage + PAGE_OFFSET_MASK, pPool->hAccessHandlerType,
2482 pPage - &pPool->aPages[0], NIL_RTR3PTR /*pszDesc*/);
2483 /** @todo we should probably deal with out-of-memory conditions here, but for now increasing
2484 * the heap size should suffice. */
2485 AssertFatalMsgRC(rc, ("PGMHandlerPhysicalRegisterEx %RGp failed with %Rrc\n", GCPhysPage, rc));
2486 PVMCPU pVCpu = VMMGetCpu(pVM);
2487 AssertFatalMsg(!(pVCpu->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL) || VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3), ("fSyncFlags=%x syncff=%d\n", pVCpu->pgm.s.fSyncFlags, VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3)));
2488 }
2489 pPage->fMonitored = true;
2490 return rc;
2491}
2492
2493
2494/**
2495 * Disables write monitoring of a guest page.
2496 *
2497 * @returns VBox status code.
2498 * @retval VINF_SUCCESS on success.
2499 * @param pPool The pool.
2500 * @param pPage The cached page.
2501 */
2502static int pgmPoolMonitorFlush(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2503{
2504 /*
2505 * Filter out the relevant kinds.
2506 */
2507 switch (pPage->enmKind)
2508 {
2509 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2510 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2511 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2512 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2513 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2514 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2515 case PGMPOOLKIND_64BIT_PML4:
2516 case PGMPOOLKIND_32BIT_PD:
2517 case PGMPOOLKIND_PAE_PDPT:
2518 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2519 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2520 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2521 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2522 break;
2523
2524 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2525 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2526 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2527 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2528 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2529 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2530 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2531 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2532 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2533 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2534 case PGMPOOLKIND_ROOT_NESTED:
2535 case PGMPOOLKIND_PAE_PD_PHYS:
2536 case PGMPOOLKIND_PAE_PDPT_PHYS:
2537 case PGMPOOLKIND_32BIT_PD_PHYS:
2538 /* Nothing to monitor here. */
2539 Assert(!pPage->fMonitored);
2540 return VINF_SUCCESS;
2541
2542 default:
2543 AssertFatalMsgFailed(("This can't happen! enmKind=%d\n", pPage->enmKind));
2544 }
2545 Assert(pPage->fMonitored);
2546
2547 /*
2548 * Remove the page from the monitored list or uninstall it if last.
2549 */
2550 const PVMCC pVM = pPool->CTX_SUFF(pVM);
2551 int rc;
2552 if ( pPage->iMonitoredNext != NIL_PGMPOOL_IDX
2553 || pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
2554 {
2555 if (pPage->iMonitoredPrev == NIL_PGMPOOL_IDX)
2556 {
2557 PPGMPOOLPAGE pNewHead = &pPool->aPages[pPage->iMonitoredNext];
2558 pNewHead->iMonitoredPrev = NIL_PGMPOOL_IDX;
2559 rc = PGMHandlerPhysicalChangeUserArg(pVM, pPage->GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK, pPage->iMonitoredNext);
2560
2561 AssertFatalRCSuccess(rc);
2562 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
2563 }
2564 else
2565 {
2566 pPool->aPages[pPage->iMonitoredPrev].iMonitoredNext = pPage->iMonitoredNext;
2567 if (pPage->iMonitoredNext != NIL_PGMPOOL_IDX)
2568 {
2569 pPool->aPages[pPage->iMonitoredNext].iMonitoredPrev = pPage->iMonitoredPrev;
2570 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
2571 }
2572 pPage->iMonitoredPrev = NIL_PGMPOOL_IDX;
2573 rc = VINF_SUCCESS;
2574 }
2575 }
2576 else
2577 {
2578 rc = PGMHandlerPhysicalDeregister(pVM, pPage->GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK);
2579 AssertFatalRC(rc);
2580 PVMCPU pVCpu = VMMGetCpu(pVM);
2581 AssertFatalMsg(!(pVCpu->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL) || VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3),
2582 ("%#x %#x\n", pVCpu->pgm.s.fSyncFlags, pVM->fGlobalForcedActions));
2583 }
2584 pPage->fMonitored = false;
2585
2586 /*
2587 * Remove it from the list of modified pages (if in it).
2588 */
2589 pgmPoolMonitorModifiedRemove(pPool, pPage);
2590
2591 return rc;
2592}
2593
2594
2595/**
2596 * Inserts the page into the list of modified pages.
2597 *
2598 * @param pPool The pool.
2599 * @param pPage The page.
2600 */
2601void pgmPoolMonitorModifiedInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2602{
2603 Log3(("pgmPoolMonitorModifiedInsert: idx=%d\n", pPage->idx));
2604 AssertMsg( pPage->iModifiedNext == NIL_PGMPOOL_IDX
2605 && pPage->iModifiedPrev == NIL_PGMPOOL_IDX
2606 && pPool->iModifiedHead != pPage->idx,
2607 ("Next=%d Prev=%d idx=%d cModifications=%d Head=%d cModifiedPages=%d\n",
2608 pPage->iModifiedNext, pPage->iModifiedPrev, pPage->idx, pPage->cModifications,
2609 pPool->iModifiedHead, pPool->cModifiedPages));
2610
2611 pPage->iModifiedNext = pPool->iModifiedHead;
2612 if (pPool->iModifiedHead != NIL_PGMPOOL_IDX)
2613 pPool->aPages[pPool->iModifiedHead].iModifiedPrev = pPage->idx;
2614 pPool->iModifiedHead = pPage->idx;
2615 pPool->cModifiedPages++;
2616#ifdef VBOX_WITH_STATISTICS
2617 if (pPool->cModifiedPages > pPool->cModifiedPagesHigh)
2618 pPool->cModifiedPagesHigh = pPool->cModifiedPages;
2619#endif
2620}
2621
2622
2623/**
2624 * Removes the page from the list of modified pages and resets the
2625 * modification counter.
2626 *
2627 * @param pPool The pool.
2628 * @param pPage The page which is believed to be in the list of modified pages.
2629 */
2630static void pgmPoolMonitorModifiedRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2631{
2632 Log3(("pgmPoolMonitorModifiedRemove: idx=%d cModifications=%d\n", pPage->idx, pPage->cModifications));
2633 if (pPool->iModifiedHead == pPage->idx)
2634 {
2635 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX);
2636 pPool->iModifiedHead = pPage->iModifiedNext;
2637 if (pPage->iModifiedNext != NIL_PGMPOOL_IDX)
2638 {
2639 pPool->aPages[pPage->iModifiedNext].iModifiedPrev = NIL_PGMPOOL_IDX;
2640 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
2641 }
2642 pPool->cModifiedPages--;
2643 }
2644 else if (pPage->iModifiedPrev != NIL_PGMPOOL_IDX)
2645 {
2646 pPool->aPages[pPage->iModifiedPrev].iModifiedNext = pPage->iModifiedNext;
2647 if (pPage->iModifiedNext != NIL_PGMPOOL_IDX)
2648 {
2649 pPool->aPages[pPage->iModifiedNext].iModifiedPrev = pPage->iModifiedPrev;
2650 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
2651 }
2652 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
2653 pPool->cModifiedPages--;
2654 }
2655 else
2656 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX);
2657 pPage->cModifications = 0;
2658}
2659
2660
2661/**
2662 * Zaps the list of modified pages, resetting their modification counters in the process.
2663 *
2664 * @param pVM The cross context VM structure.
2665 */
2666static void pgmPoolMonitorModifiedClearAll(PVMCC pVM)
2667{
2668 PGM_LOCK_VOID(pVM);
2669 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
2670 LogFlow(("pgmPoolMonitorModifiedClearAll: cModifiedPages=%d\n", pPool->cModifiedPages));
2671
2672 unsigned cPages = 0; NOREF(cPages);
2673
2674#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
2675 pgmPoolResetDirtyPages(pVM);
2676#endif
2677
2678 uint16_t idx = pPool->iModifiedHead;
2679 pPool->iModifiedHead = NIL_PGMPOOL_IDX;
2680 while (idx != NIL_PGMPOOL_IDX)
2681 {
2682 PPGMPOOLPAGE pPage = &pPool->aPages[idx];
2683 idx = pPage->iModifiedNext;
2684 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
2685 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
2686 pPage->cModifications = 0;
2687 Assert(++cPages);
2688 }
2689 AssertMsg(cPages == pPool->cModifiedPages, ("%d != %d\n", cPages, pPool->cModifiedPages));
2690 pPool->cModifiedPages = 0;
2691 PGM_UNLOCK(pVM);
2692}
2693
2694
2695/**
2696 * Handle SyncCR3 pool tasks
2697 *
2698 * @returns VBox status code.
2699 * @retval VINF_SUCCESS if successfully added.
2700 * @retval VINF_PGM_SYNC_CR3 is it needs to be deferred to ring 3 (GC only)
2701 * @param pVCpu The cross context virtual CPU structure.
2702 * @remark Should only be used when monitoring is available, thus placed in
2703 * the PGMPOOL_WITH_MONITORING \#ifdef.
2704 */
2705int pgmPoolSyncCR3(PVMCPUCC pVCpu)
2706{
2707 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
2708 LogFlow(("pgmPoolSyncCR3 fSyncFlags=%x\n", pVCpu->pgm.s.fSyncFlags));
2709
2710 /*
2711 * When monitoring shadowed pages, we reset the modification counters on CR3 sync.
2712 * Occasionally we will have to clear all the shadow page tables because we wanted
2713 * to monitor a page which was mapped by too many shadowed page tables. This operation
2714 * sometimes referred to as a 'lightweight flush'.
2715 */
2716# ifdef IN_RING3 /* Don't flush in ring-0 or raw mode, it's taking too long. */
2717 if (pVCpu->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)
2718 pgmR3PoolClearAll(pVM, false /*fFlushRemTlb*/);
2719# else /* !IN_RING3 */
2720 if (pVCpu->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)
2721 {
2722 Log(("SyncCR3: PGM_SYNC_CLEAR_PGM_POOL is set -> VINF_PGM_SYNC_CR3\n"));
2723 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3); /** @todo no need to do global sync, right? */
2724
2725 /* Make sure all other VCPUs return to ring 3. */
2726 if (pVM->cCpus > 1)
2727 {
2728 VM_FF_SET(pVM, VM_FF_PGM_POOL_FLUSH_PENDING);
2729 PGM_INVL_ALL_VCPU_TLBS(pVM);
2730 }
2731 return VINF_PGM_SYNC_CR3;
2732 }
2733# endif /* !IN_RING3 */
2734 else
2735 {
2736 pgmPoolMonitorModifiedClearAll(pVM);
2737
2738 /* pgmPoolMonitorModifiedClearAll can cause a pgm pool flush (dirty page clearing), so make sure we handle this! */
2739 if (pVCpu->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)
2740 {
2741 Log(("pgmPoolMonitorModifiedClearAll caused a pgm flush -> call pgmPoolSyncCR3 again!\n"));
2742 return pgmPoolSyncCR3(pVCpu);
2743 }
2744 }
2745 return VINF_SUCCESS;
2746}
2747
2748
2749/**
2750 * Frees up at least one user entry.
2751 *
2752 * @returns VBox status code.
2753 * @retval VINF_SUCCESS if successfully added.
2754 *
2755 * @param pPool The pool.
2756 * @param iUser The user index.
2757 */
2758static int pgmPoolTrackFreeOneUser(PPGMPOOL pPool, uint16_t iUser)
2759{
2760 STAM_COUNTER_INC(&pPool->StatTrackFreeUpOneUser);
2761 /*
2762 * Just free cached pages in a braindead fashion.
2763 */
2764 /** @todo walk the age list backwards and free the first with usage. */
2765 int rc = VINF_SUCCESS;
2766 do
2767 {
2768 int rc2 = pgmPoolCacheFreeOne(pPool, iUser);
2769 if (RT_FAILURE(rc2) && rc == VINF_SUCCESS)
2770 rc = rc2;
2771 } while (pPool->iUserFreeHead == NIL_PGMPOOL_USER_INDEX);
2772 return rc;
2773}
2774
2775
2776/**
2777 * Inserts a page into the cache.
2778 *
2779 * This will create user node for the page, insert it into the GCPhys
2780 * hash, and insert it into the age list.
2781 *
2782 * @returns VBox status code.
2783 * @retval VINF_SUCCESS if successfully added.
2784 *
2785 * @param pPool The pool.
2786 * @param pPage The cached page.
2787 * @param GCPhys The GC physical address of the page we're gonna shadow.
2788 * @param iUser The user index.
2789 * @param iUserTable The user table index.
2790 */
2791DECLINLINE(int) pgmPoolTrackInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTGCPHYS GCPhys, uint16_t iUser, uint32_t iUserTable)
2792{
2793 int rc = VINF_SUCCESS;
2794 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
2795
2796 LogFlow(("pgmPoolTrackInsert GCPhys=%RGp iUser=%d iUserTable=%x\n", GCPhys, iUser, iUserTable)); RT_NOREF_PV(GCPhys);
2797
2798 if (iUser != NIL_PGMPOOL_IDX)
2799 {
2800#ifdef VBOX_STRICT
2801 /*
2802 * Check that the entry doesn't already exists.
2803 */
2804 if (pPage->iUserHead != NIL_PGMPOOL_USER_INDEX)
2805 {
2806 uint16_t i = pPage->iUserHead;
2807 do
2808 {
2809 Assert(i < pPool->cMaxUsers);
2810 AssertMsg(paUsers[i].iUser != iUser || paUsers[i].iUserTable != iUserTable, ("%x %x vs new %x %x\n", paUsers[i].iUser, paUsers[i].iUserTable, iUser, iUserTable));
2811 i = paUsers[i].iNext;
2812 } while (i != NIL_PGMPOOL_USER_INDEX);
2813 }
2814#endif
2815
2816 /*
2817 * Find free a user node.
2818 */
2819 uint16_t i = pPool->iUserFreeHead;
2820 if (i == NIL_PGMPOOL_USER_INDEX)
2821 {
2822 rc = pgmPoolTrackFreeOneUser(pPool, iUser);
2823 if (RT_FAILURE(rc))
2824 return rc;
2825 i = pPool->iUserFreeHead;
2826 }
2827
2828 /*
2829 * Unlink the user node from the free list,
2830 * initialize and insert it into the user list.
2831 */
2832 pPool->iUserFreeHead = paUsers[i].iNext;
2833 paUsers[i].iNext = NIL_PGMPOOL_USER_INDEX;
2834 paUsers[i].iUser = iUser;
2835 paUsers[i].iUserTable = iUserTable;
2836 pPage->iUserHead = i;
2837 }
2838 else
2839 pPage->iUserHead = NIL_PGMPOOL_USER_INDEX;
2840
2841
2842 /*
2843 * Insert into cache and enable monitoring of the guest page if enabled.
2844 *
2845 * Until we implement caching of all levels, including the CR3 one, we'll
2846 * have to make sure we don't try monitor & cache any recursive reuse of
2847 * a monitored CR3 page. Because all windows versions are doing this we'll
2848 * have to be able to do combined access monitoring, CR3 + PT and
2849 * PD + PT (guest PAE).
2850 *
2851 * Update:
2852 * We're now cooperating with the CR3 monitor if an uncachable page is found.
2853 */
2854 const bool fCanBeMonitored = true;
2855 pgmPoolCacheInsert(pPool, pPage, fCanBeMonitored); /* This can be expanded. */
2856 if (fCanBeMonitored)
2857 {
2858 rc = pgmPoolMonitorInsert(pPool, pPage);
2859 AssertRC(rc);
2860 }
2861 return rc;
2862}
2863
2864
2865/**
2866 * Adds a user reference to a page.
2867 *
2868 * This will move the page to the head of the
2869 *
2870 * @returns VBox status code.
2871 * @retval VINF_SUCCESS if successfully added.
2872 *
2873 * @param pPool The pool.
2874 * @param pPage The cached page.
2875 * @param iUser The user index.
2876 * @param iUserTable The user table.
2877 */
2878static int pgmPoolTrackAddUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable)
2879{
2880 Log3(("pgmPoolTrackAddUser: GCPhys=%RGp iUser=%x iUserTable=%x\n", pPage->GCPhys, iUser, iUserTable));
2881 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
2882 Assert(iUser != NIL_PGMPOOL_IDX);
2883
2884# ifdef VBOX_STRICT
2885 /*
2886 * Check that the entry doesn't already exists. We only allow multiple
2887 * users of top-level paging structures (SHW_POOL_ROOT_IDX).
2888 */
2889 if (pPage->iUserHead != NIL_PGMPOOL_USER_INDEX)
2890 {
2891 uint16_t i = pPage->iUserHead;
2892 do
2893 {
2894 Assert(i < pPool->cMaxUsers);
2895 /** @todo this assertion looks odd... Shouldn't it be && here? */
2896 AssertMsg(paUsers[i].iUser != iUser || paUsers[i].iUserTable != iUserTable, ("%x %x vs new %x %x\n", paUsers[i].iUser, paUsers[i].iUserTable, iUser, iUserTable));
2897 i = paUsers[i].iNext;
2898 } while (i != NIL_PGMPOOL_USER_INDEX);
2899 }
2900# endif
2901
2902 /*
2903 * Allocate a user node.
2904 */
2905 uint16_t i = pPool->iUserFreeHead;
2906 if (i == NIL_PGMPOOL_USER_INDEX)
2907 {
2908 int rc = pgmPoolTrackFreeOneUser(pPool, iUser);
2909 if (RT_FAILURE(rc))
2910 return rc;
2911 i = pPool->iUserFreeHead;
2912 }
2913 pPool->iUserFreeHead = paUsers[i].iNext;
2914
2915 /*
2916 * Initialize the user node and insert it.
2917 */
2918 paUsers[i].iNext = pPage->iUserHead;
2919 paUsers[i].iUser = iUser;
2920 paUsers[i].iUserTable = iUserTable;
2921 pPage->iUserHead = i;
2922
2923# ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
2924 if (pPage->fDirty)
2925 pgmPoolFlushDirtyPage(pPool->CTX_SUFF(pVM), pPool, pPage->idxDirtyEntry, false /* do not remove */);
2926# endif
2927
2928 /*
2929 * Tell the cache to update its replacement stats for this page.
2930 */
2931 pgmPoolCacheUsed(pPool, pPage);
2932 return VINF_SUCCESS;
2933}
2934
2935
2936/**
2937 * Frees a user record associated with a page.
2938 *
2939 * This does not clear the entry in the user table, it simply replaces the
2940 * user record to the chain of free records.
2941 *
2942 * @param pPool The pool.
2943 * @param pPage The shadow page.
2944 * @param iUser The shadow page pool index of the user table.
2945 * @param iUserTable The index into the user table (shadowed).
2946 *
2947 * @remarks Don't call this for root pages.
2948 */
2949static void pgmPoolTrackFreeUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable)
2950{
2951 Log3(("pgmPoolTrackFreeUser %RGp %x %x\n", pPage->GCPhys, iUser, iUserTable));
2952 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
2953 Assert(iUser != NIL_PGMPOOL_IDX);
2954
2955 /*
2956 * Unlink and free the specified user entry.
2957 */
2958
2959 /* Special: For PAE and 32-bit paging, there is usually no more than one user. */
2960 uint16_t i = pPage->iUserHead;
2961 if ( i != NIL_PGMPOOL_USER_INDEX
2962 && paUsers[i].iUser == iUser
2963 && paUsers[i].iUserTable == iUserTable)
2964 {
2965 pPage->iUserHead = paUsers[i].iNext;
2966
2967 paUsers[i].iUser = NIL_PGMPOOL_IDX;
2968 paUsers[i].iNext = pPool->iUserFreeHead;
2969 pPool->iUserFreeHead = i;
2970 return;
2971 }
2972
2973 /* General: Linear search. */
2974 uint16_t iPrev = NIL_PGMPOOL_USER_INDEX;
2975 while (i != NIL_PGMPOOL_USER_INDEX)
2976 {
2977 if ( paUsers[i].iUser == iUser
2978 && paUsers[i].iUserTable == iUserTable)
2979 {
2980 if (iPrev != NIL_PGMPOOL_USER_INDEX)
2981 paUsers[iPrev].iNext = paUsers[i].iNext;
2982 else
2983 pPage->iUserHead = paUsers[i].iNext;
2984
2985 paUsers[i].iUser = NIL_PGMPOOL_IDX;
2986 paUsers[i].iNext = pPool->iUserFreeHead;
2987 pPool->iUserFreeHead = i;
2988 return;
2989 }
2990 iPrev = i;
2991 i = paUsers[i].iNext;
2992 }
2993
2994 /* Fatal: didn't find it */
2995 AssertFatalMsgFailed(("Didn't find the user entry! iUser=%d iUserTable=%#x GCPhys=%RGp\n",
2996 iUser, iUserTable, pPage->GCPhys));
2997}
2998
2999
3000#if 0 /* unused */
3001/**
3002 * Gets the entry size of a shadow table.
3003 *
3004 * @param enmKind The kind of page.
3005 *
3006 * @returns The size of the entry in bytes. That is, 4 or 8.
3007 * @returns If the kind is not for a table, an assertion is raised and 0 is
3008 * returned.
3009 */
3010DECLINLINE(unsigned) pgmPoolTrackGetShadowEntrySize(PGMPOOLKIND enmKind)
3011{
3012 switch (enmKind)
3013 {
3014 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
3015 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
3016 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
3017 case PGMPOOLKIND_32BIT_PD:
3018 case PGMPOOLKIND_32BIT_PD_PHYS:
3019 return 4;
3020
3021 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
3022 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
3023 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
3024 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
3025 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
3026 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
3027 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
3028 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
3029 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
3030 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
3031 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
3032 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
3033 case PGMPOOLKIND_64BIT_PML4:
3034 case PGMPOOLKIND_PAE_PDPT:
3035 case PGMPOOLKIND_ROOT_NESTED:
3036 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
3037 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
3038 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
3039 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
3040 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
3041 case PGMPOOLKIND_PAE_PD_PHYS:
3042 case PGMPOOLKIND_PAE_PDPT_PHYS:
3043 return 8;
3044
3045 default:
3046 AssertFatalMsgFailed(("enmKind=%d\n", enmKind));
3047 }
3048}
3049#endif /* unused */
3050
3051#if 0 /* unused */
3052/**
3053 * Gets the entry size of a guest table.
3054 *
3055 * @param enmKind The kind of page.
3056 *
3057 * @returns The size of the entry in bytes. That is, 0, 4 or 8.
3058 * @returns If the kind is not for a table, an assertion is raised and 0 is
3059 * returned.
3060 */
3061DECLINLINE(unsigned) pgmPoolTrackGetGuestEntrySize(PGMPOOLKIND enmKind)
3062{
3063 switch (enmKind)
3064 {
3065 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
3066 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
3067 case PGMPOOLKIND_32BIT_PD:
3068 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
3069 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
3070 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
3071 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
3072 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
3073 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
3074 return 4;
3075
3076 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
3077 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
3078 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
3079 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
3080 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
3081 case PGMPOOLKIND_64BIT_PML4:
3082 case PGMPOOLKIND_PAE_PDPT:
3083 return 8;
3084
3085 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
3086 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
3087 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
3088 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
3089 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
3090 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
3091 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
3092 case PGMPOOLKIND_ROOT_NESTED:
3093 case PGMPOOLKIND_PAE_PD_PHYS:
3094 case PGMPOOLKIND_PAE_PDPT_PHYS:
3095 case PGMPOOLKIND_32BIT_PD_PHYS:
3096 /** @todo can we return 0? (nobody is calling this...) */
3097 AssertFailed();
3098 return 0;
3099
3100 default:
3101 AssertFatalMsgFailed(("enmKind=%d\n", enmKind));
3102 }
3103}
3104#endif /* unused */
3105
3106
3107/**
3108 * Checks one shadow page table entry for a mapping of a physical page.
3109 *
3110 * @returns true / false indicating removal of all relevant PTEs
3111 *
3112 * @param pVM The cross context VM structure.
3113 * @param pPhysPage The guest page in question.
3114 * @param fFlushPTEs Flush PTEs or allow them to be updated (e.g. in case of an RW bit change)
3115 * @param iShw The shadow page table.
3116 * @param iPte Page table entry or NIL_PGMPOOL_PHYSEXT_IDX_PTE if unknown
3117 */
3118static bool pgmPoolTrackFlushGCPhysPTInt(PVM pVM, PCPGMPAGE pPhysPage, bool fFlushPTEs, uint16_t iShw, uint16_t iPte)
3119{
3120 LogFlow(("pgmPoolTrackFlushGCPhysPTInt: pPhysPage=%RHp iShw=%d iPte=%d\n", PGM_PAGE_GET_HCPHYS(pPhysPage), iShw, iPte));
3121 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3122 bool fRet = false;
3123
3124 /*
3125 * Assert sanity.
3126 */
3127 Assert(iPte != NIL_PGMPOOL_PHYSEXT_IDX_PTE);
3128 AssertFatalMsg(iShw < pPool->cCurPages && iShw != NIL_PGMPOOL_IDX, ("iShw=%d\n", iShw));
3129 PPGMPOOLPAGE pPage = &pPool->aPages[iShw];
3130
3131 /*
3132 * Then, clear the actual mappings to the page in the shadow PT.
3133 */
3134 switch (pPage->enmKind)
3135 {
3136 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
3137 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
3138 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
3139 {
3140 const uint32_t u32 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P;
3141 PX86PT pPT = (PX86PT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3142 uint32_t u32AndMask = 0;
3143 uint32_t u32OrMask = 0;
3144
3145 if (!fFlushPTEs)
3146 {
3147 switch (PGM_PAGE_GET_HNDL_PHYS_STATE(pPhysPage))
3148 {
3149 case PGM_PAGE_HNDL_PHYS_STATE_NONE: /* No handler installed. */
3150 case PGM_PAGE_HNDL_PHYS_STATE_DISABLED: /* Monitoring is temporarily disabled. */
3151 u32OrMask = X86_PTE_RW;
3152 u32AndMask = UINT32_MAX;
3153 fRet = true;
3154 STAM_COUNTER_INC(&pPool->StatTrackFlushEntryKeep);
3155 break;
3156
3157 case PGM_PAGE_HNDL_PHYS_STATE_WRITE: /* Write access is monitored. */
3158 u32OrMask = 0;
3159 u32AndMask = ~X86_PTE_RW;
3160 fRet = true;
3161 STAM_COUNTER_INC(&pPool->StatTrackFlushEntryKeep);
3162 break;
3163 default:
3164 /* (shouldn't be here, will assert below) */
3165 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3166 break;
3167 }
3168 }
3169 else
3170 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3171
3172 /* Update the counter if we're removing references. */
3173 if (!u32AndMask)
3174 {
3175 Assert(pPage->cPresent);
3176 Assert(pPool->cPresent);
3177 pPage->cPresent--;
3178 pPool->cPresent--;
3179 }
3180
3181 if ((pPT->a[iPte].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
3182 {
3183 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pte=%RX32\n", iPte, pPT->a[iPte]));
3184 X86PTE Pte;
3185 Pte.u = (pPT->a[iPte].u & u32AndMask) | u32OrMask;
3186 if (Pte.u & PGM_PTFLAGS_TRACK_DIRTY)
3187 Pte.u &= ~(X86PGUINT)X86_PTE_RW; /* need to disallow writes when dirty bit tracking is still active. */
3188 ASMAtomicWriteU32(&pPT->a[iPte].u, Pte.u);
3189 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);
3190 return fRet;
3191 }
3192#ifdef LOG_ENABLED
3193 Log(("iFirstPresent=%d cPresent=%d\n", pPage->iFirstPresent, pPage->cPresent));
3194 for (unsigned i = 0, cFound = 0; i < RT_ELEMENTS(pPT->a); i++)
3195 if ((pPT->a[i].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
3196 {
3197 Log(("i=%d cFound=%d\n", i, ++cFound));
3198 }
3199#endif
3200 AssertFatalMsgFailed(("iFirstPresent=%d cPresent=%d u32=%RX32 poolkind=%x\n", pPage->iFirstPresent, pPage->cPresent, u32, pPage->enmKind));
3201 /*PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);*/
3202 break;
3203 }
3204
3205 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
3206 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
3207 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
3208 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
3209 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
3210 case PGMPOOLKIND_EPT_PT_FOR_PHYS: /* physical mask the same as PAE; RW bit as well; be careful! */
3211 {
3212 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P;
3213 PPGMSHWPTPAE pPT = (PPGMSHWPTPAE)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3214 uint64_t u64OrMask = 0;
3215 uint64_t u64AndMask = 0;
3216
3217 if (!fFlushPTEs)
3218 {
3219 switch (PGM_PAGE_GET_HNDL_PHYS_STATE(pPhysPage))
3220 {
3221 case PGM_PAGE_HNDL_PHYS_STATE_NONE: /* No handler installed. */
3222 case PGM_PAGE_HNDL_PHYS_STATE_DISABLED: /* Monitoring is temporarily disabled. */
3223 u64OrMask = X86_PTE_RW;
3224 u64AndMask = UINT64_MAX;
3225 fRet = true;
3226 STAM_COUNTER_INC(&pPool->StatTrackFlushEntryKeep);
3227 break;
3228
3229 case PGM_PAGE_HNDL_PHYS_STATE_WRITE: /* Write access is monitored. */
3230 u64OrMask = 0;
3231 u64AndMask = ~(uint64_t)X86_PTE_RW;
3232 fRet = true;
3233 STAM_COUNTER_INC(&pPool->StatTrackFlushEntryKeep);
3234 break;
3235
3236 default:
3237 /* (shouldn't be here, will assert below) */
3238 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3239 break;
3240 }
3241 }
3242 else
3243 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3244
3245 /* Update the counter if we're removing references. */
3246 if (!u64AndMask)
3247 {
3248 Assert(pPage->cPresent);
3249 Assert(pPool->cPresent);
3250 pPage->cPresent--;
3251 pPool->cPresent--;
3252 }
3253
3254 if ((PGMSHWPTEPAE_GET_U(pPT->a[iPte]) & (X86_PTE_PAE_PG_MASK | X86_PTE_P | X86_PTE_PAE_MBZ_MASK_NX)) == u64)
3255 {
3256 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pte=%RX64\n", iPte, PGMSHWPTEPAE_GET_LOG(pPT->a[iPte])));
3257 X86PTEPAE Pte;
3258 Pte.u = (PGMSHWPTEPAE_GET_U(pPT->a[iPte]) & u64AndMask) | u64OrMask;
3259 if (Pte.u & PGM_PTFLAGS_TRACK_DIRTY)
3260 Pte.u &= ~(X86PGPAEUINT)X86_PTE_RW; /* need to disallow writes when dirty bit tracking is still active. */
3261
3262 PGMSHWPTEPAE_ATOMIC_SET(pPT->a[iPte], Pte.u);
3263 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);
3264 return fRet;
3265 }
3266#ifdef LOG_ENABLED
3267 Log(("iFirstPresent=%d cPresent=%d\n", pPage->iFirstPresent, pPage->cPresent));
3268 Log(("Found %RX64 expected %RX64\n", PGMSHWPTEPAE_GET_U(pPT->a[iPte]) & (X86_PTE_PAE_PG_MASK | X86_PTE_P | X86_PTE_PAE_MBZ_MASK_NX), u64));
3269 for (unsigned i = 0, cFound = 0; i < RT_ELEMENTS(pPT->a); i++)
3270 if ((PGMSHWPTEPAE_GET_U(pPT->a[i]) & (X86_PTE_PAE_PG_MASK | X86_PTE_P | X86_PTE_PAE_MBZ_MASK_NX)) == u64)
3271 Log(("i=%d cFound=%d\n", i, ++cFound));
3272#endif
3273 AssertFatalMsgFailed(("iFirstPresent=%d cPresent=%d u64=%RX64 poolkind=%x iPte=%d PT=%RX64\n", pPage->iFirstPresent, pPage->cPresent, u64, pPage->enmKind, iPte, PGMSHWPTEPAE_GET_LOG(pPT->a[iPte])));
3274 /*PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);*/
3275 break;
3276 }
3277
3278#ifdef PGM_WITH_LARGE_PAGES
3279 /* Large page case only. */
3280 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
3281 {
3282 Assert(pVM->pgm.s.fNestedPaging);
3283
3284 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PDE4M_P | X86_PDE4M_PS;
3285 PEPTPD pPD = (PEPTPD)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3286
3287 if ((pPD->a[iPte].u & (EPT_PDE2M_PG_MASK | X86_PDE4M_P | X86_PDE4M_PS)) == u64)
3288 {
3289 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pde=%RX64\n", iPte, pPD->a[iPte]));
3290 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3291 pPD->a[iPte].u = 0;
3292 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPD);
3293
3294 /* Update the counter as we're removing references. */
3295 Assert(pPage->cPresent);
3296 Assert(pPool->cPresent);
3297 pPage->cPresent--;
3298 pPool->cPresent--;
3299
3300 return fRet;
3301 }
3302# ifdef LOG_ENABLED
3303 Log(("iFirstPresent=%d cPresent=%d\n", pPage->iFirstPresent, pPage->cPresent));
3304 for (unsigned i = 0, cFound = 0; i < RT_ELEMENTS(pPD->a); i++)
3305 if ((pPD->a[i].u & (EPT_PDE2M_PG_MASK | X86_PDE4M_P | X86_PDE4M_PS)) == u64)
3306 Log(("i=%d cFound=%d\n", i, ++cFound));
3307# endif
3308 AssertFatalMsgFailed(("iFirstPresent=%d cPresent=%d\n", pPage->iFirstPresent, pPage->cPresent));
3309 /*PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPD);*/
3310 break;
3311 }
3312
3313 /* AMD-V nested paging */ /** @todo merge with EPT as we only check the parts that are identical. */
3314 case PGMPOOLKIND_PAE_PD_PHYS:
3315 {
3316 Assert(pVM->pgm.s.fNestedPaging);
3317
3318 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PDE4M_P | X86_PDE4M_PS;
3319 PX86PDPAE pPD = (PX86PDPAE)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3320
3321 if ((pPD->a[iPte].u & (X86_PDE2M_PAE_PG_MASK | X86_PDE4M_P | X86_PDE4M_PS)) == u64)
3322 {
3323 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pde=%RX64\n", iPte, pPD->a[iPte]));
3324 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3325 pPD->a[iPte].u = 0;
3326 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPD);
3327
3328 /* Update the counter as we're removing references. */
3329 Assert(pPage->cPresent);
3330 Assert(pPool->cPresent);
3331 pPage->cPresent--;
3332 pPool->cPresent--;
3333 return fRet;
3334 }
3335# ifdef LOG_ENABLED
3336 Log(("iFirstPresent=%d cPresent=%d\n", pPage->iFirstPresent, pPage->cPresent));
3337 for (unsigned i = 0, cFound = 0; i < RT_ELEMENTS(pPD->a); i++)
3338 if ((pPD->a[i].u & (X86_PDE2M_PAE_PG_MASK | X86_PDE4M_P | X86_PDE4M_PS)) == u64)
3339 Log(("i=%d cFound=%d\n", i, ++cFound));
3340# endif
3341 AssertFatalMsgFailed(("iFirstPresent=%d cPresent=%d\n", pPage->iFirstPresent, pPage->cPresent));
3342 /*PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPD);*/
3343 break;
3344 }
3345#endif /* PGM_WITH_LARGE_PAGES */
3346
3347 default:
3348 AssertFatalMsgFailed(("enmKind=%d iShw=%d\n", pPage->enmKind, iShw));
3349 }
3350
3351 /* not reached. */
3352#ifndef _MSC_VER
3353 return fRet;
3354#endif
3355}
3356
3357
3358/**
3359 * Scans one shadow page table for mappings of a physical page.
3360 *
3361 * @param pVM The cross context VM structure.
3362 * @param pPhysPage The guest page in question.
3363 * @param fFlushPTEs Flush PTEs or allow them to be updated (e.g. in case of an RW bit change)
3364 * @param iShw The shadow page table.
3365 */
3366static void pgmPoolTrackFlushGCPhysPT(PVM pVM, PPGMPAGE pPhysPage, bool fFlushPTEs, uint16_t iShw)
3367{
3368 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool); NOREF(pPool);
3369
3370 /* We should only come here with when there's only one reference to this physical page. */
3371 Assert(PGMPOOL_TD_GET_CREFS(PGM_PAGE_GET_TRACKING(pPhysPage)) == 1);
3372
3373 Log2(("pgmPoolTrackFlushGCPhysPT: pPhysPage=%RHp iShw=%d\n", PGM_PAGE_GET_HCPHYS(pPhysPage), iShw));
3374 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPT, f);
3375 bool fKeptPTEs = pgmPoolTrackFlushGCPhysPTInt(pVM, pPhysPage, fFlushPTEs, iShw, PGM_PAGE_GET_PTE_INDEX(pPhysPage));
3376 if (!fKeptPTEs)
3377 PGM_PAGE_SET_TRACKING(pVM, pPhysPage, 0);
3378 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPT, f);
3379}
3380
3381
3382/**
3383 * Flushes a list of shadow page tables mapping the same physical page.
3384 *
3385 * @param pVM The cross context VM structure.
3386 * @param pPhysPage The guest page in question.
3387 * @param fFlushPTEs Flush PTEs or allow them to be updated (e.g. in case of an RW bit change)
3388 * @param iPhysExt The physical cross reference extent list to flush.
3389 */
3390static void pgmPoolTrackFlushGCPhysPTs(PVMCC pVM, PPGMPAGE pPhysPage, bool fFlushPTEs, uint16_t iPhysExt)
3391{
3392 PGM_LOCK_ASSERT_OWNER(pVM);
3393 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3394 bool fKeepList = false;
3395
3396 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPTs, f);
3397 Log2(("pgmPoolTrackFlushGCPhysPTs: pPhysPage=%RHp iPhysExt=%u\n", PGM_PAGE_GET_HCPHYS(pPhysPage), iPhysExt));
3398
3399 const uint16_t iPhysExtStart = iPhysExt;
3400 PPGMPOOLPHYSEXT pPhysExt;
3401 do
3402 {
3403 Assert(iPhysExt < pPool->cMaxPhysExts);
3404 pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
3405 for (unsigned i = 0; i < RT_ELEMENTS(pPhysExt->aidx); i++)
3406 {
3407 if (pPhysExt->aidx[i] != NIL_PGMPOOL_IDX)
3408 {
3409 bool fKeptPTEs = pgmPoolTrackFlushGCPhysPTInt(pVM, pPhysPage, fFlushPTEs, pPhysExt->aidx[i], pPhysExt->apte[i]);
3410 if (!fKeptPTEs)
3411 {
3412 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
3413 pPhysExt->apte[i] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
3414 }
3415 else
3416 fKeepList = true;
3417 }
3418 }
3419 /* next */
3420 iPhysExt = pPhysExt->iNext;
3421 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
3422
3423 if (!fKeepList)
3424 {
3425 /* insert the list into the free list and clear the ram range entry. */
3426 pPhysExt->iNext = pPool->iPhysExtFreeHead;
3427 pPool->iPhysExtFreeHead = iPhysExtStart;
3428 /* Invalidate the tracking data. */
3429 PGM_PAGE_SET_TRACKING(pVM, pPhysPage, 0);
3430 }
3431
3432 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTs, f);
3433}
3434
3435
3436/**
3437 * Flushes all shadow page table mappings of the given guest page.
3438 *
3439 * This is typically called when the host page backing the guest one has been
3440 * replaced or when the page protection was changed due to a guest access
3441 * caught by the monitoring.
3442 *
3443 * @returns VBox status code.
3444 * @retval VINF_SUCCESS if all references has been successfully cleared.
3445 * @retval VINF_PGM_SYNC_CR3 if we're better off with a CR3 sync and a page
3446 * pool cleaning. FF and sync flags are set.
3447 *
3448 * @param pVM The cross context VM structure.
3449 * @param GCPhysPage GC physical address of the page in question
3450 * @param pPhysPage The guest page in question.
3451 * @param fFlushPTEs Flush PTEs or allow them to be updated (e.g. in case of an RW bit change)
3452 * @param pfFlushTLBs This is set to @a true if the shadow TLBs should be
3453 * flushed, it is NOT touched if this isn't necessary.
3454 * The caller MUST initialized this to @a false.
3455 */
3456int pgmPoolTrackUpdateGCPhys(PVMCC pVM, RTGCPHYS GCPhysPage, PPGMPAGE pPhysPage, bool fFlushPTEs, bool *pfFlushTLBs)
3457{
3458 PVMCPUCC pVCpu = VMMGetCpu(pVM);
3459 PGM_LOCK_VOID(pVM);
3460 int rc = VINF_SUCCESS;
3461
3462#ifdef PGM_WITH_LARGE_PAGES
3463 /* Is this page part of a large page? */
3464 if (PGM_PAGE_GET_PDE_TYPE(pPhysPage) == PGM_PAGE_PDE_TYPE_PDE)
3465 {
3466 RTGCPHYS GCPhysBase = GCPhysPage & X86_PDE2M_PAE_PG_MASK;
3467 GCPhysPage &= X86_PDE_PAE_PG_MASK;
3468
3469 /* Fetch the large page base. */
3470 PPGMPAGE pLargePage;
3471 if (GCPhysBase != GCPhysPage)
3472 {
3473 pLargePage = pgmPhysGetPage(pVM, GCPhysBase);
3474 AssertFatal(pLargePage);
3475 }
3476 else
3477 pLargePage = pPhysPage;
3478
3479 Log(("pgmPoolTrackUpdateGCPhys: update large page PDE for %RGp (%RGp)\n", GCPhysBase, GCPhysPage));
3480
3481 if (PGM_PAGE_GET_PDE_TYPE(pLargePage) == PGM_PAGE_PDE_TYPE_PDE)
3482 {
3483 /* Mark the large page as disabled as we need to break it up to change a single page in the 2 MB range. */
3484 PGM_PAGE_SET_PDE_TYPE(pVM, pLargePage, PGM_PAGE_PDE_TYPE_PDE_DISABLED);
3485 pVM->pgm.s.cLargePagesDisabled++;
3486
3487 /* Update the base as that *only* that one has a reference and there's only one PDE to clear. */
3488 rc = pgmPoolTrackUpdateGCPhys(pVM, GCPhysBase, pLargePage, fFlushPTEs, pfFlushTLBs);
3489
3490 *pfFlushTLBs = true;
3491 PGM_UNLOCK(pVM);
3492 return rc;
3493 }
3494 }
3495#else
3496 NOREF(GCPhysPage);
3497#endif /* PGM_WITH_LARGE_PAGES */
3498
3499 const uint16_t u16 = PGM_PAGE_GET_TRACKING(pPhysPage);
3500 if (u16)
3501 {
3502 /*
3503 * The zero page is currently screwing up the tracking and we'll
3504 * have to flush the whole shebang. Unless VBOX_WITH_NEW_LAZY_PAGE_ALLOC
3505 * is defined, zero pages won't normally be mapped. Some kind of solution
3506 * will be needed for this problem of course, but it will have to wait...
3507 */
3508 if ( PGM_PAGE_IS_ZERO(pPhysPage)
3509 || PGM_PAGE_IS_BALLOONED(pPhysPage))
3510 rc = VINF_PGM_GCPHYS_ALIASED;
3511 else
3512 {
3513 if (PGMPOOL_TD_GET_CREFS(u16) != PGMPOOL_TD_CREFS_PHYSEXT)
3514 {
3515 Assert(PGMPOOL_TD_GET_CREFS(u16) == 1);
3516 pgmPoolTrackFlushGCPhysPT(pVM,
3517 pPhysPage,
3518 fFlushPTEs,
3519 PGMPOOL_TD_GET_IDX(u16));
3520 }
3521 else if (u16 != PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED))
3522 pgmPoolTrackFlushGCPhysPTs(pVM, pPhysPage, fFlushPTEs, PGMPOOL_TD_GET_IDX(u16));
3523 else
3524 rc = pgmPoolTrackFlushGCPhysPTsSlow(pVM, pPhysPage);
3525 *pfFlushTLBs = true;
3526 }
3527 }
3528
3529 if (rc == VINF_PGM_GCPHYS_ALIASED)
3530 {
3531 pVCpu->pgm.s.fSyncFlags |= PGM_SYNC_CLEAR_PGM_POOL;
3532 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
3533 rc = VINF_PGM_SYNC_CR3;
3534 }
3535 PGM_UNLOCK(pVM);
3536 return rc;
3537}
3538
3539
3540/**
3541 * Scans all shadow page tables for mappings of a physical page.
3542 *
3543 * This may be slow, but it's most likely more efficient than cleaning
3544 * out the entire page pool / cache.
3545 *
3546 * @returns VBox status code.
3547 * @retval VINF_SUCCESS if all references has been successfully cleared.
3548 * @retval VINF_PGM_GCPHYS_ALIASED if we're better off with a CR3 sync and
3549 * a page pool cleaning.
3550 *
3551 * @param pVM The cross context VM structure.
3552 * @param pPhysPage The guest page in question.
3553 */
3554int pgmPoolTrackFlushGCPhysPTsSlow(PVMCC pVM, PPGMPAGE pPhysPage)
3555{
3556 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3557 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPTsSlow, s);
3558 LogFlow(("pgmPoolTrackFlushGCPhysPTsSlow: cUsedPages=%d cPresent=%d pPhysPage=%R[pgmpage]\n",
3559 pPool->cUsedPages, pPool->cPresent, pPhysPage));
3560
3561 /*
3562 * There is a limit to what makes sense.
3563 */
3564 if ( pPool->cPresent > 1024
3565 && pVM->cCpus == 1)
3566 {
3567 LogFlow(("pgmPoolTrackFlushGCPhysPTsSlow: giving up... (cPresent=%d)\n", pPool->cPresent));
3568 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTsSlow, s);
3569 return VINF_PGM_GCPHYS_ALIASED;
3570 }
3571
3572 /*
3573 * Iterate all the pages until we've encountered all that in use.
3574 * This is simple but not quite optimal solution.
3575 */
3576 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage);
3577 unsigned cLeft = pPool->cUsedPages;
3578 unsigned iPage = pPool->cCurPages;
3579 while (--iPage >= PGMPOOL_IDX_FIRST)
3580 {
3581 PPGMPOOLPAGE pPage = &pPool->aPages[iPage];
3582 if ( pPage->GCPhys != NIL_RTGCPHYS
3583 && pPage->cPresent)
3584 {
3585 switch (pPage->enmKind)
3586 {
3587 /*
3588 * We only care about shadow page tables.
3589 */
3590 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
3591 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
3592 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
3593 {
3594 const uint32_t u32 = (uint32_t)u64;
3595 unsigned cPresent = pPage->cPresent;
3596 PX86PT pPT = (PX86PT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3597 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
3598 {
3599 const X86PGUINT uPte = pPT->a[i].u;
3600 if (uPte & X86_PTE_P)
3601 {
3602 if ((uPte & X86_PTE_PG_MASK) == u32)
3603 {
3604 //Log4(("pgmPoolTrackFlushGCPhysPTsSlow: idx=%d i=%d pte=%RX32\n", iPage, i, pPT->a[i]));
3605 ASMAtomicWriteU32(&pPT->a[i].u, 0);
3606
3607 /* Update the counter as we're removing references. */
3608 Assert(pPage->cPresent);
3609 Assert(pPool->cPresent);
3610 pPage->cPresent--;
3611 pPool->cPresent--;
3612 }
3613 if (!--cPresent)
3614 break;
3615 }
3616 }
3617 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);
3618 break;
3619 }
3620
3621 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
3622 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
3623 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
3624 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
3625 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
3626 {
3627 unsigned cPresent = pPage->cPresent;
3628 PPGMSHWPTPAE pPT = (PPGMSHWPTPAE)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3629 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
3630 if (PGMSHWPTEPAE_IS_P(pPT->a[i]))
3631 {
3632 if ((PGMSHWPTEPAE_GET_U(pPT->a[i]) & X86_PTE_PAE_PG_MASK) == u64)
3633 {
3634 //Log4(("pgmPoolTrackFlushGCPhysPTsSlow: idx=%d i=%d pte=%RX64\n", iPage, i, pPT->a[i]));
3635 PGMSHWPTEPAE_ATOMIC_SET(pPT->a[i], 0); /// @todo why not atomic?
3636
3637 /* Update the counter as we're removing references. */
3638 Assert(pPage->cPresent);
3639 Assert(pPool->cPresent);
3640 pPage->cPresent--;
3641 pPool->cPresent--;
3642 }
3643 if (!--cPresent)
3644 break;
3645 }
3646 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);
3647 break;
3648 }
3649
3650 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
3651 {
3652 unsigned cPresent = pPage->cPresent;
3653 PEPTPT pPT = (PEPTPT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3654 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
3655 {
3656 X86PGPAEUINT const uPte = pPT->a[i].u;
3657 if (uPte & EPT_E_READ)
3658 {
3659 if ((uPte & EPT_PTE_PG_MASK) == u64)
3660 {
3661 //Log4(("pgmPoolTrackFlushGCPhysPTsSlow: idx=%d i=%d pte=%RX64\n", iPage, i, pPT->a[i]));
3662 ASMAtomicWriteU64(&pPT->a[i].u, 0);
3663
3664 /* Update the counter as we're removing references. */
3665 Assert(pPage->cPresent);
3666 Assert(pPool->cPresent);
3667 pPage->cPresent--;
3668 pPool->cPresent--;
3669 }
3670 if (!--cPresent)
3671 break;
3672 }
3673 }
3674 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);
3675 break;
3676 }
3677 }
3678
3679 if (!--cLeft)
3680 break;
3681 }
3682 }
3683
3684 PGM_PAGE_SET_TRACKING(pVM, pPhysPage, 0);
3685 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTsSlow, s);
3686
3687 /*
3688 * There is a limit to what makes sense. The above search is very expensive, so force a pgm pool flush.
3689 */
3690 if (pPool->cPresent > 1024)
3691 {
3692 LogFlow(("pgmPoolTrackFlushGCPhysPTsSlow: giving up... (cPresent=%d)\n", pPool->cPresent));
3693 return VINF_PGM_GCPHYS_ALIASED;
3694 }
3695
3696 return VINF_SUCCESS;
3697}
3698
3699
3700/**
3701 * Clears the user entry in a user table.
3702 *
3703 * This is used to remove all references to a page when flushing it.
3704 */
3705static void pgmPoolTrackClearPageUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PCPGMPOOLUSER pUser)
3706{
3707 Assert(pUser->iUser != NIL_PGMPOOL_IDX);
3708 Assert(pUser->iUser < pPool->cCurPages);
3709 uint32_t iUserTable = pUser->iUserTable;
3710
3711 /*
3712 * Map the user page. Ignore references made by fictitious pages.
3713 */
3714 PPGMPOOLPAGE pUserPage = &pPool->aPages[pUser->iUser];
3715 LogFlow(("pgmPoolTrackClearPageUser: clear %x in %s (%RGp) (flushing %s)\n", iUserTable, pgmPoolPoolKindToStr(pUserPage->enmKind), pUserPage->Core.Key, pgmPoolPoolKindToStr(pPage->enmKind)));
3716 union
3717 {
3718 uint64_t *pau64;
3719 uint32_t *pau32;
3720 } u;
3721 if (pUserPage->idx < PGMPOOL_IDX_FIRST)
3722 {
3723 Assert(!pUserPage->pvPageR3);
3724 return;
3725 }
3726 u.pau64 = (uint64_t *)PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pUserPage);
3727
3728
3729 /* Safety precaution in case we change the paging for other modes too in the future. */
3730 Assert(!pgmPoolIsPageLocked(pPage)); RT_NOREF_PV(pPage);
3731
3732#ifdef VBOX_STRICT
3733 /*
3734 * Some sanity checks.
3735 */
3736 switch (pUserPage->enmKind)
3737 {
3738 case PGMPOOLKIND_32BIT_PD:
3739 case PGMPOOLKIND_32BIT_PD_PHYS:
3740 Assert(iUserTable < X86_PG_ENTRIES);
3741 break;
3742 case PGMPOOLKIND_PAE_PDPT:
3743 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
3744 case PGMPOOLKIND_PAE_PDPT_PHYS:
3745 Assert(iUserTable < 4);
3746 Assert(!(u.pau64[iUserTable] & PGM_PLXFLAGS_PERMANENT));
3747 break;
3748 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
3749 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
3750 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
3751 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
3752 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
3753 case PGMPOOLKIND_PAE_PD_PHYS:
3754 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3755 break;
3756 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
3757 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3758 break;
3759 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
3760 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3761 Assert(!(u.pau64[iUserTable] & PGM_PLXFLAGS_PERMANENT));
3762 break;
3763 case PGMPOOLKIND_64BIT_PML4:
3764 Assert(!(u.pau64[iUserTable] & PGM_PLXFLAGS_PERMANENT));
3765 /* GCPhys >> PAGE_SHIFT is the index here */
3766 break;
3767 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
3768 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
3769 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3770 break;
3771
3772 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
3773 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
3774 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3775 break;
3776
3777 case PGMPOOLKIND_ROOT_NESTED:
3778 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3779 break;
3780
3781 default:
3782 AssertMsgFailed(("enmKind=%d\n", pUserPage->enmKind));
3783 break;
3784 }
3785#endif /* VBOX_STRICT */
3786
3787 /*
3788 * Clear the entry in the user page.
3789 */
3790 switch (pUserPage->enmKind)
3791 {
3792 /* 32-bit entries */
3793 case PGMPOOLKIND_32BIT_PD:
3794 case PGMPOOLKIND_32BIT_PD_PHYS:
3795 ASMAtomicWriteU32(&u.pau32[iUserTable], 0);
3796 break;
3797
3798 /* 64-bit entries */
3799 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
3800 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
3801 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
3802 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
3803 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
3804 case PGMPOOLKIND_PAE_PD_PHYS:
3805 case PGMPOOLKIND_PAE_PDPT_PHYS:
3806 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
3807 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
3808 case PGMPOOLKIND_64BIT_PML4:
3809 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
3810 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
3811 case PGMPOOLKIND_PAE_PDPT:
3812 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
3813 case PGMPOOLKIND_ROOT_NESTED:
3814 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
3815 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
3816 ASMAtomicWriteU64(&u.pau64[iUserTable], 0);
3817 break;
3818
3819 default:
3820 AssertFatalMsgFailed(("enmKind=%d iUser=%d iUserTable=%#x\n", pUserPage->enmKind, pUser->iUser, pUser->iUserTable));
3821 }
3822 PGM_DYNMAP_UNUSED_HINT_VM(pPool->CTX_SUFF(pVM), u.pau64);
3823}
3824
3825
3826/**
3827 * Clears all users of a page.
3828 */
3829static void pgmPoolTrackClearPageUsers(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
3830{
3831 /*
3832 * Free all the user records.
3833 */
3834 LogFlow(("pgmPoolTrackClearPageUsers %RGp\n", pPage->GCPhys));
3835
3836 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
3837 uint16_t i = pPage->iUserHead;
3838 while (i != NIL_PGMPOOL_USER_INDEX)
3839 {
3840 /* Clear enter in user table. */
3841 pgmPoolTrackClearPageUser(pPool, pPage, &paUsers[i]);
3842
3843 /* Free it. */
3844 const uint16_t iNext = paUsers[i].iNext;
3845 paUsers[i].iUser = NIL_PGMPOOL_IDX;
3846 paUsers[i].iNext = pPool->iUserFreeHead;
3847 pPool->iUserFreeHead = i;
3848
3849 /* Next. */
3850 i = iNext;
3851 }
3852 pPage->iUserHead = NIL_PGMPOOL_USER_INDEX;
3853}
3854
3855
3856/**
3857 * Allocates a new physical cross reference extent.
3858 *
3859 * @returns Pointer to the allocated extent on success. NULL if we're out of them.
3860 * @param pVM The cross context VM structure.
3861 * @param piPhysExt Where to store the phys ext index.
3862 */
3863PPGMPOOLPHYSEXT pgmPoolTrackPhysExtAlloc(PVMCC pVM, uint16_t *piPhysExt)
3864{
3865 PGM_LOCK_ASSERT_OWNER(pVM);
3866 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3867 uint16_t iPhysExt = pPool->iPhysExtFreeHead;
3868 if (iPhysExt == NIL_PGMPOOL_PHYSEXT_INDEX)
3869 {
3870 STAM_COUNTER_INC(&pPool->StamTrackPhysExtAllocFailures);
3871 return NULL;
3872 }
3873 PPGMPOOLPHYSEXT pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
3874 pPool->iPhysExtFreeHead = pPhysExt->iNext;
3875 pPhysExt->iNext = NIL_PGMPOOL_PHYSEXT_INDEX;
3876 *piPhysExt = iPhysExt;
3877 return pPhysExt;
3878}
3879
3880
3881/**
3882 * Frees a physical cross reference extent.
3883 *
3884 * @param pVM The cross context VM structure.
3885 * @param iPhysExt The extent to free.
3886 */
3887void pgmPoolTrackPhysExtFree(PVMCC pVM, uint16_t iPhysExt)
3888{
3889 PGM_LOCK_ASSERT_OWNER(pVM);
3890 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3891 Assert(iPhysExt < pPool->cMaxPhysExts);
3892 PPGMPOOLPHYSEXT pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
3893 for (unsigned i = 0; i < RT_ELEMENTS(pPhysExt->aidx); i++)
3894 {
3895 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
3896 pPhysExt->apte[i] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
3897 }
3898 pPhysExt->iNext = pPool->iPhysExtFreeHead;
3899 pPool->iPhysExtFreeHead = iPhysExt;
3900}
3901
3902
3903/**
3904 * Frees a physical cross reference extent.
3905 *
3906 * @param pVM The cross context VM structure.
3907 * @param iPhysExt The extent to free.
3908 */
3909void pgmPoolTrackPhysExtFreeList(PVMCC pVM, uint16_t iPhysExt)
3910{
3911 PGM_LOCK_ASSERT_OWNER(pVM);
3912 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3913
3914 const uint16_t iPhysExtStart = iPhysExt;
3915 PPGMPOOLPHYSEXT pPhysExt;
3916 do
3917 {
3918 Assert(iPhysExt < pPool->cMaxPhysExts);
3919 pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
3920 for (unsigned i = 0; i < RT_ELEMENTS(pPhysExt->aidx); i++)
3921 {
3922 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
3923 pPhysExt->apte[i] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
3924 }
3925
3926 /* next */
3927 iPhysExt = pPhysExt->iNext;
3928 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
3929
3930 pPhysExt->iNext = pPool->iPhysExtFreeHead;
3931 pPool->iPhysExtFreeHead = iPhysExtStart;
3932}
3933
3934
3935/**
3936 * Insert a reference into a list of physical cross reference extents.
3937 *
3938 * @returns The new tracking data for PGMPAGE.
3939 *
3940 * @param pVM The cross context VM structure.
3941 * @param iPhysExt The physical extent index of the list head.
3942 * @param iShwPT The shadow page table index.
3943 * @param iPte Page table entry
3944 *
3945 */
3946static uint16_t pgmPoolTrackPhysExtInsert(PVMCC pVM, uint16_t iPhysExt, uint16_t iShwPT, uint16_t iPte)
3947{
3948 PGM_LOCK_ASSERT_OWNER(pVM);
3949 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3950 PPGMPOOLPHYSEXT paPhysExts = pPool->CTX_SUFF(paPhysExts);
3951
3952 /*
3953 * Special common cases.
3954 */
3955 if (paPhysExts[iPhysExt].aidx[1] == NIL_PGMPOOL_IDX)
3956 {
3957 paPhysExts[iPhysExt].aidx[1] = iShwPT;
3958 paPhysExts[iPhysExt].apte[1] = iPte;
3959 STAM_COUNTER_INC(&pVM->pgm.s.Stats.StatTrackAliasedMany);
3960 LogFlow(("pgmPoolTrackPhysExtInsert: %d:{,%d pte %d,}\n", iPhysExt, iShwPT, iPte));
3961 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExt);
3962 }
3963 if (paPhysExts[iPhysExt].aidx[2] == NIL_PGMPOOL_IDX)
3964 {
3965 paPhysExts[iPhysExt].aidx[2] = iShwPT;
3966 paPhysExts[iPhysExt].apte[2] = iPte;
3967 STAM_COUNTER_INC(&pVM->pgm.s.Stats.StatTrackAliasedMany);
3968 LogFlow(("pgmPoolTrackPhysExtInsert: %d:{,,%d pte %d}\n", iPhysExt, iShwPT, iPte));
3969 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExt);
3970 }
3971 AssertCompile(RT_ELEMENTS(paPhysExts[iPhysExt].aidx) == 3);
3972
3973 /*
3974 * General treatment.
3975 */
3976 const uint16_t iPhysExtStart = iPhysExt;
3977 unsigned cMax = 15;
3978 for (;;)
3979 {
3980 Assert(iPhysExt < pPool->cMaxPhysExts);
3981 for (unsigned i = 0; i < RT_ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
3982 if (paPhysExts[iPhysExt].aidx[i] == NIL_PGMPOOL_IDX)
3983 {
3984 paPhysExts[iPhysExt].aidx[i] = iShwPT;
3985 paPhysExts[iPhysExt].apte[i] = iPte;
3986 STAM_COUNTER_INC(&pVM->pgm.s.Stats.StatTrackAliasedMany);
3987 LogFlow(("pgmPoolTrackPhysExtInsert: %d:{%d pte %d} i=%d cMax=%d\n", iPhysExt, iShwPT, iPte, i, cMax));
3988 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExtStart);
3989 }
3990 if (!--cMax)
3991 {
3992 STAM_COUNTER_INC(&pVM->pgm.s.Stats.StatTrackOverflows);
3993 pgmPoolTrackPhysExtFreeList(pVM, iPhysExtStart);
3994 LogFlow(("pgmPoolTrackPhysExtInsert: overflow (1) iShwPT=%d\n", iShwPT));
3995 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED);
3996 }
3997
3998 /* advance */
3999 iPhysExt = paPhysExts[iPhysExt].iNext;
4000 if (iPhysExt == NIL_PGMPOOL_PHYSEXT_INDEX)
4001 break;
4002 }
4003
4004 /*
4005 * Add another extent to the list.
4006 */
4007 PPGMPOOLPHYSEXT pNew = pgmPoolTrackPhysExtAlloc(pVM, &iPhysExt);
4008 if (!pNew)
4009 {
4010 STAM_COUNTER_INC(&pVM->pgm.s.Stats.StatTrackNoExtentsLeft);
4011 pgmPoolTrackPhysExtFreeList(pVM, iPhysExtStart);
4012 LogFlow(("pgmPoolTrackPhysExtInsert: pgmPoolTrackPhysExtAlloc failed iShwPT=%d\n", iShwPT));
4013 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED);
4014 }
4015 pNew->iNext = iPhysExtStart;
4016 pNew->aidx[0] = iShwPT;
4017 pNew->apte[0] = iPte;
4018 LogFlow(("pgmPoolTrackPhysExtInsert: added new extent %d:{%d pte %d}->%d\n", iPhysExt, iShwPT, iPte, iPhysExtStart));
4019 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExt);
4020}
4021
4022
4023/**
4024 * Add a reference to guest physical page where extents are in use.
4025 *
4026 * @returns The new tracking data for PGMPAGE.
4027 *
4028 * @param pVM The cross context VM structure.
4029 * @param pPhysPage Pointer to the aPages entry in the ram range.
4030 * @param u16 The ram range flags (top 16-bits).
4031 * @param iShwPT The shadow page table index.
4032 * @param iPte Page table entry
4033 */
4034uint16_t pgmPoolTrackPhysExtAddref(PVMCC pVM, PPGMPAGE pPhysPage, uint16_t u16, uint16_t iShwPT, uint16_t iPte)
4035{
4036 PGM_LOCK_VOID(pVM);
4037 if (PGMPOOL_TD_GET_CREFS(u16) != PGMPOOL_TD_CREFS_PHYSEXT)
4038 {
4039 /*
4040 * Convert to extent list.
4041 */
4042 Assert(PGMPOOL_TD_GET_CREFS(u16) == 1);
4043 uint16_t iPhysExt;
4044 PPGMPOOLPHYSEXT pPhysExt = pgmPoolTrackPhysExtAlloc(pVM, &iPhysExt);
4045 if (pPhysExt)
4046 {
4047 LogFlow(("pgmPoolTrackPhysExtAddref: new extent: %d:{%d, %d}\n", iPhysExt, PGMPOOL_TD_GET_IDX(u16), iShwPT));
4048 STAM_COUNTER_INC(&pVM->pgm.s.Stats.StatTrackAliased);
4049 pPhysExt->aidx[0] = PGMPOOL_TD_GET_IDX(u16);
4050 pPhysExt->apte[0] = PGM_PAGE_GET_PTE_INDEX(pPhysPage);
4051 pPhysExt->aidx[1] = iShwPT;
4052 pPhysExt->apte[1] = iPte;
4053 u16 = PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExt);
4054 }
4055 else
4056 u16 = PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED);
4057 }
4058 else if (u16 != PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED))
4059 {
4060 /*
4061 * Insert into the extent list.
4062 */
4063 u16 = pgmPoolTrackPhysExtInsert(pVM, PGMPOOL_TD_GET_IDX(u16), iShwPT, iPte);
4064 }
4065 else
4066 STAM_COUNTER_INC(&pVM->pgm.s.Stats.StatTrackAliasedLots);
4067 PGM_UNLOCK(pVM);
4068 return u16;
4069}
4070
4071
4072/**
4073 * Clear references to guest physical memory.
4074 *
4075 * @param pPool The pool.
4076 * @param pPage The page.
4077 * @param pPhysPage Pointer to the aPages entry in the ram range.
4078 * @param iPte Shadow PTE index
4079 */
4080void pgmPoolTrackPhysExtDerefGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMPAGE pPhysPage, uint16_t iPte)
4081{
4082 PVMCC pVM = pPool->CTX_SUFF(pVM);
4083 const unsigned cRefs = PGM_PAGE_GET_TD_CREFS(pPhysPage);
4084 AssertFatalMsg(cRefs == PGMPOOL_TD_CREFS_PHYSEXT, ("cRefs=%d pPhysPage=%R[pgmpage] pPage=%p:{.idx=%d}\n", cRefs, pPhysPage, pPage, pPage->idx));
4085
4086 uint16_t iPhysExt = PGM_PAGE_GET_TD_IDX(pPhysPage);
4087 if (iPhysExt != PGMPOOL_TD_IDX_OVERFLOWED)
4088 {
4089 PGM_LOCK_VOID(pVM);
4090
4091 uint16_t iPhysExtPrev = NIL_PGMPOOL_PHYSEXT_INDEX;
4092 PPGMPOOLPHYSEXT paPhysExts = pPool->CTX_SUFF(paPhysExts);
4093 do
4094 {
4095 Assert(iPhysExt < pPool->cMaxPhysExts);
4096
4097 /*
4098 * Look for the shadow page and check if it's all freed.
4099 */
4100 for (unsigned i = 0; i < RT_ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
4101 {
4102 if ( paPhysExts[iPhysExt].aidx[i] == pPage->idx
4103 && paPhysExts[iPhysExt].apte[i] == iPte)
4104 {
4105 paPhysExts[iPhysExt].aidx[i] = NIL_PGMPOOL_IDX;
4106 paPhysExts[iPhysExt].apte[i] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
4107
4108 for (i = 0; i < RT_ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
4109 if (paPhysExts[iPhysExt].aidx[i] != NIL_PGMPOOL_IDX)
4110 {
4111 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage] idx=%d\n", pPhysPage, pPage->idx));
4112 PGM_UNLOCK(pVM);
4113 return;
4114 }
4115
4116 /* we can free the node. */
4117 const uint16_t iPhysExtNext = paPhysExts[iPhysExt].iNext;
4118 if ( iPhysExtPrev == NIL_PGMPOOL_PHYSEXT_INDEX
4119 && iPhysExtNext == NIL_PGMPOOL_PHYSEXT_INDEX)
4120 {
4121 /* lonely node */
4122 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
4123 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage] idx=%d lonely\n", pPhysPage, pPage->idx));
4124 PGM_PAGE_SET_TRACKING(pVM, pPhysPage, 0);
4125 }
4126 else if (iPhysExtPrev == NIL_PGMPOOL_PHYSEXT_INDEX)
4127 {
4128 /* head */
4129 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage] idx=%d head\n", pPhysPage, pPage->idx));
4130 PGM_PAGE_SET_TRACKING(pVM, pPhysPage, PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExtNext));
4131 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
4132 }
4133 else
4134 {
4135 /* in list */
4136 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage] idx=%d in list\n", pPhysPage, pPage->idx));
4137 paPhysExts[iPhysExtPrev].iNext = iPhysExtNext;
4138 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
4139 }
4140 iPhysExt = iPhysExtNext;
4141 PGM_UNLOCK(pVM);
4142 return;
4143 }
4144 }
4145
4146 /* next */
4147 iPhysExtPrev = iPhysExt;
4148 iPhysExt = paPhysExts[iPhysExt].iNext;
4149 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
4150
4151 PGM_UNLOCK(pVM);
4152 AssertFatalMsgFailed(("not-found! cRefs=%d pPhysPage=%R[pgmpage] pPage=%p:{.idx=%d}\n", cRefs, pPhysPage, pPage, pPage->idx));
4153 }
4154 else /* nothing to do */
4155 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage]\n", pPhysPage));
4156}
4157
4158/**
4159 * Clear references to guest physical memory.
4160 *
4161 * This is the same as pgmPoolTracDerefGCPhysHint except that the guest
4162 * physical address is assumed to be correct, so the linear search can be
4163 * skipped and we can assert at an earlier point.
4164 *
4165 * @param pPool The pool.
4166 * @param pPage The page.
4167 * @param HCPhys The host physical address corresponding to the guest page.
4168 * @param GCPhys The guest physical address corresponding to HCPhys.
4169 * @param iPte Shadow PTE index
4170 */
4171static void pgmPoolTracDerefGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTHCPHYS HCPhys, RTGCPHYS GCPhys, uint16_t iPte)
4172{
4173 /*
4174 * Lookup the page and check if it checks out before derefing it.
4175 */
4176 PVMCC pVM = pPool->CTX_SUFF(pVM);
4177 PPGMPAGE pPhysPage = pgmPhysGetPage(pVM, GCPhys);
4178 if (pPhysPage)
4179 {
4180 Assert(PGM_PAGE_GET_HCPHYS(pPhysPage));
4181#ifdef LOG_ENABLED
4182 RTHCPHYS HCPhysPage = PGM_PAGE_GET_HCPHYS(pPhysPage);
4183 Log2(("pgmPoolTracDerefGCPhys %RHp vs %RHp\n", HCPhysPage, HCPhys));
4184#endif
4185 if (PGM_PAGE_GET_HCPHYS(pPhysPage) == HCPhys)
4186 {
4187 Assert(pPage->cPresent);
4188 Assert(pPool->cPresent);
4189 pPage->cPresent--;
4190 pPool->cPresent--;
4191 pgmTrackDerefGCPhys(pPool, pPage, pPhysPage, iPte);
4192 return;
4193 }
4194
4195 AssertFatalMsgFailed(("HCPhys=%RHp GCPhys=%RGp; found page has HCPhys=%RHp\n",
4196 HCPhys, GCPhys, PGM_PAGE_GET_HCPHYS(pPhysPage)));
4197 }
4198 AssertFatalMsgFailed(("HCPhys=%RHp GCPhys=%RGp\n", HCPhys, GCPhys));
4199}
4200
4201
4202/**
4203 * Clear references to guest physical memory.
4204 *
4205 * @param pPool The pool.
4206 * @param pPage The page.
4207 * @param HCPhys The host physical address corresponding to the guest page.
4208 * @param GCPhysHint The guest physical address which may corresponding to HCPhys.
4209 * @param iPte Shadow pte index
4210 */
4211void pgmPoolTracDerefGCPhysHint(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTHCPHYS HCPhys, RTGCPHYS GCPhysHint, uint16_t iPte)
4212{
4213 Log4(("pgmPoolTracDerefGCPhysHint %RHp %RGp\n", HCPhys, GCPhysHint));
4214
4215 /*
4216 * Try the hint first.
4217 */
4218 RTHCPHYS HCPhysHinted;
4219 PVMCC pVM = pPool->CTX_SUFF(pVM);
4220 PPGMPAGE pPhysPage = pgmPhysGetPage(pVM, GCPhysHint);
4221 if (pPhysPage)
4222 {
4223 HCPhysHinted = PGM_PAGE_GET_HCPHYS(pPhysPage);
4224 Assert(HCPhysHinted);
4225 if (HCPhysHinted == HCPhys)
4226 {
4227 Assert(pPage->cPresent);
4228 Assert(pPool->cPresent);
4229 pPage->cPresent--;
4230 pPool->cPresent--;
4231 pgmTrackDerefGCPhys(pPool, pPage, pPhysPage, iPte);
4232 return;
4233 }
4234 }
4235 else
4236 HCPhysHinted = UINT64_C(0xdeadbeefdeadbeef);
4237
4238 /*
4239 * Damn, the hint didn't work. We'll have to do an expensive linear search.
4240 */
4241 STAM_COUNTER_INC(&pPool->StatTrackLinearRamSearches);
4242 PPGMRAMRANGE pRam = pPool->CTX_SUFF(pVM)->pgm.s.CTX_SUFF(pRamRangesX);
4243 while (pRam)
4244 {
4245 unsigned iPage = pRam->cb >> PAGE_SHIFT;
4246 while (iPage-- > 0)
4247 {
4248 if (PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]) == HCPhys)
4249 {
4250 Log4(("pgmPoolTracDerefGCPhysHint: Linear HCPhys=%RHp GCPhysHint=%RGp GCPhysReal=%RGp\n",
4251 HCPhys, GCPhysHint, pRam->GCPhys + (iPage << PAGE_SHIFT)));
4252 Assert(pPage->cPresent);
4253 Assert(pPool->cPresent);
4254 pPage->cPresent--;
4255 pPool->cPresent--;
4256 pgmTrackDerefGCPhys(pPool, pPage, &pRam->aPages[iPage], iPte);
4257 return;
4258 }
4259 }
4260 pRam = pRam->CTX_SUFF(pNext);
4261 }
4262
4263 AssertFatalMsgFailed(("HCPhys=%RHp GCPhysHint=%RGp (Hinted page has HCPhys = %RHp)\n", HCPhys, GCPhysHint, HCPhysHinted));
4264}
4265
4266
4267/**
4268 * Clear references to guest physical memory in a 32-bit / 32-bit page table.
4269 *
4270 * @param pPool The pool.
4271 * @param pPage The page.
4272 * @param pShwPT The shadow page table (mapping of the page).
4273 * @param pGstPT The guest page table.
4274 */
4275DECLINLINE(void) pgmPoolTrackDerefPT32Bit32Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PT pShwPT, PCX86PT pGstPT)
4276{
4277 RTGCPHYS32 const fPgMask = pPage->fA20Enabled ? X86_PTE_PG_MASK : X86_PTE_PG_MASK & ~RT_BIT_32(20);
4278 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
4279 {
4280 const X86PGUINT uPte = pShwPT->a[i].u;
4281 Assert(!(uPte & RT_BIT_32(10)));
4282 if (uPte & X86_PTE_P)
4283 {
4284 Log4(("pgmPoolTrackDerefPT32Bit32Bit: i=%d pte=%RX32 hint=%RX32\n",
4285 i, uPte & X86_PTE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK));
4286 pgmPoolTracDerefGCPhysHint(pPool, pPage, uPte & X86_PTE_PG_MASK, pGstPT->a[i].u & fPgMask, i);
4287 if (!pPage->cPresent)
4288 break;
4289 }
4290 }
4291}
4292
4293
4294/**
4295 * Clear references to guest physical memory in a PAE / 32-bit page table.
4296 *
4297 * @param pPool The pool.
4298 * @param pPage The page.
4299 * @param pShwPT The shadow page table (mapping of the page).
4300 * @param pGstPT The guest page table (just a half one).
4301 */
4302DECLINLINE(void) pgmPoolTrackDerefPTPae32Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT, PCX86PT pGstPT)
4303{
4304 RTGCPHYS32 const fPgMask = pPage->fA20Enabled ? X86_PTE_PG_MASK : X86_PTE_PG_MASK & ~RT_BIT_32(20);
4305 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
4306 {
4307 Assert( (PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & UINT64_C(0x7ff0000000000400)) == 0
4308 || (PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & UINT64_C(0x7ff0000000000400)) == UINT64_C(0x7ff0000000000000));
4309 if (PGMSHWPTEPAE_IS_P(pShwPT->a[i]))
4310 {
4311 Log4(("pgmPoolTrackDerefPTPae32Bit: i=%d pte=%RX64 hint=%RX32\n",
4312 i, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pGstPT->a[i].u & X86_PTE_PG_MASK));
4313 pgmPoolTracDerefGCPhysHint(pPool, pPage, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pGstPT->a[i].u & fPgMask, i);
4314 if (!pPage->cPresent)
4315 break;
4316 }
4317 }
4318}
4319
4320
4321/**
4322 * Clear references to guest physical memory in a PAE / PAE page table.
4323 *
4324 * @param pPool The pool.
4325 * @param pPage The page.
4326 * @param pShwPT The shadow page table (mapping of the page).
4327 * @param pGstPT The guest page table.
4328 */
4329DECLINLINE(void) pgmPoolTrackDerefPTPaePae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT, PCX86PTPAE pGstPT)
4330{
4331 RTGCPHYS const fPgMask = pPage->fA20Enabled ? X86_PTE_PAE_PG_MASK : X86_PTE_PAE_PG_MASK & ~RT_BIT_64(20);
4332 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
4333 {
4334 Assert( (PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & UINT64_C(0x7ff0000000000400)) == 0
4335 || (PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & UINT64_C(0x7ff0000000000400)) == UINT64_C(0x7ff0000000000000));
4336 if (PGMSHWPTEPAE_IS_P(pShwPT->a[i]))
4337 {
4338 Log4(("pgmPoolTrackDerefPTPaePae: i=%d pte=%RX32 hint=%RX32\n",
4339 i, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pGstPT->a[i].u & X86_PTE_PAE_PG_MASK));
4340 pgmPoolTracDerefGCPhysHint(pPool, pPage, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pGstPT->a[i].u & fPgMask, i);
4341 if (!pPage->cPresent)
4342 break;
4343 }
4344 }
4345}
4346
4347
4348/**
4349 * Clear references to guest physical memory in a 32-bit / 4MB page table.
4350 *
4351 * @param pPool The pool.
4352 * @param pPage The page.
4353 * @param pShwPT The shadow page table (mapping of the page).
4354 */
4355DECLINLINE(void) pgmPoolTrackDerefPT32Bit4MB(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PT pShwPT)
4356{
4357 RTGCPHYS const GCPhysA20Mask = pPage->fA20Enabled ? UINT64_MAX : ~RT_BIT_64(20);
4358 RTGCPHYS GCPhys = pPage->GCPhys + PAGE_SIZE * pPage->iFirstPresent;
4359 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
4360 {
4361 const X86PGUINT uPte = pShwPT->a[i].u;
4362 Assert(!(uPte & RT_BIT_32(10)));
4363 if (uPte & X86_PTE_P)
4364 {
4365 Log4(("pgmPoolTrackDerefPT32Bit4MB: i=%d pte=%RX32 GCPhys=%RGp\n",
4366 i, uPte & X86_PTE_PG_MASK, GCPhys));
4367 pgmPoolTracDerefGCPhys(pPool, pPage, uPte & X86_PTE_PG_MASK, GCPhys & GCPhysA20Mask, i);
4368 if (!pPage->cPresent)
4369 break;
4370 }
4371 }
4372}
4373
4374
4375/**
4376 * Clear references to guest physical memory in a PAE / 2/4MB page table.
4377 *
4378 * @param pPool The pool.
4379 * @param pPage The page.
4380 * @param pShwPT The shadow page table (mapping of the page).
4381 */
4382DECLINLINE(void) pgmPoolTrackDerefPTPaeBig(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT)
4383{
4384 RTGCPHYS const GCPhysA20Mask = pPage->fA20Enabled ? UINT64_MAX : ~RT_BIT_64(20);
4385 RTGCPHYS GCPhys = pPage->GCPhys + PAGE_SIZE * pPage->iFirstPresent;
4386 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
4387 {
4388 Assert( (PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & UINT64_C(0x7ff0000000000400)) == 0
4389 || (PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & UINT64_C(0x7ff0000000000400)) == UINT64_C(0x7ff0000000000000));
4390 if (PGMSHWPTEPAE_IS_P(pShwPT->a[i]))
4391 {
4392 Log4(("pgmPoolTrackDerefPTPaeBig: i=%d pte=%RX64 hint=%RGp\n",
4393 i, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), GCPhys));
4394 pgmPoolTracDerefGCPhys(pPool, pPage, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), GCPhys & GCPhysA20Mask, i);
4395 if (!pPage->cPresent)
4396 break;
4397 }
4398 }
4399}
4400
4401
4402/**
4403 * Clear references to shadowed pages in an EPT page table.
4404 *
4405 * @param pPool The pool.
4406 * @param pPage The page.
4407 * @param pShwPT The shadow page directory pointer table (mapping of the
4408 * page).
4409 */
4410DECLINLINE(void) pgmPoolTrackDerefPTEPT(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PEPTPT pShwPT)
4411{
4412 RTGCPHYS const GCPhysA20Mask = pPage->fA20Enabled ? UINT64_MAX : ~RT_BIT_64(20);
4413 RTGCPHYS GCPhys = pPage->GCPhys + PAGE_SIZE * pPage->iFirstPresent;
4414 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
4415 {
4416 X86PGPAEUINT const uPte = pShwPT->a[i].u;
4417 Assert((uPte & UINT64_C(0xfff0000000000f80)) == 0);
4418 if (uPte & EPT_E_READ)
4419 {
4420 Log4(("pgmPoolTrackDerefPTEPT: i=%d pte=%RX64 GCPhys=%RX64\n",
4421 i, uPte & EPT_PTE_PG_MASK, pPage->GCPhys));
4422 pgmPoolTracDerefGCPhys(pPool, pPage, uPte & EPT_PTE_PG_MASK, GCPhys & GCPhysA20Mask, i);
4423 if (!pPage->cPresent)
4424 break;
4425 }
4426 }
4427}
4428
4429
4430/**
4431 * Clear references to shadowed pages in a 32 bits page directory.
4432 *
4433 * @param pPool The pool.
4434 * @param pPage The page.
4435 * @param pShwPD The shadow page directory (mapping of the page).
4436 */
4437DECLINLINE(void) pgmPoolTrackDerefPD(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PD pShwPD)
4438{
4439 for (unsigned i = 0; i < RT_ELEMENTS(pShwPD->a); i++)
4440 {
4441 X86PGUINT const uPde = pShwPD->a[i].u;
4442 if (uPde & X86_PDE_P)
4443 {
4444 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPD->a[i].u & X86_PDE_PG_MASK);
4445 if (pSubPage)
4446 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4447 else
4448 AssertFatalMsgFailed(("%x\n", pShwPD->a[i].u & X86_PDE_PG_MASK));
4449 }
4450 }
4451}
4452
4453
4454/**
4455 * Clear references to shadowed pages in a PAE (legacy or 64 bits) page directory.
4456 *
4457 * @param pPool The pool.
4458 * @param pPage The page.
4459 * @param pShwPD The shadow page directory (mapping of the page).
4460 */
4461DECLINLINE(void) pgmPoolTrackDerefPDPae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PDPAE pShwPD)
4462{
4463 for (unsigned i = 0; i < RT_ELEMENTS(pShwPD->a); i++)
4464 {
4465 X86PGPAEUINT const uPde = pShwPD->a[i].u;
4466 if (uPde & X86_PDE_P)
4467 {
4468#ifdef PGM_WITH_LARGE_PAGES
4469 if (uPde & X86_PDE_PS)
4470 {
4471 Log4(("pgmPoolTrackDerefPDPae: i=%d pde=%RX64 GCPhys=%RX64\n",
4472 i, uPde & X86_PDE2M_PAE_PG_MASK, pPage->GCPhys));
4473 pgmPoolTracDerefGCPhys(pPool, pPage, uPde & X86_PDE2M_PAE_PG_MASK,
4474 pPage->GCPhys + i * 2 * _1M /* pPage->GCPhys = base address of the memory described by the PD */,
4475 i);
4476 }
4477 else
4478#endif
4479 {
4480 Assert((uPde & (X86_PDE_PAE_MBZ_MASK_NX | UINT64_C(0x7ff0000000000000))) == 0);
4481 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, uPde & X86_PDE_PAE_PG_MASK);
4482 if (pSubPage)
4483 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4484 else
4485 AssertFatalMsgFailed(("%RX64\n", uPde & X86_PDE_PAE_PG_MASK));
4486 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4487 }
4488 }
4489 }
4490}
4491
4492
4493/**
4494 * Clear references to shadowed pages in a PAE page directory pointer table.
4495 *
4496 * @param pPool The pool.
4497 * @param pPage The page.
4498 * @param pShwPDPT The shadow page directory pointer table (mapping of the page).
4499 */
4500DECLINLINE(void) pgmPoolTrackDerefPDPTPae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PDPT pShwPDPT)
4501{
4502 for (unsigned i = 0; i < X86_PG_PAE_PDPE_ENTRIES; i++)
4503 {
4504 X86PGPAEUINT const uPdpe = pShwPDPT->a[i].u;
4505 Assert((uPdpe & (X86_PDPE_PAE_MBZ_MASK | UINT64_C(0x7ff0000000000200))) == 0);
4506 if (uPdpe & X86_PDPE_P)
4507 {
4508 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, uPdpe & X86_PDPE_PG_MASK);
4509 if (pSubPage)
4510 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4511 else
4512 AssertFatalMsgFailed(("%RX64\n", uPdpe & X86_PDPE_PG_MASK));
4513 }
4514 }
4515}
4516
4517
4518/**
4519 * Clear references to shadowed pages in a 64-bit page directory pointer table.
4520 *
4521 * @param pPool The pool.
4522 * @param pPage The page.
4523 * @param pShwPDPT The shadow page directory pointer table (mapping of the page).
4524 */
4525DECLINLINE(void) pgmPoolTrackDerefPDPT64Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PDPT pShwPDPT)
4526{
4527 for (unsigned i = 0; i < RT_ELEMENTS(pShwPDPT->a); i++)
4528 {
4529 X86PGPAEUINT const uPdpe = pShwPDPT->a[i].u;
4530 Assert((uPdpe & (X86_PDPE_LM_MBZ_MASK_NX | UINT64_C(0x7ff0000000000200))) == 0);
4531 if (uPdpe & X86_PDPE_P)
4532 {
4533 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, uPdpe & X86_PDPE_PG_MASK);
4534 if (pSubPage)
4535 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4536 else
4537 AssertFatalMsgFailed(("%RX64\n", uPdpe & X86_PDPE_PG_MASK));
4538 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4539 }
4540 }
4541}
4542
4543
4544/**
4545 * Clear references to shadowed pages in a 64-bit level 4 page table.
4546 *
4547 * @param pPool The pool.
4548 * @param pPage The page.
4549 * @param pShwPML4 The shadow page directory pointer table (mapping of the page).
4550 */
4551DECLINLINE(void) pgmPoolTrackDerefPML464Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PML4 pShwPML4)
4552{
4553 for (unsigned i = 0; i < RT_ELEMENTS(pShwPML4->a); i++)
4554 {
4555 X86PGPAEUINT const uPml4e = pShwPML4->a[i].u;
4556 Assert((uPml4e & (X86_PML4E_MBZ_MASK_NX | UINT64_C(0x7ff0000000000200))) == 0);
4557 if (uPml4e & X86_PML4E_P)
4558 {
4559 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, uPml4e & X86_PDPE_PG_MASK);
4560 if (pSubPage)
4561 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4562 else
4563 AssertFatalMsgFailed(("%RX64\n", uPml4e & X86_PML4E_PG_MASK));
4564 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4565 }
4566 }
4567}
4568
4569
4570/**
4571 * Clear references to shadowed pages in an EPT page directory.
4572 *
4573 * @param pPool The pool.
4574 * @param pPage The page.
4575 * @param pShwPD The shadow page directory (mapping of the page).
4576 */
4577DECLINLINE(void) pgmPoolTrackDerefPDEPT(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PEPTPD pShwPD)
4578{
4579 for (unsigned i = 0; i < RT_ELEMENTS(pShwPD->a); i++)
4580 {
4581 X86PGPAEUINT const uPde = pShwPD->a[i].u;
4582 Assert((uPde & UINT64_C(0xfff0000000000f80)) == 0);
4583 if (uPde & EPT_E_READ)
4584 {
4585#ifdef PGM_WITH_LARGE_PAGES
4586 if (uPde & EPT_E_LEAF)
4587 {
4588 Log4(("pgmPoolTrackDerefPDEPT: i=%d pde=%RX64 GCPhys=%RX64\n",
4589 i, uPde & EPT_PDE2M_PG_MASK, pPage->GCPhys));
4590 pgmPoolTracDerefGCPhys(pPool, pPage, uPde & EPT_PDE2M_PG_MASK,
4591 pPage->GCPhys + i * 2 * _1M /* pPage->GCPhys = base address of the memory described by the PD */,
4592 i);
4593 }
4594 else
4595#endif
4596 {
4597 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, uPde & EPT_PDE_PG_MASK);
4598 if (pSubPage)
4599 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4600 else
4601 AssertFatalMsgFailed(("%RX64\n", pShwPD->a[i].u & EPT_PDE_PG_MASK));
4602 }
4603 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4604 }
4605 }
4606}
4607
4608
4609/**
4610 * Clear references to shadowed pages in an EPT page directory pointer table.
4611 *
4612 * @param pPool The pool.
4613 * @param pPage The page.
4614 * @param pShwPDPT The shadow page directory pointer table (mapping of the page).
4615 */
4616DECLINLINE(void) pgmPoolTrackDerefPDPTEPT(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PEPTPDPT pShwPDPT)
4617{
4618 for (unsigned i = 0; i < RT_ELEMENTS(pShwPDPT->a); i++)
4619 {
4620 X86PGPAEUINT const uPdpe = pShwPDPT->a[i].u;
4621 Assert((uPdpe & UINT64_C(0xfff0000000000f80)) == 0);
4622 if (uPdpe & EPT_E_READ)
4623 {
4624 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, uPdpe & EPT_PDPTE_PG_MASK);
4625 if (pSubPage)
4626 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4627 else
4628 AssertFatalMsgFailed(("%RX64\n", uPdpe & EPT_PDPTE_PG_MASK));
4629 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4630 }
4631 }
4632}
4633
4634
4635/**
4636 * Clears all references made by this page.
4637 *
4638 * This includes other shadow pages and GC physical addresses.
4639 *
4640 * @param pPool The pool.
4641 * @param pPage The page.
4642 */
4643static void pgmPoolTrackDeref(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
4644{
4645 /*
4646 * Map the shadow page and take action according to the page kind.
4647 */
4648 PVMCC pVM = pPool->CTX_SUFF(pVM);
4649 void *pvShw = PGMPOOL_PAGE_2_PTR(pVM, pPage);
4650 switch (pPage->enmKind)
4651 {
4652 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
4653 {
4654 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
4655 void *pvGst;
4656 int rc = PGM_GCPHYS_2_PTR(pVM, pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
4657 pgmPoolTrackDerefPT32Bit32Bit(pPool, pPage, (PX86PT)pvShw, (PCX86PT)pvGst);
4658 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvGst);
4659 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
4660 break;
4661 }
4662
4663 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
4664 {
4665 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
4666 void *pvGst;
4667 int rc = PGM_GCPHYS_2_PTR_EX(pVM, pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
4668 pgmPoolTrackDerefPTPae32Bit(pPool, pPage, (PPGMSHWPTPAE)pvShw, (PCX86PT)pvGst);
4669 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvGst);
4670 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
4671 break;
4672 }
4673
4674 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
4675 {
4676 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
4677 void *pvGst;
4678 int rc = PGM_GCPHYS_2_PTR(pVM, pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
4679 pgmPoolTrackDerefPTPaePae(pPool, pPage, (PPGMSHWPTPAE)pvShw, (PCX86PTPAE)pvGst);
4680 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvGst);
4681 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
4682 break;
4683 }
4684
4685 case PGMPOOLKIND_32BIT_PT_FOR_PHYS: /* treat it like a 4 MB page */
4686 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
4687 {
4688 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
4689 pgmPoolTrackDerefPT32Bit4MB(pPool, pPage, (PX86PT)pvShw);
4690 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
4691 break;
4692 }
4693
4694 case PGMPOOLKIND_PAE_PT_FOR_PHYS: /* treat it like a 2 MB page */
4695 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
4696 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
4697 {
4698 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
4699 pgmPoolTrackDerefPTPaeBig(pPool, pPage, (PPGMSHWPTPAE)pvShw);
4700 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
4701 break;
4702 }
4703
4704 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
4705 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
4706 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
4707 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
4708 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
4709 case PGMPOOLKIND_PAE_PD_PHYS:
4710 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
4711 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
4712 pgmPoolTrackDerefPDPae(pPool, pPage, (PX86PDPAE)pvShw);
4713 break;
4714
4715 case PGMPOOLKIND_32BIT_PD_PHYS:
4716 case PGMPOOLKIND_32BIT_PD:
4717 pgmPoolTrackDerefPD(pPool, pPage, (PX86PD)pvShw);
4718 break;
4719
4720 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
4721 case PGMPOOLKIND_PAE_PDPT:
4722 case PGMPOOLKIND_PAE_PDPT_PHYS:
4723 pgmPoolTrackDerefPDPTPae(pPool, pPage, (PX86PDPT)pvShw);
4724 break;
4725
4726 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
4727 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
4728 pgmPoolTrackDerefPDPT64Bit(pPool, pPage, (PX86PDPT)pvShw);
4729 break;
4730
4731 case PGMPOOLKIND_64BIT_PML4:
4732 pgmPoolTrackDerefPML464Bit(pPool, pPage, (PX86PML4)pvShw);
4733 break;
4734
4735 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
4736 pgmPoolTrackDerefPTEPT(pPool, pPage, (PEPTPT)pvShw);
4737 break;
4738
4739 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
4740 pgmPoolTrackDerefPDEPT(pPool, pPage, (PEPTPD)pvShw);
4741 break;
4742
4743 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
4744 pgmPoolTrackDerefPDPTEPT(pPool, pPage, (PEPTPDPT)pvShw);
4745 break;
4746
4747 default:
4748 AssertFatalMsgFailed(("enmKind=%d\n", pPage->enmKind));
4749 }
4750
4751 /* paranoia, clear the shadow page. Remove this laser (i.e. let Alloc and ClearAll do it). */
4752 STAM_PROFILE_START(&pPool->StatZeroPage, z);
4753 ASMMemZeroPage(pvShw);
4754 STAM_PROFILE_STOP(&pPool->StatZeroPage, z);
4755 pPage->fZeroed = true;
4756 Assert(!pPage->cPresent);
4757 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvShw);
4758}
4759
4760
4761/**
4762 * Flushes a pool page.
4763 *
4764 * This moves the page to the free list after removing all user references to it.
4765 *
4766 * @returns VBox status code.
4767 * @retval VINF_SUCCESS on success.
4768 * @param pPool The pool.
4769 * @param pPage The shadow page.
4770 * @param fFlush Flush the TLBS when required (should only be false in very specific use cases!!)
4771 */
4772int pgmPoolFlushPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage, bool fFlush)
4773{
4774 PVMCC pVM = pPool->CTX_SUFF(pVM);
4775 bool fFlushRequired = false;
4776
4777 int rc = VINF_SUCCESS;
4778 STAM_PROFILE_START(&pPool->StatFlushPage, f);
4779 LogFlow(("pgmPoolFlushPage: pPage=%p:{.Key=%RHp, .idx=%d, .enmKind=%s, .GCPhys=%RGp}\n",
4780 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), pPage->GCPhys));
4781
4782 /*
4783 * Reject any attempts at flushing any of the special root pages (shall
4784 * not happen).
4785 */
4786 AssertMsgReturn(pPage->idx >= PGMPOOL_IDX_FIRST,
4787 ("pgmPoolFlushPage: special root page, rejected. enmKind=%s idx=%d\n",
4788 pgmPoolPoolKindToStr(pPage->enmKind), pPage->idx),
4789 VINF_SUCCESS);
4790
4791 PGM_LOCK_VOID(pVM);
4792
4793 /*
4794 * Quietly reject any attempts at flushing the currently active shadow CR3 mapping
4795 */
4796 if (pgmPoolIsPageLocked(pPage))
4797 {
4798 AssertMsg( pPage->enmKind == PGMPOOLKIND_64BIT_PML4
4799 || pPage->enmKind == PGMPOOLKIND_PAE_PDPT
4800 || pPage->enmKind == PGMPOOLKIND_PAE_PDPT_FOR_32BIT
4801 || pPage->enmKind == PGMPOOLKIND_32BIT_PD
4802 || pPage->enmKind == PGMPOOLKIND_PAE_PD_FOR_PAE_PD
4803 || pPage->enmKind == PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD
4804 || pPage->enmKind == PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD
4805 || pPage->enmKind == PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD
4806 || pPage->enmKind == PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD
4807 || pPage->enmKind == PGMPOOLKIND_ROOT_NESTED,
4808 ("Can't free the shadow CR3! (%RHp vs %RHp kind=%d\n", PGMGetHyperCR3(VMMGetCpu(pVM)), pPage->Core.Key, pPage->enmKind));
4809 Log(("pgmPoolFlushPage: current active shadow CR3, rejected. enmKind=%s idx=%d\n", pgmPoolPoolKindToStr(pPage->enmKind), pPage->idx));
4810 PGM_UNLOCK(pVM);
4811 return VINF_SUCCESS;
4812 }
4813
4814 /*
4815 * Mark the page as being in need of an ASMMemZeroPage().
4816 */
4817 pPage->fZeroed = false;
4818
4819#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
4820 if (pPage->fDirty)
4821 pgmPoolFlushDirtyPage(pVM, pPool, pPage->idxDirtyEntry, false /* do not remove */);
4822#endif
4823
4824 /* If there are any users of this table, then we *must* issue a tlb flush on all VCPUs. */
4825 if (pPage->iUserHead != NIL_PGMPOOL_USER_INDEX)
4826 fFlushRequired = true;
4827
4828 /*
4829 * Clear the page.
4830 */
4831 pgmPoolTrackClearPageUsers(pPool, pPage);
4832 STAM_PROFILE_START(&pPool->StatTrackDeref,a);
4833 pgmPoolTrackDeref(pPool, pPage);
4834 STAM_PROFILE_STOP(&pPool->StatTrackDeref,a);
4835
4836 /*
4837 * Flush it from the cache.
4838 */
4839 pgmPoolCacheFlushPage(pPool, pPage);
4840
4841 /*
4842 * Deregistering the monitoring.
4843 */
4844 if (pPage->fMonitored)
4845 rc = pgmPoolMonitorFlush(pPool, pPage);
4846
4847 /*
4848 * Free the page.
4849 */
4850 Assert(pPage->iNext == NIL_PGMPOOL_IDX);
4851 pPage->iNext = pPool->iFreeHead;
4852 pPool->iFreeHead = pPage->idx;
4853 pPage->enmKind = PGMPOOLKIND_FREE;
4854 pPage->enmAccess = PGMPOOLACCESS_DONTCARE;
4855 pPage->GCPhys = NIL_RTGCPHYS;
4856 pPage->fReusedFlushPending = false;
4857
4858 pPool->cUsedPages--;
4859
4860 /* Flush the TLBs of all VCPUs if required. */
4861 if ( fFlushRequired
4862 && fFlush)
4863 {
4864 PGM_INVL_ALL_VCPU_TLBS(pVM);
4865 }
4866
4867 PGM_UNLOCK(pVM);
4868 STAM_PROFILE_STOP(&pPool->StatFlushPage, f);
4869 return rc;
4870}
4871
4872
4873/**
4874 * Frees a usage of a pool page.
4875 *
4876 * The caller is responsible to updating the user table so that it no longer
4877 * references the shadow page.
4878 *
4879 * @param pPool The pool.
4880 * @param pPage The shadow page.
4881 * @param iUser The shadow page pool index of the user table.
4882 * NIL_PGMPOOL_IDX for root pages.
4883 * @param iUserTable The index into the user table (shadowed). Ignored if
4884 * root page.
4885 */
4886void pgmPoolFreeByPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable)
4887{
4888 PVMCC pVM = pPool->CTX_SUFF(pVM);
4889
4890 STAM_PROFILE_START(&pPool->StatFree, a);
4891 LogFlow(("pgmPoolFreeByPage: pPage=%p:{.Key=%RHp, .idx=%d, enmKind=%s} iUser=%d iUserTable=%#x\n",
4892 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), iUser, iUserTable));
4893 AssertReturnVoid(pPage->idx >= PGMPOOL_IDX_FIRST); /* paranoia (#6349) */
4894
4895 PGM_LOCK_VOID(pVM);
4896 if (iUser != NIL_PGMPOOL_IDX)
4897 pgmPoolTrackFreeUser(pPool, pPage, iUser, iUserTable);
4898 if (!pPage->fCached)
4899 pgmPoolFlushPage(pPool, pPage);
4900 PGM_UNLOCK(pVM);
4901 STAM_PROFILE_STOP(&pPool->StatFree, a);
4902}
4903
4904
4905/**
4906 * Makes one or more free page free.
4907 *
4908 * @returns VBox status code.
4909 * @retval VINF_SUCCESS on success.
4910 *
4911 * @param pPool The pool.
4912 * @param enmKind Page table kind
4913 * @param iUser The user of the page.
4914 */
4915static int pgmPoolMakeMoreFreePages(PPGMPOOL pPool, PGMPOOLKIND enmKind, uint16_t iUser)
4916{
4917 PVMCC pVM = pPool->CTX_SUFF(pVM);
4918 LogFlow(("pgmPoolMakeMoreFreePages: enmKind=%d iUser=%d\n", enmKind, iUser));
4919 NOREF(enmKind);
4920
4921 /*
4922 * If the pool isn't full grown yet, expand it.
4923 */
4924 if (pPool->cCurPages < pPool->cMaxPages)
4925 {
4926 STAM_PROFILE_ADV_SUSPEND(&pPool->StatAlloc, a);
4927#ifdef IN_RING3
4928 int rc = PGMR3PoolGrow(pVM, VMMGetCpu(pVM));
4929#else
4930 int rc = PGMR0PoolGrow(pVM, VMMGetCpuId(pVM));
4931#endif
4932 if (RT_FAILURE(rc))
4933 return rc;
4934 STAM_PROFILE_ADV_RESUME(&pPool->StatAlloc, a);
4935 if (pPool->iFreeHead != NIL_PGMPOOL_IDX)
4936 return VINF_SUCCESS;
4937 }
4938
4939 /*
4940 * Free one cached page.
4941 */
4942 return pgmPoolCacheFreeOne(pPool, iUser);
4943}
4944
4945
4946/**
4947 * Allocates a page from the pool.
4948 *
4949 * This page may actually be a cached page and not in need of any processing
4950 * on the callers part.
4951 *
4952 * @returns VBox status code.
4953 * @retval VINF_SUCCESS if a NEW page was allocated.
4954 * @retval VINF_PGM_CACHED_PAGE if a CACHED page was returned.
4955 *
4956 * @param pVM The cross context VM structure.
4957 * @param GCPhys The GC physical address of the page we're gonna shadow.
4958 * For 4MB and 2MB PD entries, it's the first address the
4959 * shadow PT is covering.
4960 * @param enmKind The kind of mapping.
4961 * @param enmAccess Access type for the mapping (only relevant for big pages)
4962 * @param fA20Enabled Whether the A20 gate is enabled or not.
4963 * @param iUser The shadow page pool index of the user table. Root
4964 * pages should pass NIL_PGMPOOL_IDX.
4965 * @param iUserTable The index into the user table (shadowed). Ignored for
4966 * root pages (iUser == NIL_PGMPOOL_IDX).
4967 * @param fLockPage Lock the page
4968 * @param ppPage Where to store the pointer to the page. NULL is stored here on failure.
4969 */
4970int pgmPoolAlloc(PVMCC pVM, RTGCPHYS GCPhys, PGMPOOLKIND enmKind, PGMPOOLACCESS enmAccess, bool fA20Enabled,
4971 uint16_t iUser, uint32_t iUserTable, bool fLockPage, PPPGMPOOLPAGE ppPage)
4972{
4973 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
4974 STAM_PROFILE_ADV_START(&pPool->StatAlloc, a);
4975 LogFlow(("pgmPoolAlloc: GCPhys=%RGp enmKind=%s iUser=%d iUserTable=%#x\n", GCPhys, pgmPoolPoolKindToStr(enmKind), iUser, iUserTable));
4976 *ppPage = NULL;
4977 /** @todo CSAM/PGMPrefetchPage messes up here during CSAMR3CheckGates
4978 * (TRPMR3SyncIDT) because of FF priority. Try fix that?
4979 * Assert(!(pVM->pgm.s.fGlobalSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)); */
4980
4981 PGM_LOCK_VOID(pVM);
4982
4983 if (pPool->fCacheEnabled)
4984 {
4985 int rc2 = pgmPoolCacheAlloc(pPool, GCPhys, enmKind, enmAccess, fA20Enabled, iUser, iUserTable, ppPage);
4986 if (RT_SUCCESS(rc2))
4987 {
4988 if (fLockPage)
4989 pgmPoolLockPage(pPool, *ppPage);
4990 PGM_UNLOCK(pVM);
4991 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
4992 LogFlow(("pgmPoolAlloc: cached returns %Rrc *ppPage=%p:{.Key=%RHp, .idx=%d}\n", rc2, *ppPage, (*ppPage)->Core.Key, (*ppPage)->idx));
4993 return rc2;
4994 }
4995 }
4996
4997 /*
4998 * Allocate a new one.
4999 */
5000 int rc = VINF_SUCCESS;
5001 uint16_t iNew = pPool->iFreeHead;
5002 if (iNew == NIL_PGMPOOL_IDX)
5003 {
5004 rc = pgmPoolMakeMoreFreePages(pPool, enmKind, iUser);
5005 if (RT_FAILURE(rc))
5006 {
5007 PGM_UNLOCK(pVM);
5008 Log(("pgmPoolAlloc: returns %Rrc (Free)\n", rc));
5009 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
5010 return rc;
5011 }
5012 iNew = pPool->iFreeHead;
5013 AssertReleaseMsgReturn(iNew != NIL_PGMPOOL_IDX, ("iNew=%#x\n", iNew), VERR_PGM_POOL_IPE);
5014 }
5015
5016 /* unlink the free head */
5017 PPGMPOOLPAGE pPage = &pPool->aPages[iNew];
5018 pPool->iFreeHead = pPage->iNext;
5019 pPage->iNext = NIL_PGMPOOL_IDX;
5020
5021 /*
5022 * Initialize it.
5023 */
5024 pPool->cUsedPages++; /* physical handler registration / pgmPoolTrackFlushGCPhysPTsSlow requirement. */
5025 pPage->enmKind = enmKind;
5026 pPage->enmAccess = enmAccess;
5027 pPage->GCPhys = GCPhys;
5028 pPage->fA20Enabled = fA20Enabled;
5029 pPage->fSeenNonGlobal = false; /* Set this to 'true' to disable this feature. */
5030 pPage->fMonitored = false;
5031 pPage->fCached = false;
5032 pPage->fDirty = false;
5033 pPage->fReusedFlushPending = false;
5034 pPage->cModifications = 0;
5035 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
5036 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
5037 pPage->cPresent = 0;
5038 pPage->iFirstPresent = NIL_PGMPOOL_PRESENT_INDEX;
5039 pPage->idxDirtyEntry = 0;
5040 pPage->GCPtrLastAccessHandlerFault = NIL_RTGCPTR;
5041 pPage->GCPtrLastAccessHandlerRip = NIL_RTGCPTR;
5042 pPage->cLastAccessHandler = 0;
5043 pPage->cLocked = 0;
5044# ifdef VBOX_STRICT
5045 pPage->GCPtrDirtyFault = NIL_RTGCPTR;
5046# endif
5047
5048 /*
5049 * Insert into the tracking and cache. If this fails, free the page.
5050 */
5051 int rc3 = pgmPoolTrackInsert(pPool, pPage, GCPhys, iUser, iUserTable);
5052 if (RT_FAILURE(rc3))
5053 {
5054 pPool->cUsedPages--;
5055 pPage->enmKind = PGMPOOLKIND_FREE;
5056 pPage->enmAccess = PGMPOOLACCESS_DONTCARE;
5057 pPage->GCPhys = NIL_RTGCPHYS;
5058 pPage->iNext = pPool->iFreeHead;
5059 pPool->iFreeHead = pPage->idx;
5060 PGM_UNLOCK(pVM);
5061 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
5062 Log(("pgmPoolAlloc: returns %Rrc (Insert)\n", rc3));
5063 return rc3;
5064 }
5065
5066 /*
5067 * Commit the allocation, clear the page and return.
5068 */
5069#ifdef VBOX_WITH_STATISTICS
5070 if (pPool->cUsedPages > pPool->cUsedPagesHigh)
5071 pPool->cUsedPagesHigh = pPool->cUsedPages;
5072#endif
5073
5074 if (!pPage->fZeroed)
5075 {
5076 STAM_PROFILE_START(&pPool->StatZeroPage, z);
5077 void *pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
5078 ASMMemZeroPage(pv);
5079 STAM_PROFILE_STOP(&pPool->StatZeroPage, z);
5080 }
5081
5082 *ppPage = pPage;
5083 if (fLockPage)
5084 pgmPoolLockPage(pPool, pPage);
5085 PGM_UNLOCK(pVM);
5086 LogFlow(("pgmPoolAlloc: returns %Rrc *ppPage=%p:{.Key=%RHp, .idx=%d, .fCached=%RTbool, .fMonitored=%RTbool}\n",
5087 rc, pPage, pPage->Core.Key, pPage->idx, pPage->fCached, pPage->fMonitored));
5088 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
5089 return rc;
5090}
5091
5092
5093/**
5094 * Frees a usage of a pool page.
5095 *
5096 * @param pVM The cross context VM structure.
5097 * @param HCPhys The HC physical address of the shadow page.
5098 * @param iUser The shadow page pool index of the user table.
5099 * NIL_PGMPOOL_IDX if root page.
5100 * @param iUserTable The index into the user table (shadowed). Ignored if
5101 * root page.
5102 */
5103void pgmPoolFree(PVM pVM, RTHCPHYS HCPhys, uint16_t iUser, uint32_t iUserTable)
5104{
5105 LogFlow(("pgmPoolFree: HCPhys=%RHp iUser=%d iUserTable=%#x\n", HCPhys, iUser, iUserTable));
5106 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
5107 pgmPoolFreeByPage(pPool, pgmPoolGetPage(pPool, HCPhys), iUser, iUserTable);
5108}
5109
5110
5111/**
5112 * Internal worker for finding a 'in-use' shadow page give by it's physical address.
5113 *
5114 * @returns Pointer to the shadow page structure.
5115 * @param pPool The pool.
5116 * @param HCPhys The HC physical address of the shadow page.
5117 */
5118PPGMPOOLPAGE pgmPoolGetPage(PPGMPOOL pPool, RTHCPHYS HCPhys)
5119{
5120 PGM_LOCK_ASSERT_OWNER(pPool->CTX_SUFF(pVM));
5121
5122 /*
5123 * Look up the page.
5124 */
5125 PPGMPOOLPAGE pPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, HCPhys & X86_PTE_PAE_PG_MASK);
5126
5127 AssertFatalMsg(pPage && pPage->enmKind != PGMPOOLKIND_FREE, ("HCPhys=%RHp pPage=%p idx=%d\n", HCPhys, pPage, (pPage) ? pPage->idx : 0));
5128 return pPage;
5129}
5130
5131
5132/**
5133 * Internal worker for finding a page for debugging purposes, no assertions.
5134 *
5135 * @returns Pointer to the shadow page structure. NULL on if not found.
5136 * @param pPool The pool.
5137 * @param HCPhys The HC physical address of the shadow page.
5138 */
5139PPGMPOOLPAGE pgmPoolQueryPageForDbg(PPGMPOOL pPool, RTHCPHYS HCPhys)
5140{
5141 PGM_LOCK_ASSERT_OWNER(pPool->CTX_SUFF(pVM));
5142 return (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, HCPhys & X86_PTE_PAE_PG_MASK);
5143}
5144
5145
5146/**
5147 * Internal worker for PGM_HCPHYS_2_PTR.
5148 *
5149 * @returns VBox status code.
5150 * @param pVM The cross context VM structure.
5151 * @param HCPhys The HC physical address of the shadow page.
5152 * @param ppv Where to return the address.
5153 */
5154int pgmPoolHCPhys2Ptr(PVM pVM, RTHCPHYS HCPhys, void **ppv)
5155{
5156 PPGMPOOLPAGE pPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pVM->pgm.s.CTX_SUFF(pPool)->HCPhysTree, HCPhys & X86_PTE_PAE_PG_MASK);
5157 AssertMsgReturn(pPage && pPage->enmKind != PGMPOOLKIND_FREE,
5158 ("HCPhys=%RHp pPage=%p idx=%d\n", HCPhys, pPage, (pPage) ? pPage->idx : 0),
5159 VERR_PGM_POOL_GET_PAGE_FAILED);
5160 *ppv = (uint8_t *)pPage->CTX_SUFF(pvPage) + (HCPhys & PAGE_OFFSET_MASK);
5161 return VINF_SUCCESS;
5162}
5163
5164#ifdef IN_RING3 /* currently only used in ring 3; save some space in the R0 & GC modules (left it here as we might need it elsewhere later on) */
5165
5166/**
5167 * Flush the specified page if present
5168 *
5169 * @param pVM The cross context VM structure.
5170 * @param GCPhys Guest physical address of the page to flush
5171 */
5172void pgmPoolFlushPageByGCPhys(PVM pVM, RTGCPHYS GCPhys)
5173{
5174 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
5175
5176 VM_ASSERT_EMT(pVM);
5177
5178 /*
5179 * Look up the GCPhys in the hash.
5180 */
5181 GCPhys = GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK;
5182 unsigned i = pPool->aiHash[PGMPOOL_HASH(GCPhys)];
5183 if (i == NIL_PGMPOOL_IDX)
5184 return;
5185
5186 do
5187 {
5188 PPGMPOOLPAGE pPage = &pPool->aPages[i];
5189 if (pPage->GCPhys - GCPhys < PAGE_SIZE)
5190 {
5191 switch (pPage->enmKind)
5192 {
5193 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
5194 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
5195 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
5196 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
5197 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
5198 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
5199 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
5200 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
5201 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
5202 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
5203 case PGMPOOLKIND_64BIT_PML4:
5204 case PGMPOOLKIND_32BIT_PD:
5205 case PGMPOOLKIND_PAE_PDPT:
5206 {
5207 Log(("PGMPoolFlushPage: found pgm pool pages for %RGp\n", GCPhys));
5208# ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
5209 if (pPage->fDirty)
5210 STAM_COUNTER_INC(&pPool->StatForceFlushDirtyPage);
5211 else
5212# endif
5213 STAM_COUNTER_INC(&pPool->StatForceFlushPage);
5214 Assert(!pgmPoolIsPageLocked(pPage));
5215 pgmPoolMonitorChainFlush(pPool, pPage);
5216 return;
5217 }
5218
5219 /* ignore, no monitoring. */
5220 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
5221 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
5222 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
5223 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
5224 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
5225 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
5226 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
5227 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
5228 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
5229 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
5230 case PGMPOOLKIND_ROOT_NESTED:
5231 case PGMPOOLKIND_PAE_PD_PHYS:
5232 case PGMPOOLKIND_PAE_PDPT_PHYS:
5233 case PGMPOOLKIND_32BIT_PD_PHYS:
5234 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
5235 break;
5236
5237 default:
5238 AssertFatalMsgFailed(("enmKind=%d idx=%d\n", pPage->enmKind, pPage->idx));
5239 }
5240 }
5241
5242 /* next */
5243 i = pPage->iNext;
5244 } while (i != NIL_PGMPOOL_IDX);
5245 return;
5246}
5247
5248
5249/**
5250 * Reset CPU on hot plugging.
5251 *
5252 * @param pVM The cross context VM structure.
5253 * @param pVCpu The cross context virtual CPU structure.
5254 */
5255void pgmR3PoolResetUnpluggedCpu(PVM pVM, PVMCPU pVCpu)
5256{
5257 pgmR3ExitShadowModeBeforePoolFlush(pVCpu);
5258
5259 pgmR3ReEnterShadowModeAfterPoolFlush(pVM, pVCpu);
5260 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
5261 VMCPU_FF_SET(pVCpu, VMCPU_FF_TLB_FLUSH);
5262}
5263
5264
5265/**
5266 * Flushes the entire cache.
5267 *
5268 * It will assert a global CR3 flush (FF) and assumes the caller is aware of
5269 * this and execute this CR3 flush.
5270 *
5271 * @param pVM The cross context VM structure.
5272 */
5273void pgmR3PoolReset(PVM pVM)
5274{
5275 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
5276
5277 PGM_LOCK_ASSERT_OWNER(pVM);
5278 STAM_PROFILE_START(&pPool->StatR3Reset, a);
5279 LogFlow(("pgmR3PoolReset:\n"));
5280
5281 /*
5282 * If there are no pages in the pool, there is nothing to do.
5283 */
5284 if (pPool->cCurPages <= PGMPOOL_IDX_FIRST)
5285 {
5286 STAM_PROFILE_STOP(&pPool->StatR3Reset, a);
5287 return;
5288 }
5289
5290 /*
5291 * Exit the shadow mode since we're going to clear everything,
5292 * including the root page.
5293 */
5294 VMCC_FOR_EACH_VMCPU(pVM)
5295 pgmR3ExitShadowModeBeforePoolFlush(pVCpu);
5296 VMCC_FOR_EACH_VMCPU_END(pVM);
5297
5298
5299 /*
5300 * Nuke the free list and reinsert all pages into it.
5301 */
5302 for (unsigned i = pPool->cCurPages - 1; i >= PGMPOOL_IDX_FIRST; i--)
5303 {
5304 PPGMPOOLPAGE pPage = &pPool->aPages[i];
5305
5306 if (pPage->fMonitored)
5307 pgmPoolMonitorFlush(pPool, pPage);
5308 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
5309 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
5310 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
5311 pPage->iMonitoredPrev = NIL_PGMPOOL_IDX;
5312 pPage->GCPhys = NIL_RTGCPHYS;
5313 pPage->enmKind = PGMPOOLKIND_FREE;
5314 pPage->enmAccess = PGMPOOLACCESS_DONTCARE;
5315 Assert(pPage->idx == i);
5316 pPage->iNext = i + 1;
5317 pPage->fA20Enabled = true;
5318 pPage->fZeroed = false; /* This could probably be optimized, but better safe than sorry. */
5319 pPage->fSeenNonGlobal = false;
5320 pPage->fMonitored = false;
5321 pPage->fDirty = false;
5322 pPage->fCached = false;
5323 pPage->fReusedFlushPending = false;
5324 pPage->iUserHead = NIL_PGMPOOL_USER_INDEX;
5325 pPage->cPresent = 0;
5326 pPage->iFirstPresent = NIL_PGMPOOL_PRESENT_INDEX;
5327 pPage->cModifications = 0;
5328 pPage->iAgeNext = NIL_PGMPOOL_IDX;
5329 pPage->iAgePrev = NIL_PGMPOOL_IDX;
5330 pPage->idxDirtyEntry = 0;
5331 pPage->GCPtrLastAccessHandlerRip = NIL_RTGCPTR;
5332 pPage->GCPtrLastAccessHandlerFault = NIL_RTGCPTR;
5333 pPage->cLastAccessHandler = 0;
5334 pPage->cLocked = 0;
5335# ifdef VBOX_STRICT
5336 pPage->GCPtrDirtyFault = NIL_RTGCPTR;
5337# endif
5338 }
5339 pPool->aPages[pPool->cCurPages - 1].iNext = NIL_PGMPOOL_IDX;
5340 pPool->iFreeHead = PGMPOOL_IDX_FIRST;
5341 pPool->cUsedPages = 0;
5342
5343 /*
5344 * Zap and reinitialize the user records.
5345 */
5346 pPool->cPresent = 0;
5347 pPool->iUserFreeHead = 0;
5348 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
5349 const unsigned cMaxUsers = pPool->cMaxUsers;
5350 for (unsigned i = 0; i < cMaxUsers; i++)
5351 {
5352 paUsers[i].iNext = i + 1;
5353 paUsers[i].iUser = NIL_PGMPOOL_IDX;
5354 paUsers[i].iUserTable = 0xfffffffe;
5355 }
5356 paUsers[cMaxUsers - 1].iNext = NIL_PGMPOOL_USER_INDEX;
5357
5358 /*
5359 * Clear all the GCPhys links and rebuild the phys ext free list.
5360 */
5361 for (PPGMRAMRANGE pRam = pVM->pgm.s.CTX_SUFF(pRamRangesX);
5362 pRam;
5363 pRam = pRam->CTX_SUFF(pNext))
5364 {
5365 unsigned iPage = pRam->cb >> PAGE_SHIFT;
5366 while (iPage-- > 0)
5367 PGM_PAGE_SET_TRACKING(pVM, &pRam->aPages[iPage], 0);
5368 }
5369
5370 pPool->iPhysExtFreeHead = 0;
5371 PPGMPOOLPHYSEXT paPhysExts = pPool->CTX_SUFF(paPhysExts);
5372 const unsigned cMaxPhysExts = pPool->cMaxPhysExts;
5373 for (unsigned i = 0; i < cMaxPhysExts; i++)
5374 {
5375 paPhysExts[i].iNext = i + 1;
5376 paPhysExts[i].aidx[0] = NIL_PGMPOOL_IDX;
5377 paPhysExts[i].apte[0] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
5378 paPhysExts[i].aidx[1] = NIL_PGMPOOL_IDX;
5379 paPhysExts[i].apte[1] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
5380 paPhysExts[i].aidx[2] = NIL_PGMPOOL_IDX;
5381 paPhysExts[i].apte[2] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
5382 }
5383 paPhysExts[cMaxPhysExts - 1].iNext = NIL_PGMPOOL_PHYSEXT_INDEX;
5384
5385 /*
5386 * Just zap the modified list.
5387 */
5388 pPool->cModifiedPages = 0;
5389 pPool->iModifiedHead = NIL_PGMPOOL_IDX;
5390
5391 /*
5392 * Clear the GCPhys hash and the age list.
5393 */
5394 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aiHash); i++)
5395 pPool->aiHash[i] = NIL_PGMPOOL_IDX;
5396 pPool->iAgeHead = NIL_PGMPOOL_IDX;
5397 pPool->iAgeTail = NIL_PGMPOOL_IDX;
5398
5399# ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
5400 /* Clear all dirty pages. */
5401 pPool->idxFreeDirtyPage = 0;
5402 pPool->cDirtyPages = 0;
5403 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aidxDirtyPages); i++)
5404 pPool->aidxDirtyPages[i] = NIL_PGMPOOL_IDX;
5405# endif
5406
5407 /*
5408 * Reinsert active pages into the hash and ensure monitoring chains are correct.
5409 */
5410 VMCC_FOR_EACH_VMCPU(pVM)
5411 {
5412 /*
5413 * Re-enter the shadowing mode and assert Sync CR3 FF.
5414 */
5415 pgmR3ReEnterShadowModeAfterPoolFlush(pVM, pVCpu);
5416 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
5417 VMCPU_FF_SET(pVCpu, VMCPU_FF_TLB_FLUSH);
5418 }
5419 VMCC_FOR_EACH_VMCPU_END(pVM);
5420
5421 STAM_PROFILE_STOP(&pPool->StatR3Reset, a);
5422}
5423
5424#endif /* IN_RING3 */
5425
5426#if defined(LOG_ENABLED) || defined(VBOX_STRICT)
5427/**
5428 * Stringifies a PGMPOOLKIND value.
5429 */
5430static const char *pgmPoolPoolKindToStr(uint8_t enmKind)
5431{
5432 switch ((PGMPOOLKIND)enmKind)
5433 {
5434 case PGMPOOLKIND_INVALID:
5435 return "PGMPOOLKIND_INVALID";
5436 case PGMPOOLKIND_FREE:
5437 return "PGMPOOLKIND_FREE";
5438 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
5439 return "PGMPOOLKIND_32BIT_PT_FOR_PHYS";
5440 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
5441 return "PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT";
5442 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
5443 return "PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB";
5444 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
5445 return "PGMPOOLKIND_PAE_PT_FOR_PHYS";
5446 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
5447 return "PGMPOOLKIND_PAE_PT_FOR_32BIT_PT";
5448 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
5449 return "PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB";
5450 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
5451 return "PGMPOOLKIND_PAE_PT_FOR_PAE_PT";
5452 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
5453 return "PGMPOOLKIND_PAE_PT_FOR_PAE_2MB";
5454 case PGMPOOLKIND_32BIT_PD:
5455 return "PGMPOOLKIND_32BIT_PD";
5456 case PGMPOOLKIND_32BIT_PD_PHYS:
5457 return "PGMPOOLKIND_32BIT_PD_PHYS";
5458 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
5459 return "PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD";
5460 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
5461 return "PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD";
5462 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
5463 return "PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD";
5464 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
5465 return "PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD";
5466 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
5467 return "PGMPOOLKIND_PAE_PD_FOR_PAE_PD";
5468 case PGMPOOLKIND_PAE_PD_PHYS:
5469 return "PGMPOOLKIND_PAE_PD_PHYS";
5470 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
5471 return "PGMPOOLKIND_PAE_PDPT_FOR_32BIT";
5472 case PGMPOOLKIND_PAE_PDPT:
5473 return "PGMPOOLKIND_PAE_PDPT";
5474 case PGMPOOLKIND_PAE_PDPT_PHYS:
5475 return "PGMPOOLKIND_PAE_PDPT_PHYS";
5476 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
5477 return "PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT";
5478 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
5479 return "PGMPOOLKIND_64BIT_PDPT_FOR_PHYS";
5480 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
5481 return "PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD";
5482 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
5483 return "PGMPOOLKIND_64BIT_PD_FOR_PHYS";
5484 case PGMPOOLKIND_64BIT_PML4:
5485 return "PGMPOOLKIND_64BIT_PML4";
5486 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
5487 return "PGMPOOLKIND_EPT_PDPT_FOR_PHYS";
5488 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
5489 return "PGMPOOLKIND_EPT_PD_FOR_PHYS";
5490 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
5491 return "PGMPOOLKIND_EPT_PT_FOR_PHYS";
5492 case PGMPOOLKIND_ROOT_NESTED:
5493 return "PGMPOOLKIND_ROOT_NESTED";
5494 }
5495 return "Unknown kind!";
5496}
5497#endif /* LOG_ENABLED || VBOX_STRICT */
5498
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette