VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/PGMAllPool.cpp@ 93554

Last change on this file since 93554 was 93554, checked in by vboxsync, 3 years ago

VMM: Changed PAGE_SIZE -> GUEST_PAGE_SIZE / HOST_PAGE_SIZE, PAGE_SHIFT -> GUEST_PAGE_SHIFT / HOST_PAGE_SHIFT, and PAGE_OFFSET_MASK -> GUEST_PAGE_OFFSET_MASK / HOST_PAGE_OFFSET_MASK. Also removed most usage of ASMMemIsZeroPage and ASMMemZeroPage since the host and guest page size doesn't need to be the same any more. Some work left to do in the page pool code. bugref:9898

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id Revision
File size: 208.1 KB
Line 
1/* $Id: PGMAllPool.cpp 93554 2022-02-02 22:57:02Z vboxsync $ */
2/** @file
3 * PGM Shadow Page Pool.
4 */
5
6/*
7 * Copyright (C) 2006-2022 Oracle Corporation
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 */
17
18
19/*********************************************************************************************************************************
20* Header Files *
21*********************************************************************************************************************************/
22#define LOG_GROUP LOG_GROUP_PGM_POOL
23#define VBOX_WITHOUT_PAGING_BIT_FIELDS /* 64-bit bitfields are just asking for trouble. See @bugref{9841} and others. */
24#include <VBox/vmm/pgm.h>
25#include <VBox/vmm/mm.h>
26#include <VBox/vmm/em.h>
27#include <VBox/vmm/cpum.h>
28#include "PGMInternal.h"
29#include <VBox/vmm/vmcc.h>
30#include "PGMInline.h"
31#include <VBox/disopcode.h>
32#include <VBox/vmm/hm_vmx.h>
33
34#include <VBox/log.h>
35#include <VBox/err.h>
36#include <iprt/asm.h>
37#include <iprt/asm-amd64-x86.h>
38#include <iprt/string.h>
39
40
41/*********************************************************************************************************************************
42* Internal Functions *
43*********************************************************************************************************************************/
44RT_C_DECLS_BEGIN
45#if 0 /* unused */
46DECLINLINE(unsigned) pgmPoolTrackGetShadowEntrySize(PGMPOOLKIND enmKind);
47DECLINLINE(unsigned) pgmPoolTrackGetGuestEntrySize(PGMPOOLKIND enmKind);
48#endif /* unused */
49static void pgmPoolTrackClearPageUsers(PPGMPOOL pPool, PPGMPOOLPAGE pPage);
50static void pgmPoolTrackDeref(PPGMPOOL pPool, PPGMPOOLPAGE pPage);
51static int pgmPoolTrackAddUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable);
52static void pgmPoolMonitorModifiedRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage);
53#if defined(LOG_ENABLED) || defined(VBOX_STRICT)
54static const char *pgmPoolPoolKindToStr(uint8_t enmKind);
55#endif
56#if 0 /*defined(VBOX_STRICT) && defined(PGMPOOL_WITH_OPTIMIZED_DIRTY_PT)*/
57static void pgmPoolTrackCheckPTPaePae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT, PCX86PTPAE pGstPT);
58#endif
59
60int pgmPoolTrackFlushGCPhysPTsSlow(PVMCC pVM, PPGMPAGE pPhysPage);
61PPGMPOOLPHYSEXT pgmPoolTrackPhysExtAlloc(PVMCC pVM, uint16_t *piPhysExt);
62void pgmPoolTrackPhysExtFree(PVMCC pVM, uint16_t iPhysExt);
63void pgmPoolTrackPhysExtFreeList(PVMCC pVM, uint16_t iPhysExt);
64
65RT_C_DECLS_END
66
67
68#if 0 /* unused */
69/**
70 * Checks if the specified page pool kind is for a 4MB or 2MB guest page.
71 *
72 * @returns true if it's the shadow of a 4MB or 2MB guest page, otherwise false.
73 * @param enmKind The page kind.
74 */
75DECLINLINE(bool) pgmPoolIsBigPage(PGMPOOLKIND enmKind)
76{
77 switch (enmKind)
78 {
79 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
80 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
81 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
82 return true;
83 default:
84 return false;
85 }
86}
87#endif /* unused */
88
89
90/**
91 * Flushes a chain of pages sharing the same access monitor.
92 *
93 * @param pPool The pool.
94 * @param pPage A page in the chain.
95 */
96void pgmPoolMonitorChainFlush(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
97{
98 LogFlow(("pgmPoolMonitorChainFlush: Flush page %RGp type=%d\n", pPage->GCPhys, pPage->enmKind));
99
100 /*
101 * Find the list head.
102 */
103 uint16_t idx = pPage->idx;
104 if (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
105 {
106 while (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
107 {
108 idx = pPage->iMonitoredPrev;
109 Assert(idx != pPage->idx);
110 pPage = &pPool->aPages[idx];
111 }
112 }
113
114 /*
115 * Iterate the list flushing each shadow page.
116 */
117 for (;;)
118 {
119 idx = pPage->iMonitoredNext;
120 Assert(idx != pPage->idx);
121 if (pPage->idx >= PGMPOOL_IDX_FIRST)
122 {
123 int rc2 = pgmPoolFlushPage(pPool, pPage);
124 AssertRC(rc2);
125 }
126 /* next */
127 if (idx == NIL_PGMPOOL_IDX)
128 break;
129 pPage = &pPool->aPages[idx];
130 }
131}
132
133
134/**
135 * Wrapper for getting the current context pointer to the entry being modified.
136 *
137 * @returns VBox status code suitable for scheduling.
138 * @param pVM The cross context VM structure.
139 * @param pvDst Destination address
140 * @param pvSrc Pointer to the mapping of @a GCPhysSrc or NULL depending
141 * on the context (e.g. \#PF in R0 & RC).
142 * @param GCPhysSrc The source guest physical address.
143 * @param cb Size of data to read
144 */
145DECLINLINE(int) pgmPoolPhysSimpleReadGCPhys(PVMCC pVM, void *pvDst, void const *pvSrc, RTGCPHYS GCPhysSrc, size_t cb)
146{
147#if defined(IN_RING3)
148 NOREF(pVM); NOREF(GCPhysSrc);
149 memcpy(pvDst, (RTHCPTR)((uintptr_t)pvSrc & ~(RTHCUINTPTR)(cb - 1)), cb);
150 return VINF_SUCCESS;
151#else
152 /** @todo in RC we could attempt to use the virtual address, although this can cause many faults (PAE Windows XP guest). */
153 NOREF(pvSrc);
154 return PGMPhysSimpleReadGCPhys(pVM, pvDst, GCPhysSrc & ~(RTGCPHYS)(cb - 1), cb);
155#endif
156}
157
158
159/**
160 * Process shadow entries before they are changed by the guest.
161 *
162 * For PT entries we will clear them. For PD entries, we'll simply check
163 * for mapping conflicts and set the SyncCR3 FF if found.
164 *
165 * @param pVCpu The cross context virtual CPU structure.
166 * @param pPool The pool.
167 * @param pPage The head page.
168 * @param GCPhysFault The guest physical fault address.
169 * @param pvAddress Pointer to the mapping of @a GCPhysFault or NULL
170 * depending on the context (e.g. \#PF in R0 & RC).
171 * @param cbWrite Write size; might be zero if the caller knows we're not crossing entry boundaries
172 */
173static void pgmPoolMonitorChainChanging(PVMCPU pVCpu, PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTGCPHYS GCPhysFault,
174 void const *pvAddress, unsigned cbWrite)
175{
176 AssertMsg(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX, ("%u (idx=%u)\n", pPage->iMonitoredPrev, pPage->idx));
177 const unsigned off = GCPhysFault & GUEST_PAGE_OFFSET_MASK;
178 PVMCC pVM = pPool->CTX_SUFF(pVM);
179 NOREF(pVCpu);
180
181 LogFlow(("pgmPoolMonitorChainChanging: %RGv phys=%RGp cbWrite=%d\n",
182 (RTGCPTR)(CTXTYPE(RTGCPTR, uintptr_t, RTGCPTR))(uintptr_t)pvAddress, GCPhysFault, cbWrite));
183
184 for (;;)
185 {
186 union
187 {
188 void *pv;
189 PX86PT pPT;
190 PPGMSHWPTPAE pPTPae;
191 PX86PD pPD;
192 PX86PDPAE pPDPae;
193 PX86PDPT pPDPT;
194 PX86PML4 pPML4;
195 } uShw;
196
197 LogFlow(("pgmPoolMonitorChainChanging: page idx=%d phys=%RGp (next=%d) kind=%s write=%#x\n",
198 pPage->idx, pPage->GCPhys, pPage->iMonitoredNext, pgmPoolPoolKindToStr(pPage->enmKind), cbWrite));
199
200 uShw.pv = NULL;
201 switch (pPage->enmKind)
202 {
203 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
204 {
205 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPT));
206 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
207 const unsigned iShw = off / sizeof(X86PTE);
208 LogFlow(("PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT iShw=%x\n", iShw));
209 X86PGUINT const uPde = uShw.pPT->a[iShw].u;
210 if (uPde & X86_PTE_P)
211 {
212 X86PTE GstPte;
213 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvAddress, GCPhysFault, sizeof(GstPte));
214 AssertRC(rc);
215 Log4(("pgmPoolMonitorChainChanging 32_32: deref %016RX64 GCPhys %08RX32\n", uPde & X86_PTE_PG_MASK, GstPte.u & X86_PTE_PG_MASK));
216 pgmPoolTracDerefGCPhysHint(pPool, pPage, uPde & X86_PTE_PG_MASK, GstPte.u & X86_PTE_PG_MASK, iShw);
217 ASMAtomicWriteU32(&uShw.pPT->a[iShw].u, 0);
218 }
219 break;
220 }
221
222 /* page/2 sized */
223 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
224 {
225 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPT));
226 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
227 if (!((off ^ pPage->GCPhys) & (PAGE_SIZE / 2)))
228 {
229 const unsigned iShw = (off / sizeof(X86PTE)) & (X86_PG_PAE_ENTRIES - 1);
230 LogFlow(("PGMPOOLKIND_PAE_PT_FOR_32BIT_PT iShw=%x\n", iShw));
231 if (PGMSHWPTEPAE_IS_P(uShw.pPTPae->a[iShw]))
232 {
233 X86PTE GstPte;
234 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvAddress, GCPhysFault, sizeof(GstPte));
235 AssertRC(rc);
236
237 Log4(("pgmPoolMonitorChainChanging pae_32: deref %016RX64 GCPhys %08RX32\n", uShw.pPT->a[iShw].u & X86_PTE_PAE_PG_MASK, GstPte.u & X86_PTE_PG_MASK));
238 pgmPoolTracDerefGCPhysHint(pPool, pPage,
239 PGMSHWPTEPAE_GET_HCPHYS(uShw.pPTPae->a[iShw]),
240 GstPte.u & X86_PTE_PG_MASK,
241 iShw);
242 PGMSHWPTEPAE_ATOMIC_SET(uShw.pPTPae->a[iShw], 0);
243 }
244 }
245 break;
246 }
247
248 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
249 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
250 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
251 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
252 {
253 unsigned iGst = off / sizeof(X86PDE);
254 unsigned iShwPdpt = iGst / 256;
255 unsigned iShw = (iGst % 256) * 2;
256 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
257
258 LogFlow(("pgmPoolMonitorChainChanging PAE for 32 bits: iGst=%x iShw=%x idx = %d page idx=%d\n", iGst, iShw, iShwPdpt, pPage->enmKind - PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD));
259 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPD));
260 if (iShwPdpt == pPage->enmKind - (unsigned)PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD)
261 {
262 for (unsigned i = 0; i < 2; i++)
263 {
264 X86PGPAEUINT const uPde = uShw.pPDPae->a[iShw + i].u;
265 if (uPde & X86_PDE_P)
266 {
267 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw + i, uPde));
268 pgmPoolFree(pVM, uPde & X86_PDE_PAE_PG_MASK, pPage->idx, iShw + i);
269 ASMAtomicWriteU64(&uShw.pPDPae->a[iShw + i].u, 0);
270 }
271
272 /* paranoia / a bit assumptive. */
273 if ( (off & 3)
274 && (off & 3) + cbWrite > 4)
275 {
276 const unsigned iShw2 = iShw + 2 + i;
277 if (iShw2 < RT_ELEMENTS(uShw.pPDPae->a))
278 {
279 X86PGPAEUINT const uPde2 = uShw.pPDPae->a[iShw2].u;
280 if (uPde2 & X86_PDE_P)
281 {
282 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw2, uPde2));
283 pgmPoolFree(pVM, uPde2 & X86_PDE_PAE_PG_MASK, pPage->idx, iShw2);
284 ASMAtomicWriteU64(&uShw.pPDPae->a[iShw2].u, 0);
285 }
286 }
287 }
288 }
289 }
290 break;
291 }
292
293 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
294 {
295 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
296 const unsigned iShw = off / sizeof(X86PTEPAE);
297 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPT));
298 if (PGMSHWPTEPAE_IS_P(uShw.pPTPae->a[iShw]))
299 {
300 X86PTEPAE GstPte;
301 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvAddress, GCPhysFault, sizeof(GstPte));
302 AssertRC(rc);
303
304 Log4(("pgmPoolMonitorChainChanging pae: deref %016RX64 GCPhys %016RX64\n", PGMSHWPTEPAE_GET_HCPHYS(uShw.pPTPae->a[iShw]), GstPte.u & X86_PTE_PAE_PG_MASK));
305 pgmPoolTracDerefGCPhysHint(pPool, pPage,
306 PGMSHWPTEPAE_GET_HCPHYS(uShw.pPTPae->a[iShw]),
307 GstPte.u & X86_PTE_PAE_PG_MASK,
308 iShw);
309 PGMSHWPTEPAE_ATOMIC_SET(uShw.pPTPae->a[iShw], 0);
310 }
311
312 /* paranoia / a bit assumptive. */
313 if ( (off & 7)
314 && (off & 7) + cbWrite > sizeof(X86PTEPAE))
315 {
316 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PTEPAE);
317 AssertBreak(iShw2 < RT_ELEMENTS(uShw.pPTPae->a));
318
319 if (PGMSHWPTEPAE_IS_P(uShw.pPTPae->a[iShw2]))
320 {
321 X86PTEPAE GstPte;
322 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte,
323 pvAddress ? (uint8_t const *)pvAddress + sizeof(GstPte) : NULL,
324 GCPhysFault + sizeof(GstPte), sizeof(GstPte));
325 AssertRC(rc);
326 Log4(("pgmPoolMonitorChainChanging pae: deref %016RX64 GCPhys %016RX64\n", PGMSHWPTEPAE_GET_HCPHYS(uShw.pPTPae->a[iShw2]), GstPte.u & X86_PTE_PAE_PG_MASK));
327 pgmPoolTracDerefGCPhysHint(pPool, pPage,
328 PGMSHWPTEPAE_GET_HCPHYS(uShw.pPTPae->a[iShw2]),
329 GstPte.u & X86_PTE_PAE_PG_MASK,
330 iShw2);
331 PGMSHWPTEPAE_ATOMIC_SET(uShw.pPTPae->a[iShw2], 0);
332 }
333 }
334 break;
335 }
336
337 case PGMPOOLKIND_32BIT_PD:
338 {
339 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
340 const unsigned iShw = off / sizeof(X86PTE); // ASSUMING 32-bit guest paging!
341
342 LogFlow(("pgmPoolMonitorChainChanging: PGMPOOLKIND_32BIT_PD %x\n", iShw));
343 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPD));
344 X86PGUINT const uPde = uShw.pPD->a[iShw].u;
345 if (uPde & X86_PDE_P)
346 {
347 LogFlow(("pgmPoolMonitorChainChanging: 32 bit pd iShw=%#x: %RX64 -> freeing it!\n", iShw, uPde));
348 pgmPoolFree(pVM, uPde & X86_PDE_PG_MASK, pPage->idx, iShw);
349 ASMAtomicWriteU32(&uShw.pPD->a[iShw].u, 0);
350 }
351
352 /* paranoia / a bit assumptive. */
353 if ( (off & 3)
354 && (off & 3) + cbWrite > sizeof(X86PTE))
355 {
356 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PTE);
357 if ( iShw2 != iShw
358 && iShw2 < RT_ELEMENTS(uShw.pPD->a))
359 {
360 X86PGUINT const uPde2 = uShw.pPD->a[iShw2].u;
361 if (uPde2 & X86_PDE_P)
362 {
363 LogFlow(("pgmPoolMonitorChainChanging: 32 bit pd iShw=%#x: %RX64 -> freeing it!\n", iShw2, uPde2));
364 pgmPoolFree(pVM, uPde2 & X86_PDE_PG_MASK, pPage->idx, iShw2);
365 ASMAtomicWriteU32(&uShw.pPD->a[iShw2].u, 0);
366 }
367 }
368 }
369#if 0 /* useful when running PGMAssertCR3(), a bit too troublesome for general use (TLBs). - not working any longer... */
370 if ( uShw.pPD->a[iShw].n.u1Present
371 && !VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3))
372 {
373 LogFlow(("pgmPoolMonitorChainChanging: iShw=%#x: %RX32 -> freeing it!\n", iShw, uShw.pPD->a[iShw].u));
374 pgmPoolFree(pVM, uShw.pPD->a[iShw].u & X86_PDE_PG_MASK, pPage->idx, iShw);
375 ASMAtomicWriteU32(&uShw.pPD->a[iShw].u, 0);
376 }
377#endif
378 break;
379 }
380
381 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
382 {
383 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
384 const unsigned iShw = off / sizeof(X86PDEPAE);
385 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPD));
386
387 /*
388 * Causes trouble when the guest uses a PDE to refer to the whole page table level
389 * structure. (Invalidate here; faults later on when it tries to change the page
390 * table entries -> recheck; probably only applies to the RC case.)
391 */
392 X86PGPAEUINT const uPde = uShw.pPDPae->a[iShw].u;
393 if (uPde & X86_PDE_P)
394 {
395 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw, uPde));
396 pgmPoolFree(pVM, uPde & X86_PDE_PAE_PG_MASK, pPage->idx, iShw);
397 ASMAtomicWriteU64(&uShw.pPDPae->a[iShw].u, 0);
398 }
399
400 /* paranoia / a bit assumptive. */
401 if ( (off & 7)
402 && (off & 7) + cbWrite > sizeof(X86PDEPAE))
403 {
404 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PDEPAE);
405 AssertBreak(iShw2 < RT_ELEMENTS(uShw.pPDPae->a));
406
407 X86PGPAEUINT const uPde2 = uShw.pPDPae->a[iShw2].u;
408 if (uPde2 & X86_PDE_P)
409 {
410 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uPde2));
411 pgmPoolFree(pVM, uPde2 & X86_PDE_PAE_PG_MASK, pPage->idx, iShw2);
412 ASMAtomicWriteU64(&uShw.pPDPae->a[iShw2].u, 0);
413 }
414 }
415 break;
416 }
417
418 case PGMPOOLKIND_PAE_PDPT:
419 {
420 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPDPT));
421 /*
422 * Hopefully this doesn't happen very often:
423 * - touching unused parts of the page
424 * - messing with the bits of pd pointers without changing the physical address
425 */
426 /* PDPT roots are not page aligned; 32 byte only! */
427 const unsigned offPdpt = GCPhysFault - pPage->GCPhys;
428
429 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
430 const unsigned iShw = offPdpt / sizeof(X86PDPE);
431 if (iShw < X86_PG_PAE_PDPE_ENTRIES) /* don't use RT_ELEMENTS(uShw.pPDPT->a), because that's for long mode only */
432 {
433 X86PGPAEUINT const uPdpe = uShw.pPDPT->a[iShw].u;
434 if (uPdpe & X86_PDPE_P)
435 {
436 LogFlow(("pgmPoolMonitorChainChanging: pae pdpt iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPT->a[iShw].u));
437 pgmPoolFree(pVM, uPdpe & X86_PDPE_PG_MASK, pPage->idx, iShw);
438 ASMAtomicWriteU64(&uShw.pPDPT->a[iShw].u, 0);
439 }
440
441 /* paranoia / a bit assumptive. */
442 if ( (offPdpt & 7)
443 && (offPdpt & 7) + cbWrite > sizeof(X86PDPE))
444 {
445 const unsigned iShw2 = (offPdpt + cbWrite - 1) / sizeof(X86PDPE);
446 if ( iShw2 != iShw
447 && iShw2 < X86_PG_PAE_PDPE_ENTRIES)
448 {
449 X86PGPAEUINT const uPdpe2 = uShw.pPDPT->a[iShw2].u;
450 if (uPdpe2 & X86_PDPE_P)
451 {
452 LogFlow(("pgmPoolMonitorChainChanging: pae pdpt iShw=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPT->a[iShw2].u));
453 pgmPoolFree(pVM, uPdpe2 & X86_PDPE_PG_MASK, pPage->idx, iShw2);
454 ASMAtomicWriteU64(&uShw.pPDPT->a[iShw2].u, 0);
455 }
456 }
457 }
458 }
459 break;
460 }
461
462 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
463 {
464 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPD));
465 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
466 const unsigned iShw = off / sizeof(X86PDEPAE);
467 X86PGPAEUINT const uPde = uShw.pPDPae->a[iShw].u;
468 if (uPde & X86_PDE_P)
469 {
470 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw, uPde));
471 pgmPoolFree(pVM, uPde & X86_PDE_PAE_PG_MASK, pPage->idx, iShw);
472 ASMAtomicWriteU64(&uShw.pPDPae->a[iShw].u, 0);
473 }
474
475 /* paranoia / a bit assumptive. */
476 if ( (off & 7)
477 && (off & 7) + cbWrite > sizeof(X86PDEPAE))
478 {
479 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PDEPAE);
480 AssertBreak(iShw2 < RT_ELEMENTS(uShw.pPDPae->a));
481 X86PGPAEUINT const uPde2 = uShw.pPDPae->a[iShw2].u;
482 if (uPde2 & X86_PDE_P)
483 {
484 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uPde2));
485 pgmPoolFree(pVM, uPde2 & X86_PDE_PAE_PG_MASK, pPage->idx, iShw2);
486 ASMAtomicWriteU64(&uShw.pPDPae->a[iShw2].u, 0);
487 }
488 }
489 break;
490 }
491
492 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
493 {
494 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPDPT));
495 /*
496 * Hopefully this doesn't happen very often:
497 * - messing with the bits of pd pointers without changing the physical address
498 */
499 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
500 const unsigned iShw = off / sizeof(X86PDPE);
501 X86PGPAEUINT const uPdpe = uShw.pPDPT->a[iShw].u;
502 if (uPdpe & X86_PDPE_P)
503 {
504 LogFlow(("pgmPoolMonitorChainChanging: pdpt iShw=%#x: %RX64 -> freeing it!\n", iShw, uPdpe));
505 pgmPoolFree(pVM, uPdpe & X86_PDPE_PG_MASK, pPage->idx, iShw);
506 ASMAtomicWriteU64(&uShw.pPDPT->a[iShw].u, 0);
507 }
508 /* paranoia / a bit assumptive. */
509 if ( (off & 7)
510 && (off & 7) + cbWrite > sizeof(X86PDPE))
511 {
512 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PDPE);
513 X86PGPAEUINT const uPdpe2 = uShw.pPDPT->a[iShw2].u;
514 if (uPdpe2 & X86_PDPE_P)
515 {
516 LogFlow(("pgmPoolMonitorChainChanging: pdpt iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uPdpe2));
517 pgmPoolFree(pVM, uPdpe2 & X86_PDPE_PG_MASK, pPage->idx, iShw2);
518 ASMAtomicWriteU64(&uShw.pPDPT->a[iShw2].u, 0);
519 }
520 }
521 break;
522 }
523
524 case PGMPOOLKIND_64BIT_PML4:
525 {
526 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPML4));
527 /*
528 * Hopefully this doesn't happen very often:
529 * - messing with the bits of pd pointers without changing the physical address
530 */
531 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
532 const unsigned iShw = off / sizeof(X86PDPE);
533 X86PGPAEUINT const uPml4e = uShw.pPML4->a[iShw].u;
534 if (uPml4e & X86_PML4E_P)
535 {
536 LogFlow(("pgmPoolMonitorChainChanging: pml4 iShw=%#x: %RX64 -> freeing it!\n", iShw, uPml4e));
537 pgmPoolFree(pVM, uPml4e & X86_PML4E_PG_MASK, pPage->idx, iShw);
538 ASMAtomicWriteU64(&uShw.pPML4->a[iShw].u, 0);
539 }
540 /* paranoia / a bit assumptive. */
541 if ( (off & 7)
542 && (off & 7) + cbWrite > sizeof(X86PDPE))
543 {
544 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PML4E);
545 X86PGPAEUINT const uPml4e2 = uShw.pPML4->a[iShw2].u;
546 if (uPml4e2 & X86_PML4E_P)
547 {
548 LogFlow(("pgmPoolMonitorChainChanging: pml4 iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uPml4e2));
549 pgmPoolFree(pVM, uPml4e2 & X86_PML4E_PG_MASK, pPage->idx, iShw2);
550 ASMAtomicWriteU64(&uShw.pPML4->a[iShw2].u, 0);
551 }
552 }
553 break;
554 }
555
556 default:
557 AssertFatalMsgFailed(("enmKind=%d\n", pPage->enmKind));
558 }
559 PGM_DYNMAP_UNUSED_HINT_VM(pVM, uShw.pv);
560
561 /* next */
562 if (pPage->iMonitoredNext == NIL_PGMPOOL_IDX)
563 return;
564 pPage = &pPool->aPages[pPage->iMonitoredNext];
565 }
566}
567
568#ifndef IN_RING3
569
570/**
571 * Checks if a access could be a fork operation in progress.
572 *
573 * Meaning, that the guest is setting up the parent process for Copy-On-Write.
574 *
575 * @returns true if it's likely that we're forking, otherwise false.
576 * @param pPool The pool.
577 * @param pDis The disassembled instruction.
578 * @param offFault The access offset.
579 */
580DECLINLINE(bool) pgmRZPoolMonitorIsForking(PPGMPOOL pPool, PDISCPUSTATE pDis, unsigned offFault)
581{
582 /*
583 * i386 linux is using btr to clear X86_PTE_RW.
584 * The functions involved are (2.6.16 source inspection):
585 * clear_bit
586 * ptep_set_wrprotect
587 * copy_one_pte
588 * copy_pte_range
589 * copy_pmd_range
590 * copy_pud_range
591 * copy_page_range
592 * dup_mmap
593 * dup_mm
594 * copy_mm
595 * copy_process
596 * do_fork
597 */
598 if ( pDis->pCurInstr->uOpcode == OP_BTR
599 && !(offFault & 4)
600 /** @todo Validate that the bit index is X86_PTE_RW. */
601 )
602 {
603 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitorPf,Fork)); RT_NOREF_PV(pPool);
604 return true;
605 }
606 return false;
607}
608
609
610/**
611 * Determine whether the page is likely to have been reused.
612 *
613 * @returns true if we consider the page as being reused for a different purpose.
614 * @returns false if we consider it to still be a paging page.
615 * @param pVM The cross context VM structure.
616 * @param pVCpu The cross context virtual CPU structure.
617 * @param pRegFrame Trap register frame.
618 * @param pDis The disassembly info for the faulting instruction.
619 * @param pvFault The fault address.
620 * @param pPage The pool page being accessed.
621 *
622 * @remark The REP prefix check is left to the caller because of STOSD/W.
623 */
624DECLINLINE(bool) pgmRZPoolMonitorIsReused(PVMCC pVM, PVMCPUCC pVCpu, PCPUMCTXCORE pRegFrame, PDISCPUSTATE pDis, RTGCPTR pvFault,
625 PPGMPOOLPAGE pPage)
626{
627 /* Locked (CR3, PDPTR*4) should not be reusable. Considering them as
628 such may cause loops booting tst-ubuntu-15_10-64-efi, ++. */
629 if (pPage->cLocked)
630 {
631 Log2(("pgmRZPoolMonitorIsReused: %RGv (%p) can't have been resued, because it's locked!\n", pvFault, pPage));
632 return false;
633 }
634
635 /** @todo could make this general, faulting close to rsp should be a safe reuse heuristic. */
636 if ( HMHasPendingIrq(pVM)
637 && pRegFrame->rsp - pvFault < 32)
638 {
639 /* Fault caused by stack writes while trying to inject an interrupt event. */
640 Log(("pgmRZPoolMonitorIsReused: reused %RGv for interrupt stack (rsp=%RGv).\n", pvFault, pRegFrame->rsp));
641 return true;
642 }
643
644 LogFlow(("Reused instr %RGv %d at %RGv param1.fUse=%llx param1.reg=%d\n", pRegFrame->rip, pDis->pCurInstr->uOpcode, pvFault, pDis->Param1.fUse, pDis->Param1.Base.idxGenReg));
645
646 /* Non-supervisor mode write means it's used for something else. */
647 if (CPUMGetGuestCPL(pVCpu) == 3)
648 return true;
649
650 switch (pDis->pCurInstr->uOpcode)
651 {
652 /* call implies the actual push of the return address faulted */
653 case OP_CALL:
654 Log4(("pgmRZPoolMonitorIsReused: CALL\n"));
655 return true;
656 case OP_PUSH:
657 Log4(("pgmRZPoolMonitorIsReused: PUSH\n"));
658 return true;
659 case OP_PUSHF:
660 Log4(("pgmRZPoolMonitorIsReused: PUSHF\n"));
661 return true;
662 case OP_PUSHA:
663 Log4(("pgmRZPoolMonitorIsReused: PUSHA\n"));
664 return true;
665 case OP_FXSAVE:
666 Log4(("pgmRZPoolMonitorIsReused: FXSAVE\n"));
667 return true;
668 case OP_MOVNTI: /* solaris - block_zero_no_xmm */
669 Log4(("pgmRZPoolMonitorIsReused: MOVNTI\n"));
670 return true;
671 case OP_MOVNTDQ: /* solaris - hwblkclr & hwblkpagecopy */
672 Log4(("pgmRZPoolMonitorIsReused: MOVNTDQ\n"));
673 return true;
674 case OP_MOVSWD:
675 case OP_STOSWD:
676 if ( pDis->fPrefix == (DISPREFIX_REP|DISPREFIX_REX)
677 && pRegFrame->rcx >= 0x40
678 )
679 {
680 Assert(pDis->uCpuMode == DISCPUMODE_64BIT);
681
682 Log(("pgmRZPoolMonitorIsReused: OP_STOSQ\n"));
683 return true;
684 }
685 break;
686
687 default:
688 /*
689 * Anything having ESP on the left side means stack writes.
690 */
691 if ( ( (pDis->Param1.fUse & DISUSE_REG_GEN32)
692 || (pDis->Param1.fUse & DISUSE_REG_GEN64))
693 && (pDis->Param1.Base.idxGenReg == DISGREG_ESP))
694 {
695 Log4(("pgmRZPoolMonitorIsReused: ESP\n"));
696 return true;
697 }
698 break;
699 }
700
701 /*
702 * Page table updates are very very unlikely to be crossing page boundraries,
703 * and we don't want to deal with that in pgmPoolMonitorChainChanging and such.
704 */
705 uint32_t const cbWrite = DISGetParamSize(pDis, &pDis->Param1);
706 if ( (((uintptr_t)pvFault + cbWrite) >> X86_PAGE_SHIFT) != ((uintptr_t)pvFault >> X86_PAGE_SHIFT) )
707 {
708 Log4(("pgmRZPoolMonitorIsReused: cross page write\n"));
709 return true;
710 }
711
712 /*
713 * Nobody does an unaligned 8 byte write to a page table, right.
714 */
715 if (cbWrite >= 8 && ((uintptr_t)pvFault & 7) != 0)
716 {
717 Log4(("pgmRZPoolMonitorIsReused: Unaligned 8+ byte write\n"));
718 return true;
719 }
720
721 return false;
722}
723
724
725/**
726 * Flushes the page being accessed.
727 *
728 * @returns VBox status code suitable for scheduling.
729 * @param pVM The cross context VM structure.
730 * @param pVCpu The cross context virtual CPU structure.
731 * @param pPool The pool.
732 * @param pPage The pool page (head).
733 * @param pDis The disassembly of the write instruction.
734 * @param pRegFrame The trap register frame.
735 * @param GCPhysFault The fault address as guest physical address.
736 * @param pvFault The fault address.
737 * @todo VBOXSTRICTRC
738 */
739static int pgmRZPoolAccessPfHandlerFlush(PVMCC pVM, PVMCPUCC pVCpu, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pDis,
740 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault)
741{
742 NOREF(pVM); NOREF(GCPhysFault);
743
744 /*
745 * First, do the flushing.
746 */
747 pgmPoolMonitorChainFlush(pPool, pPage);
748
749 /*
750 * Emulate the instruction (xp/w2k problem, requires pc/cr2/sp detection).
751 * Must do this in raw mode (!); XP boot will fail otherwise.
752 */
753 int rc = VINF_SUCCESS;
754 VBOXSTRICTRC rc2 = EMInterpretInstructionDisasState(pVCpu, pDis, pRegFrame, pvFault, EMCODETYPE_ALL);
755 if (rc2 == VINF_SUCCESS)
756 { /* do nothing */ }
757 else if (rc2 == VINF_EM_RESCHEDULE)
758 {
759 rc = VBOXSTRICTRC_VAL(rc2);
760# ifndef IN_RING3
761 VMCPU_FF_SET(pVCpu, VMCPU_FF_TO_R3);
762# endif
763 }
764 else if (rc2 == VERR_EM_INTERPRETER)
765 {
766 rc = VINF_EM_RAW_EMULATE_INSTR;
767 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitorPf,EmulateInstr));
768 }
769 else if (RT_FAILURE_NP(rc2))
770 rc = VBOXSTRICTRC_VAL(rc2);
771 else
772 AssertMsgFailed(("%Rrc\n", VBOXSTRICTRC_VAL(rc2))); /* ASSUMES no complicated stuff here. */
773
774 LogFlow(("pgmRZPoolAccessPfHandlerFlush: returns %Rrc (flushed)\n", rc));
775 return rc;
776}
777
778
779/**
780 * Handles the STOSD write accesses.
781 *
782 * @returns VBox status code suitable for scheduling.
783 * @param pVM The cross context VM structure.
784 * @param pPool The pool.
785 * @param pPage The pool page (head).
786 * @param pDis The disassembly of the write instruction.
787 * @param pRegFrame The trap register frame.
788 * @param GCPhysFault The fault address as guest physical address.
789 * @param pvFault The fault address.
790 */
791DECLINLINE(int) pgmRZPoolAccessPfHandlerSTOSD(PVMCC pVM, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pDis,
792 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault)
793{
794 unsigned uIncrement = pDis->Param1.cb;
795 NOREF(pVM);
796
797 Assert(pDis->uCpuMode == DISCPUMODE_32BIT || pDis->uCpuMode == DISCPUMODE_64BIT);
798 Assert(pRegFrame->rcx <= 0x20);
799
800# ifdef VBOX_STRICT
801 if (pDis->uOpMode == DISCPUMODE_32BIT)
802 Assert(uIncrement == 4);
803 else
804 Assert(uIncrement == 8);
805# endif
806
807 Log3(("pgmRZPoolAccessPfHandlerSTOSD\n"));
808
809 /*
810 * Increment the modification counter and insert it into the list
811 * of modified pages the first time.
812 */
813 if (!pPage->cModifications++)
814 pgmPoolMonitorModifiedInsert(pPool, pPage);
815
816 /*
817 * Execute REP STOSD.
818 *
819 * This ASSUMES that we're not invoked by Trap0e on in a out-of-sync
820 * write situation, meaning that it's safe to write here.
821 */
822 PVMCPUCC pVCpu = VMMGetCpu(pPool->CTX_SUFF(pVM));
823 RTGCUINTPTR pu32 = (RTGCUINTPTR)pvFault;
824 while (pRegFrame->rcx)
825 {
826 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault, NULL, uIncrement);
827 PGMPhysSimpleWriteGCPhys(pVM, GCPhysFault, &pRegFrame->rax, uIncrement);
828 pu32 += uIncrement;
829 GCPhysFault += uIncrement;
830 pRegFrame->rdi += uIncrement;
831 pRegFrame->rcx--;
832 }
833 pRegFrame->rip += pDis->cbInstr;
834
835 LogFlow(("pgmRZPoolAccessPfHandlerSTOSD: returns\n"));
836 return VINF_SUCCESS;
837}
838
839
840/**
841 * Handles the simple write accesses.
842 *
843 * @returns VBox status code suitable for scheduling.
844 * @param pVM The cross context VM structure.
845 * @param pVCpu The cross context virtual CPU structure.
846 * @param pPool The pool.
847 * @param pPage The pool page (head).
848 * @param pDis The disassembly of the write instruction.
849 * @param pRegFrame The trap register frame.
850 * @param GCPhysFault The fault address as guest physical address.
851 * @param pvFault The fault address.
852 * @param pfReused Reused state (in/out)
853 */
854DECLINLINE(int) pgmRZPoolAccessPfHandlerSimple(PVMCC pVM, PVMCPUCC pVCpu, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pDis,
855 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault, bool *pfReused)
856{
857 Log3(("pgmRZPoolAccessPfHandlerSimple\n"));
858 NOREF(pVM);
859 NOREF(pfReused); /* initialized by caller */
860
861 /*
862 * Increment the modification counter and insert it into the list
863 * of modified pages the first time.
864 */
865 if (!pPage->cModifications++)
866 pgmPoolMonitorModifiedInsert(pPool, pPage);
867
868 /*
869 * Clear all the pages. ASSUMES that pvFault is readable.
870 */
871 uint32_t cbWrite = DISGetParamSize(pDis, &pDis->Param1);
872 if (cbWrite <= 8)
873 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault, NULL, cbWrite);
874 else if (cbWrite <= 16)
875 {
876 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault, NULL, 8);
877 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault + 8, NULL, cbWrite - 8);
878 }
879 else
880 {
881 Assert(cbWrite <= 32);
882 for (uint32_t off = 0; off < cbWrite; off += 8)
883 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault + off, NULL, RT_MIN(8, cbWrite - off));
884 }
885
886 /*
887 * Interpret the instruction.
888 */
889 VBOXSTRICTRC rc = EMInterpretInstructionDisasState(pVCpu, pDis, pRegFrame, pvFault, EMCODETYPE_ALL);
890 if (RT_SUCCESS(rc))
891 AssertMsg(rc == VINF_SUCCESS, ("%Rrc\n", VBOXSTRICTRC_VAL(rc))); /* ASSUMES no complicated stuff here. */
892 else if (rc == VERR_EM_INTERPRETER)
893 {
894 LogFlow(("pgmRZPoolAccessPfHandlerSimple: Interpretation failed for %04x:%RGv - opcode=%d\n",
895 pRegFrame->cs.Sel, (RTGCPTR)pRegFrame->rip, pDis->pCurInstr->uOpcode));
896 rc = VINF_EM_RAW_EMULATE_INSTR;
897 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitorPf,EmulateInstr));
898 }
899
900# if 0 /* experimental code */
901 if (rc == VINF_SUCCESS)
902 {
903 switch (pPage->enmKind)
904 {
905 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
906 {
907 X86PTEPAE GstPte;
908 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvFault, GCPhysFault, sizeof(GstPte));
909 AssertRC(rc);
910
911 /* Check the new value written by the guest. If present and with a bogus physical address, then
912 * it's fairly safe to assume the guest is reusing the PT.
913 */
914 if (GstPte.n.u1Present)
915 {
916 RTHCPHYS HCPhys = -1;
917 int rc = PGMPhysGCPhys2HCPhys(pVM, GstPte.u & X86_PTE_PAE_PG_MASK, &HCPhys);
918 if (rc != VINF_SUCCESS)
919 {
920 *pfReused = true;
921 STAM_COUNTER_INC(&pPool->StatForceFlushReused);
922 }
923 }
924 break;
925 }
926 }
927 }
928# endif
929
930 LogFlow(("pgmRZPoolAccessPfHandlerSimple: returns %Rrc\n", VBOXSTRICTRC_VAL(rc)));
931 return VBOXSTRICTRC_VAL(rc);
932}
933
934
935/**
936 * @callback_method_impl{FNPGMRZPHYSPFHANDLER,
937 * \#PF access handler callback for page table pages.}
938 *
939 * @remarks The @a pvUser argument points to the PGMPOOLPAGE.
940 */
941DECLEXPORT(VBOXSTRICTRC) pgmRZPoolAccessPfHandler(PVMCC pVM, PVMCPUCC pVCpu, RTGCUINT uErrorCode, PCPUMCTXCORE pRegFrame,
942 RTGCPTR pvFault, RTGCPHYS GCPhysFault, void *pvUser)
943{
944 STAM_PROFILE_START(&pVM->pgm.s.CTX_SUFF(pPool)->StatMonitorRZ, a);
945 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
946 PPGMPOOLPAGE pPage = (PPGMPOOLPAGE)pvUser;
947 unsigned cMaxModifications;
948 bool fForcedFlush = false;
949 NOREF(uErrorCode);
950
951 LogFlow(("pgmRZPoolAccessPfHandler: pvFault=%RGv pPage=%p:{.idx=%d} GCPhysFault=%RGp\n", pvFault, pPage, pPage->idx, GCPhysFault));
952
953 PGM_LOCK_VOID(pVM);
954 if (PHYS_PAGE_ADDRESS(GCPhysFault) != PHYS_PAGE_ADDRESS(pPage->GCPhys))
955 {
956 /* Pool page changed while we were waiting for the lock; ignore. */
957 Log(("CPU%d: pgmRZPoolAccessPfHandler pgm pool page for %RGp changed (to %RGp) while waiting!\n", pVCpu->idCpu, PHYS_PAGE_ADDRESS(GCPhysFault), PHYS_PAGE_ADDRESS(pPage->GCPhys)));
958 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->StatMonitorPfRZ, &pPool->StatMonitorPfRZHandled, a);
959 PGM_UNLOCK(pVM);
960 return VINF_SUCCESS;
961 }
962# ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
963 if (pPage->fDirty)
964 {
965 Assert(VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_TLB_FLUSH));
966 PGM_UNLOCK(pVM);
967 return VINF_SUCCESS; /* SMP guest case where we were blocking on the pgm lock while the same page was being marked dirty. */
968 }
969# endif
970
971# if 0 /* test code defined(VBOX_STRICT) && defined(PGMPOOL_WITH_OPTIMIZED_DIRTY_PT) */
972 if (pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT)
973 {
974 void *pvShw = PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage);
975 void *pvGst;
976 int rc = PGM_GCPHYS_2_PTR(pPool->CTX_SUFF(pVM), pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
977 pgmPoolTrackCheckPTPaePae(pPool, pPage, (PPGMSHWPTPAE)pvShw, (PCX86PTPAE)pvGst);
978 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvGst);
979 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvShw);
980 }
981# endif
982
983 /*
984 * Disassemble the faulting instruction.
985 */
986 PDISCPUSTATE pDis = &pVCpu->pgm.s.DisState;
987 int rc = EMInterpretDisasCurrent(pVM, pVCpu, pDis, NULL);
988 if (RT_UNLIKELY(rc != VINF_SUCCESS))
989 {
990 AssertMsg(rc == VERR_PAGE_NOT_PRESENT || rc == VERR_PAGE_TABLE_NOT_PRESENT, ("Unexpected rc %d\n", rc));
991 PGM_UNLOCK(pVM);
992 return rc;
993 }
994
995 Assert(pPage->enmKind != PGMPOOLKIND_FREE);
996
997 /*
998 * We should ALWAYS have the list head as user parameter. This
999 * is because we use that page to record the changes.
1000 */
1001 Assert(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
1002
1003# ifdef IN_RING0
1004 /* Maximum nr of modifications depends on the page type. */
1005 if ( pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT
1006 || pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_32BIT_PT)
1007 cMaxModifications = 4;
1008 else
1009 cMaxModifications = 24;
1010# else
1011 cMaxModifications = 48;
1012# endif
1013
1014 /*
1015 * Incremental page table updates should weigh more than random ones.
1016 * (Only applies when started from offset 0)
1017 */
1018 pVCpu->pgm.s.cPoolAccessHandler++;
1019 if ( pPage->GCPtrLastAccessHandlerRip >= pRegFrame->rip - 0x40 /* observed loops in Windows 7 x64 */
1020 && pPage->GCPtrLastAccessHandlerRip < pRegFrame->rip + 0x40
1021 && pvFault == (pPage->GCPtrLastAccessHandlerFault + pDis->Param1.cb)
1022 && pVCpu->pgm.s.cPoolAccessHandler == pPage->cLastAccessHandler + 1)
1023 {
1024 Log(("Possible page reuse cMods=%d -> %d (locked=%d type=%s)\n", pPage->cModifications, pPage->cModifications * 2, pgmPoolIsPageLocked(pPage), pgmPoolPoolKindToStr(pPage->enmKind)));
1025 Assert(pPage->cModifications < 32000);
1026 pPage->cModifications = pPage->cModifications * 2;
1027 pPage->GCPtrLastAccessHandlerFault = pvFault;
1028 pPage->cLastAccessHandler = pVCpu->pgm.s.cPoolAccessHandler;
1029 if (pPage->cModifications >= cMaxModifications)
1030 {
1031 STAM_COUNTER_INC(&pPool->StatMonitorPfRZFlushReinit);
1032 fForcedFlush = true;
1033 }
1034 }
1035
1036 if (pPage->cModifications >= cMaxModifications)
1037 Log(("Mod overflow %RGv cMods=%d (locked=%d type=%s)\n", pvFault, pPage->cModifications, pgmPoolIsPageLocked(pPage), pgmPoolPoolKindToStr(pPage->enmKind)));
1038
1039 /*
1040 * Check if it's worth dealing with.
1041 */
1042 bool fReused = false;
1043 bool fNotReusedNotForking = false;
1044 if ( ( pPage->cModifications < cMaxModifications /** @todo \#define */ /** @todo need to check that it's not mapping EIP. */ /** @todo adjust this! */
1045 || pgmPoolIsPageLocked(pPage)
1046 )
1047 && !(fReused = pgmRZPoolMonitorIsReused(pVM, pVCpu, pRegFrame, pDis, pvFault, pPage))
1048 && !pgmRZPoolMonitorIsForking(pPool, pDis, GCPhysFault & PAGE_OFFSET_MASK))
1049 {
1050 /*
1051 * Simple instructions, no REP prefix.
1052 */
1053 if (!(pDis->fPrefix & (DISPREFIX_REP | DISPREFIX_REPNE)))
1054 {
1055 rc = pgmRZPoolAccessPfHandlerSimple(pVM, pVCpu, pPool, pPage, pDis, pRegFrame, GCPhysFault, pvFault, &fReused);
1056 if (fReused)
1057 goto flushPage;
1058
1059 /* A mov instruction to change the first page table entry will be remembered so we can detect
1060 * full page table changes early on. This will reduce the amount of unnecessary traps we'll take.
1061 */
1062 if ( rc == VINF_SUCCESS
1063 && !pPage->cLocked /* only applies to unlocked pages as we can't free locked ones (e.g. cr3 root). */
1064 && pDis->pCurInstr->uOpcode == OP_MOV
1065 && (pvFault & PAGE_OFFSET_MASK) == 0)
1066 {
1067 pPage->GCPtrLastAccessHandlerFault = pvFault;
1068 pPage->cLastAccessHandler = pVCpu->pgm.s.cPoolAccessHandler;
1069 pPage->GCPtrLastAccessHandlerRip = pRegFrame->rip;
1070 /* Make sure we don't kick out a page too quickly. */
1071 if (pPage->cModifications > 8)
1072 pPage->cModifications = 2;
1073 }
1074 else if (pPage->GCPtrLastAccessHandlerFault == pvFault)
1075 {
1076 /* ignore the 2nd write to this page table entry. */
1077 pPage->cLastAccessHandler = pVCpu->pgm.s.cPoolAccessHandler;
1078 }
1079 else
1080 {
1081 pPage->GCPtrLastAccessHandlerFault = NIL_RTGCPTR;
1082 pPage->GCPtrLastAccessHandlerRip = 0;
1083 }
1084
1085 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->StatMonitorPfRZ, &pPool->StatMonitorPfRZHandled, a);
1086 PGM_UNLOCK(pVM);
1087 return rc;
1088 }
1089
1090 /*
1091 * Windows is frequently doing small memset() operations (netio test 4k+).
1092 * We have to deal with these or we'll kill the cache and performance.
1093 */
1094 if ( pDis->pCurInstr->uOpcode == OP_STOSWD
1095 && !pRegFrame->eflags.Bits.u1DF
1096 && pDis->uOpMode == pDis->uCpuMode
1097 && pDis->uAddrMode == pDis->uCpuMode)
1098 {
1099 bool fValidStosd = false;
1100
1101 if ( pDis->uCpuMode == DISCPUMODE_32BIT
1102 && pDis->fPrefix == DISPREFIX_REP
1103 && pRegFrame->ecx <= 0x20
1104 && pRegFrame->ecx * 4 <= GUEST_PAGE_SIZE - ((uintptr_t)pvFault & GUEST_PAGE_OFFSET_MASK)
1105 && !((uintptr_t)pvFault & 3)
1106 && (pRegFrame->eax == 0 || pRegFrame->eax == 0x80) /* the two values observed. */
1107 )
1108 {
1109 fValidStosd = true;
1110 pRegFrame->rcx &= 0xffffffff; /* paranoia */
1111 }
1112 else
1113 if ( pDis->uCpuMode == DISCPUMODE_64BIT
1114 && pDis->fPrefix == (DISPREFIX_REP | DISPREFIX_REX)
1115 && pRegFrame->rcx <= 0x20
1116 && pRegFrame->rcx * 8 <= GUEST_PAGE_SIZE - ((uintptr_t)pvFault & GUEST_PAGE_OFFSET_MASK)
1117 && !((uintptr_t)pvFault & 7)
1118 && (pRegFrame->rax == 0 || pRegFrame->rax == 0x80) /* the two values observed. */
1119 )
1120 {
1121 fValidStosd = true;
1122 }
1123
1124 if (fValidStosd)
1125 {
1126 rc = pgmRZPoolAccessPfHandlerSTOSD(pVM, pPool, pPage, pDis, pRegFrame, GCPhysFault, pvFault);
1127 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->StatMonitorPfRZ, &pPool->StatMonitorPfRZRepStosd, a);
1128 PGM_UNLOCK(pVM);
1129 return rc;
1130 }
1131 }
1132
1133 /* REP prefix, don't bother. */
1134 STAM_COUNTER_INC(&pPool->StatMonitorPfRZRepPrefix);
1135 Log4(("pgmRZPoolAccessPfHandler: eax=%#x ecx=%#x edi=%#x esi=%#x rip=%RGv opcode=%d prefix=%#x\n",
1136 pRegFrame->eax, pRegFrame->ecx, pRegFrame->edi, pRegFrame->esi, (RTGCPTR)pRegFrame->rip, pDis->pCurInstr->uOpcode, pDis->fPrefix));
1137 fNotReusedNotForking = true;
1138 }
1139
1140# if defined(PGMPOOL_WITH_OPTIMIZED_DIRTY_PT) && defined(IN_RING0)
1141 /* E.g. Windows 7 x64 initializes page tables and touches some pages in the table during the process. This
1142 * leads to pgm pool trashing and an excessive amount of write faults due to page monitoring.
1143 */
1144 if ( pPage->cModifications >= cMaxModifications
1145 && !fForcedFlush
1146 && (pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT || pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_32BIT_PT)
1147 && ( fNotReusedNotForking
1148 || ( !pgmRZPoolMonitorIsReused(pVM, pVCpu, pRegFrame, pDis, pvFault, pPage)
1149 && !pgmRZPoolMonitorIsForking(pPool, pDis, GCPhysFault & PAGE_OFFSET_MASK))
1150 )
1151 )
1152 {
1153 Assert(!pgmPoolIsPageLocked(pPage));
1154 Assert(pPage->fDirty == false);
1155
1156 /* Flush any monitored duplicates as we will disable write protection. */
1157 if ( pPage->iMonitoredNext != NIL_PGMPOOL_IDX
1158 || pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
1159 {
1160 PPGMPOOLPAGE pPageHead = pPage;
1161
1162 /* Find the monitor head. */
1163 while (pPageHead->iMonitoredPrev != NIL_PGMPOOL_IDX)
1164 pPageHead = &pPool->aPages[pPageHead->iMonitoredPrev];
1165
1166 while (pPageHead)
1167 {
1168 unsigned idxNext = pPageHead->iMonitoredNext;
1169
1170 if (pPageHead != pPage)
1171 {
1172 STAM_COUNTER_INC(&pPool->StatDirtyPageDupFlush);
1173 Log(("Flush duplicate page idx=%d GCPhys=%RGp type=%s\n", pPageHead->idx, pPageHead->GCPhys, pgmPoolPoolKindToStr(pPageHead->enmKind)));
1174 int rc2 = pgmPoolFlushPage(pPool, pPageHead);
1175 AssertRC(rc2);
1176 }
1177
1178 if (idxNext == NIL_PGMPOOL_IDX)
1179 break;
1180
1181 pPageHead = &pPool->aPages[idxNext];
1182 }
1183 }
1184
1185 /* The flushing above might fail for locked pages, so double check. */
1186 if ( pPage->iMonitoredNext == NIL_PGMPOOL_IDX
1187 && pPage->iMonitoredPrev == NIL_PGMPOOL_IDX)
1188 {
1189 pgmPoolAddDirtyPage(pVM, pPool, pPage);
1190
1191 /* Temporarily allow write access to the page table again. */
1192 rc = PGMHandlerPhysicalPageTempOff(pVM, pPage->GCPhys & PAGE_BASE_GC_MASK, pPage->GCPhys & PAGE_BASE_GC_MASK);
1193 if (rc == VINF_SUCCESS)
1194 {
1195 rc = PGMShwMakePageWritable(pVCpu, pvFault, PGM_MK_PG_IS_WRITE_FAULT);
1196 AssertMsg(rc == VINF_SUCCESS
1197 /* In the SMP case the page table might be removed while we wait for the PGM lock in the trap handler. */
1198 || rc == VERR_PAGE_TABLE_NOT_PRESENT
1199 || rc == VERR_PAGE_NOT_PRESENT,
1200 ("PGMShwModifyPage -> GCPtr=%RGv rc=%d\n", pvFault, rc));
1201# ifdef VBOX_STRICT
1202 pPage->GCPtrDirtyFault = pvFault;
1203# endif
1204
1205 STAM_PROFILE_STOP(&pVM->pgm.s.CTX_SUFF(pPool)->StatMonitorPfRZ, a);
1206 PGM_UNLOCK(pVM);
1207 return rc;
1208 }
1209 }
1210 }
1211# endif /* PGMPOOL_WITH_OPTIMIZED_DIRTY_PT && IN_RING0 */
1212
1213 STAM_COUNTER_INC(&pPool->StatMonitorPfRZFlushModOverflow);
1214flushPage:
1215 /*
1216 * Not worth it, so flush it.
1217 *
1218 * If we considered it to be reused, don't go back to ring-3
1219 * to emulate failed instructions since we usually cannot
1220 * interpret then. This may be a bit risky, in which case
1221 * the reuse detection must be fixed.
1222 */
1223 rc = pgmRZPoolAccessPfHandlerFlush(pVM, pVCpu, pPool, pPage, pDis, pRegFrame, GCPhysFault, pvFault);
1224 if ( rc == VINF_EM_RAW_EMULATE_INSTR
1225 && fReused)
1226 {
1227 /* Make sure that the current instruction still has shadow page backing, otherwise we'll end up in a loop. */
1228 if (PGMShwGetPage(pVCpu, pRegFrame->rip, NULL, NULL) == VINF_SUCCESS)
1229 rc = VINF_SUCCESS; /* safe to restart the instruction. */
1230 }
1231 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->StatMonitorPfRZ, &pPool->StatMonitorPfRZFlushPage, a);
1232 PGM_UNLOCK(pVM);
1233 return rc;
1234}
1235
1236#endif /* !IN_RING3 */
1237
1238/**
1239 * @callback_method_impl{FNPGMPHYSHANDLER,
1240 * Access handler for shadowed page table pages.}
1241 *
1242 * @remarks Only uses the VINF_PGM_HANDLER_DO_DEFAULT status.
1243 */
1244PGM_ALL_CB2_DECL(VBOXSTRICTRC)
1245pgmPoolAccessHandler(PVMCC pVM, PVMCPUCC pVCpu, RTGCPHYS GCPhys, void *pvPhys, void *pvBuf, size_t cbBuf,
1246 PGMACCESSTYPE enmAccessType, PGMACCESSORIGIN enmOrigin, void *pvUser)
1247{
1248 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1249 STAM_PROFILE_START(&pPool->CTX_SUFF_Z(StatMonitor), a);
1250 PPGMPOOLPAGE pPage = (PPGMPOOLPAGE)pvUser;
1251 LogFlow(("PGM_ALL_CB_DECL: GCPhys=%RGp %p:{.Core=%RHp, .idx=%d, .GCPhys=%RGp, .enmType=%d}\n",
1252 GCPhys, pPage, pPage->Core.Key, pPage->idx, pPage->GCPhys, pPage->enmKind));
1253
1254 NOREF(pvPhys); NOREF(pvBuf); NOREF(enmAccessType);
1255
1256 PGM_LOCK_VOID(pVM);
1257
1258#ifdef VBOX_WITH_STATISTICS
1259 /*
1260 * Collect stats on the access.
1261 */
1262 AssertCompile(RT_ELEMENTS(pPool->CTX_MID_Z(aStatMonitor,Sizes)) == 19);
1263 if (cbBuf <= 16 && cbBuf > 0)
1264 STAM_COUNTER_INC(&pPool->CTX_MID_Z(aStatMonitor,Sizes)[cbBuf - 1]);
1265 else if (cbBuf >= 17 && cbBuf < 32)
1266 STAM_COUNTER_INC(&pPool->CTX_MID_Z(aStatMonitor,Sizes)[16]);
1267 else if (cbBuf >= 32 && cbBuf < 64)
1268 STAM_COUNTER_INC(&pPool->CTX_MID_Z(aStatMonitor,Sizes)[17]);
1269 else if (cbBuf >= 64)
1270 STAM_COUNTER_INC(&pPool->CTX_MID_Z(aStatMonitor,Sizes)[18]);
1271
1272 uint8_t cbAlign;
1273 switch (pPage->enmKind)
1274 {
1275 default:
1276 cbAlign = 7;
1277 break;
1278 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1279 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1280 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1281 case PGMPOOLKIND_32BIT_PD:
1282 case PGMPOOLKIND_32BIT_PD_PHYS:
1283 cbAlign = 3;
1284 break;
1285 }
1286 AssertCompile(RT_ELEMENTS(pPool->CTX_MID_Z(aStatMonitor,Misaligned)) == 7);
1287 if ((uint8_t)GCPhys & cbAlign)
1288 STAM_COUNTER_INC(&pPool->CTX_MID_Z(aStatMonitor,Misaligned)[((uint8_t)GCPhys & cbAlign) - 1]);
1289#endif
1290
1291 /*
1292 * Make sure the pool page wasn't modified by a different CPU.
1293 */
1294 if (PHYS_PAGE_ADDRESS(GCPhys) == PHYS_PAGE_ADDRESS(pPage->GCPhys))
1295 {
1296 Assert(pPage->enmKind != PGMPOOLKIND_FREE);
1297
1298 /* The max modification count before flushing depends on the context and page type. */
1299#ifdef IN_RING3
1300 uint16_t const cMaxModifications = 96; /* it's cheaper here, right? */
1301#else
1302 uint16_t cMaxModifications;
1303 if ( pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT
1304 || pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_32BIT_PT)
1305 cMaxModifications = 4;
1306 else
1307 cMaxModifications = 24;
1308#endif
1309
1310 /*
1311 * We don't have to be very sophisticated about this since there are relativly few calls here.
1312 * However, we must try our best to detect any non-cpu accesses (disk / networking).
1313 */
1314 if ( ( pPage->cModifications < cMaxModifications
1315 || pgmPoolIsPageLocked(pPage) )
1316 && enmOrigin != PGMACCESSORIGIN_DEVICE
1317 && cbBuf <= 16)
1318 {
1319 /* Clear the shadow entry. */
1320 if (!pPage->cModifications++)
1321 pgmPoolMonitorModifiedInsert(pPool, pPage);
1322
1323 if (cbBuf <= 8)
1324 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhys, pvBuf, (uint32_t)cbBuf);
1325 else
1326 {
1327 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhys, pvBuf, 8);
1328 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhys + 8, (uint8_t *)pvBuf + 8, (uint32_t)cbBuf - 8);
1329 }
1330 }
1331 else
1332 pgmPoolMonitorChainFlush(pPool, pPage);
1333
1334 STAM_PROFILE_STOP_EX(&pPool->CTX_SUFF_Z(StatMonitor), &pPool->CTX_MID_Z(StatMonitor,FlushPage), a);
1335 }
1336 else
1337 Log(("CPU%d: PGM_ALL_CB_DECL pgm pool page for %RGp changed (to %RGp) while waiting!\n", pVCpu->idCpu, PHYS_PAGE_ADDRESS(GCPhys), PHYS_PAGE_ADDRESS(pPage->GCPhys)));
1338 PGM_UNLOCK(pVM);
1339 return VINF_PGM_HANDLER_DO_DEFAULT;
1340}
1341
1342
1343#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
1344
1345# if defined(VBOX_STRICT) && !defined(IN_RING3)
1346
1347/**
1348 * Check references to guest physical memory in a PAE / PAE page table.
1349 *
1350 * @param pPool The pool.
1351 * @param pPage The page.
1352 * @param pShwPT The shadow page table (mapping of the page).
1353 * @param pGstPT The guest page table.
1354 */
1355static void pgmPoolTrackCheckPTPaePae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT, PCX86PTPAE pGstPT)
1356{
1357 unsigned cErrors = 0;
1358 int LastRc = -1; /* initialized to shut up gcc */
1359 unsigned LastPTE = ~0U; /* initialized to shut up gcc */
1360 RTHCPHYS LastHCPhys = NIL_RTHCPHYS; /* initialized to shut up gcc */
1361 PVMCC pVM = pPool->CTX_SUFF(pVM);
1362
1363# ifdef VBOX_STRICT
1364 for (unsigned i = 0; i < RT_MIN(RT_ELEMENTS(pShwPT->a), pPage->iFirstPresent); i++)
1365 AssertMsg(!PGMSHWPTEPAE_IS_P(pShwPT->a[i]), ("Unexpected PTE: idx=%d %RX64 (first=%d)\n", i, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), pPage->iFirstPresent));
1366# endif
1367 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
1368 {
1369 if (PGMSHWPTEPAE_IS_P(pShwPT->a[i]))
1370 {
1371 RTHCPHYS HCPhys = NIL_RTHCPHYS;
1372 int rc = PGMPhysGCPhys2HCPhys(pVM, pGstPT->a[i].u & X86_PTE_PAE_PG_MASK, &HCPhys);
1373 if ( rc != VINF_SUCCESS
1374 || PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]) != HCPhys)
1375 {
1376 Log(("rc=%d idx=%d guest %RX64 shw=%RX64 vs %RHp\n", rc, i, pGstPT->a[i].u, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), HCPhys));
1377 LastPTE = i;
1378 LastRc = rc;
1379 LastHCPhys = HCPhys;
1380 cErrors++;
1381
1382 RTHCPHYS HCPhysPT = NIL_RTHCPHYS;
1383 rc = PGMPhysGCPhys2HCPhys(pVM, pPage->GCPhys, &HCPhysPT);
1384 AssertRC(rc);
1385
1386 for (unsigned iPage = 0; iPage < pPool->cCurPages; iPage++)
1387 {
1388 PPGMPOOLPAGE pTempPage = &pPool->aPages[iPage];
1389
1390 if (pTempPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT)
1391 {
1392 PPGMSHWPTPAE pShwPT2 = (PPGMSHWPTPAE)PGMPOOL_PAGE_2_PTR(pVM, pTempPage);
1393
1394 for (unsigned j = 0; j < RT_ELEMENTS(pShwPT->a); j++)
1395 {
1396 if ( PGMSHWPTEPAE_IS_P_RW(pShwPT2->a[j])
1397 && PGMSHWPTEPAE_GET_HCPHYS(pShwPT2->a[j]) == HCPhysPT)
1398 {
1399 Log(("GCPhys=%RGp idx=%d %RX64 vs %RX64\n", pTempPage->GCPhys, j, PGMSHWPTEPAE_GET_LOG(pShwPT->a[j]), PGMSHWPTEPAE_GET_LOG(pShwPT2->a[j])));
1400 }
1401 }
1402
1403 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pShwPT2);
1404 }
1405 }
1406 }
1407 }
1408 }
1409 AssertMsg(!cErrors, ("cErrors=%d: last rc=%d idx=%d guest %RX64 shw=%RX64 vs %RHp\n", cErrors, LastRc, LastPTE, pGstPT->a[LastPTE].u, PGMSHWPTEPAE_GET_LOG(pShwPT->a[LastPTE]), LastHCPhys));
1410}
1411
1412
1413/**
1414 * Check references to guest physical memory in a PAE / 32-bit page table.
1415 *
1416 * @param pPool The pool.
1417 * @param pPage The page.
1418 * @param pShwPT The shadow page table (mapping of the page).
1419 * @param pGstPT The guest page table.
1420 */
1421static void pgmPoolTrackCheckPTPae32Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT, PCX86PT pGstPT)
1422{
1423 unsigned cErrors = 0;
1424 int LastRc = -1; /* initialized to shut up gcc */
1425 unsigned LastPTE = ~0U; /* initialized to shut up gcc */
1426 RTHCPHYS LastHCPhys = NIL_RTHCPHYS; /* initialized to shut up gcc */
1427 PVMCC pVM = pPool->CTX_SUFF(pVM);
1428
1429# ifdef VBOX_STRICT
1430 for (unsigned i = 0; i < RT_MIN(RT_ELEMENTS(pShwPT->a), pPage->iFirstPresent); i++)
1431 AssertMsg(!PGMSHWPTEPAE_IS_P(pShwPT->a[i]), ("Unexpected PTE: idx=%d %RX64 (first=%d)\n", i, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), pPage->iFirstPresent));
1432# endif
1433 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
1434 {
1435 if (PGMSHWPTEPAE_IS_P(pShwPT->a[i]))
1436 {
1437 RTHCPHYS HCPhys = NIL_RTHCPHYS;
1438 int rc = PGMPhysGCPhys2HCPhys(pVM, pGstPT->a[i].u & X86_PTE_PG_MASK, &HCPhys);
1439 if ( rc != VINF_SUCCESS
1440 || PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]) != HCPhys)
1441 {
1442 Log(("rc=%d idx=%d guest %x shw=%RX64 vs %RHp\n", rc, i, pGstPT->a[i].u, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), HCPhys));
1443 LastPTE = i;
1444 LastRc = rc;
1445 LastHCPhys = HCPhys;
1446 cErrors++;
1447
1448 RTHCPHYS HCPhysPT = NIL_RTHCPHYS;
1449 rc = PGMPhysGCPhys2HCPhys(pVM, pPage->GCPhys, &HCPhysPT);
1450 AssertRC(rc);
1451
1452 for (unsigned iPage = 0; iPage < pPool->cCurPages; iPage++)
1453 {
1454 PPGMPOOLPAGE pTempPage = &pPool->aPages[iPage];
1455
1456 if (pTempPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_32BIT_PT)
1457 {
1458 PPGMSHWPTPAE pShwPT2 = (PPGMSHWPTPAE)PGMPOOL_PAGE_2_PTR(pVM, pTempPage);
1459
1460 for (unsigned j = 0; j < RT_ELEMENTS(pShwPT->a); j++)
1461 {
1462 if ( PGMSHWPTEPAE_IS_P_RW(pShwPT2->a[j])
1463 && PGMSHWPTEPAE_GET_HCPHYS(pShwPT2->a[j]) == HCPhysPT)
1464 {
1465 Log(("GCPhys=%RGp idx=%d %RX64 vs %RX64\n", pTempPage->GCPhys, j, PGMSHWPTEPAE_GET_LOG(pShwPT->a[j]), PGMSHWPTEPAE_GET_LOG(pShwPT2->a[j])));
1466 }
1467 }
1468
1469 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pShwPT2);
1470 }
1471 }
1472 }
1473 }
1474 }
1475 AssertMsg(!cErrors, ("cErrors=%d: last rc=%d idx=%d guest %x shw=%RX64 vs %RHp\n", cErrors, LastRc, LastPTE, pGstPT->a[LastPTE].u, PGMSHWPTEPAE_GET_LOG(pShwPT->a[LastPTE]), LastHCPhys));
1476}
1477
1478# endif /* VBOX_STRICT && !IN_RING3 */
1479
1480/**
1481 * Clear references to guest physical memory in a PAE / PAE page table.
1482 *
1483 * @returns nr of changed PTEs
1484 * @param pPool The pool.
1485 * @param pPage The page.
1486 * @param pShwPT The shadow page table (mapping of the page).
1487 * @param pGstPT The guest page table.
1488 * @param pOldGstPT The old cached guest page table.
1489 * @param fAllowRemoval Bail out as soon as we encounter an invalid PTE
1490 * @param pfFlush Flush reused page table (out)
1491 */
1492DECLINLINE(unsigned) pgmPoolTrackFlushPTPaePae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT, PCX86PTPAE pGstPT,
1493 PCX86PTPAE pOldGstPT, bool fAllowRemoval, bool *pfFlush)
1494{
1495 unsigned cChanged = 0;
1496
1497# ifdef VBOX_STRICT
1498 for (unsigned i = 0; i < RT_MIN(RT_ELEMENTS(pShwPT->a), pPage->iFirstPresent); i++)
1499 AssertMsg(!PGMSHWPTEPAE_IS_P(pShwPT->a[i]), ("Unexpected PTE: idx=%d %RX64 (first=%d)\n", i, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), pPage->iFirstPresent));
1500# endif
1501 *pfFlush = false;
1502
1503 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
1504 {
1505 /* Check the new value written by the guest. If present and with a bogus physical address, then
1506 * it's fairly safe to assume the guest is reusing the PT.
1507 */
1508 if ( fAllowRemoval
1509 && (pGstPT->a[i].u & X86_PTE_P))
1510 {
1511 if (!PGMPhysIsGCPhysValid(pPool->CTX_SUFF(pVM), pGstPT->a[i].u & X86_PTE_PAE_PG_MASK))
1512 {
1513 *pfFlush = true;
1514 return ++cChanged;
1515 }
1516 }
1517 if (PGMSHWPTEPAE_IS_P(pShwPT->a[i]))
1518 {
1519 /* If the old cached PTE is identical, then there's no need to flush the shadow copy. */
1520 if ((pGstPT->a[i].u & X86_PTE_PAE_PG_MASK) == (pOldGstPT->a[i].u & X86_PTE_PAE_PG_MASK))
1521 {
1522# ifdef VBOX_STRICT
1523 RTHCPHYS HCPhys = NIL_RTGCPHYS;
1524 int rc = PGMPhysGCPhys2HCPhys(pPool->CTX_SUFF(pVM), pGstPT->a[i].u & X86_PTE_PAE_PG_MASK, &HCPhys);
1525 AssertMsg(rc == VINF_SUCCESS && PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]) == HCPhys, ("rc=%d guest %RX64 old %RX64 shw=%RX64 vs %RHp\n", rc, pGstPT->a[i].u, pOldGstPT->a[i].u, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), HCPhys));
1526# endif
1527 uint64_t uHostAttr = PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & (X86_PTE_P | X86_PTE_US | X86_PTE_A | X86_PTE_D | X86_PTE_G | X86_PTE_PAE_NX);
1528 bool fHostRW = !!(PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & X86_PTE_RW);
1529 uint64_t uGuestAttr = pGstPT->a[i].u & (X86_PTE_P | X86_PTE_US | X86_PTE_A | X86_PTE_D | X86_PTE_G | X86_PTE_PAE_NX);
1530 bool fGuestRW = !!(pGstPT->a[i].u & X86_PTE_RW);
1531
1532 if ( uHostAttr == uGuestAttr
1533 && fHostRW <= fGuestRW)
1534 continue;
1535 }
1536 cChanged++;
1537 /* Something was changed, so flush it. */
1538 Log4(("pgmPoolTrackDerefPTPaePae: i=%d pte=%RX64 hint=%RX64\n",
1539 i, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pOldGstPT->a[i].u & X86_PTE_PAE_PG_MASK));
1540 pgmPoolTracDerefGCPhysHint(pPool, pPage, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pOldGstPT->a[i].u & X86_PTE_PAE_PG_MASK, i);
1541 PGMSHWPTEPAE_ATOMIC_SET(pShwPT->a[i], 0);
1542 }
1543 }
1544 return cChanged;
1545}
1546
1547
1548/**
1549 * Clear references to guest physical memory in a PAE / PAE page table.
1550 *
1551 * @returns nr of changed PTEs
1552 * @param pPool The pool.
1553 * @param pPage The page.
1554 * @param pShwPT The shadow page table (mapping of the page).
1555 * @param pGstPT The guest page table.
1556 * @param pOldGstPT The old cached guest page table.
1557 * @param fAllowRemoval Bail out as soon as we encounter an invalid PTE
1558 * @param pfFlush Flush reused page table (out)
1559 */
1560DECLINLINE(unsigned) pgmPoolTrackFlushPTPae32Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT, PCX86PT pGstPT,
1561 PCX86PT pOldGstPT, bool fAllowRemoval, bool *pfFlush)
1562{
1563 unsigned cChanged = 0;
1564
1565# ifdef VBOX_STRICT
1566 for (unsigned i = 0; i < RT_MIN(RT_ELEMENTS(pShwPT->a), pPage->iFirstPresent); i++)
1567 AssertMsg(!PGMSHWPTEPAE_IS_P(pShwPT->a[i]), ("Unexpected PTE: idx=%d %RX64 (first=%d)\n", i, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), pPage->iFirstPresent));
1568# endif
1569 *pfFlush = false;
1570
1571 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
1572 {
1573 /* Check the new value written by the guest. If present and with a bogus physical address, then
1574 * it's fairly safe to assume the guest is reusing the PT. */
1575 if (fAllowRemoval)
1576 {
1577 X86PGUINT const uPte = pGstPT->a[i].u;
1578 if ( (uPte & X86_PTE_P)
1579 && !PGMPhysIsGCPhysValid(pPool->CTX_SUFF(pVM), uPte & X86_PTE_PG_MASK))
1580 {
1581 *pfFlush = true;
1582 return ++cChanged;
1583 }
1584 }
1585 if (PGMSHWPTEPAE_IS_P(pShwPT->a[i]))
1586 {
1587 /* If the old cached PTE is identical, then there's no need to flush the shadow copy. */
1588 if ((pGstPT->a[i].u & X86_PTE_PG_MASK) == (pOldGstPT->a[i].u & X86_PTE_PG_MASK))
1589 {
1590# ifdef VBOX_STRICT
1591 RTHCPHYS HCPhys = NIL_RTGCPHYS;
1592 int rc = PGMPhysGCPhys2HCPhys(pPool->CTX_SUFF(pVM), pGstPT->a[i].u & X86_PTE_PG_MASK, &HCPhys);
1593 AssertMsg(rc == VINF_SUCCESS && PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]) == HCPhys, ("rc=%d guest %x old %x shw=%RX64 vs %RHp\n", rc, pGstPT->a[i].u, pOldGstPT->a[i].u, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), HCPhys));
1594# endif
1595 uint64_t uHostAttr = PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & (X86_PTE_P | X86_PTE_US | X86_PTE_A | X86_PTE_D | X86_PTE_G);
1596 bool fHostRW = !!(PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & X86_PTE_RW);
1597 uint64_t uGuestAttr = pGstPT->a[i].u & (X86_PTE_P | X86_PTE_US | X86_PTE_A | X86_PTE_D | X86_PTE_G);
1598 bool fGuestRW = !!(pGstPT->a[i].u & X86_PTE_RW);
1599
1600 if ( uHostAttr == uGuestAttr
1601 && fHostRW <= fGuestRW)
1602 continue;
1603 }
1604 cChanged++;
1605 /* Something was changed, so flush it. */
1606 Log4(("pgmPoolTrackDerefPTPaePae: i=%d pte=%RX64 hint=%x\n",
1607 i, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pOldGstPT->a[i].u & X86_PTE_PG_MASK));
1608 pgmPoolTracDerefGCPhysHint(pPool, pPage, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pOldGstPT->a[i].u & X86_PTE_PG_MASK, i);
1609 PGMSHWPTEPAE_ATOMIC_SET(pShwPT->a[i], 0);
1610 }
1611 }
1612 return cChanged;
1613}
1614
1615
1616/**
1617 * Flush a dirty page
1618 *
1619 * @param pVM The cross context VM structure.
1620 * @param pPool The pool.
1621 * @param idxSlot Dirty array slot index
1622 * @param fAllowRemoval Allow a reused page table to be removed
1623 */
1624static void pgmPoolFlushDirtyPage(PVMCC pVM, PPGMPOOL pPool, unsigned idxSlot, bool fAllowRemoval = false)
1625{
1626 AssertCompile(RT_ELEMENTS(pPool->aidxDirtyPages) == RT_ELEMENTS(pPool->aDirtyPages));
1627
1628 Assert(idxSlot < RT_ELEMENTS(pPool->aDirtyPages));
1629 unsigned idxPage = pPool->aidxDirtyPages[idxSlot];
1630 if (idxPage == NIL_PGMPOOL_IDX)
1631 return;
1632
1633 PPGMPOOLPAGE pPage = &pPool->aPages[idxPage];
1634 Assert(pPage->idx == idxPage);
1635 Assert(pPage->iMonitoredNext == NIL_PGMPOOL_IDX && pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
1636
1637 AssertMsg(pPage->fDirty, ("Page %RGp (slot=%d) not marked dirty!", pPage->GCPhys, idxSlot));
1638 Log(("Flush dirty page %RGp cMods=%d\n", pPage->GCPhys, pPage->cModifications));
1639
1640 /* First write protect the page again to catch all write accesses. (before checking for changes -> SMP) */
1641 int rc = PGMHandlerPhysicalReset(pVM, pPage->GCPhys & PAGE_BASE_GC_MASK);
1642 Assert(rc == VINF_SUCCESS);
1643 pPage->fDirty = false;
1644
1645# ifdef VBOX_STRICT
1646 uint64_t fFlags = 0;
1647 RTHCPHYS HCPhys;
1648 rc = PGMShwGetPage(VMMGetCpu(pVM), pPage->GCPtrDirtyFault, &fFlags, &HCPhys);
1649 AssertMsg( ( rc == VINF_SUCCESS
1650 && (!(fFlags & X86_PTE_RW) || HCPhys != pPage->Core.Key))
1651 /* In the SMP case the page table might be removed while we wait for the PGM lock in the trap handler. */
1652 || rc == VERR_PAGE_TABLE_NOT_PRESENT
1653 || rc == VERR_PAGE_NOT_PRESENT,
1654 ("PGMShwGetPage -> GCPtr=%RGv rc=%d flags=%RX64\n", pPage->GCPtrDirtyFault, rc, fFlags));
1655# endif
1656
1657 /* Flush those PTEs that have changed. */
1658 STAM_PROFILE_START(&pPool->StatTrackDeref,a);
1659 void *pvShw = PGMPOOL_PAGE_2_PTR(pVM, pPage);
1660 void *pvGst;
1661 rc = PGM_GCPHYS_2_PTR_EX(pVM, pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
1662 bool fFlush;
1663 unsigned cChanges;
1664
1665 if (pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT)
1666 cChanges = pgmPoolTrackFlushPTPaePae(pPool, pPage, (PPGMSHWPTPAE)pvShw, (PCX86PTPAE)pvGst,
1667 (PCX86PTPAE)&pPool->aDirtyPages[idxSlot].aPage[0], fAllowRemoval, &fFlush);
1668 else
1669 cChanges = pgmPoolTrackFlushPTPae32Bit(pPool, pPage, (PPGMSHWPTPAE)pvShw, (PCX86PT)pvGst,
1670 (PCX86PT)&pPool->aDirtyPages[idxSlot].aPage[0], fAllowRemoval, &fFlush);
1671
1672 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvGst);
1673 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvShw);
1674 STAM_PROFILE_STOP(&pPool->StatTrackDeref,a);
1675 /* Note: we might want to consider keeping the dirty page active in case there were many changes. */
1676
1677 /* This page is likely to be modified again, so reduce the nr of modifications just a bit here. */
1678 Assert(pPage->cModifications);
1679 if (cChanges < 4)
1680 pPage->cModifications = 1; /* must use > 0 here */
1681 else
1682 pPage->cModifications = RT_MAX(1, pPage->cModifications / 2);
1683
1684 STAM_COUNTER_INC(&pPool->StatResetDirtyPages);
1685 if (pPool->cDirtyPages == RT_ELEMENTS(pPool->aDirtyPages))
1686 pPool->idxFreeDirtyPage = idxSlot;
1687
1688 pPool->cDirtyPages--;
1689 pPool->aidxDirtyPages[idxSlot] = NIL_PGMPOOL_IDX;
1690 Assert(pPool->cDirtyPages <= RT_ELEMENTS(pPool->aDirtyPages));
1691 if (fFlush)
1692 {
1693 Assert(fAllowRemoval);
1694 Log(("Flush reused page table!\n"));
1695 pgmPoolFlushPage(pPool, pPage);
1696 STAM_COUNTER_INC(&pPool->StatForceFlushReused);
1697 }
1698 else
1699 Log(("Removed dirty page %RGp cMods=%d cChanges=%d\n", pPage->GCPhys, pPage->cModifications, cChanges));
1700}
1701
1702
1703# ifndef IN_RING3
1704/**
1705 * Add a new dirty page
1706 *
1707 * @param pVM The cross context VM structure.
1708 * @param pPool The pool.
1709 * @param pPage The page.
1710 */
1711void pgmPoolAddDirtyPage(PVMCC pVM, PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1712{
1713 PGM_LOCK_ASSERT_OWNER(pVM);
1714 AssertCompile(RT_ELEMENTS(pPool->aDirtyPages) == 8 || RT_ELEMENTS(pPool->aDirtyPages) == 16);
1715 Assert(!pPage->fDirty);
1716
1717 unsigned idxFree = pPool->idxFreeDirtyPage;
1718 Assert(idxFree < RT_ELEMENTS(pPool->aDirtyPages));
1719 Assert(pPage->iMonitoredNext == NIL_PGMPOOL_IDX && pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
1720
1721 if (pPool->cDirtyPages >= RT_ELEMENTS(pPool->aDirtyPages))
1722 {
1723 STAM_COUNTER_INC(&pPool->StatDirtyPageOverFlowFlush);
1724 pgmPoolFlushDirtyPage(pVM, pPool, idxFree, true /* allow removal of reused page tables*/);
1725 }
1726 Assert(pPool->cDirtyPages < RT_ELEMENTS(pPool->aDirtyPages));
1727 AssertMsg(pPool->aidxDirtyPages[idxFree] == NIL_PGMPOOL_IDX, ("idxFree=%d cDirtyPages=%d\n", idxFree, pPool->cDirtyPages));
1728
1729 Log(("Add dirty page %RGp (slot=%d)\n", pPage->GCPhys, idxFree));
1730
1731 /*
1732 * Make a copy of the guest page table as we require valid GCPhys addresses
1733 * when removing references to physical pages.
1734 * (The HCPhys linear lookup is *extremely* expensive!)
1735 */
1736 void *pvGst;
1737 int rc = PGM_GCPHYS_2_PTR_EX(pVM, pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
1738 memcpy(&pPool->aDirtyPages[idxFree].aPage[0], pvGst,
1739 pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT ? PAGE_SIZE : PAGE_SIZE / 2);
1740# ifdef VBOX_STRICT
1741 void *pvShw = PGMPOOL_PAGE_2_PTR(pVM, pPage);
1742 if (pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT)
1743 pgmPoolTrackCheckPTPaePae(pPool, pPage, (PPGMSHWPTPAE)pvShw, (PCX86PTPAE)pvGst);
1744 else
1745 pgmPoolTrackCheckPTPae32Bit(pPool, pPage, (PPGMSHWPTPAE)pvShw, (PCX86PT)pvGst);
1746 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvShw);
1747# endif
1748 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvGst);
1749
1750 STAM_COUNTER_INC(&pPool->StatDirtyPage);
1751 pPage->fDirty = true;
1752 pPage->idxDirtyEntry = (uint8_t)idxFree; Assert(pPage->idxDirtyEntry == idxFree);
1753 pPool->aidxDirtyPages[idxFree] = pPage->idx;
1754 pPool->cDirtyPages++;
1755
1756 pPool->idxFreeDirtyPage = (pPool->idxFreeDirtyPage + 1) & (RT_ELEMENTS(pPool->aDirtyPages) - 1);
1757 if ( pPool->cDirtyPages < RT_ELEMENTS(pPool->aDirtyPages)
1758 && pPool->aidxDirtyPages[pPool->idxFreeDirtyPage] != NIL_PGMPOOL_IDX)
1759 {
1760 unsigned i;
1761 for (i = 1; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
1762 {
1763 idxFree = (pPool->idxFreeDirtyPage + i) & (RT_ELEMENTS(pPool->aDirtyPages) - 1);
1764 if (pPool->aidxDirtyPages[idxFree] == NIL_PGMPOOL_IDX)
1765 {
1766 pPool->idxFreeDirtyPage = idxFree;
1767 break;
1768 }
1769 }
1770 Assert(i != RT_ELEMENTS(pPool->aDirtyPages));
1771 }
1772
1773 Assert(pPool->cDirtyPages == RT_ELEMENTS(pPool->aDirtyPages) || pPool->aidxDirtyPages[pPool->idxFreeDirtyPage] == NIL_PGMPOOL_IDX);
1774
1775 /*
1776 * Clear all references to this shadow table. See @bugref{7298}.
1777 */
1778 pgmPoolTrackClearPageUsers(pPool, pPage);
1779}
1780# endif /* !IN_RING3 */
1781
1782
1783/**
1784 * Check if the specified page is dirty (not write monitored)
1785 *
1786 * @return dirty or not
1787 * @param pVM The cross context VM structure.
1788 * @param GCPhys Guest physical address
1789 */
1790bool pgmPoolIsDirtyPageSlow(PVMCC pVM, RTGCPHYS GCPhys)
1791{
1792 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1793 PGM_LOCK_ASSERT_OWNER(pVM);
1794 if (!pPool->cDirtyPages)
1795 return false;
1796
1797 GCPhys = GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK;
1798
1799 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
1800 {
1801 unsigned idxPage = pPool->aidxDirtyPages[i];
1802 if (idxPage != NIL_PGMPOOL_IDX)
1803 {
1804 PPGMPOOLPAGE pPage = &pPool->aPages[idxPage];
1805 if (pPage->GCPhys == GCPhys)
1806 return true;
1807 }
1808 }
1809 return false;
1810}
1811
1812
1813/**
1814 * Reset all dirty pages by reinstating page monitoring.
1815 *
1816 * @param pVM The cross context VM structure.
1817 */
1818void pgmPoolResetDirtyPages(PVMCC pVM)
1819{
1820 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1821 PGM_LOCK_ASSERT_OWNER(pVM);
1822 Assert(pPool->cDirtyPages <= RT_ELEMENTS(pPool->aDirtyPages));
1823
1824 if (!pPool->cDirtyPages)
1825 return;
1826
1827 Log(("pgmPoolResetDirtyPages\n"));
1828 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
1829 pgmPoolFlushDirtyPage(pVM, pPool, i, true /* allow removal of reused page tables*/);
1830
1831 pPool->idxFreeDirtyPage = 0;
1832 if ( pPool->cDirtyPages != RT_ELEMENTS(pPool->aDirtyPages)
1833 && pPool->aidxDirtyPages[pPool->idxFreeDirtyPage] != NIL_PGMPOOL_IDX)
1834 {
1835 unsigned i;
1836 for (i = 1; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
1837 {
1838 if (pPool->aidxDirtyPages[i] == NIL_PGMPOOL_IDX)
1839 {
1840 pPool->idxFreeDirtyPage = i;
1841 break;
1842 }
1843 }
1844 AssertMsg(i != RT_ELEMENTS(pPool->aDirtyPages), ("cDirtyPages %d", pPool->cDirtyPages));
1845 }
1846
1847 Assert(pPool->aidxDirtyPages[pPool->idxFreeDirtyPage] == NIL_PGMPOOL_IDX || pPool->cDirtyPages == RT_ELEMENTS(pPool->aDirtyPages));
1848 return;
1849}
1850
1851
1852/**
1853 * Invalidate the PT entry for the specified page
1854 *
1855 * @param pVM The cross context VM structure.
1856 * @param GCPtrPage Guest page to invalidate
1857 */
1858void pgmPoolResetDirtyPage(PVMCC pVM, RTGCPTR GCPtrPage)
1859{
1860 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1861 PGM_LOCK_ASSERT_OWNER(pVM);
1862 Assert(pPool->cDirtyPages <= RT_ELEMENTS(pPool->aDirtyPages));
1863
1864 if (!pPool->cDirtyPages)
1865 return;
1866
1867 Log(("pgmPoolResetDirtyPage %RGv\n", GCPtrPage)); RT_NOREF_PV(GCPtrPage);
1868 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
1869 {
1870 /** @todo What was intended here??? This looks incomplete... */
1871 }
1872}
1873
1874
1875/**
1876 * Reset all dirty pages by reinstating page monitoring.
1877 *
1878 * @param pVM The cross context VM structure.
1879 * @param GCPhysPT Physical address of the page table
1880 */
1881void pgmPoolInvalidateDirtyPage(PVMCC pVM, RTGCPHYS GCPhysPT)
1882{
1883 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1884 PGM_LOCK_ASSERT_OWNER(pVM);
1885 Assert(pPool->cDirtyPages <= RT_ELEMENTS(pPool->aDirtyPages));
1886 unsigned idxDirtyPage = RT_ELEMENTS(pPool->aDirtyPages);
1887
1888 if (!pPool->cDirtyPages)
1889 return;
1890
1891 GCPhysPT = GCPhysPT & ~(RTGCPHYS)PAGE_OFFSET_MASK;
1892
1893 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
1894 {
1895 unsigned idxPage = pPool->aidxDirtyPages[i];
1896 if (idxPage != NIL_PGMPOOL_IDX)
1897 {
1898 PPGMPOOLPAGE pPage = &pPool->aPages[idxPage];
1899 if (pPage->GCPhys == GCPhysPT)
1900 {
1901 idxDirtyPage = i;
1902 break;
1903 }
1904 }
1905 }
1906
1907 if (idxDirtyPage != RT_ELEMENTS(pPool->aDirtyPages))
1908 {
1909 pgmPoolFlushDirtyPage(pVM, pPool, idxDirtyPage, true /* allow removal of reused page tables*/);
1910 if ( pPool->cDirtyPages != RT_ELEMENTS(pPool->aDirtyPages)
1911 && pPool->aidxDirtyPages[pPool->idxFreeDirtyPage] != NIL_PGMPOOL_IDX)
1912 {
1913 unsigned i;
1914 for (i = 0; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
1915 {
1916 if (pPool->aidxDirtyPages[i] == NIL_PGMPOOL_IDX)
1917 {
1918 pPool->idxFreeDirtyPage = i;
1919 break;
1920 }
1921 }
1922 AssertMsg(i != RT_ELEMENTS(pPool->aDirtyPages), ("cDirtyPages %d", pPool->cDirtyPages));
1923 }
1924 }
1925}
1926
1927#endif /* PGMPOOL_WITH_OPTIMIZED_DIRTY_PT */
1928
1929/**
1930 * Inserts a page into the GCPhys hash table.
1931 *
1932 * @param pPool The pool.
1933 * @param pPage The page.
1934 */
1935DECLINLINE(void) pgmPoolHashInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1936{
1937 Log3(("pgmPoolHashInsert: %RGp\n", pPage->GCPhys));
1938 Assert(pPage->GCPhys != NIL_RTGCPHYS); Assert(pPage->iNext == NIL_PGMPOOL_IDX);
1939 uint16_t iHash = PGMPOOL_HASH(pPage->GCPhys);
1940 pPage->iNext = pPool->aiHash[iHash];
1941 pPool->aiHash[iHash] = pPage->idx;
1942}
1943
1944
1945/**
1946 * Removes a page from the GCPhys hash table.
1947 *
1948 * @param pPool The pool.
1949 * @param pPage The page.
1950 */
1951DECLINLINE(void) pgmPoolHashRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1952{
1953 Log3(("pgmPoolHashRemove: %RGp\n", pPage->GCPhys));
1954 uint16_t iHash = PGMPOOL_HASH(pPage->GCPhys);
1955 if (pPool->aiHash[iHash] == pPage->idx)
1956 pPool->aiHash[iHash] = pPage->iNext;
1957 else
1958 {
1959 uint16_t iPrev = pPool->aiHash[iHash];
1960 for (;;)
1961 {
1962 const int16_t i = pPool->aPages[iPrev].iNext;
1963 if (i == pPage->idx)
1964 {
1965 pPool->aPages[iPrev].iNext = pPage->iNext;
1966 break;
1967 }
1968 if (i == NIL_PGMPOOL_IDX)
1969 {
1970 AssertReleaseMsgFailed(("GCPhys=%RGp idx=%d\n", pPage->GCPhys, pPage->idx));
1971 break;
1972 }
1973 iPrev = i;
1974 }
1975 }
1976 pPage->iNext = NIL_PGMPOOL_IDX;
1977}
1978
1979
1980/**
1981 * Frees up one cache page.
1982 *
1983 * @returns VBox status code.
1984 * @retval VINF_SUCCESS on success.
1985 * @param pPool The pool.
1986 * @param iUser The user index.
1987 */
1988static int pgmPoolCacheFreeOne(PPGMPOOL pPool, uint16_t iUser)
1989{
1990 const PVMCC pVM = pPool->CTX_SUFF(pVM);
1991 Assert(pPool->iAgeHead != pPool->iAgeTail); /* We shouldn't be here if there < 2 cached entries! */
1992 STAM_COUNTER_INC(&pPool->StatCacheFreeUpOne);
1993
1994 /*
1995 * Select one page from the tail of the age list.
1996 */
1997 PPGMPOOLPAGE pPage;
1998 for (unsigned iLoop = 0; ; iLoop++)
1999 {
2000 uint16_t iToFree = pPool->iAgeTail;
2001 if (iToFree == iUser && iUser != NIL_PGMPOOL_IDX)
2002 iToFree = pPool->aPages[iToFree].iAgePrev;
2003/* This is the alternative to the SyncCR3 pgmPoolCacheUsed calls.
2004 if (pPool->aPages[iToFree].iUserHead != NIL_PGMPOOL_USER_INDEX)
2005 {
2006 uint16_t i = pPool->aPages[iToFree].iAgePrev;
2007 for (unsigned j = 0; j < 10 && i != NIL_PGMPOOL_USER_INDEX; j++, i = pPool->aPages[i].iAgePrev)
2008 {
2009 if (pPool->aPages[iToFree].iUserHead == NIL_PGMPOOL_USER_INDEX)
2010 continue;
2011 iToFree = i;
2012 break;
2013 }
2014 }
2015*/
2016 Assert(iToFree != iUser);
2017 AssertReleaseMsg(iToFree != NIL_PGMPOOL_IDX,
2018 ("iToFree=%#x (iAgeTail=%#x) iUser=%#x iLoop=%u - pPool=%p LB %#zx\n",
2019 iToFree, pPool->iAgeTail, iUser, iLoop, pPool,
2020 RT_UOFFSETOF_DYN(PGMPOOL, aPages[pPool->cMaxPages])
2021 + pPool->cMaxUsers * sizeof(PGMPOOLUSER)
2022 + pPool->cMaxPhysExts * sizeof(PGMPOOLPHYSEXT) ));
2023
2024 pPage = &pPool->aPages[iToFree];
2025
2026 /*
2027 * Reject any attempts at flushing the currently active shadow CR3 mapping.
2028 * Call pgmPoolCacheUsed to move the page to the head of the age list.
2029 */
2030 if ( !pgmPoolIsPageLocked(pPage)
2031 && pPage->idx >= PGMPOOL_IDX_FIRST /* paranoia (#6349) */)
2032 break;
2033 LogFlow(("pgmPoolCacheFreeOne: refuse CR3 mapping\n"));
2034 pgmPoolCacheUsed(pPool, pPage);
2035 AssertLogRelReturn(iLoop < 8192, VERR_PGM_POOL_TOO_MANY_LOOPS);
2036 }
2037
2038 /*
2039 * Found a usable page, flush it and return.
2040 */
2041 int rc = pgmPoolFlushPage(pPool, pPage);
2042 /* This flush was initiated by us and not the guest, so explicitly flush the TLB. */
2043 /** @todo find out why this is necessary; pgmPoolFlushPage should trigger a flush if one is really needed. */
2044 if (rc == VINF_SUCCESS)
2045 PGM_INVL_ALL_VCPU_TLBS(pVM);
2046 return rc;
2047}
2048
2049
2050/**
2051 * Checks if a kind mismatch is really a page being reused
2052 * or if it's just normal remappings.
2053 *
2054 * @returns true if reused and the cached page (enmKind1) should be flushed
2055 * @returns false if not reused.
2056 * @param enmKind1 The kind of the cached page.
2057 * @param enmKind2 The kind of the requested page.
2058 */
2059static bool pgmPoolCacheReusedByKind(PGMPOOLKIND enmKind1, PGMPOOLKIND enmKind2)
2060{
2061 switch (enmKind1)
2062 {
2063 /*
2064 * Never reuse them. There is no remapping in non-paging mode.
2065 */
2066 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2067 case PGMPOOLKIND_32BIT_PD_PHYS:
2068 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2069 case PGMPOOLKIND_PAE_PD_PHYS:
2070 case PGMPOOLKIND_PAE_PDPT_PHYS:
2071 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2072 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2073 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2074 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2075 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2076 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT: /* never reuse them for other types */
2077 return false;
2078
2079 /*
2080 * It's perfectly fine to reuse these, except for PAE and non-paging stuff.
2081 */
2082 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2083 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2084 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2085 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2086 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2087 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2088 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2089 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2090 case PGMPOOLKIND_32BIT_PD:
2091 case PGMPOOLKIND_PAE_PDPT:
2092 switch (enmKind2)
2093 {
2094 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2095 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2096 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2097 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2098 case PGMPOOLKIND_64BIT_PML4:
2099 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2100 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2101 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2102 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2103 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2104 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2105 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2106 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2107 return true;
2108 default:
2109 return false;
2110 }
2111
2112 /*
2113 * It's perfectly fine to reuse these, except for PAE and non-paging stuff.
2114 */
2115 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2116 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2117 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2118 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2119 case PGMPOOLKIND_64BIT_PML4:
2120 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2121 switch (enmKind2)
2122 {
2123 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2124 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2125 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2126 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2127 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2128 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2129 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2130 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2131 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2132 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2133 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2134 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2135 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2136 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2137 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2138 return true;
2139 default:
2140 return false;
2141 }
2142
2143 /*
2144 * These cannot be flushed, and it's common to reuse the PDs as PTs.
2145 */
2146 case PGMPOOLKIND_ROOT_NESTED:
2147 return false;
2148
2149 default:
2150 AssertFatalMsgFailed(("enmKind1=%d\n", enmKind1));
2151 }
2152}
2153
2154
2155/**
2156 * Attempts to satisfy a pgmPoolAlloc request from the cache.
2157 *
2158 * @returns VBox status code.
2159 * @retval VINF_PGM_CACHED_PAGE on success.
2160 * @retval VERR_FILE_NOT_FOUND if not found.
2161 * @param pPool The pool.
2162 * @param GCPhys The GC physical address of the page we're gonna shadow.
2163 * @param enmKind The kind of mapping.
2164 * @param enmAccess Access type for the mapping (only relevant for big pages)
2165 * @param fA20Enabled Whether the CPU has the A20 gate enabled.
2166 * @param iUser The shadow page pool index of the user table. This is
2167 * NIL_PGMPOOL_IDX for root pages.
2168 * @param iUserTable The index into the user table (shadowed). Ignored if
2169 * root page
2170 * @param ppPage Where to store the pointer to the page.
2171 */
2172static int pgmPoolCacheAlloc(PPGMPOOL pPool, RTGCPHYS GCPhys, PGMPOOLKIND enmKind, PGMPOOLACCESS enmAccess, bool fA20Enabled,
2173 uint16_t iUser, uint32_t iUserTable, PPPGMPOOLPAGE ppPage)
2174{
2175 /*
2176 * Look up the GCPhys in the hash.
2177 */
2178 unsigned i = pPool->aiHash[PGMPOOL_HASH(GCPhys)];
2179 Log3(("pgmPoolCacheAlloc: %RGp kind %s iUser=%d iUserTable=%x SLOT=%d\n", GCPhys, pgmPoolPoolKindToStr(enmKind), iUser, iUserTable, i));
2180 if (i != NIL_PGMPOOL_IDX)
2181 {
2182 do
2183 {
2184 PPGMPOOLPAGE pPage = &pPool->aPages[i];
2185 Log4(("pgmPoolCacheAlloc: slot %d found page %RGp\n", i, pPage->GCPhys));
2186 if (pPage->GCPhys == GCPhys)
2187 {
2188 if ( (PGMPOOLKIND)pPage->enmKind == enmKind
2189 && (PGMPOOLACCESS)pPage->enmAccess == enmAccess
2190 && pPage->fA20Enabled == fA20Enabled)
2191 {
2192 /* Put it at the start of the use list to make sure pgmPoolTrackAddUser
2193 * doesn't flush it in case there are no more free use records.
2194 */
2195 pgmPoolCacheUsed(pPool, pPage);
2196
2197 int rc = VINF_SUCCESS;
2198 if (iUser != NIL_PGMPOOL_IDX)
2199 rc = pgmPoolTrackAddUser(pPool, pPage, iUser, iUserTable);
2200 if (RT_SUCCESS(rc))
2201 {
2202 Assert((PGMPOOLKIND)pPage->enmKind == enmKind);
2203 *ppPage = pPage;
2204 if (pPage->cModifications)
2205 pPage->cModifications = 1; /* reset counter (can't use 0, or else it will be reinserted in the modified list) */
2206 STAM_COUNTER_INC(&pPool->StatCacheHits);
2207 return VINF_PGM_CACHED_PAGE;
2208 }
2209 return rc;
2210 }
2211
2212 if ((PGMPOOLKIND)pPage->enmKind != enmKind)
2213 {
2214 /*
2215 * The kind is different. In some cases we should now flush the page
2216 * as it has been reused, but in most cases this is normal remapping
2217 * of PDs as PT or big pages using the GCPhys field in a slightly
2218 * different way than the other kinds.
2219 */
2220 if (pgmPoolCacheReusedByKind((PGMPOOLKIND)pPage->enmKind, enmKind))
2221 {
2222 STAM_COUNTER_INC(&pPool->StatCacheKindMismatches);
2223 pgmPoolFlushPage(pPool, pPage);
2224 break;
2225 }
2226 }
2227 }
2228
2229 /* next */
2230 i = pPage->iNext;
2231 } while (i != NIL_PGMPOOL_IDX);
2232 }
2233
2234 Log3(("pgmPoolCacheAlloc: Missed GCPhys=%RGp enmKind=%s\n", GCPhys, pgmPoolPoolKindToStr(enmKind)));
2235 STAM_COUNTER_INC(&pPool->StatCacheMisses);
2236 return VERR_FILE_NOT_FOUND;
2237}
2238
2239
2240/**
2241 * Inserts a page into the cache.
2242 *
2243 * @param pPool The pool.
2244 * @param pPage The cached page.
2245 * @param fCanBeCached Set if the page is fit for caching from the caller's point of view.
2246 */
2247static void pgmPoolCacheInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage, bool fCanBeCached)
2248{
2249 /*
2250 * Insert into the GCPhys hash if the page is fit for that.
2251 */
2252 Assert(!pPage->fCached);
2253 if (fCanBeCached)
2254 {
2255 pPage->fCached = true;
2256 pgmPoolHashInsert(pPool, pPage);
2257 Log3(("pgmPoolCacheInsert: Caching %p:{.Core=%RHp, .idx=%d, .enmKind=%s, GCPhys=%RGp}\n",
2258 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), pPage->GCPhys));
2259 STAM_COUNTER_INC(&pPool->StatCacheCacheable);
2260 }
2261 else
2262 {
2263 Log3(("pgmPoolCacheInsert: Not caching %p:{.Core=%RHp, .idx=%d, .enmKind=%s, GCPhys=%RGp}\n",
2264 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), pPage->GCPhys));
2265 STAM_COUNTER_INC(&pPool->StatCacheUncacheable);
2266 }
2267
2268 /*
2269 * Insert at the head of the age list.
2270 */
2271 pPage->iAgePrev = NIL_PGMPOOL_IDX;
2272 pPage->iAgeNext = pPool->iAgeHead;
2273 if (pPool->iAgeHead != NIL_PGMPOOL_IDX)
2274 pPool->aPages[pPool->iAgeHead].iAgePrev = pPage->idx;
2275 else
2276 pPool->iAgeTail = pPage->idx;
2277 pPool->iAgeHead = pPage->idx;
2278}
2279
2280
2281/**
2282 * Flushes a cached page.
2283 *
2284 * @param pPool The pool.
2285 * @param pPage The cached page.
2286 */
2287static void pgmPoolCacheFlushPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2288{
2289 Log3(("pgmPoolCacheFlushPage: %RGp\n", pPage->GCPhys));
2290
2291 /*
2292 * Remove the page from the hash.
2293 */
2294 if (pPage->fCached)
2295 {
2296 pPage->fCached = false;
2297 pgmPoolHashRemove(pPool, pPage);
2298 }
2299 else
2300 Assert(pPage->iNext == NIL_PGMPOOL_IDX);
2301
2302 /*
2303 * Remove it from the age list.
2304 */
2305 if (pPage->iAgeNext != NIL_PGMPOOL_IDX)
2306 pPool->aPages[pPage->iAgeNext].iAgePrev = pPage->iAgePrev;
2307 else
2308 pPool->iAgeTail = pPage->iAgePrev;
2309 if (pPage->iAgePrev != NIL_PGMPOOL_IDX)
2310 pPool->aPages[pPage->iAgePrev].iAgeNext = pPage->iAgeNext;
2311 else
2312 pPool->iAgeHead = pPage->iAgeNext;
2313 pPage->iAgeNext = NIL_PGMPOOL_IDX;
2314 pPage->iAgePrev = NIL_PGMPOOL_IDX;
2315}
2316
2317
2318/**
2319 * Looks for pages sharing the monitor.
2320 *
2321 * @returns Pointer to the head page.
2322 * @returns NULL if not found.
2323 * @param pPool The Pool
2324 * @param pNewPage The page which is going to be monitored.
2325 */
2326static PPGMPOOLPAGE pgmPoolMonitorGetPageByGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pNewPage)
2327{
2328 /*
2329 * Look up the GCPhys in the hash.
2330 */
2331 RTGCPHYS GCPhys = pNewPage->GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK;
2332 unsigned i = pPool->aiHash[PGMPOOL_HASH(GCPhys)];
2333 if (i == NIL_PGMPOOL_IDX)
2334 return NULL;
2335 do
2336 {
2337 PPGMPOOLPAGE pPage = &pPool->aPages[i];
2338 if ( pPage->GCPhys - GCPhys < PAGE_SIZE
2339 && pPage != pNewPage)
2340 {
2341 switch (pPage->enmKind)
2342 {
2343 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2344 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2345 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2346 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2347 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2348 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2349 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2350 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2351 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2352 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2353 case PGMPOOLKIND_64BIT_PML4:
2354 case PGMPOOLKIND_32BIT_PD:
2355 case PGMPOOLKIND_PAE_PDPT:
2356 {
2357 /* find the head */
2358 while (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
2359 {
2360 Assert(pPage->iMonitoredPrev != pPage->idx);
2361 pPage = &pPool->aPages[pPage->iMonitoredPrev];
2362 }
2363 return pPage;
2364 }
2365
2366 /* ignore, no monitoring. */
2367 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2368 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2369 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2370 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2371 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2372 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2373 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2374 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2375 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2376 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2377 case PGMPOOLKIND_ROOT_NESTED:
2378 case PGMPOOLKIND_PAE_PD_PHYS:
2379 case PGMPOOLKIND_PAE_PDPT_PHYS:
2380 case PGMPOOLKIND_32BIT_PD_PHYS:
2381 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
2382 break;
2383 default:
2384 AssertFatalMsgFailed(("enmKind=%d idx=%d\n", pPage->enmKind, pPage->idx));
2385 }
2386 }
2387
2388 /* next */
2389 i = pPage->iNext;
2390 } while (i != NIL_PGMPOOL_IDX);
2391 return NULL;
2392}
2393
2394
2395/**
2396 * Enabled write monitoring of a guest page.
2397 *
2398 * @returns VBox status code.
2399 * @retval VINF_SUCCESS on success.
2400 * @param pPool The pool.
2401 * @param pPage The cached page.
2402 */
2403static int pgmPoolMonitorInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2404{
2405 LogFlow(("pgmPoolMonitorInsert %RGp\n", pPage->GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK));
2406
2407 /*
2408 * Filter out the relevant kinds.
2409 */
2410 switch (pPage->enmKind)
2411 {
2412 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2413 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2414 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2415 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2416 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2417 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2418 case PGMPOOLKIND_64BIT_PML4:
2419 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2420 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2421 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2422 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2423 case PGMPOOLKIND_32BIT_PD:
2424 case PGMPOOLKIND_PAE_PDPT:
2425 break;
2426
2427 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2428 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2429 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2430 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2431 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2432 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2433 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2434 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2435 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2436 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2437 case PGMPOOLKIND_ROOT_NESTED:
2438 /* Nothing to monitor here. */
2439 return VINF_SUCCESS;
2440
2441 case PGMPOOLKIND_32BIT_PD_PHYS:
2442 case PGMPOOLKIND_PAE_PDPT_PHYS:
2443 case PGMPOOLKIND_PAE_PD_PHYS:
2444 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
2445 /* Nothing to monitor here. */
2446 return VINF_SUCCESS;
2447 default:
2448 AssertFatalMsgFailed(("This can't happen! enmKind=%d\n", pPage->enmKind));
2449 }
2450
2451 /*
2452 * Install handler.
2453 */
2454 int rc;
2455 PPGMPOOLPAGE pPageHead = pgmPoolMonitorGetPageByGCPhys(pPool, pPage);
2456 if (pPageHead)
2457 {
2458 Assert(pPageHead != pPage); Assert(pPageHead->iMonitoredNext != pPage->idx);
2459 Assert(pPageHead->iMonitoredPrev != pPage->idx);
2460
2461#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
2462 if (pPageHead->fDirty)
2463 pgmPoolFlushDirtyPage(pPool->CTX_SUFF(pVM), pPool, pPageHead->idxDirtyEntry, false /* do not remove */);
2464#endif
2465
2466 pPage->iMonitoredPrev = pPageHead->idx;
2467 pPage->iMonitoredNext = pPageHead->iMonitoredNext;
2468 if (pPageHead->iMonitoredNext != NIL_PGMPOOL_IDX)
2469 pPool->aPages[pPageHead->iMonitoredNext].iMonitoredPrev = pPage->idx;
2470 pPageHead->iMonitoredNext = pPage->idx;
2471 rc = VINF_SUCCESS;
2472 }
2473 else
2474 {
2475 Assert(pPage->iMonitoredNext == NIL_PGMPOOL_IDX); Assert(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
2476 PVMCC pVM = pPool->CTX_SUFF(pVM);
2477 const RTGCPHYS GCPhysPage = pPage->GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK;
2478 rc = PGMHandlerPhysicalRegister(pVM, GCPhysPage, GCPhysPage + PAGE_OFFSET_MASK, pPool->hAccessHandlerType,
2479 MMHyperCCToR3(pVM, pPage), MMHyperCCToR0(pVM, pPage), NIL_RTRCPTR, NIL_RTR3PTR /*pszDesc*/);
2480 /** @todo we should probably deal with out-of-memory conditions here, but for now increasing
2481 * the heap size should suffice. */
2482 AssertFatalMsgRC(rc, ("PGMHandlerPhysicalRegisterEx %RGp failed with %Rrc\n", GCPhysPage, rc));
2483 PVMCPU pVCpu = VMMGetCpu(pVM);
2484 AssertFatalMsg(!(pVCpu->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL) || VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3), ("fSyncFlags=%x syncff=%d\n", pVCpu->pgm.s.fSyncFlags, VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3)));
2485 }
2486 pPage->fMonitored = true;
2487 return rc;
2488}
2489
2490
2491/**
2492 * Disables write monitoring of a guest page.
2493 *
2494 * @returns VBox status code.
2495 * @retval VINF_SUCCESS on success.
2496 * @param pPool The pool.
2497 * @param pPage The cached page.
2498 */
2499static int pgmPoolMonitorFlush(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2500{
2501 /*
2502 * Filter out the relevant kinds.
2503 */
2504 switch (pPage->enmKind)
2505 {
2506 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2507 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2508 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2509 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2510 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2511 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2512 case PGMPOOLKIND_64BIT_PML4:
2513 case PGMPOOLKIND_32BIT_PD:
2514 case PGMPOOLKIND_PAE_PDPT:
2515 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2516 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2517 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2518 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2519 break;
2520
2521 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2522 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2523 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2524 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2525 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2526 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2527 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2528 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2529 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2530 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2531 case PGMPOOLKIND_ROOT_NESTED:
2532 case PGMPOOLKIND_PAE_PD_PHYS:
2533 case PGMPOOLKIND_PAE_PDPT_PHYS:
2534 case PGMPOOLKIND_32BIT_PD_PHYS:
2535 /* Nothing to monitor here. */
2536 Assert(!pPage->fMonitored);
2537 return VINF_SUCCESS;
2538
2539 default:
2540 AssertFatalMsgFailed(("This can't happen! enmKind=%d\n", pPage->enmKind));
2541 }
2542 Assert(pPage->fMonitored);
2543
2544 /*
2545 * Remove the page from the monitored list or uninstall it if last.
2546 */
2547 const PVMCC pVM = pPool->CTX_SUFF(pVM);
2548 int rc;
2549 if ( pPage->iMonitoredNext != NIL_PGMPOOL_IDX
2550 || pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
2551 {
2552 if (pPage->iMonitoredPrev == NIL_PGMPOOL_IDX)
2553 {
2554 PPGMPOOLPAGE pNewHead = &pPool->aPages[pPage->iMonitoredNext];
2555 pNewHead->iMonitoredPrev = NIL_PGMPOOL_IDX;
2556 rc = PGMHandlerPhysicalChangeUserArgs(pVM, pPage->GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK,
2557 MMHyperCCToR3(pVM, pNewHead), MMHyperCCToR0(pVM, pNewHead));
2558
2559 AssertFatalRCSuccess(rc);
2560 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
2561 }
2562 else
2563 {
2564 pPool->aPages[pPage->iMonitoredPrev].iMonitoredNext = pPage->iMonitoredNext;
2565 if (pPage->iMonitoredNext != NIL_PGMPOOL_IDX)
2566 {
2567 pPool->aPages[pPage->iMonitoredNext].iMonitoredPrev = pPage->iMonitoredPrev;
2568 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
2569 }
2570 pPage->iMonitoredPrev = NIL_PGMPOOL_IDX;
2571 rc = VINF_SUCCESS;
2572 }
2573 }
2574 else
2575 {
2576 rc = PGMHandlerPhysicalDeregister(pVM, pPage->GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK);
2577 AssertFatalRC(rc);
2578 PVMCPU pVCpu = VMMGetCpu(pVM);
2579 AssertFatalMsg(!(pVCpu->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL) || VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3),
2580 ("%#x %#x\n", pVCpu->pgm.s.fSyncFlags, pVM->fGlobalForcedActions));
2581 }
2582 pPage->fMonitored = false;
2583
2584 /*
2585 * Remove it from the list of modified pages (if in it).
2586 */
2587 pgmPoolMonitorModifiedRemove(pPool, pPage);
2588
2589 return rc;
2590}
2591
2592
2593/**
2594 * Inserts the page into the list of modified pages.
2595 *
2596 * @param pPool The pool.
2597 * @param pPage The page.
2598 */
2599void pgmPoolMonitorModifiedInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2600{
2601 Log3(("pgmPoolMonitorModifiedInsert: idx=%d\n", pPage->idx));
2602 AssertMsg( pPage->iModifiedNext == NIL_PGMPOOL_IDX
2603 && pPage->iModifiedPrev == NIL_PGMPOOL_IDX
2604 && pPool->iModifiedHead != pPage->idx,
2605 ("Next=%d Prev=%d idx=%d cModifications=%d Head=%d cModifiedPages=%d\n",
2606 pPage->iModifiedNext, pPage->iModifiedPrev, pPage->idx, pPage->cModifications,
2607 pPool->iModifiedHead, pPool->cModifiedPages));
2608
2609 pPage->iModifiedNext = pPool->iModifiedHead;
2610 if (pPool->iModifiedHead != NIL_PGMPOOL_IDX)
2611 pPool->aPages[pPool->iModifiedHead].iModifiedPrev = pPage->idx;
2612 pPool->iModifiedHead = pPage->idx;
2613 pPool->cModifiedPages++;
2614#ifdef VBOX_WITH_STATISTICS
2615 if (pPool->cModifiedPages > pPool->cModifiedPagesHigh)
2616 pPool->cModifiedPagesHigh = pPool->cModifiedPages;
2617#endif
2618}
2619
2620
2621/**
2622 * Removes the page from the list of modified pages and resets the
2623 * modification counter.
2624 *
2625 * @param pPool The pool.
2626 * @param pPage The page which is believed to be in the list of modified pages.
2627 */
2628static void pgmPoolMonitorModifiedRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2629{
2630 Log3(("pgmPoolMonitorModifiedRemove: idx=%d cModifications=%d\n", pPage->idx, pPage->cModifications));
2631 if (pPool->iModifiedHead == pPage->idx)
2632 {
2633 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX);
2634 pPool->iModifiedHead = pPage->iModifiedNext;
2635 if (pPage->iModifiedNext != NIL_PGMPOOL_IDX)
2636 {
2637 pPool->aPages[pPage->iModifiedNext].iModifiedPrev = NIL_PGMPOOL_IDX;
2638 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
2639 }
2640 pPool->cModifiedPages--;
2641 }
2642 else if (pPage->iModifiedPrev != NIL_PGMPOOL_IDX)
2643 {
2644 pPool->aPages[pPage->iModifiedPrev].iModifiedNext = pPage->iModifiedNext;
2645 if (pPage->iModifiedNext != NIL_PGMPOOL_IDX)
2646 {
2647 pPool->aPages[pPage->iModifiedNext].iModifiedPrev = pPage->iModifiedPrev;
2648 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
2649 }
2650 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
2651 pPool->cModifiedPages--;
2652 }
2653 else
2654 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX);
2655 pPage->cModifications = 0;
2656}
2657
2658
2659/**
2660 * Zaps the list of modified pages, resetting their modification counters in the process.
2661 *
2662 * @param pVM The cross context VM structure.
2663 */
2664static void pgmPoolMonitorModifiedClearAll(PVMCC pVM)
2665{
2666 PGM_LOCK_VOID(pVM);
2667 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
2668 LogFlow(("pgmPoolMonitorModifiedClearAll: cModifiedPages=%d\n", pPool->cModifiedPages));
2669
2670 unsigned cPages = 0; NOREF(cPages);
2671
2672#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
2673 pgmPoolResetDirtyPages(pVM);
2674#endif
2675
2676 uint16_t idx = pPool->iModifiedHead;
2677 pPool->iModifiedHead = NIL_PGMPOOL_IDX;
2678 while (idx != NIL_PGMPOOL_IDX)
2679 {
2680 PPGMPOOLPAGE pPage = &pPool->aPages[idx];
2681 idx = pPage->iModifiedNext;
2682 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
2683 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
2684 pPage->cModifications = 0;
2685 Assert(++cPages);
2686 }
2687 AssertMsg(cPages == pPool->cModifiedPages, ("%d != %d\n", cPages, pPool->cModifiedPages));
2688 pPool->cModifiedPages = 0;
2689 PGM_UNLOCK(pVM);
2690}
2691
2692
2693/**
2694 * Handle SyncCR3 pool tasks
2695 *
2696 * @returns VBox status code.
2697 * @retval VINF_SUCCESS if successfully added.
2698 * @retval VINF_PGM_SYNC_CR3 is it needs to be deferred to ring 3 (GC only)
2699 * @param pVCpu The cross context virtual CPU structure.
2700 * @remark Should only be used when monitoring is available, thus placed in
2701 * the PGMPOOL_WITH_MONITORING \#ifdef.
2702 */
2703int pgmPoolSyncCR3(PVMCPUCC pVCpu)
2704{
2705 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
2706 LogFlow(("pgmPoolSyncCR3 fSyncFlags=%x\n", pVCpu->pgm.s.fSyncFlags));
2707
2708 /*
2709 * When monitoring shadowed pages, we reset the modification counters on CR3 sync.
2710 * Occasionally we will have to clear all the shadow page tables because we wanted
2711 * to monitor a page which was mapped by too many shadowed page tables. This operation
2712 * sometimes referred to as a 'lightweight flush'.
2713 */
2714# ifdef IN_RING3 /* Don't flush in ring-0 or raw mode, it's taking too long. */
2715 if (pVCpu->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)
2716 pgmR3PoolClearAll(pVM, false /*fFlushRemTlb*/);
2717# else /* !IN_RING3 */
2718 if (pVCpu->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)
2719 {
2720 Log(("SyncCR3: PGM_SYNC_CLEAR_PGM_POOL is set -> VINF_PGM_SYNC_CR3\n"));
2721 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3); /** @todo no need to do global sync, right? */
2722
2723 /* Make sure all other VCPUs return to ring 3. */
2724 if (pVM->cCpus > 1)
2725 {
2726 VM_FF_SET(pVM, VM_FF_PGM_POOL_FLUSH_PENDING);
2727 PGM_INVL_ALL_VCPU_TLBS(pVM);
2728 }
2729 return VINF_PGM_SYNC_CR3;
2730 }
2731# endif /* !IN_RING3 */
2732 else
2733 {
2734 pgmPoolMonitorModifiedClearAll(pVM);
2735
2736 /* pgmPoolMonitorModifiedClearAll can cause a pgm pool flush (dirty page clearing), so make sure we handle this! */
2737 if (pVCpu->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)
2738 {
2739 Log(("pgmPoolMonitorModifiedClearAll caused a pgm flush -> call pgmPoolSyncCR3 again!\n"));
2740 return pgmPoolSyncCR3(pVCpu);
2741 }
2742 }
2743 return VINF_SUCCESS;
2744}
2745
2746
2747/**
2748 * Frees up at least one user entry.
2749 *
2750 * @returns VBox status code.
2751 * @retval VINF_SUCCESS if successfully added.
2752 *
2753 * @param pPool The pool.
2754 * @param iUser The user index.
2755 */
2756static int pgmPoolTrackFreeOneUser(PPGMPOOL pPool, uint16_t iUser)
2757{
2758 STAM_COUNTER_INC(&pPool->StatTrackFreeUpOneUser);
2759 /*
2760 * Just free cached pages in a braindead fashion.
2761 */
2762 /** @todo walk the age list backwards and free the first with usage. */
2763 int rc = VINF_SUCCESS;
2764 do
2765 {
2766 int rc2 = pgmPoolCacheFreeOne(pPool, iUser);
2767 if (RT_FAILURE(rc2) && rc == VINF_SUCCESS)
2768 rc = rc2;
2769 } while (pPool->iUserFreeHead == NIL_PGMPOOL_USER_INDEX);
2770 return rc;
2771}
2772
2773
2774/**
2775 * Inserts a page into the cache.
2776 *
2777 * This will create user node for the page, insert it into the GCPhys
2778 * hash, and insert it into the age list.
2779 *
2780 * @returns VBox status code.
2781 * @retval VINF_SUCCESS if successfully added.
2782 *
2783 * @param pPool The pool.
2784 * @param pPage The cached page.
2785 * @param GCPhys The GC physical address of the page we're gonna shadow.
2786 * @param iUser The user index.
2787 * @param iUserTable The user table index.
2788 */
2789DECLINLINE(int) pgmPoolTrackInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTGCPHYS GCPhys, uint16_t iUser, uint32_t iUserTable)
2790{
2791 int rc = VINF_SUCCESS;
2792 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
2793
2794 LogFlow(("pgmPoolTrackInsert GCPhys=%RGp iUser=%d iUserTable=%x\n", GCPhys, iUser, iUserTable)); RT_NOREF_PV(GCPhys);
2795
2796 if (iUser != NIL_PGMPOOL_IDX)
2797 {
2798#ifdef VBOX_STRICT
2799 /*
2800 * Check that the entry doesn't already exists.
2801 */
2802 if (pPage->iUserHead != NIL_PGMPOOL_USER_INDEX)
2803 {
2804 uint16_t i = pPage->iUserHead;
2805 do
2806 {
2807 Assert(i < pPool->cMaxUsers);
2808 AssertMsg(paUsers[i].iUser != iUser || paUsers[i].iUserTable != iUserTable, ("%x %x vs new %x %x\n", paUsers[i].iUser, paUsers[i].iUserTable, iUser, iUserTable));
2809 i = paUsers[i].iNext;
2810 } while (i != NIL_PGMPOOL_USER_INDEX);
2811 }
2812#endif
2813
2814 /*
2815 * Find free a user node.
2816 */
2817 uint16_t i = pPool->iUserFreeHead;
2818 if (i == NIL_PGMPOOL_USER_INDEX)
2819 {
2820 rc = pgmPoolTrackFreeOneUser(pPool, iUser);
2821 if (RT_FAILURE(rc))
2822 return rc;
2823 i = pPool->iUserFreeHead;
2824 }
2825
2826 /*
2827 * Unlink the user node from the free list,
2828 * initialize and insert it into the user list.
2829 */
2830 pPool->iUserFreeHead = paUsers[i].iNext;
2831 paUsers[i].iNext = NIL_PGMPOOL_USER_INDEX;
2832 paUsers[i].iUser = iUser;
2833 paUsers[i].iUserTable = iUserTable;
2834 pPage->iUserHead = i;
2835 }
2836 else
2837 pPage->iUserHead = NIL_PGMPOOL_USER_INDEX;
2838
2839
2840 /*
2841 * Insert into cache and enable monitoring of the guest page if enabled.
2842 *
2843 * Until we implement caching of all levels, including the CR3 one, we'll
2844 * have to make sure we don't try monitor & cache any recursive reuse of
2845 * a monitored CR3 page. Because all windows versions are doing this we'll
2846 * have to be able to do combined access monitoring, CR3 + PT and
2847 * PD + PT (guest PAE).
2848 *
2849 * Update:
2850 * We're now cooperating with the CR3 monitor if an uncachable page is found.
2851 */
2852 const bool fCanBeMonitored = true;
2853 pgmPoolCacheInsert(pPool, pPage, fCanBeMonitored); /* This can be expanded. */
2854 if (fCanBeMonitored)
2855 {
2856 rc = pgmPoolMonitorInsert(pPool, pPage);
2857 AssertRC(rc);
2858 }
2859 return rc;
2860}
2861
2862
2863/**
2864 * Adds a user reference to a page.
2865 *
2866 * This will move the page to the head of the
2867 *
2868 * @returns VBox status code.
2869 * @retval VINF_SUCCESS if successfully added.
2870 *
2871 * @param pPool The pool.
2872 * @param pPage The cached page.
2873 * @param iUser The user index.
2874 * @param iUserTable The user table.
2875 */
2876static int pgmPoolTrackAddUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable)
2877{
2878 Log3(("pgmPoolTrackAddUser: GCPhys=%RGp iUser=%x iUserTable=%x\n", pPage->GCPhys, iUser, iUserTable));
2879 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
2880 Assert(iUser != NIL_PGMPOOL_IDX);
2881
2882# ifdef VBOX_STRICT
2883 /*
2884 * Check that the entry doesn't already exists. We only allow multiple
2885 * users of top-level paging structures (SHW_POOL_ROOT_IDX).
2886 */
2887 if (pPage->iUserHead != NIL_PGMPOOL_USER_INDEX)
2888 {
2889 uint16_t i = pPage->iUserHead;
2890 do
2891 {
2892 Assert(i < pPool->cMaxUsers);
2893 /** @todo this assertion looks odd... Shouldn't it be && here? */
2894 AssertMsg(paUsers[i].iUser != iUser || paUsers[i].iUserTable != iUserTable, ("%x %x vs new %x %x\n", paUsers[i].iUser, paUsers[i].iUserTable, iUser, iUserTable));
2895 i = paUsers[i].iNext;
2896 } while (i != NIL_PGMPOOL_USER_INDEX);
2897 }
2898# endif
2899
2900 /*
2901 * Allocate a user node.
2902 */
2903 uint16_t i = pPool->iUserFreeHead;
2904 if (i == NIL_PGMPOOL_USER_INDEX)
2905 {
2906 int rc = pgmPoolTrackFreeOneUser(pPool, iUser);
2907 if (RT_FAILURE(rc))
2908 return rc;
2909 i = pPool->iUserFreeHead;
2910 }
2911 pPool->iUserFreeHead = paUsers[i].iNext;
2912
2913 /*
2914 * Initialize the user node and insert it.
2915 */
2916 paUsers[i].iNext = pPage->iUserHead;
2917 paUsers[i].iUser = iUser;
2918 paUsers[i].iUserTable = iUserTable;
2919 pPage->iUserHead = i;
2920
2921# ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
2922 if (pPage->fDirty)
2923 pgmPoolFlushDirtyPage(pPool->CTX_SUFF(pVM), pPool, pPage->idxDirtyEntry, false /* do not remove */);
2924# endif
2925
2926 /*
2927 * Tell the cache to update its replacement stats for this page.
2928 */
2929 pgmPoolCacheUsed(pPool, pPage);
2930 return VINF_SUCCESS;
2931}
2932
2933
2934/**
2935 * Frees a user record associated with a page.
2936 *
2937 * This does not clear the entry in the user table, it simply replaces the
2938 * user record to the chain of free records.
2939 *
2940 * @param pPool The pool.
2941 * @param pPage The shadow page.
2942 * @param iUser The shadow page pool index of the user table.
2943 * @param iUserTable The index into the user table (shadowed).
2944 *
2945 * @remarks Don't call this for root pages.
2946 */
2947static void pgmPoolTrackFreeUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable)
2948{
2949 Log3(("pgmPoolTrackFreeUser %RGp %x %x\n", pPage->GCPhys, iUser, iUserTable));
2950 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
2951 Assert(iUser != NIL_PGMPOOL_IDX);
2952
2953 /*
2954 * Unlink and free the specified user entry.
2955 */
2956
2957 /* Special: For PAE and 32-bit paging, there is usually no more than one user. */
2958 uint16_t i = pPage->iUserHead;
2959 if ( i != NIL_PGMPOOL_USER_INDEX
2960 && paUsers[i].iUser == iUser
2961 && paUsers[i].iUserTable == iUserTable)
2962 {
2963 pPage->iUserHead = paUsers[i].iNext;
2964
2965 paUsers[i].iUser = NIL_PGMPOOL_IDX;
2966 paUsers[i].iNext = pPool->iUserFreeHead;
2967 pPool->iUserFreeHead = i;
2968 return;
2969 }
2970
2971 /* General: Linear search. */
2972 uint16_t iPrev = NIL_PGMPOOL_USER_INDEX;
2973 while (i != NIL_PGMPOOL_USER_INDEX)
2974 {
2975 if ( paUsers[i].iUser == iUser
2976 && paUsers[i].iUserTable == iUserTable)
2977 {
2978 if (iPrev != NIL_PGMPOOL_USER_INDEX)
2979 paUsers[iPrev].iNext = paUsers[i].iNext;
2980 else
2981 pPage->iUserHead = paUsers[i].iNext;
2982
2983 paUsers[i].iUser = NIL_PGMPOOL_IDX;
2984 paUsers[i].iNext = pPool->iUserFreeHead;
2985 pPool->iUserFreeHead = i;
2986 return;
2987 }
2988 iPrev = i;
2989 i = paUsers[i].iNext;
2990 }
2991
2992 /* Fatal: didn't find it */
2993 AssertFatalMsgFailed(("Didn't find the user entry! iUser=%d iUserTable=%#x GCPhys=%RGp\n",
2994 iUser, iUserTable, pPage->GCPhys));
2995}
2996
2997
2998#if 0 /* unused */
2999/**
3000 * Gets the entry size of a shadow table.
3001 *
3002 * @param enmKind The kind of page.
3003 *
3004 * @returns The size of the entry in bytes. That is, 4 or 8.
3005 * @returns If the kind is not for a table, an assertion is raised and 0 is
3006 * returned.
3007 */
3008DECLINLINE(unsigned) pgmPoolTrackGetShadowEntrySize(PGMPOOLKIND enmKind)
3009{
3010 switch (enmKind)
3011 {
3012 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
3013 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
3014 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
3015 case PGMPOOLKIND_32BIT_PD:
3016 case PGMPOOLKIND_32BIT_PD_PHYS:
3017 return 4;
3018
3019 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
3020 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
3021 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
3022 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
3023 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
3024 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
3025 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
3026 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
3027 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
3028 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
3029 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
3030 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
3031 case PGMPOOLKIND_64BIT_PML4:
3032 case PGMPOOLKIND_PAE_PDPT:
3033 case PGMPOOLKIND_ROOT_NESTED:
3034 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
3035 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
3036 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
3037 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
3038 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
3039 case PGMPOOLKIND_PAE_PD_PHYS:
3040 case PGMPOOLKIND_PAE_PDPT_PHYS:
3041 return 8;
3042
3043 default:
3044 AssertFatalMsgFailed(("enmKind=%d\n", enmKind));
3045 }
3046}
3047#endif /* unused */
3048
3049#if 0 /* unused */
3050/**
3051 * Gets the entry size of a guest table.
3052 *
3053 * @param enmKind The kind of page.
3054 *
3055 * @returns The size of the entry in bytes. That is, 0, 4 or 8.
3056 * @returns If the kind is not for a table, an assertion is raised and 0 is
3057 * returned.
3058 */
3059DECLINLINE(unsigned) pgmPoolTrackGetGuestEntrySize(PGMPOOLKIND enmKind)
3060{
3061 switch (enmKind)
3062 {
3063 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
3064 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
3065 case PGMPOOLKIND_32BIT_PD:
3066 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
3067 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
3068 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
3069 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
3070 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
3071 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
3072 return 4;
3073
3074 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
3075 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
3076 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
3077 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
3078 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
3079 case PGMPOOLKIND_64BIT_PML4:
3080 case PGMPOOLKIND_PAE_PDPT:
3081 return 8;
3082
3083 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
3084 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
3085 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
3086 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
3087 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
3088 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
3089 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
3090 case PGMPOOLKIND_ROOT_NESTED:
3091 case PGMPOOLKIND_PAE_PD_PHYS:
3092 case PGMPOOLKIND_PAE_PDPT_PHYS:
3093 case PGMPOOLKIND_32BIT_PD_PHYS:
3094 /** @todo can we return 0? (nobody is calling this...) */
3095 AssertFailed();
3096 return 0;
3097
3098 default:
3099 AssertFatalMsgFailed(("enmKind=%d\n", enmKind));
3100 }
3101}
3102#endif /* unused */
3103
3104
3105/**
3106 * Checks one shadow page table entry for a mapping of a physical page.
3107 *
3108 * @returns true / false indicating removal of all relevant PTEs
3109 *
3110 * @param pVM The cross context VM structure.
3111 * @param pPhysPage The guest page in question.
3112 * @param fFlushPTEs Flush PTEs or allow them to be updated (e.g. in case of an RW bit change)
3113 * @param iShw The shadow page table.
3114 * @param iPte Page table entry or NIL_PGMPOOL_PHYSEXT_IDX_PTE if unknown
3115 */
3116static bool pgmPoolTrackFlushGCPhysPTInt(PVM pVM, PCPGMPAGE pPhysPage, bool fFlushPTEs, uint16_t iShw, uint16_t iPte)
3117{
3118 LogFlow(("pgmPoolTrackFlushGCPhysPTInt: pPhysPage=%RHp iShw=%d iPte=%d\n", PGM_PAGE_GET_HCPHYS(pPhysPage), iShw, iPte));
3119 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3120 bool fRet = false;
3121
3122 /*
3123 * Assert sanity.
3124 */
3125 Assert(iPte != NIL_PGMPOOL_PHYSEXT_IDX_PTE);
3126 AssertFatalMsg(iShw < pPool->cCurPages && iShw != NIL_PGMPOOL_IDX, ("iShw=%d\n", iShw));
3127 PPGMPOOLPAGE pPage = &pPool->aPages[iShw];
3128
3129 /*
3130 * Then, clear the actual mappings to the page in the shadow PT.
3131 */
3132 switch (pPage->enmKind)
3133 {
3134 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
3135 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
3136 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
3137 {
3138 const uint32_t u32 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P;
3139 PX86PT pPT = (PX86PT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3140 uint32_t u32AndMask = 0;
3141 uint32_t u32OrMask = 0;
3142
3143 if (!fFlushPTEs)
3144 {
3145 switch (PGM_PAGE_GET_HNDL_PHYS_STATE(pPhysPage))
3146 {
3147 case PGM_PAGE_HNDL_PHYS_STATE_NONE: /* No handler installed. */
3148 case PGM_PAGE_HNDL_PHYS_STATE_DISABLED: /* Monitoring is temporarily disabled. */
3149 u32OrMask = X86_PTE_RW;
3150 u32AndMask = UINT32_MAX;
3151 fRet = true;
3152 STAM_COUNTER_INC(&pPool->StatTrackFlushEntryKeep);
3153 break;
3154
3155 case PGM_PAGE_HNDL_PHYS_STATE_WRITE: /* Write access is monitored. */
3156 u32OrMask = 0;
3157 u32AndMask = ~X86_PTE_RW;
3158 fRet = true;
3159 STAM_COUNTER_INC(&pPool->StatTrackFlushEntryKeep);
3160 break;
3161 default:
3162 /* (shouldn't be here, will assert below) */
3163 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3164 break;
3165 }
3166 }
3167 else
3168 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3169
3170 /* Update the counter if we're removing references. */
3171 if (!u32AndMask)
3172 {
3173 Assert(pPage->cPresent);
3174 Assert(pPool->cPresent);
3175 pPage->cPresent--;
3176 pPool->cPresent--;
3177 }
3178
3179 if ((pPT->a[iPte].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
3180 {
3181 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pte=%RX32\n", iPte, pPT->a[iPte]));
3182 X86PTE Pte;
3183 Pte.u = (pPT->a[iPte].u & u32AndMask) | u32OrMask;
3184 if (Pte.u & PGM_PTFLAGS_TRACK_DIRTY)
3185 Pte.u &= ~(X86PGUINT)X86_PTE_RW; /* need to disallow writes when dirty bit tracking is still active. */
3186 ASMAtomicWriteU32(&pPT->a[iPte].u, Pte.u);
3187 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);
3188 return fRet;
3189 }
3190#ifdef LOG_ENABLED
3191 Log(("iFirstPresent=%d cPresent=%d\n", pPage->iFirstPresent, pPage->cPresent));
3192 for (unsigned i = 0, cFound = 0; i < RT_ELEMENTS(pPT->a); i++)
3193 if ((pPT->a[i].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
3194 {
3195 Log(("i=%d cFound=%d\n", i, ++cFound));
3196 }
3197#endif
3198 AssertFatalMsgFailed(("iFirstPresent=%d cPresent=%d u32=%RX32 poolkind=%x\n", pPage->iFirstPresent, pPage->cPresent, u32, pPage->enmKind));
3199 /*PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);*/
3200 break;
3201 }
3202
3203 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
3204 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
3205 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
3206 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
3207 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
3208 case PGMPOOLKIND_EPT_PT_FOR_PHYS: /* physical mask the same as PAE; RW bit as well; be careful! */
3209 {
3210 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P;
3211 PPGMSHWPTPAE pPT = (PPGMSHWPTPAE)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3212 uint64_t u64OrMask = 0;
3213 uint64_t u64AndMask = 0;
3214
3215 if (!fFlushPTEs)
3216 {
3217 switch (PGM_PAGE_GET_HNDL_PHYS_STATE(pPhysPage))
3218 {
3219 case PGM_PAGE_HNDL_PHYS_STATE_NONE: /* No handler installed. */
3220 case PGM_PAGE_HNDL_PHYS_STATE_DISABLED: /* Monitoring is temporarily disabled. */
3221 u64OrMask = X86_PTE_RW;
3222 u64AndMask = UINT64_MAX;
3223 fRet = true;
3224 STAM_COUNTER_INC(&pPool->StatTrackFlushEntryKeep);
3225 break;
3226
3227 case PGM_PAGE_HNDL_PHYS_STATE_WRITE: /* Write access is monitored. */
3228 u64OrMask = 0;
3229 u64AndMask = ~(uint64_t)X86_PTE_RW;
3230 fRet = true;
3231 STAM_COUNTER_INC(&pPool->StatTrackFlushEntryKeep);
3232 break;
3233
3234 default:
3235 /* (shouldn't be here, will assert below) */
3236 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3237 break;
3238 }
3239 }
3240 else
3241 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3242
3243 /* Update the counter if we're removing references. */
3244 if (!u64AndMask)
3245 {
3246 Assert(pPage->cPresent);
3247 Assert(pPool->cPresent);
3248 pPage->cPresent--;
3249 pPool->cPresent--;
3250 }
3251
3252 if ((PGMSHWPTEPAE_GET_U(pPT->a[iPte]) & (X86_PTE_PAE_PG_MASK | X86_PTE_P | X86_PTE_PAE_MBZ_MASK_NX)) == u64)
3253 {
3254 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pte=%RX64\n", iPte, PGMSHWPTEPAE_GET_LOG(pPT->a[iPte])));
3255 X86PTEPAE Pte;
3256 Pte.u = (PGMSHWPTEPAE_GET_U(pPT->a[iPte]) & u64AndMask) | u64OrMask;
3257 if (Pte.u & PGM_PTFLAGS_TRACK_DIRTY)
3258 Pte.u &= ~(X86PGPAEUINT)X86_PTE_RW; /* need to disallow writes when dirty bit tracking is still active. */
3259
3260 PGMSHWPTEPAE_ATOMIC_SET(pPT->a[iPte], Pte.u);
3261 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);
3262 return fRet;
3263 }
3264#ifdef LOG_ENABLED
3265 Log(("iFirstPresent=%d cPresent=%d\n", pPage->iFirstPresent, pPage->cPresent));
3266 Log(("Found %RX64 expected %RX64\n", PGMSHWPTEPAE_GET_U(pPT->a[iPte]) & (X86_PTE_PAE_PG_MASK | X86_PTE_P | X86_PTE_PAE_MBZ_MASK_NX), u64));
3267 for (unsigned i = 0, cFound = 0; i < RT_ELEMENTS(pPT->a); i++)
3268 if ((PGMSHWPTEPAE_GET_U(pPT->a[i]) & (X86_PTE_PAE_PG_MASK | X86_PTE_P | X86_PTE_PAE_MBZ_MASK_NX)) == u64)
3269 Log(("i=%d cFound=%d\n", i, ++cFound));
3270#endif
3271 AssertFatalMsgFailed(("iFirstPresent=%d cPresent=%d u64=%RX64 poolkind=%x iPte=%d PT=%RX64\n", pPage->iFirstPresent, pPage->cPresent, u64, pPage->enmKind, iPte, PGMSHWPTEPAE_GET_LOG(pPT->a[iPte])));
3272 /*PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);*/
3273 break;
3274 }
3275
3276#ifdef PGM_WITH_LARGE_PAGES
3277 /* Large page case only. */
3278 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
3279 {
3280 Assert(pVM->pgm.s.fNestedPaging);
3281
3282 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PDE4M_P | X86_PDE4M_PS;
3283 PEPTPD pPD = (PEPTPD)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3284
3285 if ((pPD->a[iPte].u & (EPT_PDE2M_PG_MASK | X86_PDE4M_P | X86_PDE4M_PS)) == u64)
3286 {
3287 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pde=%RX64\n", iPte, pPD->a[iPte]));
3288 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3289 pPD->a[iPte].u = 0;
3290 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPD);
3291
3292 /* Update the counter as we're removing references. */
3293 Assert(pPage->cPresent);
3294 Assert(pPool->cPresent);
3295 pPage->cPresent--;
3296 pPool->cPresent--;
3297
3298 return fRet;
3299 }
3300# ifdef LOG_ENABLED
3301 Log(("iFirstPresent=%d cPresent=%d\n", pPage->iFirstPresent, pPage->cPresent));
3302 for (unsigned i = 0, cFound = 0; i < RT_ELEMENTS(pPD->a); i++)
3303 if ((pPD->a[i].u & (EPT_PDE2M_PG_MASK | X86_PDE4M_P | X86_PDE4M_PS)) == u64)
3304 Log(("i=%d cFound=%d\n", i, ++cFound));
3305# endif
3306 AssertFatalMsgFailed(("iFirstPresent=%d cPresent=%d\n", pPage->iFirstPresent, pPage->cPresent));
3307 /*PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPD);*/
3308 break;
3309 }
3310
3311 /* AMD-V nested paging */ /** @todo merge with EPT as we only check the parts that are identical. */
3312 case PGMPOOLKIND_PAE_PD_PHYS:
3313 {
3314 Assert(pVM->pgm.s.fNestedPaging);
3315
3316 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PDE4M_P | X86_PDE4M_PS;
3317 PX86PDPAE pPD = (PX86PDPAE)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3318
3319 if ((pPD->a[iPte].u & (X86_PDE2M_PAE_PG_MASK | X86_PDE4M_P | X86_PDE4M_PS)) == u64)
3320 {
3321 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pde=%RX64\n", iPte, pPD->a[iPte]));
3322 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3323 pPD->a[iPte].u = 0;
3324 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPD);
3325
3326 /* Update the counter as we're removing references. */
3327 Assert(pPage->cPresent);
3328 Assert(pPool->cPresent);
3329 pPage->cPresent--;
3330 pPool->cPresent--;
3331 return fRet;
3332 }
3333# ifdef LOG_ENABLED
3334 Log(("iFirstPresent=%d cPresent=%d\n", pPage->iFirstPresent, pPage->cPresent));
3335 for (unsigned i = 0, cFound = 0; i < RT_ELEMENTS(pPD->a); i++)
3336 if ((pPD->a[i].u & (X86_PDE2M_PAE_PG_MASK | X86_PDE4M_P | X86_PDE4M_PS)) == u64)
3337 Log(("i=%d cFound=%d\n", i, ++cFound));
3338# endif
3339 AssertFatalMsgFailed(("iFirstPresent=%d cPresent=%d\n", pPage->iFirstPresent, pPage->cPresent));
3340 /*PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPD);*/
3341 break;
3342 }
3343#endif /* PGM_WITH_LARGE_PAGES */
3344
3345 default:
3346 AssertFatalMsgFailed(("enmKind=%d iShw=%d\n", pPage->enmKind, iShw));
3347 }
3348
3349 /* not reached. */
3350#ifndef _MSC_VER
3351 return fRet;
3352#endif
3353}
3354
3355
3356/**
3357 * Scans one shadow page table for mappings of a physical page.
3358 *
3359 * @param pVM The cross context VM structure.
3360 * @param pPhysPage The guest page in question.
3361 * @param fFlushPTEs Flush PTEs or allow them to be updated (e.g. in case of an RW bit change)
3362 * @param iShw The shadow page table.
3363 */
3364static void pgmPoolTrackFlushGCPhysPT(PVM pVM, PPGMPAGE pPhysPage, bool fFlushPTEs, uint16_t iShw)
3365{
3366 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool); NOREF(pPool);
3367
3368 /* We should only come here with when there's only one reference to this physical page. */
3369 Assert(PGMPOOL_TD_GET_CREFS(PGM_PAGE_GET_TRACKING(pPhysPage)) == 1);
3370
3371 Log2(("pgmPoolTrackFlushGCPhysPT: pPhysPage=%RHp iShw=%d\n", PGM_PAGE_GET_HCPHYS(pPhysPage), iShw));
3372 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPT, f);
3373 bool fKeptPTEs = pgmPoolTrackFlushGCPhysPTInt(pVM, pPhysPage, fFlushPTEs, iShw, PGM_PAGE_GET_PTE_INDEX(pPhysPage));
3374 if (!fKeptPTEs)
3375 PGM_PAGE_SET_TRACKING(pVM, pPhysPage, 0);
3376 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPT, f);
3377}
3378
3379
3380/**
3381 * Flushes a list of shadow page tables mapping the same physical page.
3382 *
3383 * @param pVM The cross context VM structure.
3384 * @param pPhysPage The guest page in question.
3385 * @param fFlushPTEs Flush PTEs or allow them to be updated (e.g. in case of an RW bit change)
3386 * @param iPhysExt The physical cross reference extent list to flush.
3387 */
3388static void pgmPoolTrackFlushGCPhysPTs(PVMCC pVM, PPGMPAGE pPhysPage, bool fFlushPTEs, uint16_t iPhysExt)
3389{
3390 PGM_LOCK_ASSERT_OWNER(pVM);
3391 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3392 bool fKeepList = false;
3393
3394 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPTs, f);
3395 Log2(("pgmPoolTrackFlushGCPhysPTs: pPhysPage=%RHp iPhysExt=%u\n", PGM_PAGE_GET_HCPHYS(pPhysPage), iPhysExt));
3396
3397 const uint16_t iPhysExtStart = iPhysExt;
3398 PPGMPOOLPHYSEXT pPhysExt;
3399 do
3400 {
3401 Assert(iPhysExt < pPool->cMaxPhysExts);
3402 pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
3403 for (unsigned i = 0; i < RT_ELEMENTS(pPhysExt->aidx); i++)
3404 {
3405 if (pPhysExt->aidx[i] != NIL_PGMPOOL_IDX)
3406 {
3407 bool fKeptPTEs = pgmPoolTrackFlushGCPhysPTInt(pVM, pPhysPage, fFlushPTEs, pPhysExt->aidx[i], pPhysExt->apte[i]);
3408 if (!fKeptPTEs)
3409 {
3410 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
3411 pPhysExt->apte[i] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
3412 }
3413 else
3414 fKeepList = true;
3415 }
3416 }
3417 /* next */
3418 iPhysExt = pPhysExt->iNext;
3419 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
3420
3421 if (!fKeepList)
3422 {
3423 /* insert the list into the free list and clear the ram range entry. */
3424 pPhysExt->iNext = pPool->iPhysExtFreeHead;
3425 pPool->iPhysExtFreeHead = iPhysExtStart;
3426 /* Invalidate the tracking data. */
3427 PGM_PAGE_SET_TRACKING(pVM, pPhysPage, 0);
3428 }
3429
3430 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTs, f);
3431}
3432
3433
3434/**
3435 * Flushes all shadow page table mappings of the given guest page.
3436 *
3437 * This is typically called when the host page backing the guest one has been
3438 * replaced or when the page protection was changed due to a guest access
3439 * caught by the monitoring.
3440 *
3441 * @returns VBox status code.
3442 * @retval VINF_SUCCESS if all references has been successfully cleared.
3443 * @retval VINF_PGM_SYNC_CR3 if we're better off with a CR3 sync and a page
3444 * pool cleaning. FF and sync flags are set.
3445 *
3446 * @param pVM The cross context VM structure.
3447 * @param GCPhysPage GC physical address of the page in question
3448 * @param pPhysPage The guest page in question.
3449 * @param fFlushPTEs Flush PTEs or allow them to be updated (e.g. in case of an RW bit change)
3450 * @param pfFlushTLBs This is set to @a true if the shadow TLBs should be
3451 * flushed, it is NOT touched if this isn't necessary.
3452 * The caller MUST initialized this to @a false.
3453 */
3454int pgmPoolTrackUpdateGCPhys(PVMCC pVM, RTGCPHYS GCPhysPage, PPGMPAGE pPhysPage, bool fFlushPTEs, bool *pfFlushTLBs)
3455{
3456 PVMCPUCC pVCpu = VMMGetCpu(pVM);
3457 PGM_LOCK_VOID(pVM);
3458 int rc = VINF_SUCCESS;
3459
3460#ifdef PGM_WITH_LARGE_PAGES
3461 /* Is this page part of a large page? */
3462 if (PGM_PAGE_GET_PDE_TYPE(pPhysPage) == PGM_PAGE_PDE_TYPE_PDE)
3463 {
3464 RTGCPHYS GCPhysBase = GCPhysPage & X86_PDE2M_PAE_PG_MASK;
3465 GCPhysPage &= X86_PDE_PAE_PG_MASK;
3466
3467 /* Fetch the large page base. */
3468 PPGMPAGE pLargePage;
3469 if (GCPhysBase != GCPhysPage)
3470 {
3471 pLargePage = pgmPhysGetPage(pVM, GCPhysBase);
3472 AssertFatal(pLargePage);
3473 }
3474 else
3475 pLargePage = pPhysPage;
3476
3477 Log(("pgmPoolTrackUpdateGCPhys: update large page PDE for %RGp (%RGp)\n", GCPhysBase, GCPhysPage));
3478
3479 if (PGM_PAGE_GET_PDE_TYPE(pLargePage) == PGM_PAGE_PDE_TYPE_PDE)
3480 {
3481 /* Mark the large page as disabled as we need to break it up to change a single page in the 2 MB range. */
3482 PGM_PAGE_SET_PDE_TYPE(pVM, pLargePage, PGM_PAGE_PDE_TYPE_PDE_DISABLED);
3483 pVM->pgm.s.cLargePagesDisabled++;
3484
3485 /* Update the base as that *only* that one has a reference and there's only one PDE to clear. */
3486 rc = pgmPoolTrackUpdateGCPhys(pVM, GCPhysBase, pLargePage, fFlushPTEs, pfFlushTLBs);
3487
3488 *pfFlushTLBs = true;
3489 PGM_UNLOCK(pVM);
3490 return rc;
3491 }
3492 }
3493#else
3494 NOREF(GCPhysPage);
3495#endif /* PGM_WITH_LARGE_PAGES */
3496
3497 const uint16_t u16 = PGM_PAGE_GET_TRACKING(pPhysPage);
3498 if (u16)
3499 {
3500 /*
3501 * The zero page is currently screwing up the tracking and we'll
3502 * have to flush the whole shebang. Unless VBOX_WITH_NEW_LAZY_PAGE_ALLOC
3503 * is defined, zero pages won't normally be mapped. Some kind of solution
3504 * will be needed for this problem of course, but it will have to wait...
3505 */
3506 if ( PGM_PAGE_IS_ZERO(pPhysPage)
3507 || PGM_PAGE_IS_BALLOONED(pPhysPage))
3508 rc = VINF_PGM_GCPHYS_ALIASED;
3509 else
3510 {
3511 if (PGMPOOL_TD_GET_CREFS(u16) != PGMPOOL_TD_CREFS_PHYSEXT)
3512 {
3513 Assert(PGMPOOL_TD_GET_CREFS(u16) == 1);
3514 pgmPoolTrackFlushGCPhysPT(pVM,
3515 pPhysPage,
3516 fFlushPTEs,
3517 PGMPOOL_TD_GET_IDX(u16));
3518 }
3519 else if (u16 != PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED))
3520 pgmPoolTrackFlushGCPhysPTs(pVM, pPhysPage, fFlushPTEs, PGMPOOL_TD_GET_IDX(u16));
3521 else
3522 rc = pgmPoolTrackFlushGCPhysPTsSlow(pVM, pPhysPage);
3523 *pfFlushTLBs = true;
3524 }
3525 }
3526
3527 if (rc == VINF_PGM_GCPHYS_ALIASED)
3528 {
3529 pVCpu->pgm.s.fSyncFlags |= PGM_SYNC_CLEAR_PGM_POOL;
3530 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
3531 rc = VINF_PGM_SYNC_CR3;
3532 }
3533 PGM_UNLOCK(pVM);
3534 return rc;
3535}
3536
3537
3538/**
3539 * Scans all shadow page tables for mappings of a physical page.
3540 *
3541 * This may be slow, but it's most likely more efficient than cleaning
3542 * out the entire page pool / cache.
3543 *
3544 * @returns VBox status code.
3545 * @retval VINF_SUCCESS if all references has been successfully cleared.
3546 * @retval VINF_PGM_GCPHYS_ALIASED if we're better off with a CR3 sync and
3547 * a page pool cleaning.
3548 *
3549 * @param pVM The cross context VM structure.
3550 * @param pPhysPage The guest page in question.
3551 */
3552int pgmPoolTrackFlushGCPhysPTsSlow(PVMCC pVM, PPGMPAGE pPhysPage)
3553{
3554 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3555 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPTsSlow, s);
3556 LogFlow(("pgmPoolTrackFlushGCPhysPTsSlow: cUsedPages=%d cPresent=%d pPhysPage=%R[pgmpage]\n",
3557 pPool->cUsedPages, pPool->cPresent, pPhysPage));
3558
3559 /*
3560 * There is a limit to what makes sense.
3561 */
3562 if ( pPool->cPresent > 1024
3563 && pVM->cCpus == 1)
3564 {
3565 LogFlow(("pgmPoolTrackFlushGCPhysPTsSlow: giving up... (cPresent=%d)\n", pPool->cPresent));
3566 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTsSlow, s);
3567 return VINF_PGM_GCPHYS_ALIASED;
3568 }
3569
3570 /*
3571 * Iterate all the pages until we've encountered all that in use.
3572 * This is simple but not quite optimal solution.
3573 */
3574 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage);
3575 unsigned cLeft = pPool->cUsedPages;
3576 unsigned iPage = pPool->cCurPages;
3577 while (--iPage >= PGMPOOL_IDX_FIRST)
3578 {
3579 PPGMPOOLPAGE pPage = &pPool->aPages[iPage];
3580 if ( pPage->GCPhys != NIL_RTGCPHYS
3581 && pPage->cPresent)
3582 {
3583 switch (pPage->enmKind)
3584 {
3585 /*
3586 * We only care about shadow page tables.
3587 */
3588 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
3589 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
3590 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
3591 {
3592 const uint32_t u32 = (uint32_t)u64;
3593 unsigned cPresent = pPage->cPresent;
3594 PX86PT pPT = (PX86PT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3595 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
3596 {
3597 const X86PGUINT uPte = pPT->a[i].u;
3598 if (uPte & X86_PTE_P)
3599 {
3600 if ((uPte & X86_PTE_PG_MASK) == u32)
3601 {
3602 //Log4(("pgmPoolTrackFlushGCPhysPTsSlow: idx=%d i=%d pte=%RX32\n", iPage, i, pPT->a[i]));
3603 ASMAtomicWriteU32(&pPT->a[i].u, 0);
3604
3605 /* Update the counter as we're removing references. */
3606 Assert(pPage->cPresent);
3607 Assert(pPool->cPresent);
3608 pPage->cPresent--;
3609 pPool->cPresent--;
3610 }
3611 if (!--cPresent)
3612 break;
3613 }
3614 }
3615 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);
3616 break;
3617 }
3618
3619 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
3620 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
3621 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
3622 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
3623 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
3624 {
3625 unsigned cPresent = pPage->cPresent;
3626 PPGMSHWPTPAE pPT = (PPGMSHWPTPAE)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3627 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
3628 if (PGMSHWPTEPAE_IS_P(pPT->a[i]))
3629 {
3630 if ((PGMSHWPTEPAE_GET_U(pPT->a[i]) & X86_PTE_PAE_PG_MASK) == u64)
3631 {
3632 //Log4(("pgmPoolTrackFlushGCPhysPTsSlow: idx=%d i=%d pte=%RX64\n", iPage, i, pPT->a[i]));
3633 PGMSHWPTEPAE_ATOMIC_SET(pPT->a[i], 0); /// @todo why not atomic?
3634
3635 /* Update the counter as we're removing references. */
3636 Assert(pPage->cPresent);
3637 Assert(pPool->cPresent);
3638 pPage->cPresent--;
3639 pPool->cPresent--;
3640 }
3641 if (!--cPresent)
3642 break;
3643 }
3644 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);
3645 break;
3646 }
3647
3648 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
3649 {
3650 unsigned cPresent = pPage->cPresent;
3651 PEPTPT pPT = (PEPTPT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3652 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
3653 {
3654 X86PGPAEUINT const uPte = pPT->a[i].u;
3655 if (uPte & EPT_E_READ)
3656 {
3657 if ((uPte & EPT_PTE_PG_MASK) == u64)
3658 {
3659 //Log4(("pgmPoolTrackFlushGCPhysPTsSlow: idx=%d i=%d pte=%RX64\n", iPage, i, pPT->a[i]));
3660 ASMAtomicWriteU64(&pPT->a[i].u, 0);
3661
3662 /* Update the counter as we're removing references. */
3663 Assert(pPage->cPresent);
3664 Assert(pPool->cPresent);
3665 pPage->cPresent--;
3666 pPool->cPresent--;
3667 }
3668 if (!--cPresent)
3669 break;
3670 }
3671 }
3672 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);
3673 break;
3674 }
3675 }
3676
3677 if (!--cLeft)
3678 break;
3679 }
3680 }
3681
3682 PGM_PAGE_SET_TRACKING(pVM, pPhysPage, 0);
3683 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTsSlow, s);
3684
3685 /*
3686 * There is a limit to what makes sense. The above search is very expensive, so force a pgm pool flush.
3687 */
3688 if (pPool->cPresent > 1024)
3689 {
3690 LogFlow(("pgmPoolTrackFlushGCPhysPTsSlow: giving up... (cPresent=%d)\n", pPool->cPresent));
3691 return VINF_PGM_GCPHYS_ALIASED;
3692 }
3693
3694 return VINF_SUCCESS;
3695}
3696
3697
3698/**
3699 * Clears the user entry in a user table.
3700 *
3701 * This is used to remove all references to a page when flushing it.
3702 */
3703static void pgmPoolTrackClearPageUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PCPGMPOOLUSER pUser)
3704{
3705 Assert(pUser->iUser != NIL_PGMPOOL_IDX);
3706 Assert(pUser->iUser < pPool->cCurPages);
3707 uint32_t iUserTable = pUser->iUserTable;
3708
3709 /*
3710 * Map the user page. Ignore references made by fictitious pages.
3711 */
3712 PPGMPOOLPAGE pUserPage = &pPool->aPages[pUser->iUser];
3713 LogFlow(("pgmPoolTrackClearPageUser: clear %x in %s (%RGp) (flushing %s)\n", iUserTable, pgmPoolPoolKindToStr(pUserPage->enmKind), pUserPage->Core.Key, pgmPoolPoolKindToStr(pPage->enmKind)));
3714 union
3715 {
3716 uint64_t *pau64;
3717 uint32_t *pau32;
3718 } u;
3719 if (pUserPage->idx < PGMPOOL_IDX_FIRST)
3720 {
3721 Assert(!pUserPage->pvPageR3);
3722 return;
3723 }
3724 u.pau64 = (uint64_t *)PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pUserPage);
3725
3726
3727 /* Safety precaution in case we change the paging for other modes too in the future. */
3728 Assert(!pgmPoolIsPageLocked(pPage)); RT_NOREF_PV(pPage);
3729
3730#ifdef VBOX_STRICT
3731 /*
3732 * Some sanity checks.
3733 */
3734 switch (pUserPage->enmKind)
3735 {
3736 case PGMPOOLKIND_32BIT_PD:
3737 case PGMPOOLKIND_32BIT_PD_PHYS:
3738 Assert(iUserTable < X86_PG_ENTRIES);
3739 break;
3740 case PGMPOOLKIND_PAE_PDPT:
3741 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
3742 case PGMPOOLKIND_PAE_PDPT_PHYS:
3743 Assert(iUserTable < 4);
3744 Assert(!(u.pau64[iUserTable] & PGM_PLXFLAGS_PERMANENT));
3745 break;
3746 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
3747 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
3748 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
3749 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
3750 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
3751 case PGMPOOLKIND_PAE_PD_PHYS:
3752 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3753 break;
3754 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
3755 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3756 break;
3757 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
3758 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3759 Assert(!(u.pau64[iUserTable] & PGM_PLXFLAGS_PERMANENT));
3760 break;
3761 case PGMPOOLKIND_64BIT_PML4:
3762 Assert(!(u.pau64[iUserTable] & PGM_PLXFLAGS_PERMANENT));
3763 /* GCPhys >> PAGE_SHIFT is the index here */
3764 break;
3765 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
3766 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
3767 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3768 break;
3769
3770 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
3771 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
3772 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3773 break;
3774
3775 case PGMPOOLKIND_ROOT_NESTED:
3776 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3777 break;
3778
3779 default:
3780 AssertMsgFailed(("enmKind=%d\n", pUserPage->enmKind));
3781 break;
3782 }
3783#endif /* VBOX_STRICT */
3784
3785 /*
3786 * Clear the entry in the user page.
3787 */
3788 switch (pUserPage->enmKind)
3789 {
3790 /* 32-bit entries */
3791 case PGMPOOLKIND_32BIT_PD:
3792 case PGMPOOLKIND_32BIT_PD_PHYS:
3793 ASMAtomicWriteU32(&u.pau32[iUserTable], 0);
3794 break;
3795
3796 /* 64-bit entries */
3797 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
3798 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
3799 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
3800 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
3801 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
3802 case PGMPOOLKIND_PAE_PD_PHYS:
3803 case PGMPOOLKIND_PAE_PDPT_PHYS:
3804 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
3805 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
3806 case PGMPOOLKIND_64BIT_PML4:
3807 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
3808 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
3809 case PGMPOOLKIND_PAE_PDPT:
3810 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
3811 case PGMPOOLKIND_ROOT_NESTED:
3812 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
3813 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
3814 ASMAtomicWriteU64(&u.pau64[iUserTable], 0);
3815 break;
3816
3817 default:
3818 AssertFatalMsgFailed(("enmKind=%d iUser=%d iUserTable=%#x\n", pUserPage->enmKind, pUser->iUser, pUser->iUserTable));
3819 }
3820 PGM_DYNMAP_UNUSED_HINT_VM(pPool->CTX_SUFF(pVM), u.pau64);
3821}
3822
3823
3824/**
3825 * Clears all users of a page.
3826 */
3827static void pgmPoolTrackClearPageUsers(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
3828{
3829 /*
3830 * Free all the user records.
3831 */
3832 LogFlow(("pgmPoolTrackClearPageUsers %RGp\n", pPage->GCPhys));
3833
3834 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
3835 uint16_t i = pPage->iUserHead;
3836 while (i != NIL_PGMPOOL_USER_INDEX)
3837 {
3838 /* Clear enter in user table. */
3839 pgmPoolTrackClearPageUser(pPool, pPage, &paUsers[i]);
3840
3841 /* Free it. */
3842 const uint16_t iNext = paUsers[i].iNext;
3843 paUsers[i].iUser = NIL_PGMPOOL_IDX;
3844 paUsers[i].iNext = pPool->iUserFreeHead;
3845 pPool->iUserFreeHead = i;
3846
3847 /* Next. */
3848 i = iNext;
3849 }
3850 pPage->iUserHead = NIL_PGMPOOL_USER_INDEX;
3851}
3852
3853
3854/**
3855 * Allocates a new physical cross reference extent.
3856 *
3857 * @returns Pointer to the allocated extent on success. NULL if we're out of them.
3858 * @param pVM The cross context VM structure.
3859 * @param piPhysExt Where to store the phys ext index.
3860 */
3861PPGMPOOLPHYSEXT pgmPoolTrackPhysExtAlloc(PVMCC pVM, uint16_t *piPhysExt)
3862{
3863 PGM_LOCK_ASSERT_OWNER(pVM);
3864 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3865 uint16_t iPhysExt = pPool->iPhysExtFreeHead;
3866 if (iPhysExt == NIL_PGMPOOL_PHYSEXT_INDEX)
3867 {
3868 STAM_COUNTER_INC(&pPool->StamTrackPhysExtAllocFailures);
3869 return NULL;
3870 }
3871 PPGMPOOLPHYSEXT pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
3872 pPool->iPhysExtFreeHead = pPhysExt->iNext;
3873 pPhysExt->iNext = NIL_PGMPOOL_PHYSEXT_INDEX;
3874 *piPhysExt = iPhysExt;
3875 return pPhysExt;
3876}
3877
3878
3879/**
3880 * Frees a physical cross reference extent.
3881 *
3882 * @param pVM The cross context VM structure.
3883 * @param iPhysExt The extent to free.
3884 */
3885void pgmPoolTrackPhysExtFree(PVMCC pVM, uint16_t iPhysExt)
3886{
3887 PGM_LOCK_ASSERT_OWNER(pVM);
3888 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3889 Assert(iPhysExt < pPool->cMaxPhysExts);
3890 PPGMPOOLPHYSEXT pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
3891 for (unsigned i = 0; i < RT_ELEMENTS(pPhysExt->aidx); i++)
3892 {
3893 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
3894 pPhysExt->apte[i] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
3895 }
3896 pPhysExt->iNext = pPool->iPhysExtFreeHead;
3897 pPool->iPhysExtFreeHead = iPhysExt;
3898}
3899
3900
3901/**
3902 * Frees a physical cross reference extent.
3903 *
3904 * @param pVM The cross context VM structure.
3905 * @param iPhysExt The extent to free.
3906 */
3907void pgmPoolTrackPhysExtFreeList(PVMCC pVM, uint16_t iPhysExt)
3908{
3909 PGM_LOCK_ASSERT_OWNER(pVM);
3910 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3911
3912 const uint16_t iPhysExtStart = iPhysExt;
3913 PPGMPOOLPHYSEXT pPhysExt;
3914 do
3915 {
3916 Assert(iPhysExt < pPool->cMaxPhysExts);
3917 pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
3918 for (unsigned i = 0; i < RT_ELEMENTS(pPhysExt->aidx); i++)
3919 {
3920 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
3921 pPhysExt->apte[i] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
3922 }
3923
3924 /* next */
3925 iPhysExt = pPhysExt->iNext;
3926 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
3927
3928 pPhysExt->iNext = pPool->iPhysExtFreeHead;
3929 pPool->iPhysExtFreeHead = iPhysExtStart;
3930}
3931
3932
3933/**
3934 * Insert a reference into a list of physical cross reference extents.
3935 *
3936 * @returns The new tracking data for PGMPAGE.
3937 *
3938 * @param pVM The cross context VM structure.
3939 * @param iPhysExt The physical extent index of the list head.
3940 * @param iShwPT The shadow page table index.
3941 * @param iPte Page table entry
3942 *
3943 */
3944static uint16_t pgmPoolTrackPhysExtInsert(PVMCC pVM, uint16_t iPhysExt, uint16_t iShwPT, uint16_t iPte)
3945{
3946 PGM_LOCK_ASSERT_OWNER(pVM);
3947 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3948 PPGMPOOLPHYSEXT paPhysExts = pPool->CTX_SUFF(paPhysExts);
3949
3950 /*
3951 * Special common cases.
3952 */
3953 if (paPhysExts[iPhysExt].aidx[1] == NIL_PGMPOOL_IDX)
3954 {
3955 paPhysExts[iPhysExt].aidx[1] = iShwPT;
3956 paPhysExts[iPhysExt].apte[1] = iPte;
3957 STAM_COUNTER_INC(&pVM->pgm.s.Stats.StatTrackAliasedMany);
3958 LogFlow(("pgmPoolTrackPhysExtInsert: %d:{,%d pte %d,}\n", iPhysExt, iShwPT, iPte));
3959 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExt);
3960 }
3961 if (paPhysExts[iPhysExt].aidx[2] == NIL_PGMPOOL_IDX)
3962 {
3963 paPhysExts[iPhysExt].aidx[2] = iShwPT;
3964 paPhysExts[iPhysExt].apte[2] = iPte;
3965 STAM_COUNTER_INC(&pVM->pgm.s.Stats.StatTrackAliasedMany);
3966 LogFlow(("pgmPoolTrackPhysExtInsert: %d:{,,%d pte %d}\n", iPhysExt, iShwPT, iPte));
3967 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExt);
3968 }
3969 AssertCompile(RT_ELEMENTS(paPhysExts[iPhysExt].aidx) == 3);
3970
3971 /*
3972 * General treatment.
3973 */
3974 const uint16_t iPhysExtStart = iPhysExt;
3975 unsigned cMax = 15;
3976 for (;;)
3977 {
3978 Assert(iPhysExt < pPool->cMaxPhysExts);
3979 for (unsigned i = 0; i < RT_ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
3980 if (paPhysExts[iPhysExt].aidx[i] == NIL_PGMPOOL_IDX)
3981 {
3982 paPhysExts[iPhysExt].aidx[i] = iShwPT;
3983 paPhysExts[iPhysExt].apte[i] = iPte;
3984 STAM_COUNTER_INC(&pVM->pgm.s.Stats.StatTrackAliasedMany);
3985 LogFlow(("pgmPoolTrackPhysExtInsert: %d:{%d pte %d} i=%d cMax=%d\n", iPhysExt, iShwPT, iPte, i, cMax));
3986 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExtStart);
3987 }
3988 if (!--cMax)
3989 {
3990 STAM_COUNTER_INC(&pVM->pgm.s.Stats.StatTrackOverflows);
3991 pgmPoolTrackPhysExtFreeList(pVM, iPhysExtStart);
3992 LogFlow(("pgmPoolTrackPhysExtInsert: overflow (1) iShwPT=%d\n", iShwPT));
3993 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED);
3994 }
3995
3996 /* advance */
3997 iPhysExt = paPhysExts[iPhysExt].iNext;
3998 if (iPhysExt == NIL_PGMPOOL_PHYSEXT_INDEX)
3999 break;
4000 }
4001
4002 /*
4003 * Add another extent to the list.
4004 */
4005 PPGMPOOLPHYSEXT pNew = pgmPoolTrackPhysExtAlloc(pVM, &iPhysExt);
4006 if (!pNew)
4007 {
4008 STAM_COUNTER_INC(&pVM->pgm.s.Stats.StatTrackNoExtentsLeft);
4009 pgmPoolTrackPhysExtFreeList(pVM, iPhysExtStart);
4010 LogFlow(("pgmPoolTrackPhysExtInsert: pgmPoolTrackPhysExtAlloc failed iShwPT=%d\n", iShwPT));
4011 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED);
4012 }
4013 pNew->iNext = iPhysExtStart;
4014 pNew->aidx[0] = iShwPT;
4015 pNew->apte[0] = iPte;
4016 LogFlow(("pgmPoolTrackPhysExtInsert: added new extent %d:{%d pte %d}->%d\n", iPhysExt, iShwPT, iPte, iPhysExtStart));
4017 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExt);
4018}
4019
4020
4021/**
4022 * Add a reference to guest physical page where extents are in use.
4023 *
4024 * @returns The new tracking data for PGMPAGE.
4025 *
4026 * @param pVM The cross context VM structure.
4027 * @param pPhysPage Pointer to the aPages entry in the ram range.
4028 * @param u16 The ram range flags (top 16-bits).
4029 * @param iShwPT The shadow page table index.
4030 * @param iPte Page table entry
4031 */
4032uint16_t pgmPoolTrackPhysExtAddref(PVMCC pVM, PPGMPAGE pPhysPage, uint16_t u16, uint16_t iShwPT, uint16_t iPte)
4033{
4034 PGM_LOCK_VOID(pVM);
4035 if (PGMPOOL_TD_GET_CREFS(u16) != PGMPOOL_TD_CREFS_PHYSEXT)
4036 {
4037 /*
4038 * Convert to extent list.
4039 */
4040 Assert(PGMPOOL_TD_GET_CREFS(u16) == 1);
4041 uint16_t iPhysExt;
4042 PPGMPOOLPHYSEXT pPhysExt = pgmPoolTrackPhysExtAlloc(pVM, &iPhysExt);
4043 if (pPhysExt)
4044 {
4045 LogFlow(("pgmPoolTrackPhysExtAddref: new extent: %d:{%d, %d}\n", iPhysExt, PGMPOOL_TD_GET_IDX(u16), iShwPT));
4046 STAM_COUNTER_INC(&pVM->pgm.s.Stats.StatTrackAliased);
4047 pPhysExt->aidx[0] = PGMPOOL_TD_GET_IDX(u16);
4048 pPhysExt->apte[0] = PGM_PAGE_GET_PTE_INDEX(pPhysPage);
4049 pPhysExt->aidx[1] = iShwPT;
4050 pPhysExt->apte[1] = iPte;
4051 u16 = PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExt);
4052 }
4053 else
4054 u16 = PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED);
4055 }
4056 else if (u16 != PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED))
4057 {
4058 /*
4059 * Insert into the extent list.
4060 */
4061 u16 = pgmPoolTrackPhysExtInsert(pVM, PGMPOOL_TD_GET_IDX(u16), iShwPT, iPte);
4062 }
4063 else
4064 STAM_COUNTER_INC(&pVM->pgm.s.Stats.StatTrackAliasedLots);
4065 PGM_UNLOCK(pVM);
4066 return u16;
4067}
4068
4069
4070/**
4071 * Clear references to guest physical memory.
4072 *
4073 * @param pPool The pool.
4074 * @param pPage The page.
4075 * @param pPhysPage Pointer to the aPages entry in the ram range.
4076 * @param iPte Shadow PTE index
4077 */
4078void pgmPoolTrackPhysExtDerefGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMPAGE pPhysPage, uint16_t iPte)
4079{
4080 PVMCC pVM = pPool->CTX_SUFF(pVM);
4081 const unsigned cRefs = PGM_PAGE_GET_TD_CREFS(pPhysPage);
4082 AssertFatalMsg(cRefs == PGMPOOL_TD_CREFS_PHYSEXT, ("cRefs=%d pPhysPage=%R[pgmpage] pPage=%p:{.idx=%d}\n", cRefs, pPhysPage, pPage, pPage->idx));
4083
4084 uint16_t iPhysExt = PGM_PAGE_GET_TD_IDX(pPhysPage);
4085 if (iPhysExt != PGMPOOL_TD_IDX_OVERFLOWED)
4086 {
4087 PGM_LOCK_VOID(pVM);
4088
4089 uint16_t iPhysExtPrev = NIL_PGMPOOL_PHYSEXT_INDEX;
4090 PPGMPOOLPHYSEXT paPhysExts = pPool->CTX_SUFF(paPhysExts);
4091 do
4092 {
4093 Assert(iPhysExt < pPool->cMaxPhysExts);
4094
4095 /*
4096 * Look for the shadow page and check if it's all freed.
4097 */
4098 for (unsigned i = 0; i < RT_ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
4099 {
4100 if ( paPhysExts[iPhysExt].aidx[i] == pPage->idx
4101 && paPhysExts[iPhysExt].apte[i] == iPte)
4102 {
4103 paPhysExts[iPhysExt].aidx[i] = NIL_PGMPOOL_IDX;
4104 paPhysExts[iPhysExt].apte[i] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
4105
4106 for (i = 0; i < RT_ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
4107 if (paPhysExts[iPhysExt].aidx[i] != NIL_PGMPOOL_IDX)
4108 {
4109 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage] idx=%d\n", pPhysPage, pPage->idx));
4110 PGM_UNLOCK(pVM);
4111 return;
4112 }
4113
4114 /* we can free the node. */
4115 const uint16_t iPhysExtNext = paPhysExts[iPhysExt].iNext;
4116 if ( iPhysExtPrev == NIL_PGMPOOL_PHYSEXT_INDEX
4117 && iPhysExtNext == NIL_PGMPOOL_PHYSEXT_INDEX)
4118 {
4119 /* lonely node */
4120 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
4121 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage] idx=%d lonely\n", pPhysPage, pPage->idx));
4122 PGM_PAGE_SET_TRACKING(pVM, pPhysPage, 0);
4123 }
4124 else if (iPhysExtPrev == NIL_PGMPOOL_PHYSEXT_INDEX)
4125 {
4126 /* head */
4127 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage] idx=%d head\n", pPhysPage, pPage->idx));
4128 PGM_PAGE_SET_TRACKING(pVM, pPhysPage, PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExtNext));
4129 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
4130 }
4131 else
4132 {
4133 /* in list */
4134 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage] idx=%d in list\n", pPhysPage, pPage->idx));
4135 paPhysExts[iPhysExtPrev].iNext = iPhysExtNext;
4136 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
4137 }
4138 iPhysExt = iPhysExtNext;
4139 PGM_UNLOCK(pVM);
4140 return;
4141 }
4142 }
4143
4144 /* next */
4145 iPhysExtPrev = iPhysExt;
4146 iPhysExt = paPhysExts[iPhysExt].iNext;
4147 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
4148
4149 PGM_UNLOCK(pVM);
4150 AssertFatalMsgFailed(("not-found! cRefs=%d pPhysPage=%R[pgmpage] pPage=%p:{.idx=%d}\n", cRefs, pPhysPage, pPage, pPage->idx));
4151 }
4152 else /* nothing to do */
4153 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage]\n", pPhysPage));
4154}
4155
4156/**
4157 * Clear references to guest physical memory.
4158 *
4159 * This is the same as pgmPoolTracDerefGCPhysHint except that the guest
4160 * physical address is assumed to be correct, so the linear search can be
4161 * skipped and we can assert at an earlier point.
4162 *
4163 * @param pPool The pool.
4164 * @param pPage The page.
4165 * @param HCPhys The host physical address corresponding to the guest page.
4166 * @param GCPhys The guest physical address corresponding to HCPhys.
4167 * @param iPte Shadow PTE index
4168 */
4169static void pgmPoolTracDerefGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTHCPHYS HCPhys, RTGCPHYS GCPhys, uint16_t iPte)
4170{
4171 /*
4172 * Lookup the page and check if it checks out before derefing it.
4173 */
4174 PVMCC pVM = pPool->CTX_SUFF(pVM);
4175 PPGMPAGE pPhysPage = pgmPhysGetPage(pVM, GCPhys);
4176 if (pPhysPage)
4177 {
4178 Assert(PGM_PAGE_GET_HCPHYS(pPhysPage));
4179#ifdef LOG_ENABLED
4180 RTHCPHYS HCPhysPage = PGM_PAGE_GET_HCPHYS(pPhysPage);
4181 Log2(("pgmPoolTracDerefGCPhys %RHp vs %RHp\n", HCPhysPage, HCPhys));
4182#endif
4183 if (PGM_PAGE_GET_HCPHYS(pPhysPage) == HCPhys)
4184 {
4185 Assert(pPage->cPresent);
4186 Assert(pPool->cPresent);
4187 pPage->cPresent--;
4188 pPool->cPresent--;
4189 pgmTrackDerefGCPhys(pPool, pPage, pPhysPage, iPte);
4190 return;
4191 }
4192
4193 AssertFatalMsgFailed(("HCPhys=%RHp GCPhys=%RGp; found page has HCPhys=%RHp\n",
4194 HCPhys, GCPhys, PGM_PAGE_GET_HCPHYS(pPhysPage)));
4195 }
4196 AssertFatalMsgFailed(("HCPhys=%RHp GCPhys=%RGp\n", HCPhys, GCPhys));
4197}
4198
4199
4200/**
4201 * Clear references to guest physical memory.
4202 *
4203 * @param pPool The pool.
4204 * @param pPage The page.
4205 * @param HCPhys The host physical address corresponding to the guest page.
4206 * @param GCPhysHint The guest physical address which may corresponding to HCPhys.
4207 * @param iPte Shadow pte index
4208 */
4209void pgmPoolTracDerefGCPhysHint(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTHCPHYS HCPhys, RTGCPHYS GCPhysHint, uint16_t iPte)
4210{
4211 Log4(("pgmPoolTracDerefGCPhysHint %RHp %RGp\n", HCPhys, GCPhysHint));
4212
4213 /*
4214 * Try the hint first.
4215 */
4216 RTHCPHYS HCPhysHinted;
4217 PVMCC pVM = pPool->CTX_SUFF(pVM);
4218 PPGMPAGE pPhysPage = pgmPhysGetPage(pVM, GCPhysHint);
4219 if (pPhysPage)
4220 {
4221 HCPhysHinted = PGM_PAGE_GET_HCPHYS(pPhysPage);
4222 Assert(HCPhysHinted);
4223 if (HCPhysHinted == HCPhys)
4224 {
4225 Assert(pPage->cPresent);
4226 Assert(pPool->cPresent);
4227 pPage->cPresent--;
4228 pPool->cPresent--;
4229 pgmTrackDerefGCPhys(pPool, pPage, pPhysPage, iPte);
4230 return;
4231 }
4232 }
4233 else
4234 HCPhysHinted = UINT64_C(0xdeadbeefdeadbeef);
4235
4236 /*
4237 * Damn, the hint didn't work. We'll have to do an expensive linear search.
4238 */
4239 STAM_COUNTER_INC(&pPool->StatTrackLinearRamSearches);
4240 PPGMRAMRANGE pRam = pPool->CTX_SUFF(pVM)->pgm.s.CTX_SUFF(pRamRangesX);
4241 while (pRam)
4242 {
4243 unsigned iPage = pRam->cb >> PAGE_SHIFT;
4244 while (iPage-- > 0)
4245 {
4246 if (PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]) == HCPhys)
4247 {
4248 Log4(("pgmPoolTracDerefGCPhysHint: Linear HCPhys=%RHp GCPhysHint=%RGp GCPhysReal=%RGp\n",
4249 HCPhys, GCPhysHint, pRam->GCPhys + (iPage << PAGE_SHIFT)));
4250 Assert(pPage->cPresent);
4251 Assert(pPool->cPresent);
4252 pPage->cPresent--;
4253 pPool->cPresent--;
4254 pgmTrackDerefGCPhys(pPool, pPage, &pRam->aPages[iPage], iPte);
4255 return;
4256 }
4257 }
4258 pRam = pRam->CTX_SUFF(pNext);
4259 }
4260
4261 AssertFatalMsgFailed(("HCPhys=%RHp GCPhysHint=%RGp (Hinted page has HCPhys = %RHp)\n", HCPhys, GCPhysHint, HCPhysHinted));
4262}
4263
4264
4265/**
4266 * Clear references to guest physical memory in a 32-bit / 32-bit page table.
4267 *
4268 * @param pPool The pool.
4269 * @param pPage The page.
4270 * @param pShwPT The shadow page table (mapping of the page).
4271 * @param pGstPT The guest page table.
4272 */
4273DECLINLINE(void) pgmPoolTrackDerefPT32Bit32Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PT pShwPT, PCX86PT pGstPT)
4274{
4275 RTGCPHYS32 const fPgMask = pPage->fA20Enabled ? X86_PTE_PG_MASK : X86_PTE_PG_MASK & ~RT_BIT_32(20);
4276 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
4277 {
4278 const X86PGUINT uPte = pShwPT->a[i].u;
4279 Assert(!(uPte & RT_BIT_32(10)));
4280 if (uPte & X86_PTE_P)
4281 {
4282 Log4(("pgmPoolTrackDerefPT32Bit32Bit: i=%d pte=%RX32 hint=%RX32\n",
4283 i, uPte & X86_PTE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK));
4284 pgmPoolTracDerefGCPhysHint(pPool, pPage, uPte & X86_PTE_PG_MASK, pGstPT->a[i].u & fPgMask, i);
4285 if (!pPage->cPresent)
4286 break;
4287 }
4288 }
4289}
4290
4291
4292/**
4293 * Clear references to guest physical memory in a PAE / 32-bit page table.
4294 *
4295 * @param pPool The pool.
4296 * @param pPage The page.
4297 * @param pShwPT The shadow page table (mapping of the page).
4298 * @param pGstPT The guest page table (just a half one).
4299 */
4300DECLINLINE(void) pgmPoolTrackDerefPTPae32Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT, PCX86PT pGstPT)
4301{
4302 RTGCPHYS32 const fPgMask = pPage->fA20Enabled ? X86_PTE_PG_MASK : X86_PTE_PG_MASK & ~RT_BIT_32(20);
4303 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
4304 {
4305 Assert( (PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & UINT64_C(0x7ff0000000000400)) == 0
4306 || (PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & UINT64_C(0x7ff0000000000400)) == UINT64_C(0x7ff0000000000000));
4307 if (PGMSHWPTEPAE_IS_P(pShwPT->a[i]))
4308 {
4309 Log4(("pgmPoolTrackDerefPTPae32Bit: i=%d pte=%RX64 hint=%RX32\n",
4310 i, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pGstPT->a[i].u & X86_PTE_PG_MASK));
4311 pgmPoolTracDerefGCPhysHint(pPool, pPage, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pGstPT->a[i].u & fPgMask, i);
4312 if (!pPage->cPresent)
4313 break;
4314 }
4315 }
4316}
4317
4318
4319/**
4320 * Clear references to guest physical memory in a PAE / PAE page table.
4321 *
4322 * @param pPool The pool.
4323 * @param pPage The page.
4324 * @param pShwPT The shadow page table (mapping of the page).
4325 * @param pGstPT The guest page table.
4326 */
4327DECLINLINE(void) pgmPoolTrackDerefPTPaePae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT, PCX86PTPAE pGstPT)
4328{
4329 RTGCPHYS const fPgMask = pPage->fA20Enabled ? X86_PTE_PAE_PG_MASK : X86_PTE_PAE_PG_MASK & ~RT_BIT_64(20);
4330 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
4331 {
4332 Assert( (PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & UINT64_C(0x7ff0000000000400)) == 0
4333 || (PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & UINT64_C(0x7ff0000000000400)) == UINT64_C(0x7ff0000000000000));
4334 if (PGMSHWPTEPAE_IS_P(pShwPT->a[i]))
4335 {
4336 Log4(("pgmPoolTrackDerefPTPaePae: i=%d pte=%RX32 hint=%RX32\n",
4337 i, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pGstPT->a[i].u & X86_PTE_PAE_PG_MASK));
4338 pgmPoolTracDerefGCPhysHint(pPool, pPage, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pGstPT->a[i].u & fPgMask, i);
4339 if (!pPage->cPresent)
4340 break;
4341 }
4342 }
4343}
4344
4345
4346/**
4347 * Clear references to guest physical memory in a 32-bit / 4MB page table.
4348 *
4349 * @param pPool The pool.
4350 * @param pPage The page.
4351 * @param pShwPT The shadow page table (mapping of the page).
4352 */
4353DECLINLINE(void) pgmPoolTrackDerefPT32Bit4MB(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PT pShwPT)
4354{
4355 RTGCPHYS const GCPhysA20Mask = pPage->fA20Enabled ? UINT64_MAX : ~RT_BIT_64(20);
4356 RTGCPHYS GCPhys = pPage->GCPhys + PAGE_SIZE * pPage->iFirstPresent;
4357 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
4358 {
4359 const X86PGUINT uPte = pShwPT->a[i].u;
4360 Assert(!(uPte & RT_BIT_32(10)));
4361 if (uPte & X86_PTE_P)
4362 {
4363 Log4(("pgmPoolTrackDerefPT32Bit4MB: i=%d pte=%RX32 GCPhys=%RGp\n",
4364 i, uPte & X86_PTE_PG_MASK, GCPhys));
4365 pgmPoolTracDerefGCPhys(pPool, pPage, uPte & X86_PTE_PG_MASK, GCPhys & GCPhysA20Mask, i);
4366 if (!pPage->cPresent)
4367 break;
4368 }
4369 }
4370}
4371
4372
4373/**
4374 * Clear references to guest physical memory in a PAE / 2/4MB page table.
4375 *
4376 * @param pPool The pool.
4377 * @param pPage The page.
4378 * @param pShwPT The shadow page table (mapping of the page).
4379 */
4380DECLINLINE(void) pgmPoolTrackDerefPTPaeBig(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT)
4381{
4382 RTGCPHYS const GCPhysA20Mask = pPage->fA20Enabled ? UINT64_MAX : ~RT_BIT_64(20);
4383 RTGCPHYS GCPhys = pPage->GCPhys + PAGE_SIZE * pPage->iFirstPresent;
4384 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
4385 {
4386 Assert( (PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & UINT64_C(0x7ff0000000000400)) == 0
4387 || (PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & UINT64_C(0x7ff0000000000400)) == UINT64_C(0x7ff0000000000000));
4388 if (PGMSHWPTEPAE_IS_P(pShwPT->a[i]))
4389 {
4390 Log4(("pgmPoolTrackDerefPTPaeBig: i=%d pte=%RX64 hint=%RGp\n",
4391 i, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), GCPhys));
4392 pgmPoolTracDerefGCPhys(pPool, pPage, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), GCPhys & GCPhysA20Mask, i);
4393 if (!pPage->cPresent)
4394 break;
4395 }
4396 }
4397}
4398
4399
4400/**
4401 * Clear references to shadowed pages in an EPT page table.
4402 *
4403 * @param pPool The pool.
4404 * @param pPage The page.
4405 * @param pShwPT The shadow page directory pointer table (mapping of the
4406 * page).
4407 */
4408DECLINLINE(void) pgmPoolTrackDerefPTEPT(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PEPTPT pShwPT)
4409{
4410 RTGCPHYS const GCPhysA20Mask = pPage->fA20Enabled ? UINT64_MAX : ~RT_BIT_64(20);
4411 RTGCPHYS GCPhys = pPage->GCPhys + PAGE_SIZE * pPage->iFirstPresent;
4412 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
4413 {
4414 X86PGPAEUINT const uPte = pShwPT->a[i].u;
4415 Assert((uPte & UINT64_C(0xfff0000000000f80)) == 0);
4416 if (uPte & EPT_E_READ)
4417 {
4418 Log4(("pgmPoolTrackDerefPTEPT: i=%d pte=%RX64 GCPhys=%RX64\n",
4419 i, uPte & EPT_PTE_PG_MASK, pPage->GCPhys));
4420 pgmPoolTracDerefGCPhys(pPool, pPage, uPte & EPT_PTE_PG_MASK, GCPhys & GCPhysA20Mask, i);
4421 if (!pPage->cPresent)
4422 break;
4423 }
4424 }
4425}
4426
4427
4428/**
4429 * Clear references to shadowed pages in a 32 bits page directory.
4430 *
4431 * @param pPool The pool.
4432 * @param pPage The page.
4433 * @param pShwPD The shadow page directory (mapping of the page).
4434 */
4435DECLINLINE(void) pgmPoolTrackDerefPD(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PD pShwPD)
4436{
4437 for (unsigned i = 0; i < RT_ELEMENTS(pShwPD->a); i++)
4438 {
4439 X86PGUINT const uPde = pShwPD->a[i].u;
4440 if (uPde & X86_PDE_P)
4441 {
4442 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPD->a[i].u & X86_PDE_PG_MASK);
4443 if (pSubPage)
4444 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4445 else
4446 AssertFatalMsgFailed(("%x\n", pShwPD->a[i].u & X86_PDE_PG_MASK));
4447 }
4448 }
4449}
4450
4451
4452/**
4453 * Clear references to shadowed pages in a PAE (legacy or 64 bits) page directory.
4454 *
4455 * @param pPool The pool.
4456 * @param pPage The page.
4457 * @param pShwPD The shadow page directory (mapping of the page).
4458 */
4459DECLINLINE(void) pgmPoolTrackDerefPDPae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PDPAE pShwPD)
4460{
4461 for (unsigned i = 0; i < RT_ELEMENTS(pShwPD->a); i++)
4462 {
4463 X86PGPAEUINT const uPde = pShwPD->a[i].u;
4464 if (uPde & X86_PDE_P)
4465 {
4466#ifdef PGM_WITH_LARGE_PAGES
4467 if (uPde & X86_PDE_PS)
4468 {
4469 Log4(("pgmPoolTrackDerefPDPae: i=%d pde=%RX64 GCPhys=%RX64\n",
4470 i, uPde & X86_PDE2M_PAE_PG_MASK, pPage->GCPhys));
4471 pgmPoolTracDerefGCPhys(pPool, pPage, uPde & X86_PDE2M_PAE_PG_MASK,
4472 pPage->GCPhys + i * 2 * _1M /* pPage->GCPhys = base address of the memory described by the PD */,
4473 i);
4474 }
4475 else
4476#endif
4477 {
4478 Assert((uPde & (X86_PDE_PAE_MBZ_MASK_NX | UINT64_C(0x7ff0000000000000))) == 0);
4479 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, uPde & X86_PDE_PAE_PG_MASK);
4480 if (pSubPage)
4481 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4482 else
4483 AssertFatalMsgFailed(("%RX64\n", uPde & X86_PDE_PAE_PG_MASK));
4484 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4485 }
4486 }
4487 }
4488}
4489
4490
4491/**
4492 * Clear references to shadowed pages in a PAE page directory pointer table.
4493 *
4494 * @param pPool The pool.
4495 * @param pPage The page.
4496 * @param pShwPDPT The shadow page directory pointer table (mapping of the page).
4497 */
4498DECLINLINE(void) pgmPoolTrackDerefPDPTPae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PDPT pShwPDPT)
4499{
4500 for (unsigned i = 0; i < X86_PG_PAE_PDPE_ENTRIES; i++)
4501 {
4502 X86PGPAEUINT const uPdpe = pShwPDPT->a[i].u;
4503 Assert((uPdpe & (X86_PDPE_PAE_MBZ_MASK | UINT64_C(0x7ff0000000000200))) == 0);
4504 if (uPdpe & X86_PDPE_P)
4505 {
4506 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, uPdpe & X86_PDPE_PG_MASK);
4507 if (pSubPage)
4508 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4509 else
4510 AssertFatalMsgFailed(("%RX64\n", uPdpe & X86_PDPE_PG_MASK));
4511 }
4512 }
4513}
4514
4515
4516/**
4517 * Clear references to shadowed pages in a 64-bit page directory pointer table.
4518 *
4519 * @param pPool The pool.
4520 * @param pPage The page.
4521 * @param pShwPDPT The shadow page directory pointer table (mapping of the page).
4522 */
4523DECLINLINE(void) pgmPoolTrackDerefPDPT64Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PDPT pShwPDPT)
4524{
4525 for (unsigned i = 0; i < RT_ELEMENTS(pShwPDPT->a); i++)
4526 {
4527 X86PGPAEUINT const uPdpe = pShwPDPT->a[i].u;
4528 Assert((uPdpe & (X86_PDPE_LM_MBZ_MASK_NX | UINT64_C(0x7ff0000000000200))) == 0);
4529 if (uPdpe & X86_PDPE_P)
4530 {
4531 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, uPdpe & X86_PDPE_PG_MASK);
4532 if (pSubPage)
4533 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4534 else
4535 AssertFatalMsgFailed(("%RX64\n", uPdpe & X86_PDPE_PG_MASK));
4536 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4537 }
4538 }
4539}
4540
4541
4542/**
4543 * Clear references to shadowed pages in a 64-bit level 4 page table.
4544 *
4545 * @param pPool The pool.
4546 * @param pPage The page.
4547 * @param pShwPML4 The shadow page directory pointer table (mapping of the page).
4548 */
4549DECLINLINE(void) pgmPoolTrackDerefPML464Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PML4 pShwPML4)
4550{
4551 for (unsigned i = 0; i < RT_ELEMENTS(pShwPML4->a); i++)
4552 {
4553 X86PGPAEUINT const uPml4e = pShwPML4->a[i].u;
4554 Assert((uPml4e & (X86_PML4E_MBZ_MASK_NX | UINT64_C(0x7ff0000000000200))) == 0);
4555 if (uPml4e & X86_PML4E_P)
4556 {
4557 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, uPml4e & X86_PDPE_PG_MASK);
4558 if (pSubPage)
4559 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4560 else
4561 AssertFatalMsgFailed(("%RX64\n", uPml4e & X86_PML4E_PG_MASK));
4562 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4563 }
4564 }
4565}
4566
4567
4568/**
4569 * Clear references to shadowed pages in an EPT page directory.
4570 *
4571 * @param pPool The pool.
4572 * @param pPage The page.
4573 * @param pShwPD The shadow page directory (mapping of the page).
4574 */
4575DECLINLINE(void) pgmPoolTrackDerefPDEPT(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PEPTPD pShwPD)
4576{
4577 for (unsigned i = 0; i < RT_ELEMENTS(pShwPD->a); i++)
4578 {
4579 X86PGPAEUINT const uPde = pShwPD->a[i].u;
4580 Assert((uPde & UINT64_C(0xfff0000000000f80)) == 0);
4581 if (uPde & EPT_E_READ)
4582 {
4583#ifdef PGM_WITH_LARGE_PAGES
4584 if (uPde & EPT_E_LEAF)
4585 {
4586 Log4(("pgmPoolTrackDerefPDEPT: i=%d pde=%RX64 GCPhys=%RX64\n",
4587 i, uPde & EPT_PDE2M_PG_MASK, pPage->GCPhys));
4588 pgmPoolTracDerefGCPhys(pPool, pPage, uPde & EPT_PDE2M_PG_MASK,
4589 pPage->GCPhys + i * 2 * _1M /* pPage->GCPhys = base address of the memory described by the PD */,
4590 i);
4591 }
4592 else
4593#endif
4594 {
4595 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, uPde & EPT_PDE_PG_MASK);
4596 if (pSubPage)
4597 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4598 else
4599 AssertFatalMsgFailed(("%RX64\n", pShwPD->a[i].u & EPT_PDE_PG_MASK));
4600 }
4601 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4602 }
4603 }
4604}
4605
4606
4607/**
4608 * Clear references to shadowed pages in an EPT page directory pointer table.
4609 *
4610 * @param pPool The pool.
4611 * @param pPage The page.
4612 * @param pShwPDPT The shadow page directory pointer table (mapping of the page).
4613 */
4614DECLINLINE(void) pgmPoolTrackDerefPDPTEPT(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PEPTPDPT pShwPDPT)
4615{
4616 for (unsigned i = 0; i < RT_ELEMENTS(pShwPDPT->a); i++)
4617 {
4618 X86PGPAEUINT const uPdpe = pShwPDPT->a[i].u;
4619 Assert((uPdpe & UINT64_C(0xfff0000000000f80)) == 0);
4620 if (uPdpe & EPT_E_READ)
4621 {
4622 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, uPdpe & EPT_PDPTE_PG_MASK);
4623 if (pSubPage)
4624 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4625 else
4626 AssertFatalMsgFailed(("%RX64\n", uPdpe & EPT_PDPTE_PG_MASK));
4627 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4628 }
4629 }
4630}
4631
4632
4633/**
4634 * Clears all references made by this page.
4635 *
4636 * This includes other shadow pages and GC physical addresses.
4637 *
4638 * @param pPool The pool.
4639 * @param pPage The page.
4640 */
4641static void pgmPoolTrackDeref(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
4642{
4643 /*
4644 * Map the shadow page and take action according to the page kind.
4645 */
4646 PVMCC pVM = pPool->CTX_SUFF(pVM);
4647 void *pvShw = PGMPOOL_PAGE_2_PTR(pVM, pPage);
4648 switch (pPage->enmKind)
4649 {
4650 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
4651 {
4652 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
4653 void *pvGst;
4654 int rc = PGM_GCPHYS_2_PTR(pVM, pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
4655 pgmPoolTrackDerefPT32Bit32Bit(pPool, pPage, (PX86PT)pvShw, (PCX86PT)pvGst);
4656 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvGst);
4657 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
4658 break;
4659 }
4660
4661 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
4662 {
4663 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
4664 void *pvGst;
4665 int rc = PGM_GCPHYS_2_PTR_EX(pVM, pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
4666 pgmPoolTrackDerefPTPae32Bit(pPool, pPage, (PPGMSHWPTPAE)pvShw, (PCX86PT)pvGst);
4667 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvGst);
4668 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
4669 break;
4670 }
4671
4672 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
4673 {
4674 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
4675 void *pvGst;
4676 int rc = PGM_GCPHYS_2_PTR(pVM, pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
4677 pgmPoolTrackDerefPTPaePae(pPool, pPage, (PPGMSHWPTPAE)pvShw, (PCX86PTPAE)pvGst);
4678 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvGst);
4679 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
4680 break;
4681 }
4682
4683 case PGMPOOLKIND_32BIT_PT_FOR_PHYS: /* treat it like a 4 MB page */
4684 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
4685 {
4686 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
4687 pgmPoolTrackDerefPT32Bit4MB(pPool, pPage, (PX86PT)pvShw);
4688 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
4689 break;
4690 }
4691
4692 case PGMPOOLKIND_PAE_PT_FOR_PHYS: /* treat it like a 2 MB page */
4693 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
4694 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
4695 {
4696 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
4697 pgmPoolTrackDerefPTPaeBig(pPool, pPage, (PPGMSHWPTPAE)pvShw);
4698 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
4699 break;
4700 }
4701
4702 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
4703 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
4704 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
4705 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
4706 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
4707 case PGMPOOLKIND_PAE_PD_PHYS:
4708 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
4709 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
4710 pgmPoolTrackDerefPDPae(pPool, pPage, (PX86PDPAE)pvShw);
4711 break;
4712
4713 case PGMPOOLKIND_32BIT_PD_PHYS:
4714 case PGMPOOLKIND_32BIT_PD:
4715 pgmPoolTrackDerefPD(pPool, pPage, (PX86PD)pvShw);
4716 break;
4717
4718 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
4719 case PGMPOOLKIND_PAE_PDPT:
4720 case PGMPOOLKIND_PAE_PDPT_PHYS:
4721 pgmPoolTrackDerefPDPTPae(pPool, pPage, (PX86PDPT)pvShw);
4722 break;
4723
4724 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
4725 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
4726 pgmPoolTrackDerefPDPT64Bit(pPool, pPage, (PX86PDPT)pvShw);
4727 break;
4728
4729 case PGMPOOLKIND_64BIT_PML4:
4730 pgmPoolTrackDerefPML464Bit(pPool, pPage, (PX86PML4)pvShw);
4731 break;
4732
4733 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
4734 pgmPoolTrackDerefPTEPT(pPool, pPage, (PEPTPT)pvShw);
4735 break;
4736
4737 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
4738 pgmPoolTrackDerefPDEPT(pPool, pPage, (PEPTPD)pvShw);
4739 break;
4740
4741 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
4742 pgmPoolTrackDerefPDPTEPT(pPool, pPage, (PEPTPDPT)pvShw);
4743 break;
4744
4745 default:
4746 AssertFatalMsgFailed(("enmKind=%d\n", pPage->enmKind));
4747 }
4748
4749 /* paranoia, clear the shadow page. Remove this laser (i.e. let Alloc and ClearAll do it). */
4750 STAM_PROFILE_START(&pPool->StatZeroPage, z);
4751 ASMMemZeroPage(pvShw);
4752 STAM_PROFILE_STOP(&pPool->StatZeroPage, z);
4753 pPage->fZeroed = true;
4754 Assert(!pPage->cPresent);
4755 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvShw);
4756}
4757
4758
4759/**
4760 * Flushes a pool page.
4761 *
4762 * This moves the page to the free list after removing all user references to it.
4763 *
4764 * @returns VBox status code.
4765 * @retval VINF_SUCCESS on success.
4766 * @param pPool The pool.
4767 * @param pPage The shadow page.
4768 * @param fFlush Flush the TLBS when required (should only be false in very specific use cases!!)
4769 */
4770int pgmPoolFlushPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage, bool fFlush)
4771{
4772 PVMCC pVM = pPool->CTX_SUFF(pVM);
4773 bool fFlushRequired = false;
4774
4775 int rc = VINF_SUCCESS;
4776 STAM_PROFILE_START(&pPool->StatFlushPage, f);
4777 LogFlow(("pgmPoolFlushPage: pPage=%p:{.Key=%RHp, .idx=%d, .enmKind=%s, .GCPhys=%RGp}\n",
4778 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), pPage->GCPhys));
4779
4780 /*
4781 * Reject any attempts at flushing any of the special root pages (shall
4782 * not happen).
4783 */
4784 AssertMsgReturn(pPage->idx >= PGMPOOL_IDX_FIRST,
4785 ("pgmPoolFlushPage: special root page, rejected. enmKind=%s idx=%d\n",
4786 pgmPoolPoolKindToStr(pPage->enmKind), pPage->idx),
4787 VINF_SUCCESS);
4788
4789 PGM_LOCK_VOID(pVM);
4790
4791 /*
4792 * Quietly reject any attempts at flushing the currently active shadow CR3 mapping
4793 */
4794 if (pgmPoolIsPageLocked(pPage))
4795 {
4796 AssertMsg( pPage->enmKind == PGMPOOLKIND_64BIT_PML4
4797 || pPage->enmKind == PGMPOOLKIND_PAE_PDPT
4798 || pPage->enmKind == PGMPOOLKIND_PAE_PDPT_FOR_32BIT
4799 || pPage->enmKind == PGMPOOLKIND_32BIT_PD
4800 || pPage->enmKind == PGMPOOLKIND_PAE_PD_FOR_PAE_PD
4801 || pPage->enmKind == PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD
4802 || pPage->enmKind == PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD
4803 || pPage->enmKind == PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD
4804 || pPage->enmKind == PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD
4805 || pPage->enmKind == PGMPOOLKIND_ROOT_NESTED,
4806 ("Can't free the shadow CR3! (%RHp vs %RHp kind=%d\n", PGMGetHyperCR3(VMMGetCpu(pVM)), pPage->Core.Key, pPage->enmKind));
4807 Log(("pgmPoolFlushPage: current active shadow CR3, rejected. enmKind=%s idx=%d\n", pgmPoolPoolKindToStr(pPage->enmKind), pPage->idx));
4808 PGM_UNLOCK(pVM);
4809 return VINF_SUCCESS;
4810 }
4811
4812 /*
4813 * Mark the page as being in need of an ASMMemZeroPage().
4814 */
4815 pPage->fZeroed = false;
4816
4817#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
4818 if (pPage->fDirty)
4819 pgmPoolFlushDirtyPage(pVM, pPool, pPage->idxDirtyEntry, false /* do not remove */);
4820#endif
4821
4822 /* If there are any users of this table, then we *must* issue a tlb flush on all VCPUs. */
4823 if (pPage->iUserHead != NIL_PGMPOOL_USER_INDEX)
4824 fFlushRequired = true;
4825
4826 /*
4827 * Clear the page.
4828 */
4829 pgmPoolTrackClearPageUsers(pPool, pPage);
4830 STAM_PROFILE_START(&pPool->StatTrackDeref,a);
4831 pgmPoolTrackDeref(pPool, pPage);
4832 STAM_PROFILE_STOP(&pPool->StatTrackDeref,a);
4833
4834 /*
4835 * Flush it from the cache.
4836 */
4837 pgmPoolCacheFlushPage(pPool, pPage);
4838
4839 /*
4840 * Deregistering the monitoring.
4841 */
4842 if (pPage->fMonitored)
4843 rc = pgmPoolMonitorFlush(pPool, pPage);
4844
4845 /*
4846 * Free the page.
4847 */
4848 Assert(pPage->iNext == NIL_PGMPOOL_IDX);
4849 pPage->iNext = pPool->iFreeHead;
4850 pPool->iFreeHead = pPage->idx;
4851 pPage->enmKind = PGMPOOLKIND_FREE;
4852 pPage->enmAccess = PGMPOOLACCESS_DONTCARE;
4853 pPage->GCPhys = NIL_RTGCPHYS;
4854 pPage->fReusedFlushPending = false;
4855
4856 pPool->cUsedPages--;
4857
4858 /* Flush the TLBs of all VCPUs if required. */
4859 if ( fFlushRequired
4860 && fFlush)
4861 {
4862 PGM_INVL_ALL_VCPU_TLBS(pVM);
4863 }
4864
4865 PGM_UNLOCK(pVM);
4866 STAM_PROFILE_STOP(&pPool->StatFlushPage, f);
4867 return rc;
4868}
4869
4870
4871/**
4872 * Frees a usage of a pool page.
4873 *
4874 * The caller is responsible to updating the user table so that it no longer
4875 * references the shadow page.
4876 *
4877 * @param pPool The pool.
4878 * @param pPage The shadow page.
4879 * @param iUser The shadow page pool index of the user table.
4880 * NIL_PGMPOOL_IDX for root pages.
4881 * @param iUserTable The index into the user table (shadowed). Ignored if
4882 * root page.
4883 */
4884void pgmPoolFreeByPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable)
4885{
4886 PVMCC pVM = pPool->CTX_SUFF(pVM);
4887
4888 STAM_PROFILE_START(&pPool->StatFree, a);
4889 LogFlow(("pgmPoolFreeByPage: pPage=%p:{.Key=%RHp, .idx=%d, enmKind=%s} iUser=%d iUserTable=%#x\n",
4890 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), iUser, iUserTable));
4891 AssertReturnVoid(pPage->idx >= PGMPOOL_IDX_FIRST); /* paranoia (#6349) */
4892
4893 PGM_LOCK_VOID(pVM);
4894 if (iUser != NIL_PGMPOOL_IDX)
4895 pgmPoolTrackFreeUser(pPool, pPage, iUser, iUserTable);
4896 if (!pPage->fCached)
4897 pgmPoolFlushPage(pPool, pPage);
4898 PGM_UNLOCK(pVM);
4899 STAM_PROFILE_STOP(&pPool->StatFree, a);
4900}
4901
4902
4903/**
4904 * Makes one or more free page free.
4905 *
4906 * @returns VBox status code.
4907 * @retval VINF_SUCCESS on success.
4908 *
4909 * @param pPool The pool.
4910 * @param enmKind Page table kind
4911 * @param iUser The user of the page.
4912 */
4913static int pgmPoolMakeMoreFreePages(PPGMPOOL pPool, PGMPOOLKIND enmKind, uint16_t iUser)
4914{
4915 PVMCC pVM = pPool->CTX_SUFF(pVM);
4916 LogFlow(("pgmPoolMakeMoreFreePages: enmKind=%d iUser=%d\n", enmKind, iUser));
4917 NOREF(enmKind);
4918
4919 /*
4920 * If the pool isn't full grown yet, expand it.
4921 */
4922 if (pPool->cCurPages < pPool->cMaxPages)
4923 {
4924 STAM_PROFILE_ADV_SUSPEND(&pPool->StatAlloc, a);
4925#ifdef IN_RING3
4926 int rc = PGMR3PoolGrow(pVM, VMMGetCpu(pVM));
4927#else
4928 int rc = PGMR0PoolGrow(pVM, VMMGetCpuId(pVM));
4929#endif
4930 if (RT_FAILURE(rc))
4931 return rc;
4932 STAM_PROFILE_ADV_RESUME(&pPool->StatAlloc, a);
4933 if (pPool->iFreeHead != NIL_PGMPOOL_IDX)
4934 return VINF_SUCCESS;
4935 }
4936
4937 /*
4938 * Free one cached page.
4939 */
4940 return pgmPoolCacheFreeOne(pPool, iUser);
4941}
4942
4943
4944/**
4945 * Allocates a page from the pool.
4946 *
4947 * This page may actually be a cached page and not in need of any processing
4948 * on the callers part.
4949 *
4950 * @returns VBox status code.
4951 * @retval VINF_SUCCESS if a NEW page was allocated.
4952 * @retval VINF_PGM_CACHED_PAGE if a CACHED page was returned.
4953 *
4954 * @param pVM The cross context VM structure.
4955 * @param GCPhys The GC physical address of the page we're gonna shadow.
4956 * For 4MB and 2MB PD entries, it's the first address the
4957 * shadow PT is covering.
4958 * @param enmKind The kind of mapping.
4959 * @param enmAccess Access type for the mapping (only relevant for big pages)
4960 * @param fA20Enabled Whether the A20 gate is enabled or not.
4961 * @param iUser The shadow page pool index of the user table. Root
4962 * pages should pass NIL_PGMPOOL_IDX.
4963 * @param iUserTable The index into the user table (shadowed). Ignored for
4964 * root pages (iUser == NIL_PGMPOOL_IDX).
4965 * @param fLockPage Lock the page
4966 * @param ppPage Where to store the pointer to the page. NULL is stored here on failure.
4967 */
4968int pgmPoolAlloc(PVMCC pVM, RTGCPHYS GCPhys, PGMPOOLKIND enmKind, PGMPOOLACCESS enmAccess, bool fA20Enabled,
4969 uint16_t iUser, uint32_t iUserTable, bool fLockPage, PPPGMPOOLPAGE ppPage)
4970{
4971 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
4972 STAM_PROFILE_ADV_START(&pPool->StatAlloc, a);
4973 LogFlow(("pgmPoolAlloc: GCPhys=%RGp enmKind=%s iUser=%d iUserTable=%#x\n", GCPhys, pgmPoolPoolKindToStr(enmKind), iUser, iUserTable));
4974 *ppPage = NULL;
4975 /** @todo CSAM/PGMPrefetchPage messes up here during CSAMR3CheckGates
4976 * (TRPMR3SyncIDT) because of FF priority. Try fix that?
4977 * Assert(!(pVM->pgm.s.fGlobalSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)); */
4978
4979 PGM_LOCK_VOID(pVM);
4980
4981 if (pPool->fCacheEnabled)
4982 {
4983 int rc2 = pgmPoolCacheAlloc(pPool, GCPhys, enmKind, enmAccess, fA20Enabled, iUser, iUserTable, ppPage);
4984 if (RT_SUCCESS(rc2))
4985 {
4986 if (fLockPage)
4987 pgmPoolLockPage(pPool, *ppPage);
4988 PGM_UNLOCK(pVM);
4989 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
4990 LogFlow(("pgmPoolAlloc: cached returns %Rrc *ppPage=%p:{.Key=%RHp, .idx=%d}\n", rc2, *ppPage, (*ppPage)->Core.Key, (*ppPage)->idx));
4991 return rc2;
4992 }
4993 }
4994
4995 /*
4996 * Allocate a new one.
4997 */
4998 int rc = VINF_SUCCESS;
4999 uint16_t iNew = pPool->iFreeHead;
5000 if (iNew == NIL_PGMPOOL_IDX)
5001 {
5002 rc = pgmPoolMakeMoreFreePages(pPool, enmKind, iUser);
5003 if (RT_FAILURE(rc))
5004 {
5005 PGM_UNLOCK(pVM);
5006 Log(("pgmPoolAlloc: returns %Rrc (Free)\n", rc));
5007 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
5008 return rc;
5009 }
5010 iNew = pPool->iFreeHead;
5011 AssertReleaseMsgReturn(iNew != NIL_PGMPOOL_IDX, ("iNew=%#x\n", iNew), VERR_PGM_POOL_IPE);
5012 }
5013
5014 /* unlink the free head */
5015 PPGMPOOLPAGE pPage = &pPool->aPages[iNew];
5016 pPool->iFreeHead = pPage->iNext;
5017 pPage->iNext = NIL_PGMPOOL_IDX;
5018
5019 /*
5020 * Initialize it.
5021 */
5022 pPool->cUsedPages++; /* physical handler registration / pgmPoolTrackFlushGCPhysPTsSlow requirement. */
5023 pPage->enmKind = enmKind;
5024 pPage->enmAccess = enmAccess;
5025 pPage->GCPhys = GCPhys;
5026 pPage->fA20Enabled = fA20Enabled;
5027 pPage->fSeenNonGlobal = false; /* Set this to 'true' to disable this feature. */
5028 pPage->fMonitored = false;
5029 pPage->fCached = false;
5030 pPage->fDirty = false;
5031 pPage->fReusedFlushPending = false;
5032 pPage->cModifications = 0;
5033 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
5034 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
5035 pPage->cPresent = 0;
5036 pPage->iFirstPresent = NIL_PGMPOOL_PRESENT_INDEX;
5037 pPage->idxDirtyEntry = 0;
5038 pPage->GCPtrLastAccessHandlerFault = NIL_RTGCPTR;
5039 pPage->GCPtrLastAccessHandlerRip = NIL_RTGCPTR;
5040 pPage->cLastAccessHandler = 0;
5041 pPage->cLocked = 0;
5042# ifdef VBOX_STRICT
5043 pPage->GCPtrDirtyFault = NIL_RTGCPTR;
5044# endif
5045
5046 /*
5047 * Insert into the tracking and cache. If this fails, free the page.
5048 */
5049 int rc3 = pgmPoolTrackInsert(pPool, pPage, GCPhys, iUser, iUserTable);
5050 if (RT_FAILURE(rc3))
5051 {
5052 pPool->cUsedPages--;
5053 pPage->enmKind = PGMPOOLKIND_FREE;
5054 pPage->enmAccess = PGMPOOLACCESS_DONTCARE;
5055 pPage->GCPhys = NIL_RTGCPHYS;
5056 pPage->iNext = pPool->iFreeHead;
5057 pPool->iFreeHead = pPage->idx;
5058 PGM_UNLOCK(pVM);
5059 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
5060 Log(("pgmPoolAlloc: returns %Rrc (Insert)\n", rc3));
5061 return rc3;
5062 }
5063
5064 /*
5065 * Commit the allocation, clear the page and return.
5066 */
5067#ifdef VBOX_WITH_STATISTICS
5068 if (pPool->cUsedPages > pPool->cUsedPagesHigh)
5069 pPool->cUsedPagesHigh = pPool->cUsedPages;
5070#endif
5071
5072 if (!pPage->fZeroed)
5073 {
5074 STAM_PROFILE_START(&pPool->StatZeroPage, z);
5075 void *pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
5076 ASMMemZeroPage(pv);
5077 STAM_PROFILE_STOP(&pPool->StatZeroPage, z);
5078 }
5079
5080 *ppPage = pPage;
5081 if (fLockPage)
5082 pgmPoolLockPage(pPool, pPage);
5083 PGM_UNLOCK(pVM);
5084 LogFlow(("pgmPoolAlloc: returns %Rrc *ppPage=%p:{.Key=%RHp, .idx=%d, .fCached=%RTbool, .fMonitored=%RTbool}\n",
5085 rc, pPage, pPage->Core.Key, pPage->idx, pPage->fCached, pPage->fMonitored));
5086 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
5087 return rc;
5088}
5089
5090
5091/**
5092 * Frees a usage of a pool page.
5093 *
5094 * @param pVM The cross context VM structure.
5095 * @param HCPhys The HC physical address of the shadow page.
5096 * @param iUser The shadow page pool index of the user table.
5097 * NIL_PGMPOOL_IDX if root page.
5098 * @param iUserTable The index into the user table (shadowed). Ignored if
5099 * root page.
5100 */
5101void pgmPoolFree(PVM pVM, RTHCPHYS HCPhys, uint16_t iUser, uint32_t iUserTable)
5102{
5103 LogFlow(("pgmPoolFree: HCPhys=%RHp iUser=%d iUserTable=%#x\n", HCPhys, iUser, iUserTable));
5104 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
5105 pgmPoolFreeByPage(pPool, pgmPoolGetPage(pPool, HCPhys), iUser, iUserTable);
5106}
5107
5108
5109/**
5110 * Internal worker for finding a 'in-use' shadow page give by it's physical address.
5111 *
5112 * @returns Pointer to the shadow page structure.
5113 * @param pPool The pool.
5114 * @param HCPhys The HC physical address of the shadow page.
5115 */
5116PPGMPOOLPAGE pgmPoolGetPage(PPGMPOOL pPool, RTHCPHYS HCPhys)
5117{
5118 PGM_LOCK_ASSERT_OWNER(pPool->CTX_SUFF(pVM));
5119
5120 /*
5121 * Look up the page.
5122 */
5123 PPGMPOOLPAGE pPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, HCPhys & X86_PTE_PAE_PG_MASK);
5124
5125 AssertFatalMsg(pPage && pPage->enmKind != PGMPOOLKIND_FREE, ("HCPhys=%RHp pPage=%p idx=%d\n", HCPhys, pPage, (pPage) ? pPage->idx : 0));
5126 return pPage;
5127}
5128
5129
5130/**
5131 * Internal worker for finding a page for debugging purposes, no assertions.
5132 *
5133 * @returns Pointer to the shadow page structure. NULL on if not found.
5134 * @param pPool The pool.
5135 * @param HCPhys The HC physical address of the shadow page.
5136 */
5137PPGMPOOLPAGE pgmPoolQueryPageForDbg(PPGMPOOL pPool, RTHCPHYS HCPhys)
5138{
5139 PGM_LOCK_ASSERT_OWNER(pPool->CTX_SUFF(pVM));
5140 return (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, HCPhys & X86_PTE_PAE_PG_MASK);
5141}
5142
5143
5144/**
5145 * Internal worker for PGM_HCPHYS_2_PTR.
5146 *
5147 * @returns VBox status code.
5148 * @param pVM The cross context VM structure.
5149 * @param HCPhys The HC physical address of the shadow page.
5150 * @param ppv Where to return the address.
5151 */
5152int pgmPoolHCPhys2Ptr(PVM pVM, RTHCPHYS HCPhys, void **ppv)
5153{
5154 PPGMPOOLPAGE pPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pVM->pgm.s.CTX_SUFF(pPool)->HCPhysTree, HCPhys & X86_PTE_PAE_PG_MASK);
5155 AssertMsgReturn(pPage && pPage->enmKind != PGMPOOLKIND_FREE,
5156 ("HCPhys=%RHp pPage=%p idx=%d\n", HCPhys, pPage, (pPage) ? pPage->idx : 0),
5157 VERR_PGM_POOL_GET_PAGE_FAILED);
5158 *ppv = (uint8_t *)pPage->CTX_SUFF(pvPage) + (HCPhys & PAGE_OFFSET_MASK);
5159 return VINF_SUCCESS;
5160}
5161
5162#ifdef IN_RING3 /* currently only used in ring 3; save some space in the R0 & GC modules (left it here as we might need it elsewhere later on) */
5163
5164/**
5165 * Flush the specified page if present
5166 *
5167 * @param pVM The cross context VM structure.
5168 * @param GCPhys Guest physical address of the page to flush
5169 */
5170void pgmPoolFlushPageByGCPhys(PVM pVM, RTGCPHYS GCPhys)
5171{
5172 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
5173
5174 VM_ASSERT_EMT(pVM);
5175
5176 /*
5177 * Look up the GCPhys in the hash.
5178 */
5179 GCPhys = GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK;
5180 unsigned i = pPool->aiHash[PGMPOOL_HASH(GCPhys)];
5181 if (i == NIL_PGMPOOL_IDX)
5182 return;
5183
5184 do
5185 {
5186 PPGMPOOLPAGE pPage = &pPool->aPages[i];
5187 if (pPage->GCPhys - GCPhys < PAGE_SIZE)
5188 {
5189 switch (pPage->enmKind)
5190 {
5191 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
5192 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
5193 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
5194 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
5195 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
5196 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
5197 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
5198 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
5199 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
5200 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
5201 case PGMPOOLKIND_64BIT_PML4:
5202 case PGMPOOLKIND_32BIT_PD:
5203 case PGMPOOLKIND_PAE_PDPT:
5204 {
5205 Log(("PGMPoolFlushPage: found pgm pool pages for %RGp\n", GCPhys));
5206# ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
5207 if (pPage->fDirty)
5208 STAM_COUNTER_INC(&pPool->StatForceFlushDirtyPage);
5209 else
5210# endif
5211 STAM_COUNTER_INC(&pPool->StatForceFlushPage);
5212 Assert(!pgmPoolIsPageLocked(pPage));
5213 pgmPoolMonitorChainFlush(pPool, pPage);
5214 return;
5215 }
5216
5217 /* ignore, no monitoring. */
5218 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
5219 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
5220 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
5221 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
5222 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
5223 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
5224 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
5225 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
5226 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
5227 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
5228 case PGMPOOLKIND_ROOT_NESTED:
5229 case PGMPOOLKIND_PAE_PD_PHYS:
5230 case PGMPOOLKIND_PAE_PDPT_PHYS:
5231 case PGMPOOLKIND_32BIT_PD_PHYS:
5232 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
5233 break;
5234
5235 default:
5236 AssertFatalMsgFailed(("enmKind=%d idx=%d\n", pPage->enmKind, pPage->idx));
5237 }
5238 }
5239
5240 /* next */
5241 i = pPage->iNext;
5242 } while (i != NIL_PGMPOOL_IDX);
5243 return;
5244}
5245
5246
5247/**
5248 * Reset CPU on hot plugging.
5249 *
5250 * @param pVM The cross context VM structure.
5251 * @param pVCpu The cross context virtual CPU structure.
5252 */
5253void pgmR3PoolResetUnpluggedCpu(PVM pVM, PVMCPU pVCpu)
5254{
5255 pgmR3ExitShadowModeBeforePoolFlush(pVCpu);
5256
5257 pgmR3ReEnterShadowModeAfterPoolFlush(pVM, pVCpu);
5258 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
5259 VMCPU_FF_SET(pVCpu, VMCPU_FF_TLB_FLUSH);
5260}
5261
5262
5263/**
5264 * Flushes the entire cache.
5265 *
5266 * It will assert a global CR3 flush (FF) and assumes the caller is aware of
5267 * this and execute this CR3 flush.
5268 *
5269 * @param pVM The cross context VM structure.
5270 */
5271void pgmR3PoolReset(PVM pVM)
5272{
5273 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
5274
5275 PGM_LOCK_ASSERT_OWNER(pVM);
5276 STAM_PROFILE_START(&pPool->StatR3Reset, a);
5277 LogFlow(("pgmR3PoolReset:\n"));
5278
5279 /*
5280 * If there are no pages in the pool, there is nothing to do.
5281 */
5282 if (pPool->cCurPages <= PGMPOOL_IDX_FIRST)
5283 {
5284 STAM_PROFILE_STOP(&pPool->StatR3Reset, a);
5285 return;
5286 }
5287
5288 /*
5289 * Exit the shadow mode since we're going to clear everything,
5290 * including the root page.
5291 */
5292 VMCC_FOR_EACH_VMCPU(pVM)
5293 pgmR3ExitShadowModeBeforePoolFlush(pVCpu);
5294 VMCC_FOR_EACH_VMCPU_END(pVM);
5295
5296
5297 /*
5298 * Nuke the free list and reinsert all pages into it.
5299 */
5300 for (unsigned i = pPool->cCurPages - 1; i >= PGMPOOL_IDX_FIRST; i--)
5301 {
5302 PPGMPOOLPAGE pPage = &pPool->aPages[i];
5303
5304 if (pPage->fMonitored)
5305 pgmPoolMonitorFlush(pPool, pPage);
5306 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
5307 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
5308 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
5309 pPage->iMonitoredPrev = NIL_PGMPOOL_IDX;
5310 pPage->GCPhys = NIL_RTGCPHYS;
5311 pPage->enmKind = PGMPOOLKIND_FREE;
5312 pPage->enmAccess = PGMPOOLACCESS_DONTCARE;
5313 Assert(pPage->idx == i);
5314 pPage->iNext = i + 1;
5315 pPage->fA20Enabled = true;
5316 pPage->fZeroed = false; /* This could probably be optimized, but better safe than sorry. */
5317 pPage->fSeenNonGlobal = false;
5318 pPage->fMonitored = false;
5319 pPage->fDirty = false;
5320 pPage->fCached = false;
5321 pPage->fReusedFlushPending = false;
5322 pPage->iUserHead = NIL_PGMPOOL_USER_INDEX;
5323 pPage->cPresent = 0;
5324 pPage->iFirstPresent = NIL_PGMPOOL_PRESENT_INDEX;
5325 pPage->cModifications = 0;
5326 pPage->iAgeNext = NIL_PGMPOOL_IDX;
5327 pPage->iAgePrev = NIL_PGMPOOL_IDX;
5328 pPage->idxDirtyEntry = 0;
5329 pPage->GCPtrLastAccessHandlerRip = NIL_RTGCPTR;
5330 pPage->GCPtrLastAccessHandlerFault = NIL_RTGCPTR;
5331 pPage->cLastAccessHandler = 0;
5332 pPage->cLocked = 0;
5333# ifdef VBOX_STRICT
5334 pPage->GCPtrDirtyFault = NIL_RTGCPTR;
5335# endif
5336 }
5337 pPool->aPages[pPool->cCurPages - 1].iNext = NIL_PGMPOOL_IDX;
5338 pPool->iFreeHead = PGMPOOL_IDX_FIRST;
5339 pPool->cUsedPages = 0;
5340
5341 /*
5342 * Zap and reinitialize the user records.
5343 */
5344 pPool->cPresent = 0;
5345 pPool->iUserFreeHead = 0;
5346 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
5347 const unsigned cMaxUsers = pPool->cMaxUsers;
5348 for (unsigned i = 0; i < cMaxUsers; i++)
5349 {
5350 paUsers[i].iNext = i + 1;
5351 paUsers[i].iUser = NIL_PGMPOOL_IDX;
5352 paUsers[i].iUserTable = 0xfffffffe;
5353 }
5354 paUsers[cMaxUsers - 1].iNext = NIL_PGMPOOL_USER_INDEX;
5355
5356 /*
5357 * Clear all the GCPhys links and rebuild the phys ext free list.
5358 */
5359 for (PPGMRAMRANGE pRam = pVM->pgm.s.CTX_SUFF(pRamRangesX);
5360 pRam;
5361 pRam = pRam->CTX_SUFF(pNext))
5362 {
5363 unsigned iPage = pRam->cb >> PAGE_SHIFT;
5364 while (iPage-- > 0)
5365 PGM_PAGE_SET_TRACKING(pVM, &pRam->aPages[iPage], 0);
5366 }
5367
5368 pPool->iPhysExtFreeHead = 0;
5369 PPGMPOOLPHYSEXT paPhysExts = pPool->CTX_SUFF(paPhysExts);
5370 const unsigned cMaxPhysExts = pPool->cMaxPhysExts;
5371 for (unsigned i = 0; i < cMaxPhysExts; i++)
5372 {
5373 paPhysExts[i].iNext = i + 1;
5374 paPhysExts[i].aidx[0] = NIL_PGMPOOL_IDX;
5375 paPhysExts[i].apte[0] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
5376 paPhysExts[i].aidx[1] = NIL_PGMPOOL_IDX;
5377 paPhysExts[i].apte[1] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
5378 paPhysExts[i].aidx[2] = NIL_PGMPOOL_IDX;
5379 paPhysExts[i].apte[2] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
5380 }
5381 paPhysExts[cMaxPhysExts - 1].iNext = NIL_PGMPOOL_PHYSEXT_INDEX;
5382
5383 /*
5384 * Just zap the modified list.
5385 */
5386 pPool->cModifiedPages = 0;
5387 pPool->iModifiedHead = NIL_PGMPOOL_IDX;
5388
5389 /*
5390 * Clear the GCPhys hash and the age list.
5391 */
5392 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aiHash); i++)
5393 pPool->aiHash[i] = NIL_PGMPOOL_IDX;
5394 pPool->iAgeHead = NIL_PGMPOOL_IDX;
5395 pPool->iAgeTail = NIL_PGMPOOL_IDX;
5396
5397# ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
5398 /* Clear all dirty pages. */
5399 pPool->idxFreeDirtyPage = 0;
5400 pPool->cDirtyPages = 0;
5401 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aidxDirtyPages); i++)
5402 pPool->aidxDirtyPages[i] = NIL_PGMPOOL_IDX;
5403# endif
5404
5405 /*
5406 * Reinsert active pages into the hash and ensure monitoring chains are correct.
5407 */
5408 VMCC_FOR_EACH_VMCPU(pVM)
5409 {
5410 /*
5411 * Re-enter the shadowing mode and assert Sync CR3 FF.
5412 */
5413 pgmR3ReEnterShadowModeAfterPoolFlush(pVM, pVCpu);
5414 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
5415 VMCPU_FF_SET(pVCpu, VMCPU_FF_TLB_FLUSH);
5416 }
5417 VMCC_FOR_EACH_VMCPU_END(pVM);
5418
5419 STAM_PROFILE_STOP(&pPool->StatR3Reset, a);
5420}
5421
5422#endif /* IN_RING3 */
5423
5424#if defined(LOG_ENABLED) || defined(VBOX_STRICT)
5425/**
5426 * Stringifies a PGMPOOLKIND value.
5427 */
5428static const char *pgmPoolPoolKindToStr(uint8_t enmKind)
5429{
5430 switch ((PGMPOOLKIND)enmKind)
5431 {
5432 case PGMPOOLKIND_INVALID:
5433 return "PGMPOOLKIND_INVALID";
5434 case PGMPOOLKIND_FREE:
5435 return "PGMPOOLKIND_FREE";
5436 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
5437 return "PGMPOOLKIND_32BIT_PT_FOR_PHYS";
5438 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
5439 return "PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT";
5440 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
5441 return "PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB";
5442 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
5443 return "PGMPOOLKIND_PAE_PT_FOR_PHYS";
5444 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
5445 return "PGMPOOLKIND_PAE_PT_FOR_32BIT_PT";
5446 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
5447 return "PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB";
5448 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
5449 return "PGMPOOLKIND_PAE_PT_FOR_PAE_PT";
5450 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
5451 return "PGMPOOLKIND_PAE_PT_FOR_PAE_2MB";
5452 case PGMPOOLKIND_32BIT_PD:
5453 return "PGMPOOLKIND_32BIT_PD";
5454 case PGMPOOLKIND_32BIT_PD_PHYS:
5455 return "PGMPOOLKIND_32BIT_PD_PHYS";
5456 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
5457 return "PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD";
5458 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
5459 return "PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD";
5460 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
5461 return "PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD";
5462 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
5463 return "PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD";
5464 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
5465 return "PGMPOOLKIND_PAE_PD_FOR_PAE_PD";
5466 case PGMPOOLKIND_PAE_PD_PHYS:
5467 return "PGMPOOLKIND_PAE_PD_PHYS";
5468 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
5469 return "PGMPOOLKIND_PAE_PDPT_FOR_32BIT";
5470 case PGMPOOLKIND_PAE_PDPT:
5471 return "PGMPOOLKIND_PAE_PDPT";
5472 case PGMPOOLKIND_PAE_PDPT_PHYS:
5473 return "PGMPOOLKIND_PAE_PDPT_PHYS";
5474 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
5475 return "PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT";
5476 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
5477 return "PGMPOOLKIND_64BIT_PDPT_FOR_PHYS";
5478 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
5479 return "PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD";
5480 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
5481 return "PGMPOOLKIND_64BIT_PD_FOR_PHYS";
5482 case PGMPOOLKIND_64BIT_PML4:
5483 return "PGMPOOLKIND_64BIT_PML4";
5484 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
5485 return "PGMPOOLKIND_EPT_PDPT_FOR_PHYS";
5486 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
5487 return "PGMPOOLKIND_EPT_PD_FOR_PHYS";
5488 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
5489 return "PGMPOOLKIND_EPT_PT_FOR_PHYS";
5490 case PGMPOOLKIND_ROOT_NESTED:
5491 return "PGMPOOLKIND_ROOT_NESTED";
5492 }
5493 return "Unknown kind!";
5494}
5495#endif /* LOG_ENABLED || VBOX_STRICT */
5496
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette