VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/PGMAllPool.cpp@ 63369

Last change on this file since 63369 was 62606, checked in by vboxsync, 9 years ago

VMM: Unused parameters.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id Revision
File size: 214.7 KB
Line 
1/* $Id: PGMAllPool.cpp 62606 2016-07-27 16:33:40Z vboxsync $ */
2/** @file
3 * PGM Shadow Page Pool.
4 */
5
6/*
7 * Copyright (C) 2006-2016 Oracle Corporation
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 */
17
18
19/*********************************************************************************************************************************
20* Header Files *
21*********************************************************************************************************************************/
22#define LOG_GROUP LOG_GROUP_PGM_POOL
23#include <VBox/vmm/pgm.h>
24#include <VBox/vmm/mm.h>
25#include <VBox/vmm/em.h>
26#include <VBox/vmm/cpum.h>
27#ifdef IN_RC
28# include <VBox/vmm/patm.h>
29#endif
30#include "PGMInternal.h"
31#include <VBox/vmm/vm.h>
32#include "PGMInline.h"
33#include <VBox/disopcode.h>
34#include <VBox/vmm/hm_vmx.h>
35
36#include <VBox/log.h>
37#include <VBox/err.h>
38#include <iprt/asm.h>
39#include <iprt/asm-amd64-x86.h>
40#include <iprt/string.h>
41
42
43/*********************************************************************************************************************************
44* Internal Functions *
45*********************************************************************************************************************************/
46RT_C_DECLS_BEGIN
47DECLINLINE(unsigned) pgmPoolTrackGetShadowEntrySize(PGMPOOLKIND enmKind);
48DECLINLINE(unsigned) pgmPoolTrackGetGuestEntrySize(PGMPOOLKIND enmKind);
49static void pgmPoolTrackClearPageUsers(PPGMPOOL pPool, PPGMPOOLPAGE pPage);
50static void pgmPoolTrackDeref(PPGMPOOL pPool, PPGMPOOLPAGE pPage);
51static int pgmPoolTrackAddUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable);
52static void pgmPoolMonitorModifiedRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage);
53#if defined(LOG_ENABLED) || defined(VBOX_STRICT)
54static const char *pgmPoolPoolKindToStr(uint8_t enmKind);
55#endif
56#if 0 /*defined(VBOX_STRICT) && defined(PGMPOOL_WITH_OPTIMIZED_DIRTY_PT)*/
57static void pgmPoolTrackCheckPTPaePae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT, PCX86PTPAE pGstPT);
58#endif
59
60int pgmPoolTrackFlushGCPhysPTsSlow(PVM pVM, PPGMPAGE pPhysPage);
61PPGMPOOLPHYSEXT pgmPoolTrackPhysExtAlloc(PVM pVM, uint16_t *piPhysExt);
62void pgmPoolTrackPhysExtFree(PVM pVM, uint16_t iPhysExt);
63void pgmPoolTrackPhysExtFreeList(PVM pVM, uint16_t iPhysExt);
64
65RT_C_DECLS_END
66
67
68/**
69 * Checks if the specified page pool kind is for a 4MB or 2MB guest page.
70 *
71 * @returns true if it's the shadow of a 4MB or 2MB guest page, otherwise false.
72 * @param enmKind The page kind.
73 */
74DECLINLINE(bool) pgmPoolIsBigPage(PGMPOOLKIND enmKind)
75{
76 switch (enmKind)
77 {
78 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
79 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
80 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
81 return true;
82 default:
83 return false;
84 }
85}
86
87
88/**
89 * Flushes a chain of pages sharing the same access monitor.
90 *
91 * @returns VBox status code suitable for scheduling.
92 * @param pPool The pool.
93 * @param pPage A page in the chain.
94 * @todo VBOXSTRICTRC
95 */
96int pgmPoolMonitorChainFlush(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
97{
98 LogFlow(("pgmPoolMonitorChainFlush: Flush page %RGp type=%d\n", pPage->GCPhys, pPage->enmKind));
99
100 /*
101 * Find the list head.
102 */
103 uint16_t idx = pPage->idx;
104 if (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
105 {
106 while (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
107 {
108 idx = pPage->iMonitoredPrev;
109 Assert(idx != pPage->idx);
110 pPage = &pPool->aPages[idx];
111 }
112 }
113
114 /*
115 * Iterate the list flushing each shadow page.
116 */
117 int rc = VINF_SUCCESS;
118 for (;;)
119 {
120 idx = pPage->iMonitoredNext;
121 Assert(idx != pPage->idx);
122 if (pPage->idx >= PGMPOOL_IDX_FIRST)
123 {
124 int rc2 = pgmPoolFlushPage(pPool, pPage);
125 AssertRC(rc2);
126 }
127 /* next */
128 if (idx == NIL_PGMPOOL_IDX)
129 break;
130 pPage = &pPool->aPages[idx];
131 }
132 return rc;
133}
134
135
136/**
137 * Wrapper for getting the current context pointer to the entry being modified.
138 *
139 * @returns VBox status code suitable for scheduling.
140 * @param pVM The cross context VM structure.
141 * @param pvDst Destination address
142 * @param pvSrc Pointer to the mapping of @a GCPhysSrc or NULL depending
143 * on the context (e.g. \#PF in R0 & RC).
144 * @param GCPhysSrc The source guest physical address.
145 * @param cb Size of data to read
146 */
147DECLINLINE(int) pgmPoolPhysSimpleReadGCPhys(PVM pVM, void *pvDst, void const *pvSrc, RTGCPHYS GCPhysSrc, size_t cb)
148{
149#if defined(IN_RING3)
150 NOREF(pVM); NOREF(GCPhysSrc);
151 memcpy(pvDst, (RTHCPTR)((uintptr_t)pvSrc & ~(RTHCUINTPTR)(cb - 1)), cb);
152 return VINF_SUCCESS;
153#else
154 /* @todo in RC we could attempt to use the virtual address, although this can cause many faults (PAE Windows XP guest). */
155 NOREF(pvSrc);
156 return PGMPhysSimpleReadGCPhys(pVM, pvDst, GCPhysSrc & ~(RTGCPHYS)(cb - 1), cb);
157#endif
158}
159
160
161/**
162 * Process shadow entries before they are changed by the guest.
163 *
164 * For PT entries we will clear them. For PD entries, we'll simply check
165 * for mapping conflicts and set the SyncCR3 FF if found.
166 *
167 * @param pVCpu The cross context virtual CPU structure.
168 * @param pPool The pool.
169 * @param pPage The head page.
170 * @param GCPhysFault The guest physical fault address.
171 * @param pvAddress Pointer to the mapping of @a GCPhysFault or NULL
172 * depending on the context (e.g. \#PF in R0 & RC).
173 * @param cbWrite Write size; might be zero if the caller knows we're not crossing entry boundaries
174 */
175static void pgmPoolMonitorChainChanging(PVMCPU pVCpu, PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTGCPHYS GCPhysFault,
176 void const *pvAddress, unsigned cbWrite)
177{
178 AssertMsg(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX, ("%u (idx=%u)\n", pPage->iMonitoredPrev, pPage->idx));
179 const unsigned off = GCPhysFault & PAGE_OFFSET_MASK;
180 PVM pVM = pPool->CTX_SUFF(pVM);
181 NOREF(pVCpu);
182
183 LogFlow(("pgmPoolMonitorChainChanging: %RGv phys=%RGp cbWrite=%d\n",
184 (RTGCPTR)(CTXTYPE(RTGCPTR, uintptr_t, RTGCPTR))(uintptr_t)pvAddress, GCPhysFault, cbWrite));
185
186 for (;;)
187 {
188 union
189 {
190 void *pv;
191 PX86PT pPT;
192 PPGMSHWPTPAE pPTPae;
193 PX86PD pPD;
194 PX86PDPAE pPDPae;
195 PX86PDPT pPDPT;
196 PX86PML4 pPML4;
197 } uShw;
198
199 LogFlow(("pgmPoolMonitorChainChanging: page idx=%d phys=%RGp (next=%d) kind=%s write=%#x\n",
200 pPage->idx, pPage->GCPhys, pPage->iMonitoredNext, pgmPoolPoolKindToStr(pPage->enmKind), cbWrite));
201
202 uShw.pv = NULL;
203 switch (pPage->enmKind)
204 {
205 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
206 {
207 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPT));
208 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
209 const unsigned iShw = off / sizeof(X86PTE);
210 LogFlow(("PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT iShw=%x\n", iShw));
211 if (uShw.pPT->a[iShw].n.u1Present)
212 {
213 X86PTE GstPte;
214
215 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvAddress, GCPhysFault, sizeof(GstPte));
216 AssertRC(rc);
217 Log4(("pgmPoolMonitorChainChanging 32_32: deref %016RX64 GCPhys %08RX32\n", uShw.pPT->a[iShw].u & X86_PTE_PAE_PG_MASK, GstPte.u & X86_PTE_PG_MASK));
218 pgmPoolTracDerefGCPhysHint(pPool, pPage,
219 uShw.pPT->a[iShw].u & X86_PTE_PAE_PG_MASK,
220 GstPte.u & X86_PTE_PG_MASK,
221 iShw);
222 ASMAtomicWriteU32(&uShw.pPT->a[iShw].u, 0);
223 }
224 break;
225 }
226
227 /* page/2 sized */
228 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
229 {
230 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPT));
231 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
232 if (!((off ^ pPage->GCPhys) & (PAGE_SIZE / 2)))
233 {
234 const unsigned iShw = (off / sizeof(X86PTE)) & (X86_PG_PAE_ENTRIES - 1);
235 LogFlow(("PGMPOOLKIND_PAE_PT_FOR_32BIT_PT iShw=%x\n", iShw));
236 if (PGMSHWPTEPAE_IS_P(uShw.pPTPae->a[iShw]))
237 {
238 X86PTE GstPte;
239 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvAddress, GCPhysFault, sizeof(GstPte));
240 AssertRC(rc);
241
242 Log4(("pgmPoolMonitorChainChanging pae_32: deref %016RX64 GCPhys %08RX32\n", uShw.pPT->a[iShw].u & X86_PTE_PAE_PG_MASK, GstPte.u & X86_PTE_PG_MASK));
243 pgmPoolTracDerefGCPhysHint(pPool, pPage,
244 PGMSHWPTEPAE_GET_HCPHYS(uShw.pPTPae->a[iShw]),
245 GstPte.u & X86_PTE_PG_MASK,
246 iShw);
247 PGMSHWPTEPAE_ATOMIC_SET(uShw.pPTPae->a[iShw], 0);
248 }
249 }
250 break;
251 }
252
253 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
254 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
255 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
256 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
257 {
258 unsigned iGst = off / sizeof(X86PDE);
259 unsigned iShwPdpt = iGst / 256;
260 unsigned iShw = (iGst % 256) * 2;
261 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
262
263 LogFlow(("pgmPoolMonitorChainChanging PAE for 32 bits: iGst=%x iShw=%x idx = %d page idx=%d\n", iGst, iShw, iShwPdpt, pPage->enmKind - PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD));
264 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPD));
265 if (iShwPdpt == pPage->enmKind - (unsigned)PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD)
266 {
267 for (unsigned i = 0; i < 2; i++)
268 {
269# ifdef VBOX_WITH_RAW_MODE_NOT_R0
270 if ((uShw.pPDPae->a[iShw + i].u & (PGM_PDFLAGS_MAPPING | X86_PDE_P)) == (PGM_PDFLAGS_MAPPING | X86_PDE_P))
271 {
272 Assert(pgmMapAreMappingsEnabled(pVM));
273 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
274 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShwPdpt=%#x iShw=%#x!\n", iShwPdpt, iShw+i));
275 break;
276 }
277# endif /* VBOX_WITH_RAW_MODE_NOT_R0 */
278 if (uShw.pPDPae->a[iShw+i].n.u1Present)
279 {
280 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw+i, uShw.pPDPae->a[iShw+i].u));
281 pgmPoolFree(pVM,
282 uShw.pPDPae->a[iShw+i].u & X86_PDE_PAE_PG_MASK,
283 pPage->idx,
284 iShw + i);
285 ASMAtomicWriteU64(&uShw.pPDPae->a[iShw+i].u, 0);
286 }
287
288 /* paranoia / a bit assumptive. */
289 if ( (off & 3)
290 && (off & 3) + cbWrite > 4)
291 {
292 const unsigned iShw2 = iShw + 2 + i;
293 if (iShw2 < RT_ELEMENTS(uShw.pPDPae->a))
294 {
295# ifdef VBOX_WITH_RAW_MODE_NOT_R0
296 if ((uShw.pPDPae->a[iShw2].u & (PGM_PDFLAGS_MAPPING | X86_PDE_P)) == (PGM_PDFLAGS_MAPPING | X86_PDE_P))
297 {
298 Assert(pgmMapAreMappingsEnabled(pVM));
299 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
300 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShwPdpt=%#x iShw2=%#x!\n", iShwPdpt, iShw2));
301 break;
302 }
303# endif /* VBOX_WITH_RAW_MODE_NOT_R0 */
304 if (uShw.pPDPae->a[iShw2].n.u1Present)
305 {
306 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPae->a[iShw2].u));
307 pgmPoolFree(pVM,
308 uShw.pPDPae->a[iShw2].u & X86_PDE_PAE_PG_MASK,
309 pPage->idx,
310 iShw2);
311 ASMAtomicWriteU64(&uShw.pPDPae->a[iShw2].u, 0);
312 }
313 }
314 }
315 }
316 }
317 break;
318 }
319
320 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
321 {
322 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
323 const unsigned iShw = off / sizeof(X86PTEPAE);
324 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPT));
325 if (PGMSHWPTEPAE_IS_P(uShw.pPTPae->a[iShw]))
326 {
327 X86PTEPAE GstPte;
328 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvAddress, GCPhysFault, sizeof(GstPte));
329 AssertRC(rc);
330
331 Log4(("pgmPoolMonitorChainChanging pae: deref %016RX64 GCPhys %016RX64\n", PGMSHWPTEPAE_GET_HCPHYS(uShw.pPTPae->a[iShw]), GstPte.u & X86_PTE_PAE_PG_MASK));
332 pgmPoolTracDerefGCPhysHint(pPool, pPage,
333 PGMSHWPTEPAE_GET_HCPHYS(uShw.pPTPae->a[iShw]),
334 GstPte.u & X86_PTE_PAE_PG_MASK,
335 iShw);
336 PGMSHWPTEPAE_ATOMIC_SET(uShw.pPTPae->a[iShw], 0);
337 }
338
339 /* paranoia / a bit assumptive. */
340 if ( (off & 7)
341 && (off & 7) + cbWrite > sizeof(X86PTEPAE))
342 {
343 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PTEPAE);
344 AssertBreak(iShw2 < RT_ELEMENTS(uShw.pPTPae->a));
345
346 if (PGMSHWPTEPAE_IS_P(uShw.pPTPae->a[iShw2]))
347 {
348 X86PTEPAE GstPte;
349 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte,
350 pvAddress ? (uint8_t const *)pvAddress + sizeof(GstPte) : NULL,
351 GCPhysFault + sizeof(GstPte), sizeof(GstPte));
352 AssertRC(rc);
353 Log4(("pgmPoolMonitorChainChanging pae: deref %016RX64 GCPhys %016RX64\n", PGMSHWPTEPAE_GET_HCPHYS(uShw.pPTPae->a[iShw2]), GstPte.u & X86_PTE_PAE_PG_MASK));
354 pgmPoolTracDerefGCPhysHint(pPool, pPage,
355 PGMSHWPTEPAE_GET_HCPHYS(uShw.pPTPae->a[iShw2]),
356 GstPte.u & X86_PTE_PAE_PG_MASK,
357 iShw2);
358 PGMSHWPTEPAE_ATOMIC_SET(uShw.pPTPae->a[iShw2], 0);
359 }
360 }
361 break;
362 }
363
364 case PGMPOOLKIND_32BIT_PD:
365 {
366 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
367 const unsigned iShw = off / sizeof(X86PTE); // ASSUMING 32-bit guest paging!
368
369 LogFlow(("pgmPoolMonitorChainChanging: PGMPOOLKIND_32BIT_PD %x\n", iShw));
370 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPD));
371# ifdef VBOX_WITH_RAW_MODE_NOT_R0
372 if (uShw.pPD->a[iShw].u & PGM_PDFLAGS_MAPPING)
373 {
374 Assert(pgmMapAreMappingsEnabled(pVM));
375 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
376 STAM_COUNTER_INC(&(pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZGuestCR3WriteConflict));
377 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw=%#x!\n", iShw));
378 break;
379 }
380 else
381# endif /* VBOX_WITH_RAW_MODE_NOT_R0 */
382 {
383 if (uShw.pPD->a[iShw].n.u1Present)
384 {
385 LogFlow(("pgmPoolMonitorChainChanging: 32 bit pd iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPD->a[iShw].u));
386 pgmPoolFree(pVM,
387 uShw.pPD->a[iShw].u & X86_PDE_PAE_PG_MASK,
388 pPage->idx,
389 iShw);
390 ASMAtomicWriteU32(&uShw.pPD->a[iShw].u, 0);
391 }
392 }
393 /* paranoia / a bit assumptive. */
394 if ( (off & 3)
395 && (off & 3) + cbWrite > sizeof(X86PTE))
396 {
397 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PTE);
398 if ( iShw2 != iShw
399 && iShw2 < RT_ELEMENTS(uShw.pPD->a))
400 {
401# ifdef VBOX_WITH_RAW_MODE_NOT_R0
402 if (uShw.pPD->a[iShw2].u & PGM_PDFLAGS_MAPPING)
403 {
404 Assert(pgmMapAreMappingsEnabled(pVM));
405 STAM_COUNTER_INC(&(pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZGuestCR3WriteConflict));
406 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
407 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw2=%#x!\n", iShw2));
408 break;
409 }
410# endif /* VBOX_WITH_RAW_MODE_NOT_R0 */
411 if (uShw.pPD->a[iShw2].n.u1Present)
412 {
413 LogFlow(("pgmPoolMonitorChainChanging: 32 bit pd iShw=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPD->a[iShw2].u));
414 pgmPoolFree(pVM,
415 uShw.pPD->a[iShw2].u & X86_PDE_PAE_PG_MASK,
416 pPage->idx,
417 iShw2);
418 ASMAtomicWriteU32(&uShw.pPD->a[iShw2].u, 0);
419 }
420 }
421 }
422#if 0 /* useful when running PGMAssertCR3(), a bit too troublesome for general use (TLBs). - not working any longer... */
423 if ( uShw.pPD->a[iShw].n.u1Present
424 && !VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3))
425 {
426 LogFlow(("pgmPoolMonitorChainChanging: iShw=%#x: %RX32 -> freeing it!\n", iShw, uShw.pPD->a[iShw].u));
427# ifdef IN_RC /* TLB load - we're pushing things a bit... */
428 ASMProbeReadByte(pvAddress);
429# endif
430 pgmPoolFree(pVM, uShw.pPD->a[iShw].u & X86_PDE_PG_MASK, pPage->idx, iShw);
431 ASMAtomicWriteU32(&uShw.pPD->a[iShw].u, 0);
432 }
433#endif
434 break;
435 }
436
437 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
438 {
439 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
440 const unsigned iShw = off / sizeof(X86PDEPAE);
441 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPD));
442#ifdef VBOX_WITH_RAW_MODE_NOT_R0
443 if (uShw.pPDPae->a[iShw].u & PGM_PDFLAGS_MAPPING)
444 {
445 Assert(pgmMapAreMappingsEnabled(pVM));
446 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
447 STAM_COUNTER_INC(&(pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZGuestCR3WriteConflict));
448 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw=%#x!\n", iShw));
449 break;
450 }
451#endif /* VBOX_WITH_RAW_MODE_NOT_R0 */
452 /*
453 * Causes trouble when the guest uses a PDE to refer to the whole page table level
454 * structure. (Invalidate here; faults later on when it tries to change the page
455 * table entries -> recheck; probably only applies to the RC case.)
456 */
457#ifdef VBOX_WITH_RAW_MODE_NOT_R0
458 else
459#endif
460 {
461 if (uShw.pPDPae->a[iShw].n.u1Present)
462 {
463 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPae->a[iShw].u));
464 pgmPoolFree(pVM,
465 uShw.pPDPae->a[iShw].u & X86_PDE_PAE_PG_MASK,
466 pPage->idx,
467 iShw);
468 ASMAtomicWriteU64(&uShw.pPDPae->a[iShw].u, 0);
469 }
470 }
471 /* paranoia / a bit assumptive. */
472 if ( (off & 7)
473 && (off & 7) + cbWrite > sizeof(X86PDEPAE))
474 {
475 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PDEPAE);
476 AssertBreak(iShw2 < RT_ELEMENTS(uShw.pPDPae->a));
477
478#ifdef VBOX_WITH_RAW_MODE_NOT_R0
479 if ( iShw2 != iShw
480 && uShw.pPDPae->a[iShw2].u & PGM_PDFLAGS_MAPPING)
481 {
482 Assert(pgmMapAreMappingsEnabled(pVM));
483 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
484 STAM_COUNTER_INC(&(pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZGuestCR3WriteConflict));
485 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw2=%#x!\n", iShw2));
486 break;
487 }
488 else
489#endif /* VBOX_WITH_RAW_MODE_NOT_R0 */
490 if (uShw.pPDPae->a[iShw2].n.u1Present)
491 {
492 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPae->a[iShw2].u));
493 pgmPoolFree(pVM,
494 uShw.pPDPae->a[iShw2].u & X86_PDE_PAE_PG_MASK,
495 pPage->idx,
496 iShw2);
497 ASMAtomicWriteU64(&uShw.pPDPae->a[iShw2].u, 0);
498 }
499 }
500 break;
501 }
502
503 case PGMPOOLKIND_PAE_PDPT:
504 {
505 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPDPT));
506 /*
507 * Hopefully this doesn't happen very often:
508 * - touching unused parts of the page
509 * - messing with the bits of pd pointers without changing the physical address
510 */
511 /* PDPT roots are not page aligned; 32 byte only! */
512 const unsigned offPdpt = GCPhysFault - pPage->GCPhys;
513
514 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
515 const unsigned iShw = offPdpt / sizeof(X86PDPE);
516 if (iShw < X86_PG_PAE_PDPE_ENTRIES) /* don't use RT_ELEMENTS(uShw.pPDPT->a), because that's for long mode only */
517 {
518# ifdef VBOX_WITH_RAW_MODE_NOT_R0
519 if (uShw.pPDPT->a[iShw].u & PGM_PLXFLAGS_MAPPING)
520 {
521 Assert(pgmMapAreMappingsEnabled(pVM));
522 STAM_COUNTER_INC(&(pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZGuestCR3WriteConflict));
523 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
524 LogFlow(("pgmPoolMonitorChainChanging: Detected pdpt conflict at iShw=%#x!\n", iShw));
525 break;
526 }
527 else
528# endif /* VBOX_WITH_RAW_MODE_NOT_R0 */
529 if (uShw.pPDPT->a[iShw].n.u1Present)
530 {
531 LogFlow(("pgmPoolMonitorChainChanging: pae pdpt iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPT->a[iShw].u));
532 pgmPoolFree(pVM,
533 uShw.pPDPT->a[iShw].u & X86_PDPE_PG_MASK,
534 pPage->idx,
535 iShw);
536 ASMAtomicWriteU64(&uShw.pPDPT->a[iShw].u, 0);
537 }
538
539 /* paranoia / a bit assumptive. */
540 if ( (offPdpt & 7)
541 && (offPdpt & 7) + cbWrite > sizeof(X86PDPE))
542 {
543 const unsigned iShw2 = (offPdpt + cbWrite - 1) / sizeof(X86PDPE);
544 if ( iShw2 != iShw
545 && iShw2 < X86_PG_PAE_PDPE_ENTRIES)
546 {
547# ifdef VBOX_WITH_RAW_MODE_NOT_R0
548 if (uShw.pPDPT->a[iShw2].u & PGM_PLXFLAGS_MAPPING)
549 {
550 Assert(pgmMapAreMappingsEnabled(pVM));
551 STAM_COUNTER_INC(&(pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZGuestCR3WriteConflict));
552 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
553 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw2=%#x!\n", iShw2));
554 break;
555 }
556 else
557# endif /* VBOX_WITH_RAW_MODE_NOT_R0 */
558 if (uShw.pPDPT->a[iShw2].n.u1Present)
559 {
560 LogFlow(("pgmPoolMonitorChainChanging: pae pdpt iShw=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPT->a[iShw2].u));
561 pgmPoolFree(pVM,
562 uShw.pPDPT->a[iShw2].u & X86_PDPE_PG_MASK,
563 pPage->idx,
564 iShw2);
565 ASMAtomicWriteU64(&uShw.pPDPT->a[iShw2].u, 0);
566 }
567 }
568 }
569 }
570 break;
571 }
572
573#ifndef IN_RC
574 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
575 {
576 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPD));
577 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
578 const unsigned iShw = off / sizeof(X86PDEPAE);
579 Assert(!(uShw.pPDPae->a[iShw].u & PGM_PDFLAGS_MAPPING));
580 if (uShw.pPDPae->a[iShw].n.u1Present)
581 {
582 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPae->a[iShw].u));
583 pgmPoolFree(pVM,
584 uShw.pPDPae->a[iShw].u & X86_PDE_PAE_PG_MASK,
585 pPage->idx,
586 iShw);
587 ASMAtomicWriteU64(&uShw.pPDPae->a[iShw].u, 0);
588 }
589 /* paranoia / a bit assumptive. */
590 if ( (off & 7)
591 && (off & 7) + cbWrite > sizeof(X86PDEPAE))
592 {
593 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PDEPAE);
594 AssertBreak(iShw2 < RT_ELEMENTS(uShw.pPDPae->a));
595
596 Assert(!(uShw.pPDPae->a[iShw2].u & PGM_PDFLAGS_MAPPING));
597 if (uShw.pPDPae->a[iShw2].n.u1Present)
598 {
599 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPae->a[iShw2].u));
600 pgmPoolFree(pVM,
601 uShw.pPDPae->a[iShw2].u & X86_PDE_PAE_PG_MASK,
602 pPage->idx,
603 iShw2);
604 ASMAtomicWriteU64(&uShw.pPDPae->a[iShw2].u, 0);
605 }
606 }
607 break;
608 }
609
610 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
611 {
612 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPDPT));
613 /*
614 * Hopefully this doesn't happen very often:
615 * - messing with the bits of pd pointers without changing the physical address
616 */
617 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
618 const unsigned iShw = off / sizeof(X86PDPE);
619 if (uShw.pPDPT->a[iShw].n.u1Present)
620 {
621 LogFlow(("pgmPoolMonitorChainChanging: pdpt iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPT->a[iShw].u));
622 pgmPoolFree(pVM, uShw.pPDPT->a[iShw].u & X86_PDPE_PG_MASK, pPage->idx, iShw);
623 ASMAtomicWriteU64(&uShw.pPDPT->a[iShw].u, 0);
624 }
625 /* paranoia / a bit assumptive. */
626 if ( (off & 7)
627 && (off & 7) + cbWrite > sizeof(X86PDPE))
628 {
629 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PDPE);
630 if (uShw.pPDPT->a[iShw2].n.u1Present)
631 {
632 LogFlow(("pgmPoolMonitorChainChanging: pdpt iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPT->a[iShw2].u));
633 pgmPoolFree(pVM, uShw.pPDPT->a[iShw2].u & X86_PDPE_PG_MASK, pPage->idx, iShw2);
634 ASMAtomicWriteU64(&uShw.pPDPT->a[iShw2].u, 0);
635 }
636 }
637 break;
638 }
639
640 case PGMPOOLKIND_64BIT_PML4:
641 {
642 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPML4));
643 /*
644 * Hopefully this doesn't happen very often:
645 * - messing with the bits of pd pointers without changing the physical address
646 */
647 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
648 const unsigned iShw = off / sizeof(X86PDPE);
649 if (uShw.pPML4->a[iShw].n.u1Present)
650 {
651 LogFlow(("pgmPoolMonitorChainChanging: pml4 iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPML4->a[iShw].u));
652 pgmPoolFree(pVM, uShw.pPML4->a[iShw].u & X86_PML4E_PG_MASK, pPage->idx, iShw);
653 ASMAtomicWriteU64(&uShw.pPML4->a[iShw].u, 0);
654 }
655 /* paranoia / a bit assumptive. */
656 if ( (off & 7)
657 && (off & 7) + cbWrite > sizeof(X86PDPE))
658 {
659 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PML4E);
660 if (uShw.pPML4->a[iShw2].n.u1Present)
661 {
662 LogFlow(("pgmPoolMonitorChainChanging: pml4 iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPML4->a[iShw2].u));
663 pgmPoolFree(pVM, uShw.pPML4->a[iShw2].u & X86_PML4E_PG_MASK, pPage->idx, iShw2);
664 ASMAtomicWriteU64(&uShw.pPML4->a[iShw2].u, 0);
665 }
666 }
667 break;
668 }
669#endif /* IN_RING0 */
670
671 default:
672 AssertFatalMsgFailed(("enmKind=%d\n", pPage->enmKind));
673 }
674 PGM_DYNMAP_UNUSED_HINT_VM(pVM, uShw.pv);
675
676 /* next */
677 if (pPage->iMonitoredNext == NIL_PGMPOOL_IDX)
678 return;
679 pPage = &pPool->aPages[pPage->iMonitoredNext];
680 }
681}
682
683# ifndef IN_RING3
684
685/**
686 * Checks if a access could be a fork operation in progress.
687 *
688 * Meaning, that the guest is setting up the parent process for Copy-On-Write.
689 *
690 * @returns true if it's likely that we're forking, otherwise false.
691 * @param pPool The pool.
692 * @param pDis The disassembled instruction.
693 * @param offFault The access offset.
694 */
695DECLINLINE(bool) pgmPoolMonitorIsForking(PPGMPOOL pPool, PDISCPUSTATE pDis, unsigned offFault)
696{
697 /*
698 * i386 linux is using btr to clear X86_PTE_RW.
699 * The functions involved are (2.6.16 source inspection):
700 * clear_bit
701 * ptep_set_wrprotect
702 * copy_one_pte
703 * copy_pte_range
704 * copy_pmd_range
705 * copy_pud_range
706 * copy_page_range
707 * dup_mmap
708 * dup_mm
709 * copy_mm
710 * copy_process
711 * do_fork
712 */
713 if ( pDis->pCurInstr->uOpcode == OP_BTR
714 && !(offFault & 4)
715 /** @todo Validate that the bit index is X86_PTE_RW. */
716 )
717 {
718 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,Fork)); RT_NOREF_PV(pPool);
719 return true;
720 }
721 return false;
722}
723
724
725/**
726 * Determine whether the page is likely to have been reused.
727 *
728 * @returns true if we consider the page as being reused for a different purpose.
729 * @returns false if we consider it to still be a paging page.
730 * @param pVM The cross context VM structure.
731 * @param pVCpu The cross context virtual CPU structure.
732 * @param pRegFrame Trap register frame.
733 * @param pDis The disassembly info for the faulting instruction.
734 * @param pvFault The fault address.
735 *
736 * @remark The REP prefix check is left to the caller because of STOSD/W.
737 */
738DECLINLINE(bool) pgmPoolMonitorIsReused(PVM pVM, PVMCPU pVCpu, PCPUMCTXCORE pRegFrame, PDISCPUSTATE pDis, RTGCPTR pvFault)
739{
740#ifndef IN_RC
741 /** @todo could make this general, faulting close to rsp should be a safe reuse heuristic. */
742 if ( HMHasPendingIrq(pVM)
743 && (pRegFrame->rsp - pvFault) < 32)
744 {
745 /* Fault caused by stack writes while trying to inject an interrupt event. */
746 Log(("pgmPoolMonitorIsReused: reused %RGv for interrupt stack (rsp=%RGv).\n", pvFault, pRegFrame->rsp));
747 return true;
748 }
749#else
750 NOREF(pVM); NOREF(pvFault);
751#endif
752
753 LogFlow(("Reused instr %RGv %d at %RGv param1.fUse=%llx param1.reg=%d\n", pRegFrame->rip, pDis->pCurInstr->uOpcode, pvFault, pDis->Param1.fUse, pDis->Param1.Base.idxGenReg));
754
755 /* Non-supervisor mode write means it's used for something else. */
756 if (CPUMGetGuestCPL(pVCpu) == 3)
757 return true;
758
759 switch (pDis->pCurInstr->uOpcode)
760 {
761 /* call implies the actual push of the return address faulted */
762 case OP_CALL:
763 Log4(("pgmPoolMonitorIsReused: CALL\n"));
764 return true;
765 case OP_PUSH:
766 Log4(("pgmPoolMonitorIsReused: PUSH\n"));
767 return true;
768 case OP_PUSHF:
769 Log4(("pgmPoolMonitorIsReused: PUSHF\n"));
770 return true;
771 case OP_PUSHA:
772 Log4(("pgmPoolMonitorIsReused: PUSHA\n"));
773 return true;
774 case OP_FXSAVE:
775 Log4(("pgmPoolMonitorIsReused: FXSAVE\n"));
776 return true;
777 case OP_MOVNTI: /* solaris - block_zero_no_xmm */
778 Log4(("pgmPoolMonitorIsReused: MOVNTI\n"));
779 return true;
780 case OP_MOVNTDQ: /* solaris - hwblkclr & hwblkpagecopy */
781 Log4(("pgmPoolMonitorIsReused: MOVNTDQ\n"));
782 return true;
783 case OP_MOVSWD:
784 case OP_STOSWD:
785 if ( pDis->fPrefix == (DISPREFIX_REP|DISPREFIX_REX)
786 && pRegFrame->rcx >= 0x40
787 )
788 {
789 Assert(pDis->uCpuMode == DISCPUMODE_64BIT);
790
791 Log(("pgmPoolMonitorIsReused: OP_STOSQ\n"));
792 return true;
793 }
794 break;
795
796 default:
797 /*
798 * Anything having ESP on the left side means stack writes.
799 */
800 if ( ( (pDis->Param1.fUse & DISUSE_REG_GEN32)
801 || (pDis->Param1.fUse & DISUSE_REG_GEN64))
802 && (pDis->Param1.Base.idxGenReg == DISGREG_ESP))
803 {
804 Log4(("pgmPoolMonitorIsReused: ESP\n"));
805 return true;
806 }
807 break;
808 }
809
810 /*
811 * Page table updates are very very unlikely to be crossing page boundraries,
812 * and we don't want to deal with that in pgmPoolMonitorChainChanging and such.
813 */
814 uint32_t const cbWrite = DISGetParamSize(pDis, &pDis->Param1);
815 if ( (((uintptr_t)pvFault + cbWrite) >> X86_PAGE_SHIFT) != ((uintptr_t)pvFault >> X86_PAGE_SHIFT) )
816 {
817 Log4(("pgmPoolMonitorIsReused: cross page write\n"));
818 return true;
819 }
820
821 /*
822 * Nobody does an unaligned 8 byte write to a page table, right.
823 */
824 if (cbWrite >= 8 && ((uintptr_t)pvFault & 7) != 0)
825 {
826 Log4(("pgmPoolMonitorIsReused: Unaligned 8+ byte write\n"));
827 return true;
828 }
829
830 return false;
831}
832
833
834/**
835 * Flushes the page being accessed.
836 *
837 * @returns VBox status code suitable for scheduling.
838 * @param pVM The cross context VM structure.
839 * @param pVCpu The cross context virtual CPU structure.
840 * @param pPool The pool.
841 * @param pPage The pool page (head).
842 * @param pDis The disassembly of the write instruction.
843 * @param pRegFrame The trap register frame.
844 * @param GCPhysFault The fault address as guest physical address.
845 * @param pvFault The fault address.
846 * @todo VBOXSTRICTRC
847 */
848static int pgmPoolAccessPfHandlerFlush(PVM pVM, PVMCPU pVCpu, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pDis,
849 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault)
850{
851 NOREF(pVM); NOREF(GCPhysFault);
852
853 /*
854 * First, do the flushing.
855 */
856 int rc = pgmPoolMonitorChainFlush(pPool, pPage);
857
858 /*
859 * Emulate the instruction (xp/w2k problem, requires pc/cr2/sp detection).
860 * Must do this in raw mode (!); XP boot will fail otherwise.
861 */
862 VBOXSTRICTRC rc2 = EMInterpretInstructionDisasState(pVCpu, pDis, pRegFrame, pvFault, EMCODETYPE_ALL);
863 if (rc2 == VINF_SUCCESS)
864 { /* do nothing */ }
865 else if (rc2 == VINF_EM_RESCHEDULE)
866 {
867 if (rc == VINF_SUCCESS)
868 rc = VBOXSTRICTRC_VAL(rc2);
869#ifndef IN_RING3
870 VMCPU_FF_SET(pVCpu, VMCPU_FF_TO_R3);
871#endif
872 }
873 else if (rc2 == VERR_EM_INTERPRETER)
874 {
875#ifdef IN_RC
876 if (PATMIsPatchGCAddr(pVM, pRegFrame->eip))
877 {
878 LogFlow(("pgmPoolAccessPfHandlerPTWorker: Interpretation failed for patch code %04x:%RGv, ignoring.\n",
879 pRegFrame->cs.Sel, (RTGCPTR)pRegFrame->eip));
880 rc = VINF_SUCCESS;
881 STAM_COUNTER_INC(&pPool->StatMonitorRZIntrFailPatch2);
882 }
883 else
884#endif
885 {
886 rc = VINF_EM_RAW_EMULATE_INSTR;
887 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,EmulateInstr));
888 }
889 }
890 else if (RT_FAILURE_NP(rc2))
891 rc = VBOXSTRICTRC_VAL(rc2);
892 else
893 AssertMsgFailed(("%Rrc\n", VBOXSTRICTRC_VAL(rc2))); /* ASSUMES no complicated stuff here. */
894
895 LogFlow(("pgmPoolAccessPfHandlerPT: returns %Rrc (flushed)\n", rc));
896 return rc;
897}
898
899
900/**
901 * Handles the STOSD write accesses.
902 *
903 * @returns VBox status code suitable for scheduling.
904 * @param pVM The cross context VM structure.
905 * @param pPool The pool.
906 * @param pPage The pool page (head).
907 * @param pDis The disassembly of the write instruction.
908 * @param pRegFrame The trap register frame.
909 * @param GCPhysFault The fault address as guest physical address.
910 * @param pvFault The fault address.
911 */
912DECLINLINE(int) pgmPoolAccessPfHandlerSTOSD(PVM pVM, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pDis,
913 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault)
914{
915 unsigned uIncrement = pDis->Param1.cb;
916 NOREF(pVM);
917
918 Assert(pDis->uCpuMode == DISCPUMODE_32BIT || pDis->uCpuMode == DISCPUMODE_64BIT);
919 Assert(pRegFrame->rcx <= 0x20);
920
921#ifdef VBOX_STRICT
922 if (pDis->uOpMode == DISCPUMODE_32BIT)
923 Assert(uIncrement == 4);
924 else
925 Assert(uIncrement == 8);
926#endif
927
928 Log3(("pgmPoolAccessPfHandlerSTOSD\n"));
929
930 /*
931 * Increment the modification counter and insert it into the list
932 * of modified pages the first time.
933 */
934 if (!pPage->cModifications++)
935 pgmPoolMonitorModifiedInsert(pPool, pPage);
936
937 /*
938 * Execute REP STOSD.
939 *
940 * This ASSUMES that we're not invoked by Trap0e on in a out-of-sync
941 * write situation, meaning that it's safe to write here.
942 */
943 PVMCPU pVCpu = VMMGetCpu(pPool->CTX_SUFF(pVM));
944 RTGCUINTPTR pu32 = (RTGCUINTPTR)pvFault;
945 while (pRegFrame->rcx)
946 {
947#if defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0) || defined(IN_RC)
948 uint32_t iPrevSubset = PGMRZDynMapPushAutoSubset(pVCpu);
949 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault, NULL, uIncrement);
950 PGMRZDynMapPopAutoSubset(pVCpu, iPrevSubset);
951#else
952 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault, NULL, uIncrement);
953#endif
954#ifdef IN_RC
955 *(uint32_t *)(uintptr_t)pu32 = pRegFrame->eax;
956#else
957 PGMPhysSimpleWriteGCPhys(pVM, GCPhysFault, &pRegFrame->rax, uIncrement);
958#endif
959 pu32 += uIncrement;
960 GCPhysFault += uIncrement;
961 pRegFrame->rdi += uIncrement;
962 pRegFrame->rcx--;
963 }
964 pRegFrame->rip += pDis->cbInstr;
965
966 LogFlow(("pgmPoolAccessPfHandlerSTOSD: returns\n"));
967 return VINF_SUCCESS;
968}
969
970
971/**
972 * Handles the simple write accesses.
973 *
974 * @returns VBox status code suitable for scheduling.
975 * @param pVM The cross context VM structure.
976 * @param pVCpu The cross context virtual CPU structure.
977 * @param pPool The pool.
978 * @param pPage The pool page (head).
979 * @param pDis The disassembly of the write instruction.
980 * @param pRegFrame The trap register frame.
981 * @param GCPhysFault The fault address as guest physical address.
982 * @param pvFault The fault address.
983 * @param pfReused Reused state (in/out)
984 */
985DECLINLINE(int) pgmPoolAccessPfHandlerSimple(PVM pVM, PVMCPU pVCpu, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pDis,
986 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault, bool *pfReused)
987{
988 Log3(("pgmPoolAccessPfHandlerSimple\n"));
989 NOREF(pVM);
990 NOREF(pfReused); /* initialized by caller */
991
992 /*
993 * Increment the modification counter and insert it into the list
994 * of modified pages the first time.
995 */
996 if (!pPage->cModifications++)
997 pgmPoolMonitorModifiedInsert(pPool, pPage);
998
999 /*
1000 * Clear all the pages. ASSUMES that pvFault is readable.
1001 */
1002#if defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0) || defined(IN_RC)
1003 uint32_t iPrevSubset = PGMRZDynMapPushAutoSubset(pVCpu);
1004#endif
1005
1006 uint32_t cbWrite = DISGetParamSize(pDis, &pDis->Param1);
1007 if (cbWrite <= 8)
1008 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault, NULL, cbWrite);
1009 else if (cbWrite <= 16)
1010 {
1011 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault, NULL, 8);
1012 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault + 8, NULL, cbWrite - 8);
1013 }
1014 else
1015 {
1016 Assert(cbWrite <= 32);
1017 for (uint32_t off = 0; off < cbWrite; off += 8)
1018 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault + off, NULL, RT_MIN(8, cbWrite - off));
1019 }
1020
1021#if defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0) || defined(IN_RC)
1022 PGMRZDynMapPopAutoSubset(pVCpu, iPrevSubset);
1023#endif
1024
1025 /*
1026 * Interpret the instruction.
1027 */
1028 VBOXSTRICTRC rc = EMInterpretInstructionDisasState(pVCpu, pDis, pRegFrame, pvFault, EMCODETYPE_ALL);
1029 if (RT_SUCCESS(rc))
1030 AssertMsg(rc == VINF_SUCCESS, ("%Rrc\n", VBOXSTRICTRC_VAL(rc))); /* ASSUMES no complicated stuff here. */
1031 else if (rc == VERR_EM_INTERPRETER)
1032 {
1033 LogFlow(("pgmPoolAccessPfHandlerPTWorker: Interpretation failed for %04x:%RGv - opcode=%d\n",
1034 pRegFrame->cs.Sel, (RTGCPTR)pRegFrame->rip, pDis->pCurInstr->uOpcode));
1035 rc = VINF_EM_RAW_EMULATE_INSTR;
1036 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,EmulateInstr));
1037 }
1038
1039#if 0 /* experimental code */
1040 if (rc == VINF_SUCCESS)
1041 {
1042 switch (pPage->enmKind)
1043 {
1044 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1045 {
1046 X86PTEPAE GstPte;
1047 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvFault, GCPhysFault, sizeof(GstPte));
1048 AssertRC(rc);
1049
1050 /* Check the new value written by the guest. If present and with a bogus physical address, then
1051 * it's fairly safe to assume the guest is reusing the PT.
1052 */
1053 if (GstPte.n.u1Present)
1054 {
1055 RTHCPHYS HCPhys = -1;
1056 int rc = PGMPhysGCPhys2HCPhys(pVM, GstPte.u & X86_PTE_PAE_PG_MASK, &HCPhys);
1057 if (rc != VINF_SUCCESS)
1058 {
1059 *pfReused = true;
1060 STAM_COUNTER_INC(&pPool->StatForceFlushReused);
1061 }
1062 }
1063 break;
1064 }
1065 }
1066 }
1067#endif
1068
1069 LogFlow(("pgmPoolAccessPfHandlerSimple: returns %Rrc\n", VBOXSTRICTRC_VAL(rc)));
1070 return VBOXSTRICTRC_VAL(rc);
1071}
1072
1073
1074/**
1075 * @callback_method_impl{FNPGMRZPHYSPFHANDLER,
1076 * \#PF access handler callback for page table pages.}
1077 *
1078 * @remarks The @a pvUser argument points to the PGMPOOLPAGE.
1079 */
1080DECLEXPORT(VBOXSTRICTRC) pgmPoolAccessPfHandler(PVM pVM, PVMCPU pVCpu, RTGCUINT uErrorCode, PCPUMCTXCORE pRegFrame,
1081 RTGCPTR pvFault, RTGCPHYS GCPhysFault, void *pvUser)
1082{
1083 STAM_PROFILE_START(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), a);
1084 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1085 PPGMPOOLPAGE pPage = (PPGMPOOLPAGE)pvUser;
1086 unsigned cMaxModifications;
1087 bool fForcedFlush = false;
1088 NOREF(uErrorCode);
1089
1090 LogFlow(("pgmPoolAccessPfHandler: pvFault=%RGv pPage=%p:{.idx=%d} GCPhysFault=%RGp\n", pvFault, pPage, pPage->idx, GCPhysFault));
1091
1092 pgmLock(pVM);
1093 if (PHYS_PAGE_ADDRESS(GCPhysFault) != PHYS_PAGE_ADDRESS(pPage->GCPhys))
1094 {
1095 /* Pool page changed while we were waiting for the lock; ignore. */
1096 Log(("CPU%d: pgmPoolAccessPfHandler pgm pool page for %RGp changed (to %RGp) while waiting!\n", pVCpu->idCpu, PHYS_PAGE_ADDRESS(GCPhysFault), PHYS_PAGE_ADDRESS(pPage->GCPhys)));
1097 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), &pPool->CTX_MID_Z(StatMonitor,Handled), a);
1098 pgmUnlock(pVM);
1099 return VINF_SUCCESS;
1100 }
1101#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
1102 if (pPage->fDirty)
1103 {
1104 Assert(VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_TLB_FLUSH));
1105 pgmUnlock(pVM);
1106 return VINF_SUCCESS; /* SMP guest case where we were blocking on the pgm lock while the same page was being marked dirty. */
1107 }
1108#endif
1109
1110#if 0 /* test code defined(VBOX_STRICT) && defined(PGMPOOL_WITH_OPTIMIZED_DIRTY_PT) */
1111 if (pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT)
1112 {
1113 void *pvShw = PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage);
1114 void *pvGst;
1115 int rc = PGM_GCPHYS_2_PTR(pPool->CTX_SUFF(pVM), pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
1116 pgmPoolTrackCheckPTPaePae(pPool, pPage, (PPGMSHWPTPAE)pvShw, (PCX86PTPAE)pvGst);
1117 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvGst);
1118 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvShw);
1119 }
1120#endif
1121
1122 /*
1123 * Disassemble the faulting instruction.
1124 */
1125 PDISCPUSTATE pDis = &pVCpu->pgm.s.DisState;
1126 int rc = EMInterpretDisasCurrent(pVM, pVCpu, pDis, NULL);
1127 if (RT_UNLIKELY(rc != VINF_SUCCESS))
1128 {
1129 AssertMsg(rc == VERR_PAGE_NOT_PRESENT || rc == VERR_PAGE_TABLE_NOT_PRESENT, ("Unexpected rc %d\n", rc));
1130 pgmUnlock(pVM);
1131 return rc;
1132 }
1133
1134 Assert(pPage->enmKind != PGMPOOLKIND_FREE);
1135
1136 /*
1137 * We should ALWAYS have the list head as user parameter. This
1138 * is because we use that page to record the changes.
1139 */
1140 Assert(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
1141
1142#ifdef IN_RING0
1143 /* Maximum nr of modifications depends on the page type. */
1144 if ( pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT
1145 || pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_32BIT_PT)
1146 cMaxModifications = 4;
1147 else
1148 cMaxModifications = 24;
1149#else
1150 cMaxModifications = 48;
1151#endif
1152
1153 /*
1154 * Incremental page table updates should weigh more than random ones.
1155 * (Only applies when started from offset 0)
1156 */
1157 pVCpu->pgm.s.cPoolAccessHandler++;
1158 if ( pPage->GCPtrLastAccessHandlerRip >= pRegFrame->rip - 0x40 /* observed loops in Windows 7 x64 */
1159 && pPage->GCPtrLastAccessHandlerRip < pRegFrame->rip + 0x40
1160 && pvFault == (pPage->GCPtrLastAccessHandlerFault + pDis->Param1.cb)
1161 && pVCpu->pgm.s.cPoolAccessHandler == pPage->cLastAccessHandler + 1)
1162 {
1163 Log(("Possible page reuse cMods=%d -> %d (locked=%d type=%s)\n", pPage->cModifications, pPage->cModifications * 2, pgmPoolIsPageLocked(pPage), pgmPoolPoolKindToStr(pPage->enmKind)));
1164 Assert(pPage->cModifications < 32000);
1165 pPage->cModifications = pPage->cModifications * 2;
1166 pPage->GCPtrLastAccessHandlerFault = pvFault;
1167 pPage->cLastAccessHandler = pVCpu->pgm.s.cPoolAccessHandler;
1168 if (pPage->cModifications >= cMaxModifications)
1169 {
1170 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FlushReinit));
1171 fForcedFlush = true;
1172 }
1173 }
1174
1175 if (pPage->cModifications >= cMaxModifications)
1176 Log(("Mod overflow %RGv cMods=%d (locked=%d type=%s)\n", pvFault, pPage->cModifications, pgmPoolIsPageLocked(pPage), pgmPoolPoolKindToStr(pPage->enmKind)));
1177
1178 /*
1179 * Check if it's worth dealing with.
1180 */
1181 bool fReused = false;
1182 bool fNotReusedNotForking = false;
1183 if ( ( pPage->cModifications < cMaxModifications /** @todo \#define */ /** @todo need to check that it's not mapping EIP. */ /** @todo adjust this! */
1184 || pgmPoolIsPageLocked(pPage)
1185 )
1186 && !(fReused = pgmPoolMonitorIsReused(pVM, pVCpu, pRegFrame, pDis, pvFault))
1187 && !pgmPoolMonitorIsForking(pPool, pDis, GCPhysFault & PAGE_OFFSET_MASK))
1188 {
1189 /*
1190 * Simple instructions, no REP prefix.
1191 */
1192 if (!(pDis->fPrefix & (DISPREFIX_REP | DISPREFIX_REPNE)))
1193 {
1194 rc = pgmPoolAccessPfHandlerSimple(pVM, pVCpu, pPool, pPage, pDis, pRegFrame, GCPhysFault, pvFault, &fReused);
1195 if (fReused)
1196 goto flushPage;
1197
1198 /* A mov instruction to change the first page table entry will be remembered so we can detect
1199 * full page table changes early on. This will reduce the amount of unnecessary traps we'll take.
1200 */
1201 if ( rc == VINF_SUCCESS
1202 && !pPage->cLocked /* only applies to unlocked pages as we can't free locked ones (e.g. cr3 root). */
1203 && pDis->pCurInstr->uOpcode == OP_MOV
1204 && (pvFault & PAGE_OFFSET_MASK) == 0)
1205 {
1206 pPage->GCPtrLastAccessHandlerFault = pvFault;
1207 pPage->cLastAccessHandler = pVCpu->pgm.s.cPoolAccessHandler;
1208 pPage->GCPtrLastAccessHandlerRip = pRegFrame->rip;
1209 /* Make sure we don't kick out a page too quickly. */
1210 if (pPage->cModifications > 8)
1211 pPage->cModifications = 2;
1212 }
1213 else if (pPage->GCPtrLastAccessHandlerFault == pvFault)
1214 {
1215 /* ignore the 2nd write to this page table entry. */
1216 pPage->cLastAccessHandler = pVCpu->pgm.s.cPoolAccessHandler;
1217 }
1218 else
1219 {
1220 pPage->GCPtrLastAccessHandlerFault = NIL_RTGCPTR;
1221 pPage->GCPtrLastAccessHandlerRip = 0;
1222 }
1223
1224 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), &pPool->CTX_MID_Z(StatMonitor,Handled), a);
1225 pgmUnlock(pVM);
1226 return rc;
1227 }
1228
1229 /*
1230 * Windows is frequently doing small memset() operations (netio test 4k+).
1231 * We have to deal with these or we'll kill the cache and performance.
1232 */
1233 if ( pDis->pCurInstr->uOpcode == OP_STOSWD
1234 && !pRegFrame->eflags.Bits.u1DF
1235 && pDis->uOpMode == pDis->uCpuMode
1236 && pDis->uAddrMode == pDis->uCpuMode)
1237 {
1238 bool fValidStosd = false;
1239
1240 if ( pDis->uCpuMode == DISCPUMODE_32BIT
1241 && pDis->fPrefix == DISPREFIX_REP
1242 && pRegFrame->ecx <= 0x20
1243 && pRegFrame->ecx * 4 <= PAGE_SIZE - ((uintptr_t)pvFault & PAGE_OFFSET_MASK)
1244 && !((uintptr_t)pvFault & 3)
1245 && (pRegFrame->eax == 0 || pRegFrame->eax == 0x80) /* the two values observed. */
1246 )
1247 {
1248 fValidStosd = true;
1249 pRegFrame->rcx &= 0xffffffff; /* paranoia */
1250 }
1251 else
1252 if ( pDis->uCpuMode == DISCPUMODE_64BIT
1253 && pDis->fPrefix == (DISPREFIX_REP | DISPREFIX_REX)
1254 && pRegFrame->rcx <= 0x20
1255 && pRegFrame->rcx * 8 <= PAGE_SIZE - ((uintptr_t)pvFault & PAGE_OFFSET_MASK)
1256 && !((uintptr_t)pvFault & 7)
1257 && (pRegFrame->rax == 0 || pRegFrame->rax == 0x80) /* the two values observed. */
1258 )
1259 {
1260 fValidStosd = true;
1261 }
1262
1263 if (fValidStosd)
1264 {
1265 rc = pgmPoolAccessPfHandlerSTOSD(pVM, pPool, pPage, pDis, pRegFrame, GCPhysFault, pvFault);
1266 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), &pPool->CTX_MID_Z(StatMonitor,RepStosd), a);
1267 pgmUnlock(pVM);
1268 return rc;
1269 }
1270 }
1271
1272 /* REP prefix, don't bother. */
1273 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,RepPrefix));
1274 Log4(("pgmPoolAccessPfHandler: eax=%#x ecx=%#x edi=%#x esi=%#x rip=%RGv opcode=%d prefix=%#x\n",
1275 pRegFrame->eax, pRegFrame->ecx, pRegFrame->edi, pRegFrame->esi, (RTGCPTR)pRegFrame->rip, pDis->pCurInstr->uOpcode, pDis->fPrefix));
1276 fNotReusedNotForking = true;
1277 }
1278
1279#if defined(PGMPOOL_WITH_OPTIMIZED_DIRTY_PT) && defined(IN_RING0)
1280 /* E.g. Windows 7 x64 initializes page tables and touches some pages in the table during the process. This
1281 * leads to pgm pool trashing and an excessive amount of write faults due to page monitoring.
1282 */
1283 if ( pPage->cModifications >= cMaxModifications
1284 && !fForcedFlush
1285 && (pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT || pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_32BIT_PT)
1286 && ( fNotReusedNotForking
1287 || ( !pgmPoolMonitorIsReused(pVM, pVCpu, pRegFrame, pDis, pvFault)
1288 && !pgmPoolMonitorIsForking(pPool, pDis, GCPhysFault & PAGE_OFFSET_MASK))
1289 )
1290 )
1291 {
1292 Assert(!pgmPoolIsPageLocked(pPage));
1293 Assert(pPage->fDirty == false);
1294
1295 /* Flush any monitored duplicates as we will disable write protection. */
1296 if ( pPage->iMonitoredNext != NIL_PGMPOOL_IDX
1297 || pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
1298 {
1299 PPGMPOOLPAGE pPageHead = pPage;
1300
1301 /* Find the monitor head. */
1302 while (pPageHead->iMonitoredPrev != NIL_PGMPOOL_IDX)
1303 pPageHead = &pPool->aPages[pPageHead->iMonitoredPrev];
1304
1305 while (pPageHead)
1306 {
1307 unsigned idxNext = pPageHead->iMonitoredNext;
1308
1309 if (pPageHead != pPage)
1310 {
1311 STAM_COUNTER_INC(&pPool->StatDirtyPageDupFlush);
1312 Log(("Flush duplicate page idx=%d GCPhys=%RGp type=%s\n", pPageHead->idx, pPageHead->GCPhys, pgmPoolPoolKindToStr(pPageHead->enmKind)));
1313 int rc2 = pgmPoolFlushPage(pPool, pPageHead);
1314 AssertRC(rc2);
1315 }
1316
1317 if (idxNext == NIL_PGMPOOL_IDX)
1318 break;
1319
1320 pPageHead = &pPool->aPages[idxNext];
1321 }
1322 }
1323
1324 /* The flushing above might fail for locked pages, so double check. */
1325 if ( pPage->iMonitoredNext == NIL_PGMPOOL_IDX
1326 && pPage->iMonitoredPrev == NIL_PGMPOOL_IDX)
1327 {
1328 pgmPoolAddDirtyPage(pVM, pPool, pPage);
1329
1330 /* Temporarily allow write access to the page table again. */
1331 rc = PGMHandlerPhysicalPageTempOff(pVM, pPage->GCPhys & PAGE_BASE_GC_MASK, pPage->GCPhys & PAGE_BASE_GC_MASK);
1332 if (rc == VINF_SUCCESS)
1333 {
1334 rc = PGMShwMakePageWritable(pVCpu, pvFault, PGM_MK_PG_IS_WRITE_FAULT);
1335 AssertMsg(rc == VINF_SUCCESS
1336 /* In the SMP case the page table might be removed while we wait for the PGM lock in the trap handler. */
1337 || rc == VERR_PAGE_TABLE_NOT_PRESENT
1338 || rc == VERR_PAGE_NOT_PRESENT,
1339 ("PGMShwModifyPage -> GCPtr=%RGv rc=%d\n", pvFault, rc));
1340# ifdef VBOX_STRICT
1341 pPage->GCPtrDirtyFault = pvFault;
1342# endif
1343
1344 STAM_PROFILE_STOP(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), a);
1345 pgmUnlock(pVM);
1346 return rc;
1347 }
1348 }
1349 }
1350#endif /* PGMPOOL_WITH_OPTIMIZED_DIRTY_PT */
1351
1352 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FlushModOverflow));
1353flushPage:
1354 /*
1355 * Not worth it, so flush it.
1356 *
1357 * If we considered it to be reused, don't go back to ring-3
1358 * to emulate failed instructions since we usually cannot
1359 * interpret then. This may be a bit risky, in which case
1360 * the reuse detection must be fixed.
1361 */
1362 rc = pgmPoolAccessPfHandlerFlush(pVM, pVCpu, pPool, pPage, pDis, pRegFrame, GCPhysFault, pvFault);
1363 if ( rc == VINF_EM_RAW_EMULATE_INSTR
1364 && fReused)
1365 {
1366 /* Make sure that the current instruction still has shadow page backing, otherwise we'll end up in a loop. */
1367 if (PGMShwGetPage(pVCpu, pRegFrame->rip, NULL, NULL) == VINF_SUCCESS)
1368 rc = VINF_SUCCESS; /* safe to restart the instruction. */
1369 }
1370 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), &pPool->CTX_MID_Z(StatMonitor,FlushPage), a);
1371 pgmUnlock(pVM);
1372 return rc;
1373}
1374
1375# endif /* !IN_RING3 */
1376
1377/**
1378 * @callback_method_impl{FNPGMPHYSHANDLER,
1379 * Access handler for shadowed page table pages.}
1380 *
1381 * @remarks Only uses the VINF_PGM_HANDLER_DO_DEFAULT status.
1382 */
1383PGM_ALL_CB2_DECL(VBOXSTRICTRC)
1384pgmPoolAccessHandler(PVM pVM, PVMCPU pVCpu, RTGCPHYS GCPhys, void *pvPhys, void *pvBuf, size_t cbBuf,
1385 PGMACCESSTYPE enmAccessType, PGMACCESSORIGIN enmOrigin, void *pvUser)
1386{
1387 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1388 STAM_PROFILE_START(&pPool->StatMonitorR3, a);
1389 PPGMPOOLPAGE pPage = (PPGMPOOLPAGE)pvUser;
1390 LogFlow(("PGM_ALL_CB_DECL: GCPhys=%RGp %p:{.Core=%RHp, .idx=%d, .GCPhys=%RGp, .enmType=%d}\n",
1391 GCPhys, pPage, pPage->Core.Key, pPage->idx, pPage->GCPhys, pPage->enmKind));
1392
1393 NOREF(pvPhys); NOREF(pvBuf); NOREF(enmAccessType);
1394
1395 /*
1396 * Make sure the pool page wasn't modified by a different CPU.
1397 */
1398 pgmLock(pVM);
1399 if (PHYS_PAGE_ADDRESS(GCPhys) == PHYS_PAGE_ADDRESS(pPage->GCPhys))
1400 {
1401 Assert(pPage->enmKind != PGMPOOLKIND_FREE);
1402
1403 /* The max modification count before flushing depends on the context and page type. */
1404#ifdef IN_RING3
1405 uint16_t const cMaxModifications = 96; /* it's cheaper here, right? */
1406#else
1407 uint16_t cMaxModifications;
1408 if ( pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT
1409 || pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_32BIT_PT)
1410 cMaxModifications = 4;
1411 else
1412 cMaxModifications = 24;
1413# ifdef IN_RC
1414 cMaxModifications *= 2; /* traps are cheaper than exists. */
1415# endif
1416#endif
1417
1418 /*
1419 * We don't have to be very sophisticated about this since there are relativly few calls here.
1420 * However, we must try our best to detect any non-cpu accesses (disk / networking).
1421 */
1422 if ( ( pPage->cModifications < cMaxModifications
1423 || pgmPoolIsPageLocked(pPage) )
1424 && enmOrigin != PGMACCESSORIGIN_DEVICE
1425 && cbBuf <= 16)
1426 {
1427 /* Clear the shadow entry. */
1428 if (!pPage->cModifications++)
1429 pgmPoolMonitorModifiedInsert(pPool, pPage);
1430
1431 if (cbBuf <= 8)
1432 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhys, pvBuf, (uint32_t)cbBuf);
1433 else
1434 {
1435 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhys, pvBuf, 8);
1436 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhys + 8, (uint8_t *)pvBuf + 8, (uint32_t)cbBuf - 8);
1437 }
1438 }
1439 else
1440 {
1441 /* ASSUME that VERR_PGM_POOL_CLEARED can be ignored here and that FFs will deal with it in due time. */
1442 pgmPoolMonitorChainFlush(pPool, pPage);
1443 }
1444
1445 STAM_PROFILE_STOP_EX(&pPool->StatMonitorR3, &pPool->StatMonitorR3FlushPage, a);
1446 }
1447 else
1448 Log(("CPU%d: PGM_ALL_CB_DECL pgm pool page for %RGp changed (to %RGp) while waiting!\n", pVCpu->idCpu, PHYS_PAGE_ADDRESS(GCPhys), PHYS_PAGE_ADDRESS(pPage->GCPhys)));
1449 pgmUnlock(pVM);
1450 return VINF_PGM_HANDLER_DO_DEFAULT;
1451}
1452
1453
1454# ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
1455
1456# if defined(VBOX_STRICT) && !defined(IN_RING3)
1457
1458/**
1459 * Check references to guest physical memory in a PAE / PAE page table.
1460 *
1461 * @param pPool The pool.
1462 * @param pPage The page.
1463 * @param pShwPT The shadow page table (mapping of the page).
1464 * @param pGstPT The guest page table.
1465 */
1466static void pgmPoolTrackCheckPTPaePae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT, PCX86PTPAE pGstPT)
1467{
1468 unsigned cErrors = 0;
1469 int LastRc = -1; /* initialized to shut up gcc */
1470 unsigned LastPTE = ~0U; /* initialized to shut up gcc */
1471 RTHCPHYS LastHCPhys = NIL_RTHCPHYS; /* initialized to shut up gcc */
1472 PVM pVM = pPool->CTX_SUFF(pVM);
1473
1474#ifdef VBOX_STRICT
1475 for (unsigned i = 0; i < RT_MIN(RT_ELEMENTS(pShwPT->a), pPage->iFirstPresent); i++)
1476 AssertMsg(!PGMSHWPTEPAE_IS_P(pShwPT->a[i]), ("Unexpected PTE: idx=%d %RX64 (first=%d)\n", i, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), pPage->iFirstPresent));
1477#endif
1478 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
1479 {
1480 if (PGMSHWPTEPAE_IS_P(pShwPT->a[i]))
1481 {
1482 RTHCPHYS HCPhys = NIL_RTHCPHYS;
1483 int rc = PGMPhysGCPhys2HCPhys(pVM, pGstPT->a[i].u & X86_PTE_PAE_PG_MASK, &HCPhys);
1484 if ( rc != VINF_SUCCESS
1485 || PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]) != HCPhys)
1486 {
1487 Log(("rc=%d idx=%d guest %RX64 shw=%RX64 vs %RHp\n", rc, i, pGstPT->a[i].u, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), HCPhys));
1488 LastPTE = i;
1489 LastRc = rc;
1490 LastHCPhys = HCPhys;
1491 cErrors++;
1492
1493 RTHCPHYS HCPhysPT = NIL_RTHCPHYS;
1494 rc = PGMPhysGCPhys2HCPhys(pVM, pPage->GCPhys, &HCPhysPT);
1495 AssertRC(rc);
1496
1497 for (unsigned iPage = 0; iPage < pPool->cCurPages; iPage++)
1498 {
1499 PPGMPOOLPAGE pTempPage = &pPool->aPages[iPage];
1500
1501 if (pTempPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT)
1502 {
1503 PPGMSHWPTPAE pShwPT2 = (PPGMSHWPTPAE)PGMPOOL_PAGE_2_PTR(pVM, pTempPage);
1504
1505 for (unsigned j = 0; j < RT_ELEMENTS(pShwPT->a); j++)
1506 {
1507 if ( PGMSHWPTEPAE_IS_P_RW(pShwPT2->a[j])
1508 && PGMSHWPTEPAE_GET_HCPHYS(pShwPT2->a[j]) == HCPhysPT)
1509 {
1510 Log(("GCPhys=%RGp idx=%d %RX64 vs %RX64\n", pTempPage->GCPhys, j, PGMSHWPTEPAE_GET_LOG(pShwPT->a[j]), PGMSHWPTEPAE_GET_LOG(pShwPT2->a[j])));
1511 }
1512 }
1513
1514 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pShwPT2);
1515 }
1516 }
1517 }
1518 }
1519 }
1520 AssertMsg(!cErrors, ("cErrors=%d: last rc=%d idx=%d guest %RX64 shw=%RX64 vs %RHp\n", cErrors, LastRc, LastPTE, pGstPT->a[LastPTE].u, PGMSHWPTEPAE_GET_LOG(pShwPT->a[LastPTE]), LastHCPhys));
1521}
1522
1523
1524/**
1525 * Check references to guest physical memory in a PAE / 32-bit page table.
1526 *
1527 * @param pPool The pool.
1528 * @param pPage The page.
1529 * @param pShwPT The shadow page table (mapping of the page).
1530 * @param pGstPT The guest page table.
1531 */
1532static void pgmPoolTrackCheckPTPae32Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT, PCX86PT pGstPT)
1533{
1534 unsigned cErrors = 0;
1535 int LastRc = -1; /* initialized to shut up gcc */
1536 unsigned LastPTE = ~0U; /* initialized to shut up gcc */
1537 RTHCPHYS LastHCPhys = NIL_RTHCPHYS; /* initialized to shut up gcc */
1538 PVM pVM = pPool->CTX_SUFF(pVM);
1539
1540#ifdef VBOX_STRICT
1541 for (unsigned i = 0; i < RT_MIN(RT_ELEMENTS(pShwPT->a), pPage->iFirstPresent); i++)
1542 AssertMsg(!PGMSHWPTEPAE_IS_P(pShwPT->a[i]), ("Unexpected PTE: idx=%d %RX64 (first=%d)\n", i, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), pPage->iFirstPresent));
1543#endif
1544 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
1545 {
1546 if (PGMSHWPTEPAE_IS_P(pShwPT->a[i]))
1547 {
1548 RTHCPHYS HCPhys = NIL_RTHCPHYS;
1549 int rc = PGMPhysGCPhys2HCPhys(pVM, pGstPT->a[i].u & X86_PTE_PG_MASK, &HCPhys);
1550 if ( rc != VINF_SUCCESS
1551 || PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]) != HCPhys)
1552 {
1553 Log(("rc=%d idx=%d guest %x shw=%RX64 vs %RHp\n", rc, i, pGstPT->a[i].u, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), HCPhys));
1554 LastPTE = i;
1555 LastRc = rc;
1556 LastHCPhys = HCPhys;
1557 cErrors++;
1558
1559 RTHCPHYS HCPhysPT = NIL_RTHCPHYS;
1560 rc = PGMPhysGCPhys2HCPhys(pVM, pPage->GCPhys, &HCPhysPT);
1561 AssertRC(rc);
1562
1563 for (unsigned iPage = 0; iPage < pPool->cCurPages; iPage++)
1564 {
1565 PPGMPOOLPAGE pTempPage = &pPool->aPages[iPage];
1566
1567 if (pTempPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_32BIT_PT)
1568 {
1569 PPGMSHWPTPAE pShwPT2 = (PPGMSHWPTPAE)PGMPOOL_PAGE_2_PTR(pVM, pTempPage);
1570
1571 for (unsigned j = 0; j < RT_ELEMENTS(pShwPT->a); j++)
1572 {
1573 if ( PGMSHWPTEPAE_IS_P_RW(pShwPT2->a[j])
1574 && PGMSHWPTEPAE_GET_HCPHYS(pShwPT2->a[j]) == HCPhysPT)
1575 {
1576 Log(("GCPhys=%RGp idx=%d %RX64 vs %RX64\n", pTempPage->GCPhys, j, PGMSHWPTEPAE_GET_LOG(pShwPT->a[j]), PGMSHWPTEPAE_GET_LOG(pShwPT2->a[j])));
1577 }
1578 }
1579
1580 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pShwPT2);
1581 }
1582 }
1583 }
1584 }
1585 }
1586 AssertMsg(!cErrors, ("cErrors=%d: last rc=%d idx=%d guest %x shw=%RX64 vs %RHp\n", cErrors, LastRc, LastPTE, pGstPT->a[LastPTE].u, PGMSHWPTEPAE_GET_LOG(pShwPT->a[LastPTE]), LastHCPhys));
1587}
1588
1589# endif /* VBOX_STRICT && !IN_RING3 */
1590
1591/**
1592 * Clear references to guest physical memory in a PAE / PAE page table.
1593 *
1594 * @returns nr of changed PTEs
1595 * @param pPool The pool.
1596 * @param pPage The page.
1597 * @param pShwPT The shadow page table (mapping of the page).
1598 * @param pGstPT The guest page table.
1599 * @param pOldGstPT The old cached guest page table.
1600 * @param fAllowRemoval Bail out as soon as we encounter an invalid PTE
1601 * @param pfFlush Flush reused page table (out)
1602 */
1603DECLINLINE(unsigned) pgmPoolTrackFlushPTPaePae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT, PCX86PTPAE pGstPT,
1604 PCX86PTPAE pOldGstPT, bool fAllowRemoval, bool *pfFlush)
1605{
1606 unsigned cChanged = 0;
1607
1608#ifdef VBOX_STRICT
1609 for (unsigned i = 0; i < RT_MIN(RT_ELEMENTS(pShwPT->a), pPage->iFirstPresent); i++)
1610 AssertMsg(!PGMSHWPTEPAE_IS_P(pShwPT->a[i]), ("Unexpected PTE: idx=%d %RX64 (first=%d)\n", i, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), pPage->iFirstPresent));
1611#endif
1612 *pfFlush = false;
1613
1614 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
1615 {
1616 /* Check the new value written by the guest. If present and with a bogus physical address, then
1617 * it's fairly safe to assume the guest is reusing the PT.
1618 */
1619 if ( fAllowRemoval
1620 && pGstPT->a[i].n.u1Present)
1621 {
1622 if (!PGMPhysIsGCPhysValid(pPool->CTX_SUFF(pVM), pGstPT->a[i].u & X86_PTE_PAE_PG_MASK))
1623 {
1624 *pfFlush = true;
1625 return ++cChanged;
1626 }
1627 }
1628 if (PGMSHWPTEPAE_IS_P(pShwPT->a[i]))
1629 {
1630 /* If the old cached PTE is identical, then there's no need to flush the shadow copy. */
1631 if ((pGstPT->a[i].u & X86_PTE_PAE_PG_MASK) == (pOldGstPT->a[i].u & X86_PTE_PAE_PG_MASK))
1632 {
1633#ifdef VBOX_STRICT
1634 RTHCPHYS HCPhys = NIL_RTGCPHYS;
1635 int rc = PGMPhysGCPhys2HCPhys(pPool->CTX_SUFF(pVM), pGstPT->a[i].u & X86_PTE_PAE_PG_MASK, &HCPhys);
1636 AssertMsg(rc == VINF_SUCCESS && PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]) == HCPhys, ("rc=%d guest %RX64 old %RX64 shw=%RX64 vs %RHp\n", rc, pGstPT->a[i].u, pOldGstPT->a[i].u, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), HCPhys));
1637#endif
1638 uint64_t uHostAttr = PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & (X86_PTE_P | X86_PTE_US | X86_PTE_A | X86_PTE_D | X86_PTE_G | X86_PTE_PAE_NX);
1639 bool fHostRW = !!(PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & X86_PTE_RW);
1640 uint64_t uGuestAttr = pGstPT->a[i].u & (X86_PTE_P | X86_PTE_US | X86_PTE_A | X86_PTE_D | X86_PTE_G | X86_PTE_PAE_NX);
1641 bool fGuestRW = !!(pGstPT->a[i].u & X86_PTE_RW);
1642
1643 if ( uHostAttr == uGuestAttr
1644 && fHostRW <= fGuestRW)
1645 continue;
1646 }
1647 cChanged++;
1648 /* Something was changed, so flush it. */
1649 Log4(("pgmPoolTrackDerefPTPaePae: i=%d pte=%RX64 hint=%RX64\n",
1650 i, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pOldGstPT->a[i].u & X86_PTE_PAE_PG_MASK));
1651 pgmPoolTracDerefGCPhysHint(pPool, pPage, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pOldGstPT->a[i].u & X86_PTE_PAE_PG_MASK, i);
1652 PGMSHWPTEPAE_ATOMIC_SET(pShwPT->a[i], 0);
1653 }
1654 }
1655 return cChanged;
1656}
1657
1658
1659/**
1660 * Clear references to guest physical memory in a PAE / PAE page table.
1661 *
1662 * @returns nr of changed PTEs
1663 * @param pPool The pool.
1664 * @param pPage The page.
1665 * @param pShwPT The shadow page table (mapping of the page).
1666 * @param pGstPT The guest page table.
1667 * @param pOldGstPT The old cached guest page table.
1668 * @param fAllowRemoval Bail out as soon as we encounter an invalid PTE
1669 * @param pfFlush Flush reused page table (out)
1670 */
1671DECLINLINE(unsigned) pgmPoolTrackFlushPTPae32Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT, PCX86PT pGstPT,
1672 PCX86PT pOldGstPT, bool fAllowRemoval, bool *pfFlush)
1673{
1674 unsigned cChanged = 0;
1675
1676#ifdef VBOX_STRICT
1677 for (unsigned i = 0; i < RT_MIN(RT_ELEMENTS(pShwPT->a), pPage->iFirstPresent); i++)
1678 AssertMsg(!PGMSHWPTEPAE_IS_P(pShwPT->a[i]), ("Unexpected PTE: idx=%d %RX64 (first=%d)\n", i, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), pPage->iFirstPresent));
1679#endif
1680 *pfFlush = false;
1681
1682 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
1683 {
1684 /* Check the new value written by the guest. If present and with a bogus physical address, then
1685 * it's fairly safe to assume the guest is reusing the PT.
1686 */
1687 if ( fAllowRemoval
1688 && pGstPT->a[i].n.u1Present)
1689 {
1690 if (!PGMPhysIsGCPhysValid(pPool->CTX_SUFF(pVM), pGstPT->a[i].u & X86_PTE_PG_MASK))
1691 {
1692 *pfFlush = true;
1693 return ++cChanged;
1694 }
1695 }
1696 if (PGMSHWPTEPAE_IS_P(pShwPT->a[i]))
1697 {
1698 /* If the old cached PTE is identical, then there's no need to flush the shadow copy. */
1699 if ((pGstPT->a[i].u & X86_PTE_PG_MASK) == (pOldGstPT->a[i].u & X86_PTE_PG_MASK))
1700 {
1701#ifdef VBOX_STRICT
1702 RTHCPHYS HCPhys = NIL_RTGCPHYS;
1703 int rc = PGMPhysGCPhys2HCPhys(pPool->CTX_SUFF(pVM), pGstPT->a[i].u & X86_PTE_PG_MASK, &HCPhys);
1704 AssertMsg(rc == VINF_SUCCESS && PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]) == HCPhys, ("rc=%d guest %x old %x shw=%RX64 vs %RHp\n", rc, pGstPT->a[i].u, pOldGstPT->a[i].u, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), HCPhys));
1705#endif
1706 uint64_t uHostAttr = PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & (X86_PTE_P | X86_PTE_US | X86_PTE_A | X86_PTE_D | X86_PTE_G);
1707 bool fHostRW = !!(PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & X86_PTE_RW);
1708 uint64_t uGuestAttr = pGstPT->a[i].u & (X86_PTE_P | X86_PTE_US | X86_PTE_A | X86_PTE_D | X86_PTE_G);
1709 bool fGuestRW = !!(pGstPT->a[i].u & X86_PTE_RW);
1710
1711 if ( uHostAttr == uGuestAttr
1712 && fHostRW <= fGuestRW)
1713 continue;
1714 }
1715 cChanged++;
1716 /* Something was changed, so flush it. */
1717 Log4(("pgmPoolTrackDerefPTPaePae: i=%d pte=%RX64 hint=%x\n",
1718 i, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pOldGstPT->a[i].u & X86_PTE_PG_MASK));
1719 pgmPoolTracDerefGCPhysHint(pPool, pPage, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pOldGstPT->a[i].u & X86_PTE_PG_MASK, i);
1720 PGMSHWPTEPAE_ATOMIC_SET(pShwPT->a[i], 0);
1721 }
1722 }
1723 return cChanged;
1724}
1725
1726
1727/**
1728 * Flush a dirty page
1729 *
1730 * @param pVM The cross context VM structure.
1731 * @param pPool The pool.
1732 * @param idxSlot Dirty array slot index
1733 * @param fAllowRemoval Allow a reused page table to be removed
1734 */
1735static void pgmPoolFlushDirtyPage(PVM pVM, PPGMPOOL pPool, unsigned idxSlot, bool fAllowRemoval = false)
1736{
1737 PPGMPOOLPAGE pPage;
1738 unsigned idxPage;
1739
1740 Assert(idxSlot < RT_ELEMENTS(pPool->aDirtyPages));
1741 if (pPool->aDirtyPages[idxSlot].uIdx == NIL_PGMPOOL_IDX)
1742 return;
1743
1744 idxPage = pPool->aDirtyPages[idxSlot].uIdx;
1745 AssertRelease(idxPage != NIL_PGMPOOL_IDX);
1746 pPage = &pPool->aPages[idxPage];
1747 Assert(pPage->idx == idxPage);
1748 Assert(pPage->iMonitoredNext == NIL_PGMPOOL_IDX && pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
1749
1750 AssertMsg(pPage->fDirty, ("Page %RGp (slot=%d) not marked dirty!", pPage->GCPhys, idxSlot));
1751 Log(("Flush dirty page %RGp cMods=%d\n", pPage->GCPhys, pPage->cModifications));
1752
1753#if defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0) || defined(IN_RC)
1754 PVMCPU pVCpu = VMMGetCpu(pVM);
1755 uint32_t iPrevSubset = PGMRZDynMapPushAutoSubset(pVCpu);
1756#endif
1757
1758 /* First write protect the page again to catch all write accesses. (before checking for changes -> SMP) */
1759 int rc = PGMHandlerPhysicalReset(pVM, pPage->GCPhys & PAGE_BASE_GC_MASK);
1760 Assert(rc == VINF_SUCCESS);
1761 pPage->fDirty = false;
1762
1763#ifdef VBOX_STRICT
1764 uint64_t fFlags = 0;
1765 RTHCPHYS HCPhys;
1766 rc = PGMShwGetPage(VMMGetCpu(pVM), pPage->GCPtrDirtyFault, &fFlags, &HCPhys);
1767 AssertMsg( ( rc == VINF_SUCCESS
1768 && (!(fFlags & X86_PTE_RW) || HCPhys != pPage->Core.Key))
1769 /* In the SMP case the page table might be removed while we wait for the PGM lock in the trap handler. */
1770 || rc == VERR_PAGE_TABLE_NOT_PRESENT
1771 || rc == VERR_PAGE_NOT_PRESENT,
1772 ("PGMShwGetPage -> GCPtr=%RGv rc=%d flags=%RX64\n", pPage->GCPtrDirtyFault, rc, fFlags));
1773#endif
1774
1775 /* Flush those PTEs that have changed. */
1776 STAM_PROFILE_START(&pPool->StatTrackDeref,a);
1777 void *pvShw = PGMPOOL_PAGE_2_PTR(pVM, pPage);
1778 void *pvGst;
1779 rc = PGM_GCPHYS_2_PTR_EX(pVM, pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
1780 bool fFlush;
1781 unsigned cChanges;
1782
1783 if (pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT)
1784 cChanges = pgmPoolTrackFlushPTPaePae(pPool, pPage, (PPGMSHWPTPAE)pvShw, (PCX86PTPAE)pvGst,
1785 (PCX86PTPAE)&pPool->aDirtyPages[idxSlot].aPage[0], fAllowRemoval, &fFlush);
1786 else
1787 cChanges = pgmPoolTrackFlushPTPae32Bit(pPool, pPage, (PPGMSHWPTPAE)pvShw, (PCX86PT)pvGst,
1788 (PCX86PT)&pPool->aDirtyPages[idxSlot].aPage[0], fAllowRemoval, &fFlush);
1789
1790 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvGst);
1791 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvShw);
1792 STAM_PROFILE_STOP(&pPool->StatTrackDeref,a);
1793 /* Note: we might want to consider keeping the dirty page active in case there were many changes. */
1794
1795 /* This page is likely to be modified again, so reduce the nr of modifications just a bit here. */
1796 Assert(pPage->cModifications);
1797 if (cChanges < 4)
1798 pPage->cModifications = 1; /* must use > 0 here */
1799 else
1800 pPage->cModifications = RT_MAX(1, pPage->cModifications / 2);
1801
1802 STAM_COUNTER_INC(&pPool->StatResetDirtyPages);
1803 if (pPool->cDirtyPages == RT_ELEMENTS(pPool->aDirtyPages))
1804 pPool->idxFreeDirtyPage = idxSlot;
1805
1806 pPool->cDirtyPages--;
1807 pPool->aDirtyPages[idxSlot].uIdx = NIL_PGMPOOL_IDX;
1808 Assert(pPool->cDirtyPages <= RT_ELEMENTS(pPool->aDirtyPages));
1809 if (fFlush)
1810 {
1811 Assert(fAllowRemoval);
1812 Log(("Flush reused page table!\n"));
1813 pgmPoolFlushPage(pPool, pPage);
1814 STAM_COUNTER_INC(&pPool->StatForceFlushReused);
1815 }
1816 else
1817 Log(("Removed dirty page %RGp cMods=%d cChanges=%d\n", pPage->GCPhys, pPage->cModifications, cChanges));
1818
1819#if defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0) || defined(IN_RC)
1820 PGMRZDynMapPopAutoSubset(pVCpu, iPrevSubset);
1821#endif
1822}
1823
1824
1825# ifndef IN_RING3
1826/**
1827 * Add a new dirty page
1828 *
1829 * @param pVM The cross context VM structure.
1830 * @param pPool The pool.
1831 * @param pPage The page.
1832 */
1833void pgmPoolAddDirtyPage(PVM pVM, PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1834{
1835 unsigned idxFree;
1836
1837 PGM_LOCK_ASSERT_OWNER(pVM);
1838 AssertCompile(RT_ELEMENTS(pPool->aDirtyPages) == 8 || RT_ELEMENTS(pPool->aDirtyPages) == 16);
1839 Assert(!pPage->fDirty);
1840
1841 idxFree = pPool->idxFreeDirtyPage;
1842 Assert(idxFree < RT_ELEMENTS(pPool->aDirtyPages));
1843 Assert(pPage->iMonitoredNext == NIL_PGMPOOL_IDX && pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
1844
1845 if (pPool->cDirtyPages >= RT_ELEMENTS(pPool->aDirtyPages))
1846 {
1847 STAM_COUNTER_INC(&pPool->StatDirtyPageOverFlowFlush);
1848 pgmPoolFlushDirtyPage(pVM, pPool, idxFree, true /* allow removal of reused page tables*/);
1849 }
1850 Assert(pPool->cDirtyPages < RT_ELEMENTS(pPool->aDirtyPages));
1851 AssertMsg(pPool->aDirtyPages[idxFree].uIdx == NIL_PGMPOOL_IDX, ("idxFree=%d cDirtyPages=%d\n", idxFree, pPool->cDirtyPages));
1852
1853 Log(("Add dirty page %RGp (slot=%d)\n", pPage->GCPhys, idxFree));
1854
1855 /*
1856 * Make a copy of the guest page table as we require valid GCPhys addresses
1857 * when removing references to physical pages.
1858 * (The HCPhys linear lookup is *extremely* expensive!)
1859 */
1860 void *pvGst;
1861 int rc = PGM_GCPHYS_2_PTR_EX(pVM, pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
1862 memcpy(&pPool->aDirtyPages[idxFree].aPage[0], pvGst, (pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT) ? PAGE_SIZE : PAGE_SIZE/2);
1863# ifdef VBOX_STRICT
1864 void *pvShw = PGMPOOL_PAGE_2_PTR(pVM, pPage);
1865 if (pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT)
1866 pgmPoolTrackCheckPTPaePae(pPool, pPage, (PPGMSHWPTPAE)pvShw, (PCX86PTPAE)pvGst);
1867 else
1868 pgmPoolTrackCheckPTPae32Bit(pPool, pPage, (PPGMSHWPTPAE)pvShw, (PCX86PT)pvGst);
1869 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvShw);
1870# endif
1871 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvGst);
1872
1873 STAM_COUNTER_INC(&pPool->StatDirtyPage);
1874 pPage->fDirty = true;
1875 pPage->idxDirtyEntry = (uint8_t)idxFree; Assert(pPage->idxDirtyEntry == idxFree);
1876 pPool->aDirtyPages[idxFree].uIdx = pPage->idx;
1877 pPool->cDirtyPages++;
1878
1879 pPool->idxFreeDirtyPage = (pPool->idxFreeDirtyPage + 1) & (RT_ELEMENTS(pPool->aDirtyPages) - 1);
1880 if ( pPool->cDirtyPages < RT_ELEMENTS(pPool->aDirtyPages)
1881 && pPool->aDirtyPages[pPool->idxFreeDirtyPage].uIdx != NIL_PGMPOOL_IDX)
1882 {
1883 unsigned i;
1884 for (i = 1; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
1885 {
1886 idxFree = (pPool->idxFreeDirtyPage + i) & (RT_ELEMENTS(pPool->aDirtyPages) - 1);
1887 if (pPool->aDirtyPages[idxFree].uIdx == NIL_PGMPOOL_IDX)
1888 {
1889 pPool->idxFreeDirtyPage = idxFree;
1890 break;
1891 }
1892 }
1893 Assert(i != RT_ELEMENTS(pPool->aDirtyPages));
1894 }
1895
1896 Assert(pPool->cDirtyPages == RT_ELEMENTS(pPool->aDirtyPages) || pPool->aDirtyPages[pPool->idxFreeDirtyPage].uIdx == NIL_PGMPOOL_IDX);
1897
1898 /*
1899 * Clear all references to this shadow table. See @bugref{7298}.
1900 */
1901 pgmPoolTrackClearPageUsers(pPool, pPage);
1902}
1903# endif /* !IN_RING3 */
1904
1905
1906/**
1907 * Check if the specified page is dirty (not write monitored)
1908 *
1909 * @return dirty or not
1910 * @param pVM The cross context VM structure.
1911 * @param GCPhys Guest physical address
1912 */
1913bool pgmPoolIsDirtyPage(PVM pVM, RTGCPHYS GCPhys)
1914{
1915 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1916 PGM_LOCK_ASSERT_OWNER(pVM);
1917 if (!pPool->cDirtyPages)
1918 return false;
1919
1920 GCPhys = GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK;
1921
1922 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
1923 {
1924 if (pPool->aDirtyPages[i].uIdx != NIL_PGMPOOL_IDX)
1925 {
1926 PPGMPOOLPAGE pPage;
1927 unsigned idxPage = pPool->aDirtyPages[i].uIdx;
1928
1929 pPage = &pPool->aPages[idxPage];
1930 if (pPage->GCPhys == GCPhys)
1931 return true;
1932 }
1933 }
1934 return false;
1935}
1936
1937
1938/**
1939 * Reset all dirty pages by reinstating page monitoring.
1940 *
1941 * @param pVM The cross context VM structure.
1942 */
1943void pgmPoolResetDirtyPages(PVM pVM)
1944{
1945 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1946 PGM_LOCK_ASSERT_OWNER(pVM);
1947 Assert(pPool->cDirtyPages <= RT_ELEMENTS(pPool->aDirtyPages));
1948
1949 if (!pPool->cDirtyPages)
1950 return;
1951
1952 Log(("pgmPoolResetDirtyPages\n"));
1953 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
1954 pgmPoolFlushDirtyPage(pVM, pPool, i, true /* allow removal of reused page tables*/);
1955
1956 pPool->idxFreeDirtyPage = 0;
1957 if ( pPool->cDirtyPages != RT_ELEMENTS(pPool->aDirtyPages)
1958 && pPool->aDirtyPages[pPool->idxFreeDirtyPage].uIdx != NIL_PGMPOOL_IDX)
1959 {
1960 unsigned i;
1961 for (i = 1; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
1962 {
1963 if (pPool->aDirtyPages[i].uIdx == NIL_PGMPOOL_IDX)
1964 {
1965 pPool->idxFreeDirtyPage = i;
1966 break;
1967 }
1968 }
1969 AssertMsg(i != RT_ELEMENTS(pPool->aDirtyPages), ("cDirtyPages %d", pPool->cDirtyPages));
1970 }
1971
1972 Assert(pPool->aDirtyPages[pPool->idxFreeDirtyPage].uIdx == NIL_PGMPOOL_IDX || pPool->cDirtyPages == RT_ELEMENTS(pPool->aDirtyPages));
1973 return;
1974}
1975
1976
1977/**
1978 * Invalidate the PT entry for the specified page
1979 *
1980 * @param pVM The cross context VM structure.
1981 * @param GCPtrPage Guest page to invalidate
1982 */
1983void pgmPoolResetDirtyPage(PVM pVM, RTGCPTR GCPtrPage)
1984{
1985 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1986 PGM_LOCK_ASSERT_OWNER(pVM);
1987 Assert(pPool->cDirtyPages <= RT_ELEMENTS(pPool->aDirtyPages));
1988
1989 if (!pPool->cDirtyPages)
1990 return;
1991
1992 Log(("pgmPoolResetDirtyPage %RGv\n", GCPtrPage)); RT_NOREF_PV(GCPtrPage);
1993 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
1994 {
1995 }
1996}
1997
1998
1999/**
2000 * Reset all dirty pages by reinstating page monitoring.
2001 *
2002 * @param pVM The cross context VM structure.
2003 * @param GCPhysPT Physical address of the page table
2004 */
2005void pgmPoolInvalidateDirtyPage(PVM pVM, RTGCPHYS GCPhysPT)
2006{
2007 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
2008 PGM_LOCK_ASSERT_OWNER(pVM);
2009 Assert(pPool->cDirtyPages <= RT_ELEMENTS(pPool->aDirtyPages));
2010 unsigned idxDirtyPage = RT_ELEMENTS(pPool->aDirtyPages);
2011
2012 if (!pPool->cDirtyPages)
2013 return;
2014
2015 GCPhysPT = GCPhysPT & ~(RTGCPHYS)PAGE_OFFSET_MASK;
2016
2017 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
2018 {
2019 if (pPool->aDirtyPages[i].uIdx != NIL_PGMPOOL_IDX)
2020 {
2021 unsigned idxPage = pPool->aDirtyPages[i].uIdx;
2022
2023 PPGMPOOLPAGE pPage = &pPool->aPages[idxPage];
2024 if (pPage->GCPhys == GCPhysPT)
2025 {
2026 idxDirtyPage = i;
2027 break;
2028 }
2029 }
2030 }
2031
2032 if (idxDirtyPage != RT_ELEMENTS(pPool->aDirtyPages))
2033 {
2034 pgmPoolFlushDirtyPage(pVM, pPool, idxDirtyPage, true /* allow removal of reused page tables*/);
2035 if ( pPool->cDirtyPages != RT_ELEMENTS(pPool->aDirtyPages)
2036 && pPool->aDirtyPages[pPool->idxFreeDirtyPage].uIdx != NIL_PGMPOOL_IDX)
2037 {
2038 unsigned i;
2039 for (i = 0; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
2040 {
2041 if (pPool->aDirtyPages[i].uIdx == NIL_PGMPOOL_IDX)
2042 {
2043 pPool->idxFreeDirtyPage = i;
2044 break;
2045 }
2046 }
2047 AssertMsg(i != RT_ELEMENTS(pPool->aDirtyPages), ("cDirtyPages %d", pPool->cDirtyPages));
2048 }
2049 }
2050}
2051
2052# endif /* PGMPOOL_WITH_OPTIMIZED_DIRTY_PT */
2053
2054/**
2055 * Inserts a page into the GCPhys hash table.
2056 *
2057 * @param pPool The pool.
2058 * @param pPage The page.
2059 */
2060DECLINLINE(void) pgmPoolHashInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2061{
2062 Log3(("pgmPoolHashInsert: %RGp\n", pPage->GCPhys));
2063 Assert(pPage->GCPhys != NIL_RTGCPHYS); Assert(pPage->iNext == NIL_PGMPOOL_IDX);
2064 uint16_t iHash = PGMPOOL_HASH(pPage->GCPhys);
2065 pPage->iNext = pPool->aiHash[iHash];
2066 pPool->aiHash[iHash] = pPage->idx;
2067}
2068
2069
2070/**
2071 * Removes a page from the GCPhys hash table.
2072 *
2073 * @param pPool The pool.
2074 * @param pPage The page.
2075 */
2076DECLINLINE(void) pgmPoolHashRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2077{
2078 Log3(("pgmPoolHashRemove: %RGp\n", pPage->GCPhys));
2079 uint16_t iHash = PGMPOOL_HASH(pPage->GCPhys);
2080 if (pPool->aiHash[iHash] == pPage->idx)
2081 pPool->aiHash[iHash] = pPage->iNext;
2082 else
2083 {
2084 uint16_t iPrev = pPool->aiHash[iHash];
2085 for (;;)
2086 {
2087 const int16_t i = pPool->aPages[iPrev].iNext;
2088 if (i == pPage->idx)
2089 {
2090 pPool->aPages[iPrev].iNext = pPage->iNext;
2091 break;
2092 }
2093 if (i == NIL_PGMPOOL_IDX)
2094 {
2095 AssertReleaseMsgFailed(("GCPhys=%RGp idx=%d\n", pPage->GCPhys, pPage->idx));
2096 break;
2097 }
2098 iPrev = i;
2099 }
2100 }
2101 pPage->iNext = NIL_PGMPOOL_IDX;
2102}
2103
2104
2105/**
2106 * Frees up one cache page.
2107 *
2108 * @returns VBox status code.
2109 * @retval VINF_SUCCESS on success.
2110 * @param pPool The pool.
2111 * @param iUser The user index.
2112 */
2113static int pgmPoolCacheFreeOne(PPGMPOOL pPool, uint16_t iUser)
2114{
2115#ifndef IN_RC
2116 const PVM pVM = pPool->CTX_SUFF(pVM);
2117#endif
2118 Assert(pPool->iAgeHead != pPool->iAgeTail); /* We shouldn't be here if there < 2 cached entries! */
2119 STAM_COUNTER_INC(&pPool->StatCacheFreeUpOne);
2120
2121 /*
2122 * Select one page from the tail of the age list.
2123 */
2124 PPGMPOOLPAGE pPage;
2125 for (unsigned iLoop = 0; ; iLoop++)
2126 {
2127 uint16_t iToFree = pPool->iAgeTail;
2128 if (iToFree == iUser && iUser != NIL_PGMPOOL_IDX)
2129 iToFree = pPool->aPages[iToFree].iAgePrev;
2130/* This is the alternative to the SyncCR3 pgmPoolCacheUsed calls.
2131 if (pPool->aPages[iToFree].iUserHead != NIL_PGMPOOL_USER_INDEX)
2132 {
2133 uint16_t i = pPool->aPages[iToFree].iAgePrev;
2134 for (unsigned j = 0; j < 10 && i != NIL_PGMPOOL_USER_INDEX; j++, i = pPool->aPages[i].iAgePrev)
2135 {
2136 if (pPool->aPages[iToFree].iUserHead == NIL_PGMPOOL_USER_INDEX)
2137 continue;
2138 iToFree = i;
2139 break;
2140 }
2141 }
2142*/
2143 Assert(iToFree != iUser);
2144 AssertRelease(iToFree != NIL_PGMPOOL_IDX);
2145 pPage = &pPool->aPages[iToFree];
2146
2147 /*
2148 * Reject any attempts at flushing the currently active shadow CR3 mapping.
2149 * Call pgmPoolCacheUsed to move the page to the head of the age list.
2150 */
2151 if ( !pgmPoolIsPageLocked(pPage)
2152 && pPage->idx >= PGMPOOL_IDX_FIRST /* paranoia (#6349) */)
2153 break;
2154 LogFlow(("pgmPoolCacheFreeOne: refuse CR3 mapping\n"));
2155 pgmPoolCacheUsed(pPool, pPage);
2156 AssertLogRelReturn(iLoop < 8192, VERR_PGM_POOL_TOO_MANY_LOOPS);
2157 }
2158
2159 /*
2160 * Found a usable page, flush it and return.
2161 */
2162 int rc = pgmPoolFlushPage(pPool, pPage);
2163 /* This flush was initiated by us and not the guest, so explicitly flush the TLB. */
2164 /* todo: find out why this is necessary; pgmPoolFlushPage should trigger a flush if one is really needed. */
2165 if (rc == VINF_SUCCESS)
2166 PGM_INVL_ALL_VCPU_TLBS(pVM);
2167 return rc;
2168}
2169
2170
2171/**
2172 * Checks if a kind mismatch is really a page being reused
2173 * or if it's just normal remappings.
2174 *
2175 * @returns true if reused and the cached page (enmKind1) should be flushed
2176 * @returns false if not reused.
2177 * @param enmKind1 The kind of the cached page.
2178 * @param enmKind2 The kind of the requested page.
2179 */
2180static bool pgmPoolCacheReusedByKind(PGMPOOLKIND enmKind1, PGMPOOLKIND enmKind2)
2181{
2182 switch (enmKind1)
2183 {
2184 /*
2185 * Never reuse them. There is no remapping in non-paging mode.
2186 */
2187 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2188 case PGMPOOLKIND_32BIT_PD_PHYS:
2189 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2190 case PGMPOOLKIND_PAE_PD_PHYS:
2191 case PGMPOOLKIND_PAE_PDPT_PHYS:
2192 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2193 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2194 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2195 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2196 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2197 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT: /* never reuse them for other types */
2198 return false;
2199
2200 /*
2201 * It's perfectly fine to reuse these, except for PAE and non-paging stuff.
2202 */
2203 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2204 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2205 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2206 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2207 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2208 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2209 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2210 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2211 case PGMPOOLKIND_32BIT_PD:
2212 case PGMPOOLKIND_PAE_PDPT:
2213 switch (enmKind2)
2214 {
2215 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2216 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2217 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2218 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2219 case PGMPOOLKIND_64BIT_PML4:
2220 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2221 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2222 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2223 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2224 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2225 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2226 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2227 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2228 return true;
2229 default:
2230 return false;
2231 }
2232
2233 /*
2234 * It's perfectly fine to reuse these, except for PAE and non-paging stuff.
2235 */
2236 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2237 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2238 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2239 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2240 case PGMPOOLKIND_64BIT_PML4:
2241 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2242 switch (enmKind2)
2243 {
2244 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2245 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2246 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2247 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2248 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2249 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2250 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2251 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2252 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2253 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2254 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2255 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2256 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2257 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2258 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2259 return true;
2260 default:
2261 return false;
2262 }
2263
2264 /*
2265 * These cannot be flushed, and it's common to reuse the PDs as PTs.
2266 */
2267 case PGMPOOLKIND_ROOT_NESTED:
2268 return false;
2269
2270 default:
2271 AssertFatalMsgFailed(("enmKind1=%d\n", enmKind1));
2272 }
2273}
2274
2275
2276/**
2277 * Attempts to satisfy a pgmPoolAlloc request from the cache.
2278 *
2279 * @returns VBox status code.
2280 * @retval VINF_PGM_CACHED_PAGE on success.
2281 * @retval VERR_FILE_NOT_FOUND if not found.
2282 * @param pPool The pool.
2283 * @param GCPhys The GC physical address of the page we're gonna shadow.
2284 * @param enmKind The kind of mapping.
2285 * @param enmAccess Access type for the mapping (only relevant for big pages)
2286 * @param fA20Enabled Whether the CPU has the A20 gate enabled.
2287 * @param iUser The shadow page pool index of the user table. This is
2288 * NIL_PGMPOOL_IDX for root pages.
2289 * @param iUserTable The index into the user table (shadowed). Ignored if
2290 * root page
2291 * @param ppPage Where to store the pointer to the page.
2292 */
2293static int pgmPoolCacheAlloc(PPGMPOOL pPool, RTGCPHYS GCPhys, PGMPOOLKIND enmKind, PGMPOOLACCESS enmAccess, bool fA20Enabled,
2294 uint16_t iUser, uint32_t iUserTable, PPPGMPOOLPAGE ppPage)
2295{
2296 /*
2297 * Look up the GCPhys in the hash.
2298 */
2299 unsigned i = pPool->aiHash[PGMPOOL_HASH(GCPhys)];
2300 Log3(("pgmPoolCacheAlloc: %RGp kind %s iUser=%d iUserTable=%x SLOT=%d\n", GCPhys, pgmPoolPoolKindToStr(enmKind), iUser, iUserTable, i));
2301 if (i != NIL_PGMPOOL_IDX)
2302 {
2303 do
2304 {
2305 PPGMPOOLPAGE pPage = &pPool->aPages[i];
2306 Log4(("pgmPoolCacheAlloc: slot %d found page %RGp\n", i, pPage->GCPhys));
2307 if (pPage->GCPhys == GCPhys)
2308 {
2309 if ( (PGMPOOLKIND)pPage->enmKind == enmKind
2310 && (PGMPOOLACCESS)pPage->enmAccess == enmAccess
2311 && pPage->fA20Enabled == fA20Enabled)
2312 {
2313 /* Put it at the start of the use list to make sure pgmPoolTrackAddUser
2314 * doesn't flush it in case there are no more free use records.
2315 */
2316 pgmPoolCacheUsed(pPool, pPage);
2317
2318 int rc = VINF_SUCCESS;
2319 if (iUser != NIL_PGMPOOL_IDX)
2320 rc = pgmPoolTrackAddUser(pPool, pPage, iUser, iUserTable);
2321 if (RT_SUCCESS(rc))
2322 {
2323 Assert((PGMPOOLKIND)pPage->enmKind == enmKind);
2324 *ppPage = pPage;
2325 if (pPage->cModifications)
2326 pPage->cModifications = 1; /* reset counter (can't use 0, or else it will be reinserted in the modified list) */
2327 STAM_COUNTER_INC(&pPool->StatCacheHits);
2328 return VINF_PGM_CACHED_PAGE;
2329 }
2330 return rc;
2331 }
2332
2333 if ((PGMPOOLKIND)pPage->enmKind != enmKind)
2334 {
2335 /*
2336 * The kind is different. In some cases we should now flush the page
2337 * as it has been reused, but in most cases this is normal remapping
2338 * of PDs as PT or big pages using the GCPhys field in a slightly
2339 * different way than the other kinds.
2340 */
2341 if (pgmPoolCacheReusedByKind((PGMPOOLKIND)pPage->enmKind, enmKind))
2342 {
2343 STAM_COUNTER_INC(&pPool->StatCacheKindMismatches);
2344 pgmPoolFlushPage(pPool, pPage);
2345 break;
2346 }
2347 }
2348 }
2349
2350 /* next */
2351 i = pPage->iNext;
2352 } while (i != NIL_PGMPOOL_IDX);
2353 }
2354
2355 Log3(("pgmPoolCacheAlloc: Missed GCPhys=%RGp enmKind=%s\n", GCPhys, pgmPoolPoolKindToStr(enmKind)));
2356 STAM_COUNTER_INC(&pPool->StatCacheMisses);
2357 return VERR_FILE_NOT_FOUND;
2358}
2359
2360
2361/**
2362 * Inserts a page into the cache.
2363 *
2364 * @param pPool The pool.
2365 * @param pPage The cached page.
2366 * @param fCanBeCached Set if the page is fit for caching from the caller's point of view.
2367 */
2368static void pgmPoolCacheInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage, bool fCanBeCached)
2369{
2370 /*
2371 * Insert into the GCPhys hash if the page is fit for that.
2372 */
2373 Assert(!pPage->fCached);
2374 if (fCanBeCached)
2375 {
2376 pPage->fCached = true;
2377 pgmPoolHashInsert(pPool, pPage);
2378 Log3(("pgmPoolCacheInsert: Caching %p:{.Core=%RHp, .idx=%d, .enmKind=%s, GCPhys=%RGp}\n",
2379 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), pPage->GCPhys));
2380 STAM_COUNTER_INC(&pPool->StatCacheCacheable);
2381 }
2382 else
2383 {
2384 Log3(("pgmPoolCacheInsert: Not caching %p:{.Core=%RHp, .idx=%d, .enmKind=%s, GCPhys=%RGp}\n",
2385 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), pPage->GCPhys));
2386 STAM_COUNTER_INC(&pPool->StatCacheUncacheable);
2387 }
2388
2389 /*
2390 * Insert at the head of the age list.
2391 */
2392 pPage->iAgePrev = NIL_PGMPOOL_IDX;
2393 pPage->iAgeNext = pPool->iAgeHead;
2394 if (pPool->iAgeHead != NIL_PGMPOOL_IDX)
2395 pPool->aPages[pPool->iAgeHead].iAgePrev = pPage->idx;
2396 else
2397 pPool->iAgeTail = pPage->idx;
2398 pPool->iAgeHead = pPage->idx;
2399}
2400
2401
2402/**
2403 * Flushes a cached page.
2404 *
2405 * @param pPool The pool.
2406 * @param pPage The cached page.
2407 */
2408static void pgmPoolCacheFlushPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2409{
2410 Log3(("pgmPoolCacheFlushPage: %RGp\n", pPage->GCPhys));
2411
2412 /*
2413 * Remove the page from the hash.
2414 */
2415 if (pPage->fCached)
2416 {
2417 pPage->fCached = false;
2418 pgmPoolHashRemove(pPool, pPage);
2419 }
2420 else
2421 Assert(pPage->iNext == NIL_PGMPOOL_IDX);
2422
2423 /*
2424 * Remove it from the age list.
2425 */
2426 if (pPage->iAgeNext != NIL_PGMPOOL_IDX)
2427 pPool->aPages[pPage->iAgeNext].iAgePrev = pPage->iAgePrev;
2428 else
2429 pPool->iAgeTail = pPage->iAgePrev;
2430 if (pPage->iAgePrev != NIL_PGMPOOL_IDX)
2431 pPool->aPages[pPage->iAgePrev].iAgeNext = pPage->iAgeNext;
2432 else
2433 pPool->iAgeHead = pPage->iAgeNext;
2434 pPage->iAgeNext = NIL_PGMPOOL_IDX;
2435 pPage->iAgePrev = NIL_PGMPOOL_IDX;
2436}
2437
2438
2439/**
2440 * Looks for pages sharing the monitor.
2441 *
2442 * @returns Pointer to the head page.
2443 * @returns NULL if not found.
2444 * @param pPool The Pool
2445 * @param pNewPage The page which is going to be monitored.
2446 */
2447static PPGMPOOLPAGE pgmPoolMonitorGetPageByGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pNewPage)
2448{
2449 /*
2450 * Look up the GCPhys in the hash.
2451 */
2452 RTGCPHYS GCPhys = pNewPage->GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK;
2453 unsigned i = pPool->aiHash[PGMPOOL_HASH(GCPhys)];
2454 if (i == NIL_PGMPOOL_IDX)
2455 return NULL;
2456 do
2457 {
2458 PPGMPOOLPAGE pPage = &pPool->aPages[i];
2459 if ( pPage->GCPhys - GCPhys < PAGE_SIZE
2460 && pPage != pNewPage)
2461 {
2462 switch (pPage->enmKind)
2463 {
2464 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2465 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2466 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2467 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2468 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2469 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2470 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2471 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2472 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2473 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2474 case PGMPOOLKIND_64BIT_PML4:
2475 case PGMPOOLKIND_32BIT_PD:
2476 case PGMPOOLKIND_PAE_PDPT:
2477 {
2478 /* find the head */
2479 while (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
2480 {
2481 Assert(pPage->iMonitoredPrev != pPage->idx);
2482 pPage = &pPool->aPages[pPage->iMonitoredPrev];
2483 }
2484 return pPage;
2485 }
2486
2487 /* ignore, no monitoring. */
2488 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2489 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2490 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2491 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2492 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2493 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2494 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2495 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2496 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2497 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2498 case PGMPOOLKIND_ROOT_NESTED:
2499 case PGMPOOLKIND_PAE_PD_PHYS:
2500 case PGMPOOLKIND_PAE_PDPT_PHYS:
2501 case PGMPOOLKIND_32BIT_PD_PHYS:
2502 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
2503 break;
2504 default:
2505 AssertFatalMsgFailed(("enmKind=%d idx=%d\n", pPage->enmKind, pPage->idx));
2506 }
2507 }
2508
2509 /* next */
2510 i = pPage->iNext;
2511 } while (i != NIL_PGMPOOL_IDX);
2512 return NULL;
2513}
2514
2515
2516/**
2517 * Enabled write monitoring of a guest page.
2518 *
2519 * @returns VBox status code.
2520 * @retval VINF_SUCCESS on success.
2521 * @param pPool The pool.
2522 * @param pPage The cached page.
2523 */
2524static int pgmPoolMonitorInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2525{
2526 LogFlow(("pgmPoolMonitorInsert %RGp\n", pPage->GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK));
2527
2528 /*
2529 * Filter out the relevant kinds.
2530 */
2531 switch (pPage->enmKind)
2532 {
2533 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2534 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2535 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2536 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2537 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2538 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2539 case PGMPOOLKIND_64BIT_PML4:
2540 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2541 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2542 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2543 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2544 case PGMPOOLKIND_32BIT_PD:
2545 case PGMPOOLKIND_PAE_PDPT:
2546 break;
2547
2548 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2549 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2550 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2551 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2552 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2553 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2554 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2555 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2556 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2557 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2558 case PGMPOOLKIND_ROOT_NESTED:
2559 /* Nothing to monitor here. */
2560 return VINF_SUCCESS;
2561
2562 case PGMPOOLKIND_32BIT_PD_PHYS:
2563 case PGMPOOLKIND_PAE_PDPT_PHYS:
2564 case PGMPOOLKIND_PAE_PD_PHYS:
2565 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
2566 /* Nothing to monitor here. */
2567 return VINF_SUCCESS;
2568 default:
2569 AssertFatalMsgFailed(("This can't happen! enmKind=%d\n", pPage->enmKind));
2570 }
2571
2572 /*
2573 * Install handler.
2574 */
2575 int rc;
2576 PPGMPOOLPAGE pPageHead = pgmPoolMonitorGetPageByGCPhys(pPool, pPage);
2577 if (pPageHead)
2578 {
2579 Assert(pPageHead != pPage); Assert(pPageHead->iMonitoredNext != pPage->idx);
2580 Assert(pPageHead->iMonitoredPrev != pPage->idx);
2581
2582#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
2583 if (pPageHead->fDirty)
2584 pgmPoolFlushDirtyPage(pPool->CTX_SUFF(pVM), pPool, pPageHead->idxDirtyEntry, false /* do not remove */);
2585#endif
2586
2587 pPage->iMonitoredPrev = pPageHead->idx;
2588 pPage->iMonitoredNext = pPageHead->iMonitoredNext;
2589 if (pPageHead->iMonitoredNext != NIL_PGMPOOL_IDX)
2590 pPool->aPages[pPageHead->iMonitoredNext].iMonitoredPrev = pPage->idx;
2591 pPageHead->iMonitoredNext = pPage->idx;
2592 rc = VINF_SUCCESS;
2593 }
2594 else
2595 {
2596 Assert(pPage->iMonitoredNext == NIL_PGMPOOL_IDX); Assert(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
2597 PVM pVM = pPool->CTX_SUFF(pVM);
2598 const RTGCPHYS GCPhysPage = pPage->GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK;
2599 rc = PGMHandlerPhysicalRegister(pVM, GCPhysPage, GCPhysPage + PAGE_OFFSET_MASK, pPool->hAccessHandlerType,
2600 MMHyperCCToR3(pVM, pPage), MMHyperCCToR0(pVM, pPage), MMHyperCCToRC(pVM, pPage),
2601 NIL_RTR3PTR /*pszDesc*/);
2602 /** @todo we should probably deal with out-of-memory conditions here, but for now increasing
2603 * the heap size should suffice. */
2604 AssertFatalMsgRC(rc, ("PGMHandlerPhysicalRegisterEx %RGp failed with %Rrc\n", GCPhysPage, rc));
2605 PVMCPU pVCpu = VMMGetCpu(pVM);
2606 AssertFatalMsg(!(pVCpu->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL) || VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3), ("fSyncFlags=%x syncff=%d\n", pVCpu->pgm.s.fSyncFlags, VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3)));
2607 }
2608 pPage->fMonitored = true;
2609 return rc;
2610}
2611
2612
2613/**
2614 * Disables write monitoring of a guest page.
2615 *
2616 * @returns VBox status code.
2617 * @retval VINF_SUCCESS on success.
2618 * @param pPool The pool.
2619 * @param pPage The cached page.
2620 */
2621static int pgmPoolMonitorFlush(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2622{
2623 /*
2624 * Filter out the relevant kinds.
2625 */
2626 switch (pPage->enmKind)
2627 {
2628 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2629 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2630 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2631 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2632 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2633 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2634 case PGMPOOLKIND_64BIT_PML4:
2635 case PGMPOOLKIND_32BIT_PD:
2636 case PGMPOOLKIND_PAE_PDPT:
2637 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2638 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2639 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2640 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2641 break;
2642
2643 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2644 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2645 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2646 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2647 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2648 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2649 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2650 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2651 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2652 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2653 case PGMPOOLKIND_ROOT_NESTED:
2654 case PGMPOOLKIND_PAE_PD_PHYS:
2655 case PGMPOOLKIND_PAE_PDPT_PHYS:
2656 case PGMPOOLKIND_32BIT_PD_PHYS:
2657 /* Nothing to monitor here. */
2658 Assert(!pPage->fMonitored);
2659 return VINF_SUCCESS;
2660
2661 default:
2662 AssertFatalMsgFailed(("This can't happen! enmKind=%d\n", pPage->enmKind));
2663 }
2664 Assert(pPage->fMonitored);
2665
2666 /*
2667 * Remove the page from the monitored list or uninstall it if last.
2668 */
2669 const PVM pVM = pPool->CTX_SUFF(pVM);
2670 int rc;
2671 if ( pPage->iMonitoredNext != NIL_PGMPOOL_IDX
2672 || pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
2673 {
2674 if (pPage->iMonitoredPrev == NIL_PGMPOOL_IDX)
2675 {
2676 PPGMPOOLPAGE pNewHead = &pPool->aPages[pPage->iMonitoredNext];
2677 pNewHead->iMonitoredPrev = NIL_PGMPOOL_IDX;
2678 rc = PGMHandlerPhysicalChangeUserArgs(pVM, pPage->GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK, MMHyperCCToR3(pVM, pNewHead),
2679 MMHyperCCToR0(pVM, pNewHead), MMHyperCCToRC(pVM, pNewHead));
2680
2681 AssertFatalRCSuccess(rc);
2682 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
2683 }
2684 else
2685 {
2686 pPool->aPages[pPage->iMonitoredPrev].iMonitoredNext = pPage->iMonitoredNext;
2687 if (pPage->iMonitoredNext != NIL_PGMPOOL_IDX)
2688 {
2689 pPool->aPages[pPage->iMonitoredNext].iMonitoredPrev = pPage->iMonitoredPrev;
2690 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
2691 }
2692 pPage->iMonitoredPrev = NIL_PGMPOOL_IDX;
2693 rc = VINF_SUCCESS;
2694 }
2695 }
2696 else
2697 {
2698 rc = PGMHandlerPhysicalDeregister(pVM, pPage->GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK);
2699 AssertFatalRC(rc);
2700 PVMCPU pVCpu = VMMGetCpu(pVM);
2701 AssertFatalMsg(!(pVCpu->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL) || VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3),
2702 ("%#x %#x\n", pVCpu->pgm.s.fSyncFlags, pVM->fGlobalForcedActions));
2703 }
2704 pPage->fMonitored = false;
2705
2706 /*
2707 * Remove it from the list of modified pages (if in it).
2708 */
2709 pgmPoolMonitorModifiedRemove(pPool, pPage);
2710
2711 return rc;
2712}
2713
2714
2715/**
2716 * Inserts the page into the list of modified pages.
2717 *
2718 * @param pPool The pool.
2719 * @param pPage The page.
2720 */
2721void pgmPoolMonitorModifiedInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2722{
2723 Log3(("pgmPoolMonitorModifiedInsert: idx=%d\n", pPage->idx));
2724 AssertMsg( pPage->iModifiedNext == NIL_PGMPOOL_IDX
2725 && pPage->iModifiedPrev == NIL_PGMPOOL_IDX
2726 && pPool->iModifiedHead != pPage->idx,
2727 ("Next=%d Prev=%d idx=%d cModifications=%d Head=%d cModifiedPages=%d\n",
2728 pPage->iModifiedNext, pPage->iModifiedPrev, pPage->idx, pPage->cModifications,
2729 pPool->iModifiedHead, pPool->cModifiedPages));
2730
2731 pPage->iModifiedNext = pPool->iModifiedHead;
2732 if (pPool->iModifiedHead != NIL_PGMPOOL_IDX)
2733 pPool->aPages[pPool->iModifiedHead].iModifiedPrev = pPage->idx;
2734 pPool->iModifiedHead = pPage->idx;
2735 pPool->cModifiedPages++;
2736#ifdef VBOX_WITH_STATISTICS
2737 if (pPool->cModifiedPages > pPool->cModifiedPagesHigh)
2738 pPool->cModifiedPagesHigh = pPool->cModifiedPages;
2739#endif
2740}
2741
2742
2743/**
2744 * Removes the page from the list of modified pages and resets the
2745 * modification counter.
2746 *
2747 * @param pPool The pool.
2748 * @param pPage The page which is believed to be in the list of modified pages.
2749 */
2750static void pgmPoolMonitorModifiedRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2751{
2752 Log3(("pgmPoolMonitorModifiedRemove: idx=%d cModifications=%d\n", pPage->idx, pPage->cModifications));
2753 if (pPool->iModifiedHead == pPage->idx)
2754 {
2755 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX);
2756 pPool->iModifiedHead = pPage->iModifiedNext;
2757 if (pPage->iModifiedNext != NIL_PGMPOOL_IDX)
2758 {
2759 pPool->aPages[pPage->iModifiedNext].iModifiedPrev = NIL_PGMPOOL_IDX;
2760 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
2761 }
2762 pPool->cModifiedPages--;
2763 }
2764 else if (pPage->iModifiedPrev != NIL_PGMPOOL_IDX)
2765 {
2766 pPool->aPages[pPage->iModifiedPrev].iModifiedNext = pPage->iModifiedNext;
2767 if (pPage->iModifiedNext != NIL_PGMPOOL_IDX)
2768 {
2769 pPool->aPages[pPage->iModifiedNext].iModifiedPrev = pPage->iModifiedPrev;
2770 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
2771 }
2772 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
2773 pPool->cModifiedPages--;
2774 }
2775 else
2776 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX);
2777 pPage->cModifications = 0;
2778}
2779
2780
2781/**
2782 * Zaps the list of modified pages, resetting their modification counters in the process.
2783 *
2784 * @param pVM The cross context VM structure.
2785 */
2786static void pgmPoolMonitorModifiedClearAll(PVM pVM)
2787{
2788 pgmLock(pVM);
2789 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
2790 LogFlow(("pgmPoolMonitorModifiedClearAll: cModifiedPages=%d\n", pPool->cModifiedPages));
2791
2792 unsigned cPages = 0; NOREF(cPages);
2793
2794#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
2795 pgmPoolResetDirtyPages(pVM);
2796#endif
2797
2798 uint16_t idx = pPool->iModifiedHead;
2799 pPool->iModifiedHead = NIL_PGMPOOL_IDX;
2800 while (idx != NIL_PGMPOOL_IDX)
2801 {
2802 PPGMPOOLPAGE pPage = &pPool->aPages[idx];
2803 idx = pPage->iModifiedNext;
2804 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
2805 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
2806 pPage->cModifications = 0;
2807 Assert(++cPages);
2808 }
2809 AssertMsg(cPages == pPool->cModifiedPages, ("%d != %d\n", cPages, pPool->cModifiedPages));
2810 pPool->cModifiedPages = 0;
2811 pgmUnlock(pVM);
2812}
2813
2814
2815/**
2816 * Handle SyncCR3 pool tasks
2817 *
2818 * @returns VBox status code.
2819 * @retval VINF_SUCCESS if successfully added.
2820 * @retval VINF_PGM_SYNC_CR3 is it needs to be deferred to ring 3 (GC only)
2821 * @param pVCpu The cross context virtual CPU structure.
2822 * @remark Should only be used when monitoring is available, thus placed in
2823 * the PGMPOOL_WITH_MONITORING \#ifdef.
2824 */
2825int pgmPoolSyncCR3(PVMCPU pVCpu)
2826{
2827 PVM pVM = pVCpu->CTX_SUFF(pVM);
2828 LogFlow(("pgmPoolSyncCR3 fSyncFlags=%x\n", pVCpu->pgm.s.fSyncFlags));
2829
2830 /*
2831 * When monitoring shadowed pages, we reset the modification counters on CR3 sync.
2832 * Occasionally we will have to clear all the shadow page tables because we wanted
2833 * to monitor a page which was mapped by too many shadowed page tables. This operation
2834 * sometimes referred to as a 'lightweight flush'.
2835 */
2836# ifdef IN_RING3 /* Don't flush in ring-0 or raw mode, it's taking too long. */
2837 if (pVCpu->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)
2838 pgmR3PoolClearAll(pVM, false /*fFlushRemTlb*/);
2839# else /* !IN_RING3 */
2840 if (pVCpu->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)
2841 {
2842 Log(("SyncCR3: PGM_SYNC_CLEAR_PGM_POOL is set -> VINF_PGM_SYNC_CR3\n"));
2843 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3); /** @todo no need to do global sync, right? */
2844
2845 /* Make sure all other VCPUs return to ring 3. */
2846 if (pVM->cCpus > 1)
2847 {
2848 VM_FF_SET(pVM, VM_FF_PGM_POOL_FLUSH_PENDING);
2849 PGM_INVL_ALL_VCPU_TLBS(pVM);
2850 }
2851 return VINF_PGM_SYNC_CR3;
2852 }
2853# endif /* !IN_RING3 */
2854 else
2855 {
2856 pgmPoolMonitorModifiedClearAll(pVM);
2857
2858 /* pgmPoolMonitorModifiedClearAll can cause a pgm pool flush (dirty page clearing), so make sure we handle this! */
2859 if (pVCpu->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)
2860 {
2861 Log(("pgmPoolMonitorModifiedClearAll caused a pgm flush -> call pgmPoolSyncCR3 again!\n"));
2862 return pgmPoolSyncCR3(pVCpu);
2863 }
2864 }
2865 return VINF_SUCCESS;
2866}
2867
2868
2869/**
2870 * Frees up at least one user entry.
2871 *
2872 * @returns VBox status code.
2873 * @retval VINF_SUCCESS if successfully added.
2874 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
2875 * @param pPool The pool.
2876 * @param iUser The user index.
2877 */
2878static int pgmPoolTrackFreeOneUser(PPGMPOOL pPool, uint16_t iUser)
2879{
2880 STAM_COUNTER_INC(&pPool->StatTrackFreeUpOneUser);
2881 /*
2882 * Just free cached pages in a braindead fashion.
2883 */
2884 /** @todo walk the age list backwards and free the first with usage. */
2885 int rc = VINF_SUCCESS;
2886 do
2887 {
2888 int rc2 = pgmPoolCacheFreeOne(pPool, iUser);
2889 if (RT_FAILURE(rc2) && rc == VINF_SUCCESS)
2890 rc = rc2;
2891 } while (pPool->iUserFreeHead == NIL_PGMPOOL_USER_INDEX);
2892 return rc;
2893}
2894
2895
2896/**
2897 * Inserts a page into the cache.
2898 *
2899 * This will create user node for the page, insert it into the GCPhys
2900 * hash, and insert it into the age list.
2901 *
2902 * @returns VBox status code.
2903 * @retval VINF_SUCCESS if successfully added.
2904 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
2905 * @param pPool The pool.
2906 * @param pPage The cached page.
2907 * @param GCPhys The GC physical address of the page we're gonna shadow.
2908 * @param iUser The user index.
2909 * @param iUserTable The user table index.
2910 */
2911DECLINLINE(int) pgmPoolTrackInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTGCPHYS GCPhys, uint16_t iUser, uint32_t iUserTable)
2912{
2913 int rc = VINF_SUCCESS;
2914 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
2915
2916 LogFlow(("pgmPoolTrackInsert GCPhys=%RGp iUser=%d iUserTable=%x\n", GCPhys, iUser, iUserTable)); RT_NOREF_PV(GCPhys);
2917
2918 if (iUser != NIL_PGMPOOL_IDX)
2919 {
2920#ifdef VBOX_STRICT
2921 /*
2922 * Check that the entry doesn't already exists.
2923 */
2924 if (pPage->iUserHead != NIL_PGMPOOL_USER_INDEX)
2925 {
2926 uint16_t i = pPage->iUserHead;
2927 do
2928 {
2929 Assert(i < pPool->cMaxUsers);
2930 AssertMsg(paUsers[i].iUser != iUser || paUsers[i].iUserTable != iUserTable, ("%x %x vs new %x %x\n", paUsers[i].iUser, paUsers[i].iUserTable, iUser, iUserTable));
2931 i = paUsers[i].iNext;
2932 } while (i != NIL_PGMPOOL_USER_INDEX);
2933 }
2934#endif
2935
2936 /*
2937 * Find free a user node.
2938 */
2939 uint16_t i = pPool->iUserFreeHead;
2940 if (i == NIL_PGMPOOL_USER_INDEX)
2941 {
2942 rc = pgmPoolTrackFreeOneUser(pPool, iUser);
2943 if (RT_FAILURE(rc))
2944 return rc;
2945 i = pPool->iUserFreeHead;
2946 }
2947
2948 /*
2949 * Unlink the user node from the free list,
2950 * initialize and insert it into the user list.
2951 */
2952 pPool->iUserFreeHead = paUsers[i].iNext;
2953 paUsers[i].iNext = NIL_PGMPOOL_USER_INDEX;
2954 paUsers[i].iUser = iUser;
2955 paUsers[i].iUserTable = iUserTable;
2956 pPage->iUserHead = i;
2957 }
2958 else
2959 pPage->iUserHead = NIL_PGMPOOL_USER_INDEX;
2960
2961
2962 /*
2963 * Insert into cache and enable monitoring of the guest page if enabled.
2964 *
2965 * Until we implement caching of all levels, including the CR3 one, we'll
2966 * have to make sure we don't try monitor & cache any recursive reuse of
2967 * a monitored CR3 page. Because all windows versions are doing this we'll
2968 * have to be able to do combined access monitoring, CR3 + PT and
2969 * PD + PT (guest PAE).
2970 *
2971 * Update:
2972 * We're now cooperating with the CR3 monitor if an uncachable page is found.
2973 */
2974 const bool fCanBeMonitored = true;
2975 pgmPoolCacheInsert(pPool, pPage, fCanBeMonitored); /* This can be expanded. */
2976 if (fCanBeMonitored)
2977 {
2978 rc = pgmPoolMonitorInsert(pPool, pPage);
2979 AssertRC(rc);
2980 }
2981 return rc;
2982}
2983
2984
2985/**
2986 * Adds a user reference to a page.
2987 *
2988 * This will move the page to the head of the
2989 *
2990 * @returns VBox status code.
2991 * @retval VINF_SUCCESS if successfully added.
2992 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
2993 * @param pPool The pool.
2994 * @param pPage The cached page.
2995 * @param iUser The user index.
2996 * @param iUserTable The user table.
2997 */
2998static int pgmPoolTrackAddUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable)
2999{
3000 Log3(("pgmPoolTrackAddUser: GCPhys=%RGp iUser=%x iUserTable=%x\n", pPage->GCPhys, iUser, iUserTable));
3001 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
3002 Assert(iUser != NIL_PGMPOOL_IDX);
3003
3004# ifdef VBOX_STRICT
3005 /*
3006 * Check that the entry doesn't already exists. We only allow multiple
3007 * users of top-level paging structures (SHW_POOL_ROOT_IDX).
3008 */
3009 if (pPage->iUserHead != NIL_PGMPOOL_USER_INDEX)
3010 {
3011 uint16_t i = pPage->iUserHead;
3012 do
3013 {
3014 Assert(i < pPool->cMaxUsers);
3015 /** @todo this assertion looks odd... Shouldn't it be && here? */
3016 AssertMsg(paUsers[i].iUser != iUser || paUsers[i].iUserTable != iUserTable, ("%x %x vs new %x %x\n", paUsers[i].iUser, paUsers[i].iUserTable, iUser, iUserTable));
3017 i = paUsers[i].iNext;
3018 } while (i != NIL_PGMPOOL_USER_INDEX);
3019 }
3020# endif
3021
3022 /*
3023 * Allocate a user node.
3024 */
3025 uint16_t i = pPool->iUserFreeHead;
3026 if (i == NIL_PGMPOOL_USER_INDEX)
3027 {
3028 int rc = pgmPoolTrackFreeOneUser(pPool, iUser);
3029 if (RT_FAILURE(rc))
3030 return rc;
3031 i = pPool->iUserFreeHead;
3032 }
3033 pPool->iUserFreeHead = paUsers[i].iNext;
3034
3035 /*
3036 * Initialize the user node and insert it.
3037 */
3038 paUsers[i].iNext = pPage->iUserHead;
3039 paUsers[i].iUser = iUser;
3040 paUsers[i].iUserTable = iUserTable;
3041 pPage->iUserHead = i;
3042
3043# ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
3044 if (pPage->fDirty)
3045 pgmPoolFlushDirtyPage(pPool->CTX_SUFF(pVM), pPool, pPage->idxDirtyEntry, false /* do not remove */);
3046# endif
3047
3048 /*
3049 * Tell the cache to update its replacement stats for this page.
3050 */
3051 pgmPoolCacheUsed(pPool, pPage);
3052 return VINF_SUCCESS;
3053}
3054
3055
3056/**
3057 * Frees a user record associated with a page.
3058 *
3059 * This does not clear the entry in the user table, it simply replaces the
3060 * user record to the chain of free records.
3061 *
3062 * @param pPool The pool.
3063 * @param pPage The shadow page.
3064 * @param iUser The shadow page pool index of the user table.
3065 * @param iUserTable The index into the user table (shadowed).
3066 *
3067 * @remarks Don't call this for root pages.
3068 */
3069static void pgmPoolTrackFreeUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable)
3070{
3071 Log3(("pgmPoolTrackFreeUser %RGp %x %x\n", pPage->GCPhys, iUser, iUserTable));
3072 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
3073 Assert(iUser != NIL_PGMPOOL_IDX);
3074
3075 /*
3076 * Unlink and free the specified user entry.
3077 */
3078
3079 /* Special: For PAE and 32-bit paging, there is usually no more than one user. */
3080 uint16_t i = pPage->iUserHead;
3081 if ( i != NIL_PGMPOOL_USER_INDEX
3082 && paUsers[i].iUser == iUser
3083 && paUsers[i].iUserTable == iUserTable)
3084 {
3085 pPage->iUserHead = paUsers[i].iNext;
3086
3087 paUsers[i].iUser = NIL_PGMPOOL_IDX;
3088 paUsers[i].iNext = pPool->iUserFreeHead;
3089 pPool->iUserFreeHead = i;
3090 return;
3091 }
3092
3093 /* General: Linear search. */
3094 uint16_t iPrev = NIL_PGMPOOL_USER_INDEX;
3095 while (i != NIL_PGMPOOL_USER_INDEX)
3096 {
3097 if ( paUsers[i].iUser == iUser
3098 && paUsers[i].iUserTable == iUserTable)
3099 {
3100 if (iPrev != NIL_PGMPOOL_USER_INDEX)
3101 paUsers[iPrev].iNext = paUsers[i].iNext;
3102 else
3103 pPage->iUserHead = paUsers[i].iNext;
3104
3105 paUsers[i].iUser = NIL_PGMPOOL_IDX;
3106 paUsers[i].iNext = pPool->iUserFreeHead;
3107 pPool->iUserFreeHead = i;
3108 return;
3109 }
3110 iPrev = i;
3111 i = paUsers[i].iNext;
3112 }
3113
3114 /* Fatal: didn't find it */
3115 AssertFatalMsgFailed(("Didn't find the user entry! iUser=%d iUserTable=%#x GCPhys=%RGp\n",
3116 iUser, iUserTable, pPage->GCPhys));
3117}
3118
3119
3120/**
3121 * Gets the entry size of a shadow table.
3122 *
3123 * @param enmKind The kind of page.
3124 *
3125 * @returns The size of the entry in bytes. That is, 4 or 8.
3126 * @returns If the kind is not for a table, an assertion is raised and 0 is
3127 * returned.
3128 */
3129DECLINLINE(unsigned) pgmPoolTrackGetShadowEntrySize(PGMPOOLKIND enmKind)
3130{
3131 switch (enmKind)
3132 {
3133 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
3134 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
3135 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
3136 case PGMPOOLKIND_32BIT_PD:
3137 case PGMPOOLKIND_32BIT_PD_PHYS:
3138 return 4;
3139
3140 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
3141 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
3142 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
3143 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
3144 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
3145 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
3146 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
3147 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
3148 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
3149 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
3150 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
3151 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
3152 case PGMPOOLKIND_64BIT_PML4:
3153 case PGMPOOLKIND_PAE_PDPT:
3154 case PGMPOOLKIND_ROOT_NESTED:
3155 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
3156 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
3157 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
3158 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
3159 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
3160 case PGMPOOLKIND_PAE_PD_PHYS:
3161 case PGMPOOLKIND_PAE_PDPT_PHYS:
3162 return 8;
3163
3164 default:
3165 AssertFatalMsgFailed(("enmKind=%d\n", enmKind));
3166 }
3167}
3168
3169
3170/**
3171 * Gets the entry size of a guest table.
3172 *
3173 * @param enmKind The kind of page.
3174 *
3175 * @returns The size of the entry in bytes. That is, 0, 4 or 8.
3176 * @returns If the kind is not for a table, an assertion is raised and 0 is
3177 * returned.
3178 */
3179DECLINLINE(unsigned) pgmPoolTrackGetGuestEntrySize(PGMPOOLKIND enmKind)
3180{
3181 switch (enmKind)
3182 {
3183 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
3184 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
3185 case PGMPOOLKIND_32BIT_PD:
3186 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
3187 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
3188 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
3189 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
3190 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
3191 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
3192 return 4;
3193
3194 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
3195 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
3196 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
3197 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
3198 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
3199 case PGMPOOLKIND_64BIT_PML4:
3200 case PGMPOOLKIND_PAE_PDPT:
3201 return 8;
3202
3203 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
3204 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
3205 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
3206 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
3207 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
3208 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
3209 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
3210 case PGMPOOLKIND_ROOT_NESTED:
3211 case PGMPOOLKIND_PAE_PD_PHYS:
3212 case PGMPOOLKIND_PAE_PDPT_PHYS:
3213 case PGMPOOLKIND_32BIT_PD_PHYS:
3214 /** @todo can we return 0? (nobody is calling this...) */
3215 AssertFailed();
3216 return 0;
3217
3218 default:
3219 AssertFatalMsgFailed(("enmKind=%d\n", enmKind));
3220 }
3221}
3222
3223
3224/**
3225 * Checks one shadow page table entry for a mapping of a physical page.
3226 *
3227 * @returns true / false indicating removal of all relevant PTEs
3228 *
3229 * @param pVM The cross context VM structure.
3230 * @param pPhysPage The guest page in question.
3231 * @param fFlushPTEs Flush PTEs or allow them to be updated (e.g. in case of an RW bit change)
3232 * @param iShw The shadow page table.
3233 * @param iPte Page table entry or NIL_PGMPOOL_PHYSEXT_IDX_PTE if unknown
3234 */
3235static bool pgmPoolTrackFlushGCPhysPTInt(PVM pVM, PCPGMPAGE pPhysPage, bool fFlushPTEs, uint16_t iShw, uint16_t iPte)
3236{
3237 LogFlow(("pgmPoolTrackFlushGCPhysPTInt: pPhysPage=%RHp iShw=%d iPte=%d\n", PGM_PAGE_GET_HCPHYS(pPhysPage), iShw, iPte));
3238 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3239 bool fRet = false;
3240
3241 /*
3242 * Assert sanity.
3243 */
3244 Assert(iPte != NIL_PGMPOOL_PHYSEXT_IDX_PTE);
3245 AssertFatalMsg(iShw < pPool->cCurPages && iShw != NIL_PGMPOOL_IDX, ("iShw=%d\n", iShw));
3246 PPGMPOOLPAGE pPage = &pPool->aPages[iShw];
3247
3248 /*
3249 * Then, clear the actual mappings to the page in the shadow PT.
3250 */
3251 switch (pPage->enmKind)
3252 {
3253 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
3254 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
3255 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
3256 {
3257 const uint32_t u32 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P;
3258 PX86PT pPT = (PX86PT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3259 uint32_t u32AndMask = 0;
3260 uint32_t u32OrMask = 0;
3261
3262 if (!fFlushPTEs)
3263 {
3264 switch (PGM_PAGE_GET_HNDL_PHYS_STATE(pPhysPage))
3265 {
3266 case PGM_PAGE_HNDL_PHYS_STATE_NONE: /** No handler installed. */
3267 case PGM_PAGE_HNDL_PHYS_STATE_DISABLED: /** Monitoring is temporarily disabled. */
3268 u32OrMask = X86_PTE_RW;
3269 u32AndMask = UINT32_MAX;
3270 fRet = true;
3271 STAM_COUNTER_INC(&pPool->StatTrackFlushEntryKeep);
3272 break;
3273
3274 case PGM_PAGE_HNDL_PHYS_STATE_WRITE: /** Write access is monitored. */
3275 u32OrMask = 0;
3276 u32AndMask = ~X86_PTE_RW;
3277 fRet = true;
3278 STAM_COUNTER_INC(&pPool->StatTrackFlushEntryKeep);
3279 break;
3280 default:
3281 /* (shouldn't be here, will assert below) */
3282 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3283 break;
3284 }
3285 }
3286 else
3287 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3288
3289 /* Update the counter if we're removing references. */
3290 if (!u32AndMask)
3291 {
3292 Assert(pPage->cPresent);
3293 Assert(pPool->cPresent);
3294 pPage->cPresent--;
3295 pPool->cPresent--;
3296 }
3297
3298 if ((pPT->a[iPte].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
3299 {
3300 X86PTE Pte;
3301
3302 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pte=%RX32\n", iPte, pPT->a[iPte]));
3303 Pte.u = (pPT->a[iPte].u & u32AndMask) | u32OrMask;
3304 if (Pte.u & PGM_PTFLAGS_TRACK_DIRTY)
3305 Pte.n.u1Write = 0; /* need to disallow writes when dirty bit tracking is still active. */
3306
3307 ASMAtomicWriteU32(&pPT->a[iPte].u, Pte.u);
3308 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);
3309 return fRet;
3310 }
3311#ifdef LOG_ENABLED
3312 Log(("iFirstPresent=%d cPresent=%d\n", pPage->iFirstPresent, pPage->cPresent));
3313 for (unsigned i = 0, cFound = 0; i < RT_ELEMENTS(pPT->a); i++)
3314 if ((pPT->a[i].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
3315 {
3316 Log(("i=%d cFound=%d\n", i, ++cFound));
3317 }
3318#endif
3319 AssertFatalMsgFailed(("iFirstPresent=%d cPresent=%d u32=%RX32 poolkind=%x\n", pPage->iFirstPresent, pPage->cPresent, u32, pPage->enmKind));
3320 /*PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);*/
3321 break;
3322 }
3323
3324 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
3325 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
3326 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
3327 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
3328 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
3329 case PGMPOOLKIND_EPT_PT_FOR_PHYS: /* physical mask the same as PAE; RW bit as well; be careful! */
3330 {
3331 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P;
3332 PPGMSHWPTPAE pPT = (PPGMSHWPTPAE)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3333 uint64_t u64OrMask = 0;
3334 uint64_t u64AndMask = 0;
3335
3336 if (!fFlushPTEs)
3337 {
3338 switch (PGM_PAGE_GET_HNDL_PHYS_STATE(pPhysPage))
3339 {
3340 case PGM_PAGE_HNDL_PHYS_STATE_NONE: /* No handler installed. */
3341 case PGM_PAGE_HNDL_PHYS_STATE_DISABLED: /* Monitoring is temporarily disabled. */
3342 u64OrMask = X86_PTE_RW;
3343 u64AndMask = UINT64_MAX;
3344 fRet = true;
3345 STAM_COUNTER_INC(&pPool->StatTrackFlushEntryKeep);
3346 break;
3347
3348 case PGM_PAGE_HNDL_PHYS_STATE_WRITE: /* Write access is monitored. */
3349 u64OrMask = 0;
3350 u64AndMask = ~(uint64_t)X86_PTE_RW;
3351 fRet = true;
3352 STAM_COUNTER_INC(&pPool->StatTrackFlushEntryKeep);
3353 break;
3354
3355 default:
3356 /* (shouldn't be here, will assert below) */
3357 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3358 break;
3359 }
3360 }
3361 else
3362 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3363
3364 /* Update the counter if we're removing references. */
3365 if (!u64AndMask)
3366 {
3367 Assert(pPage->cPresent);
3368 Assert(pPool->cPresent);
3369 pPage->cPresent--;
3370 pPool->cPresent--;
3371 }
3372
3373 if ((PGMSHWPTEPAE_GET_U(pPT->a[iPte]) & (X86_PTE_PAE_PG_MASK | X86_PTE_P | X86_PTE_PAE_MBZ_MASK_NX)) == u64)
3374 {
3375 X86PTEPAE Pte;
3376
3377 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pte=%RX64\n", iPte, PGMSHWPTEPAE_GET_LOG(pPT->a[iPte])));
3378 Pte.u = (PGMSHWPTEPAE_GET_U(pPT->a[iPte]) & u64AndMask) | u64OrMask;
3379 if (Pte.u & PGM_PTFLAGS_TRACK_DIRTY)
3380 Pte.n.u1Write = 0; /* need to disallow writes when dirty bit tracking is still active. */
3381
3382 PGMSHWPTEPAE_ATOMIC_SET(pPT->a[iPte], Pte.u);
3383 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);
3384 return fRet;
3385 }
3386#ifdef LOG_ENABLED
3387 Log(("iFirstPresent=%d cPresent=%d\n", pPage->iFirstPresent, pPage->cPresent));
3388 Log(("Found %RX64 expected %RX64\n", PGMSHWPTEPAE_GET_U(pPT->a[iPte]) & (X86_PTE_PAE_PG_MASK | X86_PTE_P | X86_PTE_PAE_MBZ_MASK_NX), u64));
3389 for (unsigned i = 0, cFound = 0; i < RT_ELEMENTS(pPT->a); i++)
3390 if ((PGMSHWPTEPAE_GET_U(pPT->a[i]) & (X86_PTE_PAE_PG_MASK | X86_PTE_P | X86_PTE_PAE_MBZ_MASK_NX)) == u64)
3391 Log(("i=%d cFound=%d\n", i, ++cFound));
3392#endif
3393 AssertFatalMsgFailed(("iFirstPresent=%d cPresent=%d u64=%RX64 poolkind=%x iPte=%d PT=%RX64\n", pPage->iFirstPresent, pPage->cPresent, u64, pPage->enmKind, iPte, PGMSHWPTEPAE_GET_LOG(pPT->a[iPte])));
3394 /*PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);*/
3395 break;
3396 }
3397
3398#ifdef PGM_WITH_LARGE_PAGES
3399 /* Large page case only. */
3400 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
3401 {
3402 Assert(pVM->pgm.s.fNestedPaging);
3403
3404 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PDE4M_P | X86_PDE4M_PS;
3405 PEPTPD pPD = (PEPTPD)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3406
3407 if ((pPD->a[iPte].u & (EPT_PDE2M_PG_MASK | X86_PDE4M_P | X86_PDE4M_PS)) == u64)
3408 {
3409 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pde=%RX64\n", iPte, pPD->a[iPte]));
3410 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3411 pPD->a[iPte].u = 0;
3412 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPD);
3413
3414 /* Update the counter as we're removing references. */
3415 Assert(pPage->cPresent);
3416 Assert(pPool->cPresent);
3417 pPage->cPresent--;
3418 pPool->cPresent--;
3419
3420 return fRet;
3421 }
3422# ifdef LOG_ENABLED
3423 Log(("iFirstPresent=%d cPresent=%d\n", pPage->iFirstPresent, pPage->cPresent));
3424 for (unsigned i = 0, cFound = 0; i < RT_ELEMENTS(pPD->a); i++)
3425 if ((pPD->a[i].u & (EPT_PDE2M_PG_MASK | X86_PDE4M_P | X86_PDE4M_PS)) == u64)
3426 Log(("i=%d cFound=%d\n", i, ++cFound));
3427# endif
3428 AssertFatalMsgFailed(("iFirstPresent=%d cPresent=%d\n", pPage->iFirstPresent, pPage->cPresent));
3429 /*PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPD);*/
3430 break;
3431 }
3432
3433 /* AMD-V nested paging */ /** @todo merge with EPT as we only check the parts that are identical. */
3434 case PGMPOOLKIND_PAE_PD_PHYS:
3435 {
3436 Assert(pVM->pgm.s.fNestedPaging);
3437
3438 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PDE4M_P | X86_PDE4M_PS;
3439 PX86PD pPD = (PX86PD)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3440
3441 if ((pPD->a[iPte].u & (X86_PDE2M_PAE_PG_MASK | X86_PDE4M_P | X86_PDE4M_PS)) == u64)
3442 {
3443 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pde=%RX64\n", iPte, pPD->a[iPte]));
3444 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3445 pPD->a[iPte].u = 0;
3446 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPD);
3447
3448 /* Update the counter as we're removing references. */
3449 Assert(pPage->cPresent);
3450 Assert(pPool->cPresent);
3451 pPage->cPresent--;
3452 pPool->cPresent--;
3453 return fRet;
3454 }
3455# ifdef LOG_ENABLED
3456 Log(("iFirstPresent=%d cPresent=%d\n", pPage->iFirstPresent, pPage->cPresent));
3457 for (unsigned i = 0, cFound = 0; i < RT_ELEMENTS(pPD->a); i++)
3458 if ((pPD->a[i].u & (X86_PDE2M_PAE_PG_MASK | X86_PDE4M_P | X86_PDE4M_PS)) == u64)
3459 Log(("i=%d cFound=%d\n", i, ++cFound));
3460# endif
3461 AssertFatalMsgFailed(("iFirstPresent=%d cPresent=%d\n", pPage->iFirstPresent, pPage->cPresent));
3462 /*PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPD);*/
3463 break;
3464 }
3465#endif /* PGM_WITH_LARGE_PAGES */
3466
3467 default:
3468 AssertFatalMsgFailed(("enmKind=%d iShw=%d\n", pPage->enmKind, iShw));
3469 }
3470
3471 /* not reached. */
3472#ifndef _MSC_VER
3473 return fRet;
3474#endif
3475}
3476
3477
3478/**
3479 * Scans one shadow page table for mappings of a physical page.
3480 *
3481 * @param pVM The cross context VM structure.
3482 * @param pPhysPage The guest page in question.
3483 * @param fFlushPTEs Flush PTEs or allow them to be updated (e.g. in case of an RW bit change)
3484 * @param iShw The shadow page table.
3485 */
3486static void pgmPoolTrackFlushGCPhysPT(PVM pVM, PPGMPAGE pPhysPage, bool fFlushPTEs, uint16_t iShw)
3487{
3488 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool); NOREF(pPool);
3489
3490 /* We should only come here with when there's only one reference to this physical page. */
3491 Assert(PGMPOOL_TD_GET_CREFS(PGM_PAGE_GET_TRACKING(pPhysPage)) == 1);
3492
3493 Log2(("pgmPoolTrackFlushGCPhysPT: pPhysPage=%RHp iShw=%d\n", PGM_PAGE_GET_HCPHYS(pPhysPage), iShw));
3494 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPT, f);
3495 bool fKeptPTEs = pgmPoolTrackFlushGCPhysPTInt(pVM, pPhysPage, fFlushPTEs, iShw, PGM_PAGE_GET_PTE_INDEX(pPhysPage));
3496 if (!fKeptPTEs)
3497 PGM_PAGE_SET_TRACKING(pVM, pPhysPage, 0);
3498 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPT, f);
3499}
3500
3501
3502/**
3503 * Flushes a list of shadow page tables mapping the same physical page.
3504 *
3505 * @param pVM The cross context VM structure.
3506 * @param pPhysPage The guest page in question.
3507 * @param fFlushPTEs Flush PTEs or allow them to be updated (e.g. in case of an RW bit change)
3508 * @param iPhysExt The physical cross reference extent list to flush.
3509 */
3510static void pgmPoolTrackFlushGCPhysPTs(PVM pVM, PPGMPAGE pPhysPage, bool fFlushPTEs, uint16_t iPhysExt)
3511{
3512 PGM_LOCK_ASSERT_OWNER(pVM);
3513 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3514 bool fKeepList = false;
3515
3516 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPTs, f);
3517 Log2(("pgmPoolTrackFlushGCPhysPTs: pPhysPage=%RHp iPhysExt=%u\n", PGM_PAGE_GET_HCPHYS(pPhysPage), iPhysExt));
3518
3519 const uint16_t iPhysExtStart = iPhysExt;
3520 PPGMPOOLPHYSEXT pPhysExt;
3521 do
3522 {
3523 Assert(iPhysExt < pPool->cMaxPhysExts);
3524 pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
3525 for (unsigned i = 0; i < RT_ELEMENTS(pPhysExt->aidx); i++)
3526 {
3527 if (pPhysExt->aidx[i] != NIL_PGMPOOL_IDX)
3528 {
3529 bool fKeptPTEs = pgmPoolTrackFlushGCPhysPTInt(pVM, pPhysPage, fFlushPTEs, pPhysExt->aidx[i], pPhysExt->apte[i]);
3530 if (!fKeptPTEs)
3531 {
3532 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
3533 pPhysExt->apte[i] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
3534 }
3535 else
3536 fKeepList = true;
3537 }
3538 }
3539 /* next */
3540 iPhysExt = pPhysExt->iNext;
3541 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
3542
3543 if (!fKeepList)
3544 {
3545 /* insert the list into the free list and clear the ram range entry. */
3546 pPhysExt->iNext = pPool->iPhysExtFreeHead;
3547 pPool->iPhysExtFreeHead = iPhysExtStart;
3548 /* Invalidate the tracking data. */
3549 PGM_PAGE_SET_TRACKING(pVM, pPhysPage, 0);
3550 }
3551
3552 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTs, f);
3553}
3554
3555
3556/**
3557 * Flushes all shadow page table mappings of the given guest page.
3558 *
3559 * This is typically called when the host page backing the guest one has been
3560 * replaced or when the page protection was changed due to a guest access
3561 * caught by the monitoring.
3562 *
3563 * @returns VBox status code.
3564 * @retval VINF_SUCCESS if all references has been successfully cleared.
3565 * @retval VINF_PGM_SYNC_CR3 if we're better off with a CR3 sync and a page
3566 * pool cleaning. FF and sync flags are set.
3567 *
3568 * @param pVM The cross context VM structure.
3569 * @param GCPhysPage GC physical address of the page in question
3570 * @param pPhysPage The guest page in question.
3571 * @param fFlushPTEs Flush PTEs or allow them to be updated (e.g. in case of an RW bit change)
3572 * @param pfFlushTLBs This is set to @a true if the shadow TLBs should be
3573 * flushed, it is NOT touched if this isn't necessary.
3574 * The caller MUST initialized this to @a false.
3575 */
3576int pgmPoolTrackUpdateGCPhys(PVM pVM, RTGCPHYS GCPhysPage, PPGMPAGE pPhysPage, bool fFlushPTEs, bool *pfFlushTLBs)
3577{
3578 PVMCPU pVCpu = VMMGetCpu(pVM);
3579 pgmLock(pVM);
3580 int rc = VINF_SUCCESS;
3581
3582#ifdef PGM_WITH_LARGE_PAGES
3583 /* Is this page part of a large page? */
3584 if (PGM_PAGE_GET_PDE_TYPE(pPhysPage) == PGM_PAGE_PDE_TYPE_PDE)
3585 {
3586 RTGCPHYS GCPhysBase = GCPhysPage & X86_PDE2M_PAE_PG_MASK;
3587 GCPhysPage &= X86_PDE_PAE_PG_MASK;
3588
3589 /* Fetch the large page base. */
3590 PPGMPAGE pLargePage;
3591 if (GCPhysBase != GCPhysPage)
3592 {
3593 pLargePage = pgmPhysGetPage(pVM, GCPhysBase);
3594 AssertFatal(pLargePage);
3595 }
3596 else
3597 pLargePage = pPhysPage;
3598
3599 Log(("pgmPoolTrackUpdateGCPhys: update large page PDE for %RGp (%RGp)\n", GCPhysBase, GCPhysPage));
3600
3601 if (PGM_PAGE_GET_PDE_TYPE(pLargePage) == PGM_PAGE_PDE_TYPE_PDE)
3602 {
3603 /* Mark the large page as disabled as we need to break it up to change a single page in the 2 MB range. */
3604 PGM_PAGE_SET_PDE_TYPE(pVM, pLargePage, PGM_PAGE_PDE_TYPE_PDE_DISABLED);
3605 pVM->pgm.s.cLargePagesDisabled++;
3606
3607 /* Update the base as that *only* that one has a reference and there's only one PDE to clear. */
3608 rc = pgmPoolTrackUpdateGCPhys(pVM, GCPhysBase, pLargePage, fFlushPTEs, pfFlushTLBs);
3609
3610 *pfFlushTLBs = true;
3611 pgmUnlock(pVM);
3612 return rc;
3613 }
3614 }
3615#else
3616 NOREF(GCPhysPage);
3617#endif /* PGM_WITH_LARGE_PAGES */
3618
3619 const uint16_t u16 = PGM_PAGE_GET_TRACKING(pPhysPage);
3620 if (u16)
3621 {
3622 /*
3623 * The zero page is currently screwing up the tracking and we'll
3624 * have to flush the whole shebang. Unless VBOX_WITH_NEW_LAZY_PAGE_ALLOC
3625 * is defined, zero pages won't normally be mapped. Some kind of solution
3626 * will be needed for this problem of course, but it will have to wait...
3627 */
3628 if ( PGM_PAGE_IS_ZERO(pPhysPage)
3629 || PGM_PAGE_IS_BALLOONED(pPhysPage))
3630 rc = VINF_PGM_GCPHYS_ALIASED;
3631 else
3632 {
3633# if defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0) || defined(IN_RC) /** @todo we can drop this now. */
3634 /* Start a subset here because pgmPoolTrackFlushGCPhysPTsSlow and
3635 pgmPoolTrackFlushGCPhysPTs will/may kill the pool otherwise. */
3636 uint32_t iPrevSubset = PGMRZDynMapPushAutoSubset(pVCpu);
3637# endif
3638
3639 if (PGMPOOL_TD_GET_CREFS(u16) != PGMPOOL_TD_CREFS_PHYSEXT)
3640 {
3641 Assert(PGMPOOL_TD_GET_CREFS(u16) == 1);
3642 pgmPoolTrackFlushGCPhysPT(pVM,
3643 pPhysPage,
3644 fFlushPTEs,
3645 PGMPOOL_TD_GET_IDX(u16));
3646 }
3647 else if (u16 != PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED))
3648 pgmPoolTrackFlushGCPhysPTs(pVM, pPhysPage, fFlushPTEs, PGMPOOL_TD_GET_IDX(u16));
3649 else
3650 rc = pgmPoolTrackFlushGCPhysPTsSlow(pVM, pPhysPage);
3651 *pfFlushTLBs = true;
3652
3653# if defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0) || defined(IN_RC)
3654 PGMRZDynMapPopAutoSubset(pVCpu, iPrevSubset);
3655# endif
3656 }
3657 }
3658
3659 if (rc == VINF_PGM_GCPHYS_ALIASED)
3660 {
3661 pVCpu->pgm.s.fSyncFlags |= PGM_SYNC_CLEAR_PGM_POOL;
3662 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
3663 rc = VINF_PGM_SYNC_CR3;
3664 }
3665 pgmUnlock(pVM);
3666 return rc;
3667}
3668
3669
3670/**
3671 * Scans all shadow page tables for mappings of a physical page.
3672 *
3673 * This may be slow, but it's most likely more efficient than cleaning
3674 * out the entire page pool / cache.
3675 *
3676 * @returns VBox status code.
3677 * @retval VINF_SUCCESS if all references has been successfully cleared.
3678 * @retval VINF_PGM_GCPHYS_ALIASED if we're better off with a CR3 sync and
3679 * a page pool cleaning.
3680 *
3681 * @param pVM The cross context VM structure.
3682 * @param pPhysPage The guest page in question.
3683 */
3684int pgmPoolTrackFlushGCPhysPTsSlow(PVM pVM, PPGMPAGE pPhysPage)
3685{
3686 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3687 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPTsSlow, s);
3688 LogFlow(("pgmPoolTrackFlushGCPhysPTsSlow: cUsedPages=%d cPresent=%d pPhysPage=%R[pgmpage]\n",
3689 pPool->cUsedPages, pPool->cPresent, pPhysPage));
3690
3691 /*
3692 * There is a limit to what makes sense.
3693 */
3694 if ( pPool->cPresent > 1024
3695 && pVM->cCpus == 1)
3696 {
3697 LogFlow(("pgmPoolTrackFlushGCPhysPTsSlow: giving up... (cPresent=%d)\n", pPool->cPresent));
3698 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTsSlow, s);
3699 return VINF_PGM_GCPHYS_ALIASED;
3700 }
3701
3702 /*
3703 * Iterate all the pages until we've encountered all that in use.
3704 * This is simple but not quite optimal solution.
3705 */
3706 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P; /** @todo drop X86_PTE_P here as we always test if present separately, anyway. */
3707 const uint32_t u32 = u64; /** @todo move into the 32BIT_PT_xx case */
3708 unsigned cLeft = pPool->cUsedPages;
3709 unsigned iPage = pPool->cCurPages;
3710 while (--iPage >= PGMPOOL_IDX_FIRST)
3711 {
3712 PPGMPOOLPAGE pPage = &pPool->aPages[iPage];
3713 if ( pPage->GCPhys != NIL_RTGCPHYS
3714 && pPage->cPresent)
3715 {
3716 switch (pPage->enmKind)
3717 {
3718 /*
3719 * We only care about shadow page tables.
3720 */
3721 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
3722 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
3723 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
3724 {
3725 unsigned cPresent = pPage->cPresent;
3726 PX86PT pPT = (PX86PT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3727 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
3728 if (pPT->a[i].n.u1Present)
3729 {
3730 if ((pPT->a[i].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
3731 {
3732 //Log4(("pgmPoolTrackFlushGCPhysPTsSlow: idx=%d i=%d pte=%RX32\n", iPage, i, pPT->a[i]));
3733 pPT->a[i].u = 0;
3734
3735 /* Update the counter as we're removing references. */
3736 Assert(pPage->cPresent);
3737 Assert(pPool->cPresent);
3738 pPage->cPresent--;
3739 pPool->cPresent--;
3740 }
3741 if (!--cPresent)
3742 break;
3743 }
3744 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);
3745 break;
3746 }
3747
3748 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
3749 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
3750 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
3751 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
3752 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
3753 {
3754 unsigned cPresent = pPage->cPresent;
3755 PPGMSHWPTPAE pPT = (PPGMSHWPTPAE)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3756 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
3757 if (PGMSHWPTEPAE_IS_P(pPT->a[i]))
3758 {
3759 if ((PGMSHWPTEPAE_GET_U(pPT->a[i]) & (X86_PTE_PAE_PG_MASK | X86_PTE_P)) == u64)
3760 {
3761 //Log4(("pgmPoolTrackFlushGCPhysPTsSlow: idx=%d i=%d pte=%RX64\n", iPage, i, pPT->a[i]));
3762 PGMSHWPTEPAE_SET(pPT->a[i], 0); /// @todo why not atomic?
3763
3764 /* Update the counter as we're removing references. */
3765 Assert(pPage->cPresent);
3766 Assert(pPool->cPresent);
3767 pPage->cPresent--;
3768 pPool->cPresent--;
3769 }
3770 if (!--cPresent)
3771 break;
3772 }
3773 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);
3774 break;
3775 }
3776#ifndef IN_RC
3777 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
3778 {
3779 unsigned cPresent = pPage->cPresent;
3780 PEPTPT pPT = (PEPTPT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3781 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
3782 if (pPT->a[i].n.u1Present)
3783 {
3784 if ((pPT->a[i].u & (EPT_PTE_PG_MASK | X86_PTE_P)) == u64)
3785 {
3786 //Log4(("pgmPoolTrackFlushGCPhysPTsSlow: idx=%d i=%d pte=%RX64\n", iPage, i, pPT->a[i]));
3787 pPT->a[i].u = 0;
3788
3789 /* Update the counter as we're removing references. */
3790 Assert(pPage->cPresent);
3791 Assert(pPool->cPresent);
3792 pPage->cPresent--;
3793 pPool->cPresent--;
3794 }
3795 if (!--cPresent)
3796 break;
3797 }
3798 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);
3799 break;
3800 }
3801#endif
3802 }
3803 if (!--cLeft)
3804 break;
3805 }
3806 }
3807
3808 PGM_PAGE_SET_TRACKING(pVM, pPhysPage, 0);
3809 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTsSlow, s);
3810
3811 /*
3812 * There is a limit to what makes sense. The above search is very expensive, so force a pgm pool flush.
3813 */
3814 if (pPool->cPresent > 1024)
3815 {
3816 LogFlow(("pgmPoolTrackFlushGCPhysPTsSlow: giving up... (cPresent=%d)\n", pPool->cPresent));
3817 return VINF_PGM_GCPHYS_ALIASED;
3818 }
3819
3820 return VINF_SUCCESS;
3821}
3822
3823
3824/**
3825 * Clears the user entry in a user table.
3826 *
3827 * This is used to remove all references to a page when flushing it.
3828 */
3829static void pgmPoolTrackClearPageUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PCPGMPOOLUSER pUser)
3830{
3831 Assert(pUser->iUser != NIL_PGMPOOL_IDX);
3832 Assert(pUser->iUser < pPool->cCurPages);
3833 uint32_t iUserTable = pUser->iUserTable;
3834
3835 /*
3836 * Map the user page. Ignore references made by fictitious pages.
3837 */
3838 PPGMPOOLPAGE pUserPage = &pPool->aPages[pUser->iUser];
3839 LogFlow(("pgmPoolTrackClearPageUser: clear %x in %s (%RGp) (flushing %s)\n", iUserTable, pgmPoolPoolKindToStr(pUserPage->enmKind), pUserPage->Core.Key, pgmPoolPoolKindToStr(pPage->enmKind)));
3840 union
3841 {
3842 uint64_t *pau64;
3843 uint32_t *pau32;
3844 } u;
3845 if (pUserPage->idx < PGMPOOL_IDX_FIRST)
3846 {
3847 Assert(!pUserPage->pvPageR3);
3848 return;
3849 }
3850 u.pau64 = (uint64_t *)PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pUserPage);
3851
3852
3853 /* Safety precaution in case we change the paging for other modes too in the future. */
3854 Assert(!pgmPoolIsPageLocked(pPage)); RT_NOREF_PV(pPage);
3855
3856#ifdef VBOX_STRICT
3857 /*
3858 * Some sanity checks.
3859 */
3860 switch (pUserPage->enmKind)
3861 {
3862 case PGMPOOLKIND_32BIT_PD:
3863 case PGMPOOLKIND_32BIT_PD_PHYS:
3864 Assert(iUserTable < X86_PG_ENTRIES);
3865 break;
3866 case PGMPOOLKIND_PAE_PDPT:
3867 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
3868 case PGMPOOLKIND_PAE_PDPT_PHYS:
3869 Assert(iUserTable < 4);
3870 Assert(!(u.pau64[iUserTable] & PGM_PLXFLAGS_PERMANENT));
3871 break;
3872 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
3873 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
3874 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
3875 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
3876 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
3877 case PGMPOOLKIND_PAE_PD_PHYS:
3878 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3879 break;
3880 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
3881 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3882 Assert(!(u.pau64[iUserTable] & PGM_PDFLAGS_MAPPING));
3883 break;
3884 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
3885 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3886 Assert(!(u.pau64[iUserTable] & PGM_PLXFLAGS_PERMANENT));
3887 break;
3888 case PGMPOOLKIND_64BIT_PML4:
3889 Assert(!(u.pau64[iUserTable] & PGM_PLXFLAGS_PERMANENT));
3890 /* GCPhys >> PAGE_SHIFT is the index here */
3891 break;
3892 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
3893 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
3894 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3895 break;
3896
3897 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
3898 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
3899 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3900 break;
3901
3902 case PGMPOOLKIND_ROOT_NESTED:
3903 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3904 break;
3905
3906 default:
3907 AssertMsgFailed(("enmKind=%d\n", pUserPage->enmKind));
3908 break;
3909 }
3910#endif /* VBOX_STRICT */
3911
3912 /*
3913 * Clear the entry in the user page.
3914 */
3915 switch (pUserPage->enmKind)
3916 {
3917 /* 32-bit entries */
3918 case PGMPOOLKIND_32BIT_PD:
3919 case PGMPOOLKIND_32BIT_PD_PHYS:
3920 ASMAtomicWriteU32(&u.pau32[iUserTable], 0);
3921 break;
3922
3923 /* 64-bit entries */
3924 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
3925 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
3926 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
3927 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
3928 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
3929#ifdef IN_RC
3930 /*
3931 * In 32 bits PAE mode we *must* invalidate the TLB when changing a
3932 * PDPT entry; the CPU fetches them only during cr3 load, so any
3933 * non-present PDPT will continue to cause page faults.
3934 */
3935 ASMReloadCR3();
3936 /* no break */
3937#endif
3938 case PGMPOOLKIND_PAE_PD_PHYS:
3939 case PGMPOOLKIND_PAE_PDPT_PHYS:
3940 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
3941 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
3942 case PGMPOOLKIND_64BIT_PML4:
3943 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
3944 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
3945 case PGMPOOLKIND_PAE_PDPT:
3946 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
3947 case PGMPOOLKIND_ROOT_NESTED:
3948 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
3949 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
3950 ASMAtomicWriteU64(&u.pau64[iUserTable], 0);
3951 break;
3952
3953 default:
3954 AssertFatalMsgFailed(("enmKind=%d iUser=%d iUserTable=%#x\n", pUserPage->enmKind, pUser->iUser, pUser->iUserTable));
3955 }
3956 PGM_DYNMAP_UNUSED_HINT_VM(pPool->CTX_SUFF(pVM), u.pau64);
3957}
3958
3959
3960/**
3961 * Clears all users of a page.
3962 */
3963static void pgmPoolTrackClearPageUsers(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
3964{
3965 /*
3966 * Free all the user records.
3967 */
3968 LogFlow(("pgmPoolTrackClearPageUsers %RGp\n", pPage->GCPhys));
3969
3970 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
3971 uint16_t i = pPage->iUserHead;
3972 while (i != NIL_PGMPOOL_USER_INDEX)
3973 {
3974 /* Clear enter in user table. */
3975 pgmPoolTrackClearPageUser(pPool, pPage, &paUsers[i]);
3976
3977 /* Free it. */
3978 const uint16_t iNext = paUsers[i].iNext;
3979 paUsers[i].iUser = NIL_PGMPOOL_IDX;
3980 paUsers[i].iNext = pPool->iUserFreeHead;
3981 pPool->iUserFreeHead = i;
3982
3983 /* Next. */
3984 i = iNext;
3985 }
3986 pPage->iUserHead = NIL_PGMPOOL_USER_INDEX;
3987}
3988
3989
3990/**
3991 * Allocates a new physical cross reference extent.
3992 *
3993 * @returns Pointer to the allocated extent on success. NULL if we're out of them.
3994 * @param pVM The cross context VM structure.
3995 * @param piPhysExt Where to store the phys ext index.
3996 */
3997PPGMPOOLPHYSEXT pgmPoolTrackPhysExtAlloc(PVM pVM, uint16_t *piPhysExt)
3998{
3999 PGM_LOCK_ASSERT_OWNER(pVM);
4000 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
4001 uint16_t iPhysExt = pPool->iPhysExtFreeHead;
4002 if (iPhysExt == NIL_PGMPOOL_PHYSEXT_INDEX)
4003 {
4004 STAM_COUNTER_INC(&pPool->StamTrackPhysExtAllocFailures);
4005 return NULL;
4006 }
4007 PPGMPOOLPHYSEXT pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
4008 pPool->iPhysExtFreeHead = pPhysExt->iNext;
4009 pPhysExt->iNext = NIL_PGMPOOL_PHYSEXT_INDEX;
4010 *piPhysExt = iPhysExt;
4011 return pPhysExt;
4012}
4013
4014
4015/**
4016 * Frees a physical cross reference extent.
4017 *
4018 * @param pVM The cross context VM structure.
4019 * @param iPhysExt The extent to free.
4020 */
4021void pgmPoolTrackPhysExtFree(PVM pVM, uint16_t iPhysExt)
4022{
4023 PGM_LOCK_ASSERT_OWNER(pVM);
4024 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
4025 Assert(iPhysExt < pPool->cMaxPhysExts);
4026 PPGMPOOLPHYSEXT pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
4027 for (unsigned i = 0; i < RT_ELEMENTS(pPhysExt->aidx); i++)
4028 {
4029 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
4030 pPhysExt->apte[i] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
4031 }
4032 pPhysExt->iNext = pPool->iPhysExtFreeHead;
4033 pPool->iPhysExtFreeHead = iPhysExt;
4034}
4035
4036
4037/**
4038 * Frees a physical cross reference extent.
4039 *
4040 * @param pVM The cross context VM structure.
4041 * @param iPhysExt The extent to free.
4042 */
4043void pgmPoolTrackPhysExtFreeList(PVM pVM, uint16_t iPhysExt)
4044{
4045 PGM_LOCK_ASSERT_OWNER(pVM);
4046 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
4047
4048 const uint16_t iPhysExtStart = iPhysExt;
4049 PPGMPOOLPHYSEXT pPhysExt;
4050 do
4051 {
4052 Assert(iPhysExt < pPool->cMaxPhysExts);
4053 pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
4054 for (unsigned i = 0; i < RT_ELEMENTS(pPhysExt->aidx); i++)
4055 {
4056 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
4057 pPhysExt->apte[i] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
4058 }
4059
4060 /* next */
4061 iPhysExt = pPhysExt->iNext;
4062 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
4063
4064 pPhysExt->iNext = pPool->iPhysExtFreeHead;
4065 pPool->iPhysExtFreeHead = iPhysExtStart;
4066}
4067
4068
4069/**
4070 * Insert a reference into a list of physical cross reference extents.
4071 *
4072 * @returns The new tracking data for PGMPAGE.
4073 *
4074 * @param pVM The cross context VM structure.
4075 * @param iPhysExt The physical extent index of the list head.
4076 * @param iShwPT The shadow page table index.
4077 * @param iPte Page table entry
4078 *
4079 */
4080static uint16_t pgmPoolTrackPhysExtInsert(PVM pVM, uint16_t iPhysExt, uint16_t iShwPT, uint16_t iPte)
4081{
4082 PGM_LOCK_ASSERT_OWNER(pVM);
4083 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
4084 PPGMPOOLPHYSEXT paPhysExts = pPool->CTX_SUFF(paPhysExts);
4085
4086 /*
4087 * Special common cases.
4088 */
4089 if (paPhysExts[iPhysExt].aidx[1] == NIL_PGMPOOL_IDX)
4090 {
4091 paPhysExts[iPhysExt].aidx[1] = iShwPT;
4092 paPhysExts[iPhysExt].apte[1] = iPte;
4093 STAM_COUNTER_INC(&pVM->pgm.s.CTX_SUFF(pStats)->StatTrackAliasedMany);
4094 LogFlow(("pgmPoolTrackPhysExtInsert: %d:{,%d pte %d,}\n", iPhysExt, iShwPT, iPte));
4095 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExt);
4096 }
4097 if (paPhysExts[iPhysExt].aidx[2] == NIL_PGMPOOL_IDX)
4098 {
4099 paPhysExts[iPhysExt].aidx[2] = iShwPT;
4100 paPhysExts[iPhysExt].apte[2] = iPte;
4101 STAM_COUNTER_INC(&pVM->pgm.s.CTX_SUFF(pStats)->StatTrackAliasedMany);
4102 LogFlow(("pgmPoolTrackPhysExtInsert: %d:{,,%d pte %d}\n", iPhysExt, iShwPT, iPte));
4103 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExt);
4104 }
4105 AssertCompile(RT_ELEMENTS(paPhysExts[iPhysExt].aidx) == 3);
4106
4107 /*
4108 * General treatment.
4109 */
4110 const uint16_t iPhysExtStart = iPhysExt;
4111 unsigned cMax = 15;
4112 for (;;)
4113 {
4114 Assert(iPhysExt < pPool->cMaxPhysExts);
4115 for (unsigned i = 0; i < RT_ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
4116 if (paPhysExts[iPhysExt].aidx[i] == NIL_PGMPOOL_IDX)
4117 {
4118 paPhysExts[iPhysExt].aidx[i] = iShwPT;
4119 paPhysExts[iPhysExt].apte[i] = iPte;
4120 STAM_COUNTER_INC(&pVM->pgm.s.CTX_SUFF(pStats)->StatTrackAliasedMany);
4121 LogFlow(("pgmPoolTrackPhysExtInsert: %d:{%d pte %d} i=%d cMax=%d\n", iPhysExt, iShwPT, iPte, i, cMax));
4122 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExtStart);
4123 }
4124 if (!--cMax)
4125 {
4126 STAM_COUNTER_INC(&pVM->pgm.s.CTX_SUFF(pStats)->StatTrackOverflows);
4127 pgmPoolTrackPhysExtFreeList(pVM, iPhysExtStart);
4128 LogFlow(("pgmPoolTrackPhysExtInsert: overflow (1) iShwPT=%d\n", iShwPT));
4129 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED);
4130 }
4131
4132 /* advance */
4133 iPhysExt = paPhysExts[iPhysExt].iNext;
4134 if (iPhysExt == NIL_PGMPOOL_PHYSEXT_INDEX)
4135 break;
4136 }
4137
4138 /*
4139 * Add another extent to the list.
4140 */
4141 PPGMPOOLPHYSEXT pNew = pgmPoolTrackPhysExtAlloc(pVM, &iPhysExt);
4142 if (!pNew)
4143 {
4144 STAM_COUNTER_INC(&pVM->pgm.s.CTX_SUFF(pStats)->StatTrackNoExtentsLeft);
4145 pgmPoolTrackPhysExtFreeList(pVM, iPhysExtStart);
4146 LogFlow(("pgmPoolTrackPhysExtInsert: pgmPoolTrackPhysExtAlloc failed iShwPT=%d\n", iShwPT));
4147 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED);
4148 }
4149 pNew->iNext = iPhysExtStart;
4150 pNew->aidx[0] = iShwPT;
4151 pNew->apte[0] = iPte;
4152 LogFlow(("pgmPoolTrackPhysExtInsert: added new extent %d:{%d pte %d}->%d\n", iPhysExt, iShwPT, iPte, iPhysExtStart));
4153 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExt);
4154}
4155
4156
4157/**
4158 * Add a reference to guest physical page where extents are in use.
4159 *
4160 * @returns The new tracking data for PGMPAGE.
4161 *
4162 * @param pVM The cross context VM structure.
4163 * @param pPhysPage Pointer to the aPages entry in the ram range.
4164 * @param u16 The ram range flags (top 16-bits).
4165 * @param iShwPT The shadow page table index.
4166 * @param iPte Page table entry
4167 */
4168uint16_t pgmPoolTrackPhysExtAddref(PVM pVM, PPGMPAGE pPhysPage, uint16_t u16, uint16_t iShwPT, uint16_t iPte)
4169{
4170 pgmLock(pVM);
4171 if (PGMPOOL_TD_GET_CREFS(u16) != PGMPOOL_TD_CREFS_PHYSEXT)
4172 {
4173 /*
4174 * Convert to extent list.
4175 */
4176 Assert(PGMPOOL_TD_GET_CREFS(u16) == 1);
4177 uint16_t iPhysExt;
4178 PPGMPOOLPHYSEXT pPhysExt = pgmPoolTrackPhysExtAlloc(pVM, &iPhysExt);
4179 if (pPhysExt)
4180 {
4181 LogFlow(("pgmPoolTrackPhysExtAddref: new extent: %d:{%d, %d}\n", iPhysExt, PGMPOOL_TD_GET_IDX(u16), iShwPT));
4182 STAM_COUNTER_INC(&pVM->pgm.s.CTX_SUFF(pStats)->StatTrackAliased);
4183 pPhysExt->aidx[0] = PGMPOOL_TD_GET_IDX(u16);
4184 pPhysExt->apte[0] = PGM_PAGE_GET_PTE_INDEX(pPhysPage);
4185 pPhysExt->aidx[1] = iShwPT;
4186 pPhysExt->apte[1] = iPte;
4187 u16 = PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExt);
4188 }
4189 else
4190 u16 = PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED);
4191 }
4192 else if (u16 != PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED))
4193 {
4194 /*
4195 * Insert into the extent list.
4196 */
4197 u16 = pgmPoolTrackPhysExtInsert(pVM, PGMPOOL_TD_GET_IDX(u16), iShwPT, iPte);
4198 }
4199 else
4200 STAM_COUNTER_INC(&pVM->pgm.s.CTX_SUFF(pStats)->StatTrackAliasedLots);
4201 pgmUnlock(pVM);
4202 return u16;
4203}
4204
4205
4206/**
4207 * Clear references to guest physical memory.
4208 *
4209 * @param pPool The pool.
4210 * @param pPage The page.
4211 * @param pPhysPage Pointer to the aPages entry in the ram range.
4212 * @param iPte Shadow PTE index
4213 */
4214void pgmPoolTrackPhysExtDerefGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMPAGE pPhysPage, uint16_t iPte)
4215{
4216 PVM pVM = pPool->CTX_SUFF(pVM);
4217 const unsigned cRefs = PGM_PAGE_GET_TD_CREFS(pPhysPage);
4218 AssertFatalMsg(cRefs == PGMPOOL_TD_CREFS_PHYSEXT, ("cRefs=%d pPhysPage=%R[pgmpage] pPage=%p:{.idx=%d}\n", cRefs, pPhysPage, pPage, pPage->idx));
4219
4220 uint16_t iPhysExt = PGM_PAGE_GET_TD_IDX(pPhysPage);
4221 if (iPhysExt != PGMPOOL_TD_IDX_OVERFLOWED)
4222 {
4223 pgmLock(pVM);
4224
4225 uint16_t iPhysExtPrev = NIL_PGMPOOL_PHYSEXT_INDEX;
4226 PPGMPOOLPHYSEXT paPhysExts = pPool->CTX_SUFF(paPhysExts);
4227 do
4228 {
4229 Assert(iPhysExt < pPool->cMaxPhysExts);
4230
4231 /*
4232 * Look for the shadow page and check if it's all freed.
4233 */
4234 for (unsigned i = 0; i < RT_ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
4235 {
4236 if ( paPhysExts[iPhysExt].aidx[i] == pPage->idx
4237 && paPhysExts[iPhysExt].apte[i] == iPte)
4238 {
4239 paPhysExts[iPhysExt].aidx[i] = NIL_PGMPOOL_IDX;
4240 paPhysExts[iPhysExt].apte[i] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
4241
4242 for (i = 0; i < RT_ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
4243 if (paPhysExts[iPhysExt].aidx[i] != NIL_PGMPOOL_IDX)
4244 {
4245 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage] idx=%d\n", pPhysPage, pPage->idx));
4246 pgmUnlock(pVM);
4247 return;
4248 }
4249
4250 /* we can free the node. */
4251 const uint16_t iPhysExtNext = paPhysExts[iPhysExt].iNext;
4252 if ( iPhysExtPrev == NIL_PGMPOOL_PHYSEXT_INDEX
4253 && iPhysExtNext == NIL_PGMPOOL_PHYSEXT_INDEX)
4254 {
4255 /* lonely node */
4256 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
4257 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage] idx=%d lonely\n", pPhysPage, pPage->idx));
4258 PGM_PAGE_SET_TRACKING(pVM, pPhysPage, 0);
4259 }
4260 else if (iPhysExtPrev == NIL_PGMPOOL_PHYSEXT_INDEX)
4261 {
4262 /* head */
4263 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage] idx=%d head\n", pPhysPage, pPage->idx));
4264 PGM_PAGE_SET_TRACKING(pVM, pPhysPage, PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExtNext));
4265 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
4266 }
4267 else
4268 {
4269 /* in list */
4270 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage] idx=%d in list\n", pPhysPage, pPage->idx));
4271 paPhysExts[iPhysExtPrev].iNext = iPhysExtNext;
4272 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
4273 }
4274 iPhysExt = iPhysExtNext;
4275 pgmUnlock(pVM);
4276 return;
4277 }
4278 }
4279
4280 /* next */
4281 iPhysExtPrev = iPhysExt;
4282 iPhysExt = paPhysExts[iPhysExt].iNext;
4283 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
4284
4285 pgmUnlock(pVM);
4286 AssertFatalMsgFailed(("not-found! cRefs=%d pPhysPage=%R[pgmpage] pPage=%p:{.idx=%d}\n", cRefs, pPhysPage, pPage, pPage->idx));
4287 }
4288 else /* nothing to do */
4289 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage]\n", pPhysPage));
4290}
4291
4292/**
4293 * Clear references to guest physical memory.
4294 *
4295 * This is the same as pgmPoolTracDerefGCPhysHint except that the guest
4296 * physical address is assumed to be correct, so the linear search can be
4297 * skipped and we can assert at an earlier point.
4298 *
4299 * @param pPool The pool.
4300 * @param pPage The page.
4301 * @param HCPhys The host physical address corresponding to the guest page.
4302 * @param GCPhys The guest physical address corresponding to HCPhys.
4303 * @param iPte Shadow PTE index
4304 */
4305static void pgmPoolTracDerefGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTHCPHYS HCPhys, RTGCPHYS GCPhys, uint16_t iPte)
4306{
4307 /*
4308 * Lookup the page and check if it checks out before derefing it.
4309 */
4310 PVM pVM = pPool->CTX_SUFF(pVM);
4311 PPGMPAGE pPhysPage = pgmPhysGetPage(pVM, GCPhys);
4312 if (pPhysPage)
4313 {
4314 Assert(PGM_PAGE_GET_HCPHYS(pPhysPage));
4315#ifdef LOG_ENABLED
4316 RTHCPHYS HCPhysPage = PGM_PAGE_GET_HCPHYS(pPhysPage);
4317 Log2(("pgmPoolTracDerefGCPhys %RHp vs %RHp\n", HCPhysPage, HCPhys));
4318#endif
4319 if (PGM_PAGE_GET_HCPHYS(pPhysPage) == HCPhys)
4320 {
4321 Assert(pPage->cPresent);
4322 Assert(pPool->cPresent);
4323 pPage->cPresent--;
4324 pPool->cPresent--;
4325 pgmTrackDerefGCPhys(pPool, pPage, pPhysPage, iPte);
4326 return;
4327 }
4328
4329 AssertFatalMsgFailed(("HCPhys=%RHp GCPhys=%RGp; found page has HCPhys=%RHp\n",
4330 HCPhys, GCPhys, PGM_PAGE_GET_HCPHYS(pPhysPage)));
4331 }
4332 AssertFatalMsgFailed(("HCPhys=%RHp GCPhys=%RGp\n", HCPhys, GCPhys));
4333}
4334
4335
4336/**
4337 * Clear references to guest physical memory.
4338 *
4339 * @param pPool The pool.
4340 * @param pPage The page.
4341 * @param HCPhys The host physical address corresponding to the guest page.
4342 * @param GCPhysHint The guest physical address which may corresponding to HCPhys.
4343 * @param iPte Shadow pte index
4344 */
4345void pgmPoolTracDerefGCPhysHint(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTHCPHYS HCPhys, RTGCPHYS GCPhysHint, uint16_t iPte)
4346{
4347 Log4(("pgmPoolTracDerefGCPhysHint %RHp %RGp\n", HCPhys, GCPhysHint));
4348
4349 /*
4350 * Try the hint first.
4351 */
4352 RTHCPHYS HCPhysHinted;
4353 PVM pVM = pPool->CTX_SUFF(pVM);
4354 PPGMPAGE pPhysPage = pgmPhysGetPage(pVM, GCPhysHint);
4355 if (pPhysPage)
4356 {
4357 HCPhysHinted = PGM_PAGE_GET_HCPHYS(pPhysPage);
4358 Assert(HCPhysHinted);
4359 if (HCPhysHinted == HCPhys)
4360 {
4361 Assert(pPage->cPresent);
4362 Assert(pPool->cPresent);
4363 pPage->cPresent--;
4364 pPool->cPresent--;
4365 pgmTrackDerefGCPhys(pPool, pPage, pPhysPage, iPte);
4366 return;
4367 }
4368 }
4369 else
4370 HCPhysHinted = UINT64_C(0xdeadbeefdeadbeef);
4371
4372 /*
4373 * Damn, the hint didn't work. We'll have to do an expensive linear search.
4374 */
4375 STAM_COUNTER_INC(&pPool->StatTrackLinearRamSearches);
4376 PPGMRAMRANGE pRam = pPool->CTX_SUFF(pVM)->pgm.s.CTX_SUFF(pRamRangesX);
4377 while (pRam)
4378 {
4379 unsigned iPage = pRam->cb >> PAGE_SHIFT;
4380 while (iPage-- > 0)
4381 {
4382 if (PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]) == HCPhys)
4383 {
4384 Log4(("pgmPoolTracDerefGCPhysHint: Linear HCPhys=%RHp GCPhysHint=%RGp GCPhysReal=%RGp\n",
4385 HCPhys, GCPhysHint, pRam->GCPhys + (iPage << PAGE_SHIFT)));
4386 Assert(pPage->cPresent);
4387 Assert(pPool->cPresent);
4388 pPage->cPresent--;
4389 pPool->cPresent--;
4390 pgmTrackDerefGCPhys(pPool, pPage, &pRam->aPages[iPage], iPte);
4391 return;
4392 }
4393 }
4394 pRam = pRam->CTX_SUFF(pNext);
4395 }
4396
4397 AssertFatalMsgFailed(("HCPhys=%RHp GCPhysHint=%RGp (Hinted page has HCPhys = %RHp)\n", HCPhys, GCPhysHint, HCPhysHinted));
4398}
4399
4400
4401/**
4402 * Clear references to guest physical memory in a 32-bit / 32-bit page table.
4403 *
4404 * @param pPool The pool.
4405 * @param pPage The page.
4406 * @param pShwPT The shadow page table (mapping of the page).
4407 * @param pGstPT The guest page table.
4408 */
4409DECLINLINE(void) pgmPoolTrackDerefPT32Bit32Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PT pShwPT, PCX86PT pGstPT)
4410{
4411 RTGCPHYS32 const fPgMask = pPage->fA20Enabled ? X86_PTE_PG_MASK : X86_PTE_PG_MASK & ~RT_BIT_32(20);
4412 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
4413 {
4414 Assert(!(pShwPT->a[i].u & RT_BIT_32(10)));
4415 if (pShwPT->a[i].n.u1Present)
4416 {
4417 Log4(("pgmPoolTrackDerefPT32Bit32Bit: i=%d pte=%RX32 hint=%RX32\n",
4418 i, pShwPT->a[i].u & X86_PTE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK));
4419 pgmPoolTracDerefGCPhysHint(pPool, pPage, pShwPT->a[i].u & X86_PTE_PG_MASK, pGstPT->a[i].u & fPgMask, i);
4420 if (!pPage->cPresent)
4421 break;
4422 }
4423 }
4424}
4425
4426
4427/**
4428 * Clear references to guest physical memory in a PAE / 32-bit page table.
4429 *
4430 * @param pPool The pool.
4431 * @param pPage The page.
4432 * @param pShwPT The shadow page table (mapping of the page).
4433 * @param pGstPT The guest page table (just a half one).
4434 */
4435DECLINLINE(void) pgmPoolTrackDerefPTPae32Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT, PCX86PT pGstPT)
4436{
4437 RTGCPHYS32 const fPgMask = pPage->fA20Enabled ? X86_PTE_PG_MASK : X86_PTE_PG_MASK & ~RT_BIT_32(20);
4438 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
4439 {
4440 Assert( (PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & UINT64_C(0x7ff0000000000400)) == 0
4441 || (PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & UINT64_C(0x7ff0000000000400)) == UINT64_C(0x7ff0000000000000));
4442 if (PGMSHWPTEPAE_IS_P(pShwPT->a[i]))
4443 {
4444 Log4(("pgmPoolTrackDerefPTPae32Bit: i=%d pte=%RX64 hint=%RX32\n",
4445 i, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pGstPT->a[i].u & X86_PTE_PG_MASK));
4446 pgmPoolTracDerefGCPhysHint(pPool, pPage, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pGstPT->a[i].u & fPgMask, i);
4447 if (!pPage->cPresent)
4448 break;
4449 }
4450 }
4451}
4452
4453
4454/**
4455 * Clear references to guest physical memory in a PAE / PAE page table.
4456 *
4457 * @param pPool The pool.
4458 * @param pPage The page.
4459 * @param pShwPT The shadow page table (mapping of the page).
4460 * @param pGstPT The guest page table.
4461 */
4462DECLINLINE(void) pgmPoolTrackDerefPTPaePae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT, PCX86PTPAE pGstPT)
4463{
4464 RTGCPHYS const fPgMask = pPage->fA20Enabled ? X86_PTE_PAE_PG_MASK : X86_PTE_PAE_PG_MASK & ~RT_BIT_64(20);
4465 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
4466 {
4467 Assert( (PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & UINT64_C(0x7ff0000000000400)) == 0
4468 || (PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & UINT64_C(0x7ff0000000000400)) == UINT64_C(0x7ff0000000000000));
4469 if (PGMSHWPTEPAE_IS_P(pShwPT->a[i]))
4470 {
4471 Log4(("pgmPoolTrackDerefPTPaePae: i=%d pte=%RX32 hint=%RX32\n",
4472 i, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pGstPT->a[i].u & X86_PTE_PAE_PG_MASK));
4473 pgmPoolTracDerefGCPhysHint(pPool, pPage, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pGstPT->a[i].u & fPgMask, i);
4474 if (!pPage->cPresent)
4475 break;
4476 }
4477 }
4478}
4479
4480
4481/**
4482 * Clear references to guest physical memory in a 32-bit / 4MB page table.
4483 *
4484 * @param pPool The pool.
4485 * @param pPage The page.
4486 * @param pShwPT The shadow page table (mapping of the page).
4487 */
4488DECLINLINE(void) pgmPoolTrackDerefPT32Bit4MB(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PT pShwPT)
4489{
4490 RTGCPHYS const GCPhysA20Mask = pPage->fA20Enabled ? UINT64_MAX : ~RT_BIT_64(20);
4491 RTGCPHYS GCPhys = pPage->GCPhys + PAGE_SIZE * pPage->iFirstPresent;
4492 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
4493 {
4494 Assert(!(pShwPT->a[i].u & RT_BIT_32(10)));
4495 if (pShwPT->a[i].n.u1Present)
4496 {
4497 Log4(("pgmPoolTrackDerefPT32Bit4MB: i=%d pte=%RX32 GCPhys=%RGp\n",
4498 i, pShwPT->a[i].u & X86_PTE_PG_MASK, GCPhys));
4499 pgmPoolTracDerefGCPhys(pPool, pPage, pShwPT->a[i].u & X86_PTE_PG_MASK, GCPhys & GCPhysA20Mask, i);
4500 if (!pPage->cPresent)
4501 break;
4502 }
4503 }
4504}
4505
4506
4507/**
4508 * Clear references to guest physical memory in a PAE / 2/4MB page table.
4509 *
4510 * @param pPool The pool.
4511 * @param pPage The page.
4512 * @param pShwPT The shadow page table (mapping of the page).
4513 */
4514DECLINLINE(void) pgmPoolTrackDerefPTPaeBig(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT)
4515{
4516 RTGCPHYS const GCPhysA20Mask = pPage->fA20Enabled ? UINT64_MAX : ~RT_BIT_64(20);
4517 RTGCPHYS GCPhys = pPage->GCPhys + PAGE_SIZE * pPage->iFirstPresent;
4518 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
4519 {
4520 Assert( (PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & UINT64_C(0x7ff0000000000400)) == 0
4521 || (PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & UINT64_C(0x7ff0000000000400)) == UINT64_C(0x7ff0000000000000));
4522 if (PGMSHWPTEPAE_IS_P(pShwPT->a[i]))
4523 {
4524 Log4(("pgmPoolTrackDerefPTPaeBig: i=%d pte=%RX64 hint=%RGp\n",
4525 i, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), GCPhys));
4526 pgmPoolTracDerefGCPhys(pPool, pPage, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), GCPhys & GCPhysA20Mask, i);
4527 if (!pPage->cPresent)
4528 break;
4529 }
4530 }
4531}
4532
4533
4534/**
4535 * Clear references to shadowed pages in an EPT page table.
4536 *
4537 * @param pPool The pool.
4538 * @param pPage The page.
4539 * @param pShwPT The shadow page directory pointer table (mapping of the
4540 * page).
4541 */
4542DECLINLINE(void) pgmPoolTrackDerefPTEPT(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PEPTPT pShwPT)
4543{
4544 RTGCPHYS const GCPhysA20Mask = pPage->fA20Enabled ? UINT64_MAX : ~RT_BIT_64(20);
4545 RTGCPHYS GCPhys = pPage->GCPhys + PAGE_SIZE * pPage->iFirstPresent;
4546 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
4547 {
4548 Assert((pShwPT->a[i].u & UINT64_C(0xfff0000000000f80)) == 0);
4549 if (pShwPT->a[i].n.u1Present)
4550 {
4551 Log4(("pgmPoolTrackDerefPTEPT: i=%d pte=%RX64 GCPhys=%RX64\n",
4552 i, pShwPT->a[i].u & EPT_PTE_PG_MASK, pPage->GCPhys));
4553 pgmPoolTracDerefGCPhys(pPool, pPage, pShwPT->a[i].u & EPT_PTE_PG_MASK, GCPhys & GCPhysA20Mask, i);
4554 if (!pPage->cPresent)
4555 break;
4556 }
4557 }
4558}
4559
4560
4561/**
4562 * Clear references to shadowed pages in a 32 bits page directory.
4563 *
4564 * @param pPool The pool.
4565 * @param pPage The page.
4566 * @param pShwPD The shadow page directory (mapping of the page).
4567 */
4568DECLINLINE(void) pgmPoolTrackDerefPD(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PD pShwPD)
4569{
4570 for (unsigned i = 0; i < RT_ELEMENTS(pShwPD->a); i++)
4571 {
4572 Assert(!(pShwPD->a[i].u & RT_BIT_32(9)));
4573 if ( pShwPD->a[i].n.u1Present
4574 && !(pShwPD->a[i].u & PGM_PDFLAGS_MAPPING)
4575 )
4576 {
4577 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPD->a[i].u & X86_PDE_PG_MASK);
4578 if (pSubPage)
4579 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4580 else
4581 AssertFatalMsgFailed(("%x\n", pShwPD->a[i].u & X86_PDE_PG_MASK));
4582 }
4583 }
4584}
4585
4586
4587/**
4588 * Clear references to shadowed pages in a PAE (legacy or 64 bits) page directory.
4589 *
4590 * @param pPool The pool.
4591 * @param pPage The page.
4592 * @param pShwPD The shadow page directory (mapping of the page).
4593 */
4594DECLINLINE(void) pgmPoolTrackDerefPDPae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PDPAE pShwPD)
4595{
4596 for (unsigned i = 0; i < RT_ELEMENTS(pShwPD->a); i++)
4597 {
4598 if ( pShwPD->a[i].n.u1Present
4599 && !(pShwPD->a[i].u & PGM_PDFLAGS_MAPPING))
4600 {
4601#ifdef PGM_WITH_LARGE_PAGES
4602 if (pShwPD->a[i].b.u1Size)
4603 {
4604 Log4(("pgmPoolTrackDerefPDPae: i=%d pde=%RX64 GCPhys=%RX64\n",
4605 i, pShwPD->a[i].u & X86_PDE2M_PAE_PG_MASK, pPage->GCPhys));
4606 pgmPoolTracDerefGCPhys(pPool, pPage, pShwPD->a[i].u & X86_PDE2M_PAE_PG_MASK,
4607 pPage->GCPhys + i * 2 * _1M /* pPage->GCPhys = base address of the memory described by the PD */,
4608 i);
4609 }
4610 else
4611#endif
4612 {
4613 Assert((pShwPD->a[i].u & (X86_PDE_PAE_MBZ_MASK_NX | UINT64_C(0x7ff0000000000200))) == 0);
4614 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPD->a[i].u & X86_PDE_PAE_PG_MASK);
4615 if (pSubPage)
4616 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4617 else
4618 AssertFatalMsgFailed(("%RX64\n", pShwPD->a[i].u & X86_PDE_PAE_PG_MASK));
4619 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4620 }
4621 }
4622 }
4623}
4624
4625
4626/**
4627 * Clear references to shadowed pages in a PAE page directory pointer table.
4628 *
4629 * @param pPool The pool.
4630 * @param pPage The page.
4631 * @param pShwPDPT The shadow page directory pointer table (mapping of the page).
4632 */
4633DECLINLINE(void) pgmPoolTrackDerefPDPTPae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PDPT pShwPDPT)
4634{
4635 for (unsigned i = 0; i < X86_PG_PAE_PDPE_ENTRIES; i++)
4636 {
4637 Assert((pShwPDPT->a[i].u & (X86_PDPE_PAE_MBZ_MASK | UINT64_C(0x7ff0000000000200))) == 0);
4638 if ( pShwPDPT->a[i].n.u1Present
4639 && !(pShwPDPT->a[i].u & PGM_PLXFLAGS_MAPPING)
4640 )
4641 {
4642 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPDPT->a[i].u & X86_PDPE_PG_MASK);
4643 if (pSubPage)
4644 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4645 else
4646 AssertFatalMsgFailed(("%RX64\n", pShwPDPT->a[i].u & X86_PDPE_PG_MASK));
4647 }
4648 }
4649}
4650
4651
4652/**
4653 * Clear references to shadowed pages in a 64-bit page directory pointer table.
4654 *
4655 * @param pPool The pool.
4656 * @param pPage The page.
4657 * @param pShwPDPT The shadow page directory pointer table (mapping of the page).
4658 */
4659DECLINLINE(void) pgmPoolTrackDerefPDPT64Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PDPT pShwPDPT)
4660{
4661 for (unsigned i = 0; i < RT_ELEMENTS(pShwPDPT->a); i++)
4662 {
4663 Assert((pShwPDPT->a[i].u & (X86_PDPE_LM_MBZ_MASK_NX | UINT64_C(0x7ff0000000000200))) == 0);
4664 if (pShwPDPT->a[i].n.u1Present)
4665 {
4666 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPDPT->a[i].u & X86_PDPE_PG_MASK);
4667 if (pSubPage)
4668 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4669 else
4670 AssertFatalMsgFailed(("%RX64\n", pShwPDPT->a[i].u & X86_PDPE_PG_MASK));
4671 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4672 }
4673 }
4674}
4675
4676
4677/**
4678 * Clear references to shadowed pages in a 64-bit level 4 page table.
4679 *
4680 * @param pPool The pool.
4681 * @param pPage The page.
4682 * @param pShwPML4 The shadow page directory pointer table (mapping of the page).
4683 */
4684DECLINLINE(void) pgmPoolTrackDerefPML464Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PML4 pShwPML4)
4685{
4686 for (unsigned i = 0; i < RT_ELEMENTS(pShwPML4->a); i++)
4687 {
4688 Assert((pShwPML4->a[i].u & (X86_PML4E_MBZ_MASK_NX | UINT64_C(0x7ff0000000000200))) == 0);
4689 if (pShwPML4->a[i].n.u1Present)
4690 {
4691 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPML4->a[i].u & X86_PDPE_PG_MASK);
4692 if (pSubPage)
4693 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4694 else
4695 AssertFatalMsgFailed(("%RX64\n", pShwPML4->a[i].u & X86_PML4E_PG_MASK));
4696 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4697 }
4698 }
4699}
4700
4701
4702/**
4703 * Clear references to shadowed pages in an EPT page directory.
4704 *
4705 * @param pPool The pool.
4706 * @param pPage The page.
4707 * @param pShwPD The shadow page directory (mapping of the page).
4708 */
4709DECLINLINE(void) pgmPoolTrackDerefPDEPT(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PEPTPD pShwPD)
4710{
4711 for (unsigned i = 0; i < RT_ELEMENTS(pShwPD->a); i++)
4712 {
4713 Assert((pShwPD->a[i].u & UINT64_C(0xfff0000000000f80)) == 0);
4714 if (pShwPD->a[i].n.u1Present)
4715 {
4716#ifdef PGM_WITH_LARGE_PAGES
4717 if (pShwPD->a[i].b.u1Size)
4718 {
4719 Log4(("pgmPoolTrackDerefPDEPT: i=%d pde=%RX64 GCPhys=%RX64\n",
4720 i, pShwPD->a[i].u & X86_PDE2M_PAE_PG_MASK, pPage->GCPhys));
4721 pgmPoolTracDerefGCPhys(pPool, pPage, pShwPD->a[i].u & X86_PDE2M_PAE_PG_MASK,
4722 pPage->GCPhys + i * 2 * _1M /* pPage->GCPhys = base address of the memory described by the PD */,
4723 i);
4724 }
4725 else
4726#endif
4727 {
4728 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPD->a[i].u & EPT_PDE_PG_MASK);
4729 if (pSubPage)
4730 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4731 else
4732 AssertFatalMsgFailed(("%RX64\n", pShwPD->a[i].u & EPT_PDE_PG_MASK));
4733 }
4734 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4735 }
4736 }
4737}
4738
4739
4740/**
4741 * Clear references to shadowed pages in an EPT page directory pointer table.
4742 *
4743 * @param pPool The pool.
4744 * @param pPage The page.
4745 * @param pShwPDPT The shadow page directory pointer table (mapping of the page).
4746 */
4747DECLINLINE(void) pgmPoolTrackDerefPDPTEPT(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PEPTPDPT pShwPDPT)
4748{
4749 for (unsigned i = 0; i < RT_ELEMENTS(pShwPDPT->a); i++)
4750 {
4751 Assert((pShwPDPT->a[i].u & UINT64_C(0xfff0000000000f80)) == 0);
4752 if (pShwPDPT->a[i].n.u1Present)
4753 {
4754 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPDPT->a[i].u & EPT_PDPTE_PG_MASK);
4755 if (pSubPage)
4756 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4757 else
4758 AssertFatalMsgFailed(("%RX64\n", pShwPDPT->a[i].u & EPT_PDPTE_PG_MASK));
4759 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4760 }
4761 }
4762}
4763
4764
4765/**
4766 * Clears all references made by this page.
4767 *
4768 * This includes other shadow pages and GC physical addresses.
4769 *
4770 * @param pPool The pool.
4771 * @param pPage The page.
4772 */
4773static void pgmPoolTrackDeref(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
4774{
4775 /*
4776 * Map the shadow page and take action according to the page kind.
4777 */
4778 PVM pVM = pPool->CTX_SUFF(pVM);
4779 void *pvShw = PGMPOOL_PAGE_2_PTR(pVM, pPage);
4780 switch (pPage->enmKind)
4781 {
4782 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
4783 {
4784 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
4785 void *pvGst;
4786 int rc = PGM_GCPHYS_2_PTR(pVM, pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
4787 pgmPoolTrackDerefPT32Bit32Bit(pPool, pPage, (PX86PT)pvShw, (PCX86PT)pvGst);
4788 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvGst);
4789 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
4790 break;
4791 }
4792
4793 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
4794 {
4795 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
4796 void *pvGst;
4797 int rc = PGM_GCPHYS_2_PTR_EX(pVM, pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
4798 pgmPoolTrackDerefPTPae32Bit(pPool, pPage, (PPGMSHWPTPAE)pvShw, (PCX86PT)pvGst);
4799 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvGst);
4800 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
4801 break;
4802 }
4803
4804 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
4805 {
4806 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
4807 void *pvGst;
4808 int rc = PGM_GCPHYS_2_PTR(pVM, pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
4809 pgmPoolTrackDerefPTPaePae(pPool, pPage, (PPGMSHWPTPAE)pvShw, (PCX86PTPAE)pvGst);
4810 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvGst);
4811 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
4812 break;
4813 }
4814
4815 case PGMPOOLKIND_32BIT_PT_FOR_PHYS: /* treat it like a 4 MB page */
4816 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
4817 {
4818 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
4819 pgmPoolTrackDerefPT32Bit4MB(pPool, pPage, (PX86PT)pvShw);
4820 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
4821 break;
4822 }
4823
4824 case PGMPOOLKIND_PAE_PT_FOR_PHYS: /* treat it like a 2 MB page */
4825 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
4826 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
4827 {
4828 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
4829 pgmPoolTrackDerefPTPaeBig(pPool, pPage, (PPGMSHWPTPAE)pvShw);
4830 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
4831 break;
4832 }
4833
4834 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
4835 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
4836 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
4837 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
4838 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
4839 case PGMPOOLKIND_PAE_PD_PHYS:
4840 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
4841 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
4842 pgmPoolTrackDerefPDPae(pPool, pPage, (PX86PDPAE)pvShw);
4843 break;
4844
4845 case PGMPOOLKIND_32BIT_PD_PHYS:
4846 case PGMPOOLKIND_32BIT_PD:
4847 pgmPoolTrackDerefPD(pPool, pPage, (PX86PD)pvShw);
4848 break;
4849
4850 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
4851 case PGMPOOLKIND_PAE_PDPT:
4852 case PGMPOOLKIND_PAE_PDPT_PHYS:
4853 pgmPoolTrackDerefPDPTPae(pPool, pPage, (PX86PDPT)pvShw);
4854 break;
4855
4856 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
4857 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
4858 pgmPoolTrackDerefPDPT64Bit(pPool, pPage, (PX86PDPT)pvShw);
4859 break;
4860
4861 case PGMPOOLKIND_64BIT_PML4:
4862 pgmPoolTrackDerefPML464Bit(pPool, pPage, (PX86PML4)pvShw);
4863 break;
4864
4865 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
4866 pgmPoolTrackDerefPTEPT(pPool, pPage, (PEPTPT)pvShw);
4867 break;
4868
4869 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
4870 pgmPoolTrackDerefPDEPT(pPool, pPage, (PEPTPD)pvShw);
4871 break;
4872
4873 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
4874 pgmPoolTrackDerefPDPTEPT(pPool, pPage, (PEPTPDPT)pvShw);
4875 break;
4876
4877 default:
4878 AssertFatalMsgFailed(("enmKind=%d\n", pPage->enmKind));
4879 }
4880
4881 /* paranoia, clear the shadow page. Remove this laser (i.e. let Alloc and ClearAll do it). */
4882 STAM_PROFILE_START(&pPool->StatZeroPage, z);
4883 ASMMemZeroPage(pvShw);
4884 STAM_PROFILE_STOP(&pPool->StatZeroPage, z);
4885 pPage->fZeroed = true;
4886 Assert(!pPage->cPresent);
4887 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvShw);
4888}
4889
4890
4891/**
4892 * Flushes a pool page.
4893 *
4894 * This moves the page to the free list after removing all user references to it.
4895 *
4896 * @returns VBox status code.
4897 * @retval VINF_SUCCESS on success.
4898 * @param pPool The pool.
4899 * @param pPage The shadow page.
4900 * @param fFlush Flush the TLBS when required (should only be false in very specific use cases!!)
4901 */
4902int pgmPoolFlushPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage, bool fFlush)
4903{
4904 PVM pVM = pPool->CTX_SUFF(pVM);
4905 bool fFlushRequired = false;
4906
4907 int rc = VINF_SUCCESS;
4908 STAM_PROFILE_START(&pPool->StatFlushPage, f);
4909 LogFlow(("pgmPoolFlushPage: pPage=%p:{.Key=%RHp, .idx=%d, .enmKind=%s, .GCPhys=%RGp}\n",
4910 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), pPage->GCPhys));
4911
4912 /*
4913 * Reject any attempts at flushing any of the special root pages (shall
4914 * not happen).
4915 */
4916 AssertMsgReturn(pPage->idx >= PGMPOOL_IDX_FIRST,
4917 ("pgmPoolFlushPage: special root page, rejected. enmKind=%s idx=%d\n",
4918 pgmPoolPoolKindToStr(pPage->enmKind), pPage->idx),
4919 VINF_SUCCESS);
4920
4921 pgmLock(pVM);
4922
4923 /*
4924 * Quietly reject any attempts at flushing the currently active shadow CR3 mapping
4925 */
4926 if (pgmPoolIsPageLocked(pPage))
4927 {
4928 AssertMsg( pPage->enmKind == PGMPOOLKIND_64BIT_PML4
4929 || pPage->enmKind == PGMPOOLKIND_PAE_PDPT
4930 || pPage->enmKind == PGMPOOLKIND_PAE_PDPT_FOR_32BIT
4931 || pPage->enmKind == PGMPOOLKIND_32BIT_PD
4932 || pPage->enmKind == PGMPOOLKIND_PAE_PD_FOR_PAE_PD
4933 || pPage->enmKind == PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD
4934 || pPage->enmKind == PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD
4935 || pPage->enmKind == PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD
4936 || pPage->enmKind == PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD
4937 || pPage->enmKind == PGMPOOLKIND_ROOT_NESTED,
4938 ("Can't free the shadow CR3! (%RHp vs %RHp kind=%d\n", PGMGetHyperCR3(VMMGetCpu(pVM)), pPage->Core.Key, pPage->enmKind));
4939 Log(("pgmPoolFlushPage: current active shadow CR3, rejected. enmKind=%s idx=%d\n", pgmPoolPoolKindToStr(pPage->enmKind), pPage->idx));
4940 pgmUnlock(pVM);
4941 return VINF_SUCCESS;
4942 }
4943
4944#if defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0) || defined(IN_RC)
4945 /* Start a subset so we won't run out of mapping space. */
4946 PVMCPU pVCpu = VMMGetCpu(pVM);
4947 uint32_t iPrevSubset = PGMRZDynMapPushAutoSubset(pVCpu);
4948#endif
4949
4950 /*
4951 * Mark the page as being in need of an ASMMemZeroPage().
4952 */
4953 pPage->fZeroed = false;
4954
4955#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
4956 if (pPage->fDirty)
4957 pgmPoolFlushDirtyPage(pVM, pPool, pPage->idxDirtyEntry, false /* do not remove */);
4958#endif
4959
4960 /* If there are any users of this table, then we *must* issue a tlb flush on all VCPUs. */
4961 if (pPage->iUserHead != NIL_PGMPOOL_USER_INDEX)
4962 fFlushRequired = true;
4963
4964 /*
4965 * Clear the page.
4966 */
4967 pgmPoolTrackClearPageUsers(pPool, pPage);
4968 STAM_PROFILE_START(&pPool->StatTrackDeref,a);
4969 pgmPoolTrackDeref(pPool, pPage);
4970 STAM_PROFILE_STOP(&pPool->StatTrackDeref,a);
4971
4972 /*
4973 * Flush it from the cache.
4974 */
4975 pgmPoolCacheFlushPage(pPool, pPage);
4976
4977#if defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0) || defined(IN_RC)
4978 /* Heavy stuff done. */
4979 PGMRZDynMapPopAutoSubset(pVCpu, iPrevSubset);
4980#endif
4981
4982 /*
4983 * Deregistering the monitoring.
4984 */
4985 if (pPage->fMonitored)
4986 rc = pgmPoolMonitorFlush(pPool, pPage);
4987
4988 /*
4989 * Free the page.
4990 */
4991 Assert(pPage->iNext == NIL_PGMPOOL_IDX);
4992 pPage->iNext = pPool->iFreeHead;
4993 pPool->iFreeHead = pPage->idx;
4994 pPage->enmKind = PGMPOOLKIND_FREE;
4995 pPage->enmAccess = PGMPOOLACCESS_DONTCARE;
4996 pPage->GCPhys = NIL_RTGCPHYS;
4997 pPage->fReusedFlushPending = false;
4998
4999 pPool->cUsedPages--;
5000
5001 /* Flush the TLBs of all VCPUs if required. */
5002 if ( fFlushRequired
5003 && fFlush)
5004 {
5005 PGM_INVL_ALL_VCPU_TLBS(pVM);
5006 }
5007
5008 pgmUnlock(pVM);
5009 STAM_PROFILE_STOP(&pPool->StatFlushPage, f);
5010 return rc;
5011}
5012
5013
5014/**
5015 * Frees a usage of a pool page.
5016 *
5017 * The caller is responsible to updating the user table so that it no longer
5018 * references the shadow page.
5019 *
5020 * @param pPool The pool.
5021 * @param pPage The shadow page.
5022 * @param iUser The shadow page pool index of the user table.
5023 * NIL_PGMPOOL_IDX for root pages.
5024 * @param iUserTable The index into the user table (shadowed). Ignored if
5025 * root page.
5026 */
5027void pgmPoolFreeByPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable)
5028{
5029 PVM pVM = pPool->CTX_SUFF(pVM);
5030
5031 STAM_PROFILE_START(&pPool->StatFree, a);
5032 LogFlow(("pgmPoolFreeByPage: pPage=%p:{.Key=%RHp, .idx=%d, enmKind=%s} iUser=%d iUserTable=%#x\n",
5033 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), iUser, iUserTable));
5034 AssertReturnVoid(pPage->idx >= PGMPOOL_IDX_FIRST); /* paranoia (#6349) */
5035
5036 pgmLock(pVM);
5037 if (iUser != NIL_PGMPOOL_IDX)
5038 pgmPoolTrackFreeUser(pPool, pPage, iUser, iUserTable);
5039 if (!pPage->fCached)
5040 pgmPoolFlushPage(pPool, pPage);
5041 pgmUnlock(pVM);
5042 STAM_PROFILE_STOP(&pPool->StatFree, a);
5043}
5044
5045
5046/**
5047 * Makes one or more free page free.
5048 *
5049 * @returns VBox status code.
5050 * @retval VINF_SUCCESS on success.
5051 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
5052 *
5053 * @param pPool The pool.
5054 * @param enmKind Page table kind
5055 * @param iUser The user of the page.
5056 */
5057static int pgmPoolMakeMoreFreePages(PPGMPOOL pPool, PGMPOOLKIND enmKind, uint16_t iUser)
5058{
5059 PVM pVM = pPool->CTX_SUFF(pVM);
5060 LogFlow(("pgmPoolMakeMoreFreePages: enmKind=%d iUser=%d\n", enmKind, iUser));
5061 NOREF(enmKind);
5062
5063 /*
5064 * If the pool isn't full grown yet, expand it.
5065 */
5066 if ( pPool->cCurPages < pPool->cMaxPages
5067#if defined(IN_RC)
5068 /* Hack alert: we can't deal with jumps to ring 3 when called from MapCR3 and allocating pages for PAE PDs. */
5069 && enmKind != PGMPOOLKIND_PAE_PD_FOR_PAE_PD
5070 && (enmKind < PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD || enmKind > PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD)
5071#endif
5072 )
5073 {
5074 STAM_PROFILE_ADV_SUSPEND(&pPool->StatAlloc, a);
5075#ifdef IN_RING3
5076 int rc = PGMR3PoolGrow(pVM);
5077#else
5078 int rc = VMMRZCallRing3NoCpu(pVM, VMMCALLRING3_PGM_POOL_GROW, 0);
5079#endif
5080 if (RT_FAILURE(rc))
5081 return rc;
5082 STAM_PROFILE_ADV_RESUME(&pPool->StatAlloc, a);
5083 if (pPool->iFreeHead != NIL_PGMPOOL_IDX)
5084 return VINF_SUCCESS;
5085 }
5086
5087 /*
5088 * Free one cached page.
5089 */
5090 return pgmPoolCacheFreeOne(pPool, iUser);
5091}
5092
5093
5094/**
5095 * Allocates a page from the pool.
5096 *
5097 * This page may actually be a cached page and not in need of any processing
5098 * on the callers part.
5099 *
5100 * @returns VBox status code.
5101 * @retval VINF_SUCCESS if a NEW page was allocated.
5102 * @retval VINF_PGM_CACHED_PAGE if a CACHED page was returned.
5103 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
5104 *
5105 * @param pVM The cross context VM structure.
5106 * @param GCPhys The GC physical address of the page we're gonna shadow.
5107 * For 4MB and 2MB PD entries, it's the first address the
5108 * shadow PT is covering.
5109 * @param enmKind The kind of mapping.
5110 * @param enmAccess Access type for the mapping (only relevant for big pages)
5111 * @param fA20Enabled Whether the A20 gate is enabled or not.
5112 * @param iUser The shadow page pool index of the user table. Root
5113 * pages should pass NIL_PGMPOOL_IDX.
5114 * @param iUserTable The index into the user table (shadowed). Ignored for
5115 * root pages (iUser == NIL_PGMPOOL_IDX).
5116 * @param fLockPage Lock the page
5117 * @param ppPage Where to store the pointer to the page. NULL is stored here on failure.
5118 */
5119int pgmPoolAlloc(PVM pVM, RTGCPHYS GCPhys, PGMPOOLKIND enmKind, PGMPOOLACCESS enmAccess, bool fA20Enabled,
5120 uint16_t iUser, uint32_t iUserTable, bool fLockPage, PPPGMPOOLPAGE ppPage)
5121{
5122 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
5123 STAM_PROFILE_ADV_START(&pPool->StatAlloc, a);
5124 LogFlow(("pgmPoolAlloc: GCPhys=%RGp enmKind=%s iUser=%d iUserTable=%#x\n", GCPhys, pgmPoolPoolKindToStr(enmKind), iUser, iUserTable));
5125 *ppPage = NULL;
5126 /** @todo CSAM/PGMPrefetchPage messes up here during CSAMR3CheckGates
5127 * (TRPMR3SyncIDT) because of FF priority. Try fix that?
5128 * Assert(!(pVM->pgm.s.fGlobalSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)); */
5129
5130 pgmLock(pVM);
5131
5132 if (pPool->fCacheEnabled)
5133 {
5134 int rc2 = pgmPoolCacheAlloc(pPool, GCPhys, enmKind, enmAccess, fA20Enabled, iUser, iUserTable, ppPage);
5135 if (RT_SUCCESS(rc2))
5136 {
5137 if (fLockPage)
5138 pgmPoolLockPage(pPool, *ppPage);
5139 pgmUnlock(pVM);
5140 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
5141 LogFlow(("pgmPoolAlloc: cached returns %Rrc *ppPage=%p:{.Key=%RHp, .idx=%d}\n", rc2, *ppPage, (*ppPage)->Core.Key, (*ppPage)->idx));
5142 return rc2;
5143 }
5144 }
5145
5146 /*
5147 * Allocate a new one.
5148 */
5149 int rc = VINF_SUCCESS;
5150 uint16_t iNew = pPool->iFreeHead;
5151 if (iNew == NIL_PGMPOOL_IDX)
5152 {
5153 rc = pgmPoolMakeMoreFreePages(pPool, enmKind, iUser);
5154 if (RT_FAILURE(rc))
5155 {
5156 pgmUnlock(pVM);
5157 Log(("pgmPoolAlloc: returns %Rrc (Free)\n", rc));
5158 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
5159 return rc;
5160 }
5161 iNew = pPool->iFreeHead;
5162 AssertReleaseReturn(iNew != NIL_PGMPOOL_IDX, VERR_PGM_POOL_IPE);
5163 }
5164
5165 /* unlink the free head */
5166 PPGMPOOLPAGE pPage = &pPool->aPages[iNew];
5167 pPool->iFreeHead = pPage->iNext;
5168 pPage->iNext = NIL_PGMPOOL_IDX;
5169
5170 /*
5171 * Initialize it.
5172 */
5173 pPool->cUsedPages++; /* physical handler registration / pgmPoolTrackFlushGCPhysPTsSlow requirement. */
5174 pPage->enmKind = enmKind;
5175 pPage->enmAccess = enmAccess;
5176 pPage->GCPhys = GCPhys;
5177 pPage->fA20Enabled = fA20Enabled;
5178 pPage->fSeenNonGlobal = false; /* Set this to 'true' to disable this feature. */
5179 pPage->fMonitored = false;
5180 pPage->fCached = false;
5181 pPage->fDirty = false;
5182 pPage->fReusedFlushPending = false;
5183 pPage->cModifications = 0;
5184 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
5185 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
5186 pPage->cPresent = 0;
5187 pPage->iFirstPresent = NIL_PGMPOOL_PRESENT_INDEX;
5188 pPage->idxDirtyEntry = 0;
5189 pPage->GCPtrLastAccessHandlerFault = NIL_RTGCPTR;
5190 pPage->GCPtrLastAccessHandlerRip = NIL_RTGCPTR;
5191 pPage->cLastAccessHandler = 0;
5192 pPage->cLocked = 0;
5193# ifdef VBOX_STRICT
5194 pPage->GCPtrDirtyFault = NIL_RTGCPTR;
5195# endif
5196
5197 /*
5198 * Insert into the tracking and cache. If this fails, free the page.
5199 */
5200 int rc3 = pgmPoolTrackInsert(pPool, pPage, GCPhys, iUser, iUserTable);
5201 if (RT_FAILURE(rc3))
5202 {
5203 pPool->cUsedPages--;
5204 pPage->enmKind = PGMPOOLKIND_FREE;
5205 pPage->enmAccess = PGMPOOLACCESS_DONTCARE;
5206 pPage->GCPhys = NIL_RTGCPHYS;
5207 pPage->iNext = pPool->iFreeHead;
5208 pPool->iFreeHead = pPage->idx;
5209 pgmUnlock(pVM);
5210 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
5211 Log(("pgmPoolAlloc: returns %Rrc (Insert)\n", rc3));
5212 return rc3;
5213 }
5214
5215 /*
5216 * Commit the allocation, clear the page and return.
5217 */
5218#ifdef VBOX_WITH_STATISTICS
5219 if (pPool->cUsedPages > pPool->cUsedPagesHigh)
5220 pPool->cUsedPagesHigh = pPool->cUsedPages;
5221#endif
5222
5223 if (!pPage->fZeroed)
5224 {
5225 STAM_PROFILE_START(&pPool->StatZeroPage, z);
5226 void *pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
5227 ASMMemZeroPage(pv);
5228 STAM_PROFILE_STOP(&pPool->StatZeroPage, z);
5229 }
5230
5231 *ppPage = pPage;
5232 if (fLockPage)
5233 pgmPoolLockPage(pPool, pPage);
5234 pgmUnlock(pVM);
5235 LogFlow(("pgmPoolAlloc: returns %Rrc *ppPage=%p:{.Key=%RHp, .idx=%d, .fCached=%RTbool, .fMonitored=%RTbool}\n",
5236 rc, pPage, pPage->Core.Key, pPage->idx, pPage->fCached, pPage->fMonitored));
5237 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
5238 return rc;
5239}
5240
5241
5242/**
5243 * Frees a usage of a pool page.
5244 *
5245 * @param pVM The cross context VM structure.
5246 * @param HCPhys The HC physical address of the shadow page.
5247 * @param iUser The shadow page pool index of the user table.
5248 * NIL_PGMPOOL_IDX if root page.
5249 * @param iUserTable The index into the user table (shadowed). Ignored if
5250 * root page.
5251 */
5252void pgmPoolFree(PVM pVM, RTHCPHYS HCPhys, uint16_t iUser, uint32_t iUserTable)
5253{
5254 LogFlow(("pgmPoolFree: HCPhys=%RHp iUser=%d iUserTable=%#x\n", HCPhys, iUser, iUserTable));
5255 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
5256 pgmPoolFreeByPage(pPool, pgmPoolGetPage(pPool, HCPhys), iUser, iUserTable);
5257}
5258
5259
5260/**
5261 * Internal worker for finding a 'in-use' shadow page give by it's physical address.
5262 *
5263 * @returns Pointer to the shadow page structure.
5264 * @param pPool The pool.
5265 * @param HCPhys The HC physical address of the shadow page.
5266 */
5267PPGMPOOLPAGE pgmPoolGetPage(PPGMPOOL pPool, RTHCPHYS HCPhys)
5268{
5269 PGM_LOCK_ASSERT_OWNER(pPool->CTX_SUFF(pVM));
5270
5271 /*
5272 * Look up the page.
5273 */
5274 PPGMPOOLPAGE pPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, HCPhys & X86_PTE_PAE_PG_MASK);
5275
5276 AssertFatalMsg(pPage && pPage->enmKind != PGMPOOLKIND_FREE, ("HCPhys=%RHp pPage=%p idx=%d\n", HCPhys, pPage, (pPage) ? pPage->idx : 0));
5277 return pPage;
5278}
5279
5280
5281/**
5282 * Internal worker for finding a page for debugging purposes, no assertions.
5283 *
5284 * @returns Pointer to the shadow page structure. NULL on if not found.
5285 * @param pPool The pool.
5286 * @param HCPhys The HC physical address of the shadow page.
5287 */
5288PPGMPOOLPAGE pgmPoolQueryPageForDbg(PPGMPOOL pPool, RTHCPHYS HCPhys)
5289{
5290 PGM_LOCK_ASSERT_OWNER(pPool->CTX_SUFF(pVM));
5291 return (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, HCPhys & X86_PTE_PAE_PG_MASK);
5292}
5293
5294#ifdef IN_RING3 /* currently only used in ring 3; save some space in the R0 & GC modules (left it here as we might need it elsewhere later on) */
5295
5296/**
5297 * Flush the specified page if present
5298 *
5299 * @param pVM The cross context VM structure.
5300 * @param GCPhys Guest physical address of the page to flush
5301 */
5302void pgmPoolFlushPageByGCPhys(PVM pVM, RTGCPHYS GCPhys)
5303{
5304 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
5305
5306 VM_ASSERT_EMT(pVM);
5307
5308 /*
5309 * Look up the GCPhys in the hash.
5310 */
5311 GCPhys = GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK;
5312 unsigned i = pPool->aiHash[PGMPOOL_HASH(GCPhys)];
5313 if (i == NIL_PGMPOOL_IDX)
5314 return;
5315
5316 do
5317 {
5318 PPGMPOOLPAGE pPage = &pPool->aPages[i];
5319 if (pPage->GCPhys - GCPhys < PAGE_SIZE)
5320 {
5321 switch (pPage->enmKind)
5322 {
5323 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
5324 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
5325 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
5326 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
5327 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
5328 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
5329 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
5330 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
5331 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
5332 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
5333 case PGMPOOLKIND_64BIT_PML4:
5334 case PGMPOOLKIND_32BIT_PD:
5335 case PGMPOOLKIND_PAE_PDPT:
5336 {
5337 Log(("PGMPoolFlushPage: found pgm pool pages for %RGp\n", GCPhys));
5338#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
5339 if (pPage->fDirty)
5340 STAM_COUNTER_INC(&pPool->StatForceFlushDirtyPage);
5341 else
5342#endif
5343 STAM_COUNTER_INC(&pPool->StatForceFlushPage);
5344 Assert(!pgmPoolIsPageLocked(pPage));
5345 pgmPoolMonitorChainFlush(pPool, pPage);
5346 return;
5347 }
5348
5349 /* ignore, no monitoring. */
5350 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
5351 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
5352 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
5353 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
5354 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
5355 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
5356 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
5357 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
5358 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
5359 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
5360 case PGMPOOLKIND_ROOT_NESTED:
5361 case PGMPOOLKIND_PAE_PD_PHYS:
5362 case PGMPOOLKIND_PAE_PDPT_PHYS:
5363 case PGMPOOLKIND_32BIT_PD_PHYS:
5364 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
5365 break;
5366
5367 default:
5368 AssertFatalMsgFailed(("enmKind=%d idx=%d\n", pPage->enmKind, pPage->idx));
5369 }
5370 }
5371
5372 /* next */
5373 i = pPage->iNext;
5374 } while (i != NIL_PGMPOOL_IDX);
5375 return;
5376}
5377
5378#endif /* IN_RING3 */
5379#ifdef IN_RING3
5380
5381/**
5382 * Reset CPU on hot plugging.
5383 *
5384 * @param pVM The cross context VM structure.
5385 * @param pVCpu The cross context virtual CPU structure.
5386 */
5387void pgmR3PoolResetUnpluggedCpu(PVM pVM, PVMCPU pVCpu)
5388{
5389 pgmR3ExitShadowModeBeforePoolFlush(pVCpu);
5390
5391 pgmR3ReEnterShadowModeAfterPoolFlush(pVM, pVCpu);
5392 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
5393 VMCPU_FF_SET(pVCpu, VMCPU_FF_TLB_FLUSH);
5394}
5395
5396
5397/**
5398 * Flushes the entire cache.
5399 *
5400 * It will assert a global CR3 flush (FF) and assumes the caller is aware of
5401 * this and execute this CR3 flush.
5402 *
5403 * @param pVM The cross context VM structure.
5404 */
5405void pgmR3PoolReset(PVM pVM)
5406{
5407 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
5408
5409 PGM_LOCK_ASSERT_OWNER(pVM);
5410 STAM_PROFILE_START(&pPool->StatR3Reset, a);
5411 LogFlow(("pgmR3PoolReset:\n"));
5412
5413 /*
5414 * If there are no pages in the pool, there is nothing to do.
5415 */
5416 if (pPool->cCurPages <= PGMPOOL_IDX_FIRST)
5417 {
5418 STAM_PROFILE_STOP(&pPool->StatR3Reset, a);
5419 return;
5420 }
5421
5422 /*
5423 * Exit the shadow mode since we're going to clear everything,
5424 * including the root page.
5425 */
5426 for (VMCPUID i = 0; i < pVM->cCpus; i++)
5427 pgmR3ExitShadowModeBeforePoolFlush(&pVM->aCpus[i]);
5428
5429 /*
5430 * Nuke the free list and reinsert all pages into it.
5431 */
5432 for (unsigned i = pPool->cCurPages - 1; i >= PGMPOOL_IDX_FIRST; i--)
5433 {
5434 PPGMPOOLPAGE pPage = &pPool->aPages[i];
5435
5436 Assert(pPage->Core.Key == MMPage2Phys(pVM, pPage->pvPageR3));
5437 if (pPage->fMonitored)
5438 pgmPoolMonitorFlush(pPool, pPage);
5439 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
5440 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
5441 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
5442 pPage->iMonitoredPrev = NIL_PGMPOOL_IDX;
5443 pPage->cModifications = 0;
5444 pPage->GCPhys = NIL_RTGCPHYS;
5445 pPage->enmKind = PGMPOOLKIND_FREE;
5446 pPage->enmAccess = PGMPOOLACCESS_DONTCARE;
5447 Assert(pPage->idx == i);
5448 pPage->iNext = i + 1;
5449 pPage->fA20Enabled = true;
5450 pPage->fZeroed = false; /* This could probably be optimized, but better safe than sorry. */
5451 pPage->fSeenNonGlobal = false;
5452 pPage->fMonitored = false;
5453 pPage->fDirty = false;
5454 pPage->fCached = false;
5455 pPage->fReusedFlushPending = false;
5456 pPage->iUserHead = NIL_PGMPOOL_USER_INDEX;
5457 pPage->iAgeNext = NIL_PGMPOOL_IDX;
5458 pPage->iAgePrev = NIL_PGMPOOL_IDX;
5459 pPage->GCPtrLastAccessHandlerRip = NIL_RTGCPTR;
5460 pPage->GCPtrLastAccessHandlerFault = NIL_RTGCPTR;
5461 pPage->cLastAccessHandler = 0;
5462 pPage->cLocked = 0;
5463#ifdef VBOX_STRICT
5464 pPage->GCPtrDirtyFault = NIL_RTGCPTR;
5465#endif
5466 }
5467 pPool->aPages[pPool->cCurPages - 1].iNext = NIL_PGMPOOL_IDX;
5468 pPool->iFreeHead = PGMPOOL_IDX_FIRST;
5469 pPool->cUsedPages = 0;
5470
5471 /*
5472 * Zap and reinitialize the user records.
5473 */
5474 pPool->cPresent = 0;
5475 pPool->iUserFreeHead = 0;
5476 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
5477 const unsigned cMaxUsers = pPool->cMaxUsers;
5478 for (unsigned i = 0; i < cMaxUsers; i++)
5479 {
5480 paUsers[i].iNext = i + 1;
5481 paUsers[i].iUser = NIL_PGMPOOL_IDX;
5482 paUsers[i].iUserTable = 0xfffffffe;
5483 }
5484 paUsers[cMaxUsers - 1].iNext = NIL_PGMPOOL_USER_INDEX;
5485
5486 /*
5487 * Clear all the GCPhys links and rebuild the phys ext free list.
5488 */
5489 for (PPGMRAMRANGE pRam = pVM->pgm.s.CTX_SUFF(pRamRangesX);
5490 pRam;
5491 pRam = pRam->CTX_SUFF(pNext))
5492 {
5493 unsigned iPage = pRam->cb >> PAGE_SHIFT;
5494 while (iPage-- > 0)
5495 PGM_PAGE_SET_TRACKING(pVM, &pRam->aPages[iPage], 0);
5496 }
5497
5498 pPool->iPhysExtFreeHead = 0;
5499 PPGMPOOLPHYSEXT paPhysExts = pPool->CTX_SUFF(paPhysExts);
5500 const unsigned cMaxPhysExts = pPool->cMaxPhysExts;
5501 for (unsigned i = 0; i < cMaxPhysExts; i++)
5502 {
5503 paPhysExts[i].iNext = i + 1;
5504 paPhysExts[i].aidx[0] = NIL_PGMPOOL_IDX;
5505 paPhysExts[i].apte[0] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
5506 paPhysExts[i].aidx[1] = NIL_PGMPOOL_IDX;
5507 paPhysExts[i].apte[1] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
5508 paPhysExts[i].aidx[2] = NIL_PGMPOOL_IDX;
5509 paPhysExts[i].apte[2] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
5510 }
5511 paPhysExts[cMaxPhysExts - 1].iNext = NIL_PGMPOOL_PHYSEXT_INDEX;
5512
5513 /*
5514 * Just zap the modified list.
5515 */
5516 pPool->cModifiedPages = 0;
5517 pPool->iModifiedHead = NIL_PGMPOOL_IDX;
5518
5519 /*
5520 * Clear the GCPhys hash and the age list.
5521 */
5522 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aiHash); i++)
5523 pPool->aiHash[i] = NIL_PGMPOOL_IDX;
5524 pPool->iAgeHead = NIL_PGMPOOL_IDX;
5525 pPool->iAgeTail = NIL_PGMPOOL_IDX;
5526
5527#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
5528 /* Clear all dirty pages. */
5529 pPool->idxFreeDirtyPage = 0;
5530 pPool->cDirtyPages = 0;
5531 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
5532 pPool->aDirtyPages[i].uIdx = NIL_PGMPOOL_IDX;
5533#endif
5534
5535 /*
5536 * Reinsert active pages into the hash and ensure monitoring chains are correct.
5537 */
5538 for (VMCPUID i = 0; i < pVM->cCpus; i++)
5539 {
5540 /*
5541 * Re-enter the shadowing mode and assert Sync CR3 FF.
5542 */
5543 PVMCPU pVCpu = &pVM->aCpus[i];
5544 pgmR3ReEnterShadowModeAfterPoolFlush(pVM, pVCpu);
5545 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
5546 VMCPU_FF_SET(pVCpu, VMCPU_FF_TLB_FLUSH);
5547 }
5548
5549 STAM_PROFILE_STOP(&pPool->StatR3Reset, a);
5550}
5551
5552#endif /* IN_RING3 */
5553
5554#if defined(LOG_ENABLED) || defined(VBOX_STRICT)
5555/**
5556 * Stringifies a PGMPOOLKIND value.
5557 */
5558static const char *pgmPoolPoolKindToStr(uint8_t enmKind)
5559{
5560 switch ((PGMPOOLKIND)enmKind)
5561 {
5562 case PGMPOOLKIND_INVALID:
5563 return "PGMPOOLKIND_INVALID";
5564 case PGMPOOLKIND_FREE:
5565 return "PGMPOOLKIND_FREE";
5566 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
5567 return "PGMPOOLKIND_32BIT_PT_FOR_PHYS";
5568 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
5569 return "PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT";
5570 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
5571 return "PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB";
5572 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
5573 return "PGMPOOLKIND_PAE_PT_FOR_PHYS";
5574 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
5575 return "PGMPOOLKIND_PAE_PT_FOR_32BIT_PT";
5576 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
5577 return "PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB";
5578 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
5579 return "PGMPOOLKIND_PAE_PT_FOR_PAE_PT";
5580 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
5581 return "PGMPOOLKIND_PAE_PT_FOR_PAE_2MB";
5582 case PGMPOOLKIND_32BIT_PD:
5583 return "PGMPOOLKIND_32BIT_PD";
5584 case PGMPOOLKIND_32BIT_PD_PHYS:
5585 return "PGMPOOLKIND_32BIT_PD_PHYS";
5586 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
5587 return "PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD";
5588 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
5589 return "PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD";
5590 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
5591 return "PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD";
5592 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
5593 return "PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD";
5594 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
5595 return "PGMPOOLKIND_PAE_PD_FOR_PAE_PD";
5596 case PGMPOOLKIND_PAE_PD_PHYS:
5597 return "PGMPOOLKIND_PAE_PD_PHYS";
5598 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
5599 return "PGMPOOLKIND_PAE_PDPT_FOR_32BIT";
5600 case PGMPOOLKIND_PAE_PDPT:
5601 return "PGMPOOLKIND_PAE_PDPT";
5602 case PGMPOOLKIND_PAE_PDPT_PHYS:
5603 return "PGMPOOLKIND_PAE_PDPT_PHYS";
5604 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
5605 return "PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT";
5606 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
5607 return "PGMPOOLKIND_64BIT_PDPT_FOR_PHYS";
5608 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
5609 return "PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD";
5610 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
5611 return "PGMPOOLKIND_64BIT_PD_FOR_PHYS";
5612 case PGMPOOLKIND_64BIT_PML4:
5613 return "PGMPOOLKIND_64BIT_PML4";
5614 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
5615 return "PGMPOOLKIND_EPT_PDPT_FOR_PHYS";
5616 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
5617 return "PGMPOOLKIND_EPT_PD_FOR_PHYS";
5618 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
5619 return "PGMPOOLKIND_EPT_PT_FOR_PHYS";
5620 case PGMPOOLKIND_ROOT_NESTED:
5621 return "PGMPOOLKIND_ROOT_NESTED";
5622 }
5623 return "Unknown kind!";
5624}
5625#endif /* LOG_ENABLED || VBOX_STRICT */
5626
Note: See TracBrowser for help on using the repository browser.

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette