VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/PGMAllPool.cpp@ 32851

Last change on this file since 32851 was 32746, checked in by vboxsync, 14 years ago

pgmPoolFlushDirtyPage,pgmPoolAddDirtyPage: Must use PGM_GCPHYS_2_PTR_EX instead of PGM_GCPHYS_2_PTR since PAE shadow pages for 32-bit guest PT/PD only covers half a page. (PGM_GCPHYS_2_PTR requires page aligned requests)

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id
File size: 208.4 KB
Line 
1/* $Id: PGMAllPool.cpp 32746 2010-09-24 07:56:47Z vboxsync $ */
2/** @file
3 * PGM Shadow Page Pool.
4 */
5
6/*
7 * Copyright (C) 2006-2010 Oracle Corporation
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 */
17
18
19/*******************************************************************************
20* Header Files *
21*******************************************************************************/
22#define LOG_GROUP LOG_GROUP_PGM_POOL
23#include <VBox/pgm.h>
24#include <VBox/mm.h>
25#include <VBox/em.h>
26#include <VBox/cpum.h>
27#ifdef IN_RC
28# include <VBox/patm.h>
29#endif
30#include "../PGMInternal.h"
31#include <VBox/vm.h>
32#include "../PGMInline.h"
33#include <VBox/disopcode.h>
34#include <VBox/hwacc_vmx.h>
35
36#include <VBox/log.h>
37#include <VBox/err.h>
38#include <iprt/asm.h>
39#include <iprt/asm-amd64-x86.h>
40#include <iprt/string.h>
41
42
43/*******************************************************************************
44* Internal Functions *
45*******************************************************************************/
46RT_C_DECLS_BEGIN
47static void pgmPoolFlushAllInt(PPGMPOOL pPool);
48DECLINLINE(unsigned) pgmPoolTrackGetShadowEntrySize(PGMPOOLKIND enmKind);
49DECLINLINE(unsigned) pgmPoolTrackGetGuestEntrySize(PGMPOOLKIND enmKind);
50static void pgmPoolTrackDeref(PPGMPOOL pPool, PPGMPOOLPAGE pPage);
51static int pgmPoolTrackAddUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable);
52static void pgmPoolMonitorModifiedRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage);
53#ifndef IN_RING3
54DECLEXPORT(int) pgmPoolAccessHandler(PVM pVM, RTGCUINT uErrorCode, PCPUMCTXCORE pRegFrame, RTGCPTR pvFault, RTGCPHYS GCPhysFault, void *pvUser);
55#endif
56#ifdef LOG_ENABLED
57static const char *pgmPoolPoolKindToStr(uint8_t enmKind);
58#endif
59#if defined(VBOX_STRICT) && defined(PGMPOOL_WITH_OPTIMIZED_DIRTY_PT)
60static void pgmPoolTrackCheckPTPaePae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT, PCX86PTPAE pGstPT);
61#endif
62
63int pgmPoolTrackFlushGCPhysPTsSlow(PVM pVM, PPGMPAGE pPhysPage);
64PPGMPOOLPHYSEXT pgmPoolTrackPhysExtAlloc(PVM pVM, uint16_t *piPhysExt);
65void pgmPoolTrackPhysExtFree(PVM pVM, uint16_t iPhysExt);
66void pgmPoolTrackPhysExtFreeList(PVM pVM, uint16_t iPhysExt);
67
68RT_C_DECLS_END
69
70
71/**
72 * Checks if the specified page pool kind is for a 4MB or 2MB guest page.
73 *
74 * @returns true if it's the shadow of a 4MB or 2MB guest page, otherwise false.
75 * @param enmKind The page kind.
76 */
77DECLINLINE(bool) pgmPoolIsBigPage(PGMPOOLKIND enmKind)
78{
79 switch (enmKind)
80 {
81 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
82 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
83 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
84 return true;
85 default:
86 return false;
87 }
88}
89
90
91/**
92 * Flushes a chain of pages sharing the same access monitor.
93 *
94 * @returns VBox status code suitable for scheduling.
95 * @param pPool The pool.
96 * @param pPage A page in the chain.
97 * @todo VBOXSTRICTRC
98 */
99int pgmPoolMonitorChainFlush(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
100{
101 LogFlow(("pgmPoolMonitorChainFlush: Flush page %RGp type=%d\n", pPage->GCPhys, pPage->enmKind));
102
103 /*
104 * Find the list head.
105 */
106 uint16_t idx = pPage->idx;
107 if (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
108 {
109 while (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
110 {
111 idx = pPage->iMonitoredPrev;
112 Assert(idx != pPage->idx);
113 pPage = &pPool->aPages[idx];
114 }
115 }
116
117 /*
118 * Iterate the list flushing each shadow page.
119 */
120 int rc = VINF_SUCCESS;
121 for (;;)
122 {
123 idx = pPage->iMonitoredNext;
124 Assert(idx != pPage->idx);
125 if (pPage->idx >= PGMPOOL_IDX_FIRST)
126 {
127 int rc2 = pgmPoolFlushPage(pPool, pPage);
128 AssertRC(rc2);
129 }
130 /* next */
131 if (idx == NIL_PGMPOOL_IDX)
132 break;
133 pPage = &pPool->aPages[idx];
134 }
135 return rc;
136}
137
138
139/**
140 * Wrapper for getting the current context pointer to the entry being modified.
141 *
142 * @returns VBox status code suitable for scheduling.
143 * @param pVM VM Handle.
144 * @param pvDst Destination address
145 * @param pvSrc Source guest virtual address.
146 * @param GCPhysSrc The source guest physical address.
147 * @param cb Size of data to read
148 */
149DECLINLINE(int) pgmPoolPhysSimpleReadGCPhys(PVM pVM, void *pvDst, CTXTYPE(RTGCPTR, RTHCPTR, RTGCPTR) pvSrc, RTGCPHYS GCPhysSrc, size_t cb)
150{
151#if defined(IN_RING3)
152 memcpy(pvDst, (RTHCPTR)((uintptr_t)pvSrc & ~(RTHCUINTPTR)(cb - 1)), cb);
153 return VINF_SUCCESS;
154#else
155 /* @todo in RC we could attempt to use the virtual address, although this can cause many faults (PAE Windows XP guest). */
156 return PGMPhysSimpleReadGCPhys(pVM, pvDst, GCPhysSrc & ~(RTGCPHYS)(cb - 1), cb);
157#endif
158}
159
160/**
161 * Process shadow entries before they are changed by the guest.
162 *
163 * For PT entries we will clear them. For PD entries, we'll simply check
164 * for mapping conflicts and set the SyncCR3 FF if found.
165 *
166 * @param pVCpu VMCPU handle
167 * @param pPool The pool.
168 * @param pPage The head page.
169 * @param GCPhysFault The guest physical fault address.
170 * @param uAddress In R0 and GC this is the guest context fault address (flat).
171 * In R3 this is the host context 'fault' address.
172 * @param cbWrite Write size; might be zero if the caller knows we're not crossing entry boundaries
173 */
174void pgmPoolMonitorChainChanging(PVMCPU pVCpu, PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTGCPHYS GCPhysFault, CTXTYPE(RTGCPTR, RTHCPTR, RTGCPTR) pvAddress, unsigned cbWrite)
175{
176 AssertMsg(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX, ("%u (idx=%u)\n", pPage->iMonitoredPrev, pPage->idx));
177 const unsigned off = GCPhysFault & PAGE_OFFSET_MASK;
178 PVM pVM = pPool->CTX_SUFF(pVM);
179
180 LogFlow(("pgmPoolMonitorChainChanging: %RGv phys=%RGp cbWrite=%d\n", (RTGCPTR)(CTXTYPE(RTGCPTR, uintptr_t, RTGCPTR))pvAddress, GCPhysFault, cbWrite));
181
182 for (;;)
183 {
184 union
185 {
186 void *pv;
187 PX86PT pPT;
188 PPGMSHWPTPAE pPTPae;
189 PX86PD pPD;
190 PX86PDPAE pPDPae;
191 PX86PDPT pPDPT;
192 PX86PML4 pPML4;
193 } uShw;
194
195 LogFlow(("pgmPoolMonitorChainChanging: page idx=%d phys=%RGp (next=%d) kind=%s\n", pPage->idx, pPage->GCPhys, pPage->iMonitoredNext, pgmPoolPoolKindToStr(pPage->enmKind), cbWrite));
196
197 uShw.pv = NULL;
198 switch (pPage->enmKind)
199 {
200 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
201 {
202 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPT));
203 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
204 const unsigned iShw = off / sizeof(X86PTE);
205 LogFlow(("PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT iShw=%x\n", iShw));
206 if (uShw.pPT->a[iShw].n.u1Present)
207 {
208 X86PTE GstPte;
209
210 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvAddress, GCPhysFault, sizeof(GstPte));
211 AssertRC(rc);
212 Log4(("pgmPoolMonitorChainChanging 32_32: deref %016RX64 GCPhys %08RX32\n", uShw.pPT->a[iShw].u & X86_PTE_PAE_PG_MASK, GstPte.u & X86_PTE_PG_MASK));
213 pgmPoolTracDerefGCPhysHint(pPool, pPage,
214 uShw.pPT->a[iShw].u & X86_PTE_PAE_PG_MASK,
215 GstPte.u & X86_PTE_PG_MASK,
216 iShw);
217 ASMAtomicWriteU32(&uShw.pPT->a[iShw].u, 0);
218 }
219 break;
220 }
221
222 /* page/2 sized */
223 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
224 {
225 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPT));
226 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
227 if (!((off ^ pPage->GCPhys) & (PAGE_SIZE / 2)))
228 {
229 const unsigned iShw = (off / sizeof(X86PTE)) & (X86_PG_PAE_ENTRIES - 1);
230 LogFlow(("PGMPOOLKIND_PAE_PT_FOR_32BIT_PT iShw=%x\n", iShw));
231 if (PGMSHWPTEPAE_IS_P(uShw.pPTPae->a[iShw]))
232 {
233 X86PTE GstPte;
234 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvAddress, GCPhysFault, sizeof(GstPte));
235 AssertRC(rc);
236
237 Log4(("pgmPoolMonitorChainChanging pae_32: deref %016RX64 GCPhys %08RX32\n", uShw.pPT->a[iShw].u & X86_PTE_PAE_PG_MASK, GstPte.u & X86_PTE_PG_MASK));
238 pgmPoolTracDerefGCPhysHint(pPool, pPage,
239 PGMSHWPTEPAE_GET_HCPHYS(uShw.pPTPae->a[iShw]),
240 GstPte.u & X86_PTE_PG_MASK,
241 iShw);
242 PGMSHWPTEPAE_ATOMIC_SET(uShw.pPTPae->a[iShw], 0);
243 }
244 }
245 break;
246 }
247
248 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
249 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
250 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
251 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
252 {
253 unsigned iGst = off / sizeof(X86PDE);
254 unsigned iShwPdpt = iGst / 256;
255 unsigned iShw = (iGst % 256) * 2;
256 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
257
258 LogFlow(("pgmPoolMonitorChainChanging PAE for 32 bits: iGst=%x iShw=%x idx = %d page idx=%d\n", iGst, iShw, iShwPdpt, pPage->enmKind - PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD));
259 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPD));
260 if (iShwPdpt == pPage->enmKind - (unsigned)PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD)
261 {
262 for (unsigned i = 0; i < 2; i++)
263 {
264# ifndef IN_RING0
265 if ((uShw.pPDPae->a[iShw + i].u & (PGM_PDFLAGS_MAPPING | X86_PDE_P)) == (PGM_PDFLAGS_MAPPING | X86_PDE_P))
266 {
267 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
268 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
269 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShwPdpt=%#x iShw=%#x!\n", iShwPdpt, iShw+i));
270 break;
271 }
272 else
273# endif /* !IN_RING0 */
274 if (uShw.pPDPae->a[iShw+i].n.u1Present)
275 {
276 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw+i, uShw.pPDPae->a[iShw+i].u));
277 pgmPoolFree(pVM,
278 uShw.pPDPae->a[iShw+i].u & X86_PDE_PAE_PG_MASK,
279 pPage->idx,
280 iShw + i);
281 ASMAtomicWriteU64(&uShw.pPDPae->a[iShw+i].u, 0);
282 }
283
284 /* paranoia / a bit assumptive. */
285 if ( (off & 3)
286 && (off & 3) + cbWrite > 4)
287 {
288 const unsigned iShw2 = iShw + 2 + i;
289 if (iShw2 < RT_ELEMENTS(uShw.pPDPae->a))
290 {
291# ifndef IN_RING0
292 if ((uShw.pPDPae->a[iShw2].u & (PGM_PDFLAGS_MAPPING | X86_PDE_P)) == (PGM_PDFLAGS_MAPPING | X86_PDE_P))
293 {
294 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
295 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
296 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShwPdpt=%#x iShw2=%#x!\n", iShwPdpt, iShw2));
297 break;
298 }
299 else
300# endif /* !IN_RING0 */
301 if (uShw.pPDPae->a[iShw2].n.u1Present)
302 {
303 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPae->a[iShw2].u));
304 pgmPoolFree(pVM,
305 uShw.pPDPae->a[iShw2].u & X86_PDE_PAE_PG_MASK,
306 pPage->idx,
307 iShw2);
308 ASMAtomicWriteU64(&uShw.pPDPae->a[iShw2].u, 0);
309 }
310 }
311 }
312 }
313 }
314 break;
315 }
316
317 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
318 {
319 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
320 const unsigned iShw = off / sizeof(X86PTEPAE);
321 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPT));
322 if (PGMSHWPTEPAE_IS_P(uShw.pPTPae->a[iShw]))
323 {
324 X86PTEPAE GstPte;
325 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvAddress, GCPhysFault, sizeof(GstPte));
326 AssertRC(rc);
327
328 Log4(("pgmPoolMonitorChainChanging pae: deref %016RX64 GCPhys %016RX64\n", PGMSHWPTEPAE_GET_HCPHYS(uShw.pPTPae->a[iShw]), GstPte.u & X86_PTE_PAE_PG_MASK));
329 pgmPoolTracDerefGCPhysHint(pPool, pPage,
330 PGMSHWPTEPAE_GET_HCPHYS(uShw.pPTPae->a[iShw]),
331 GstPte.u & X86_PTE_PAE_PG_MASK,
332 iShw);
333 PGMSHWPTEPAE_ATOMIC_SET(uShw.pPTPae->a[iShw], 0);
334 }
335
336 /* paranoia / a bit assumptive. */
337 if ( (off & 7)
338 && (off & 7) + cbWrite > sizeof(X86PTEPAE))
339 {
340 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PTEPAE);
341 AssertBreak(iShw2 < RT_ELEMENTS(uShw.pPTPae->a));
342
343 if (PGMSHWPTEPAE_IS_P(uShw.pPTPae->a[iShw2]))
344 {
345 X86PTEPAE GstPte;
346# ifdef IN_RING3
347 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, (RTHCPTR)((RTHCUINTPTR)pvAddress + sizeof(GstPte)), GCPhysFault + sizeof(GstPte), sizeof(GstPte));
348# else
349 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvAddress + sizeof(GstPte), GCPhysFault + sizeof(GstPte), sizeof(GstPte));
350# endif
351 AssertRC(rc);
352 Log4(("pgmPoolMonitorChainChanging pae: deref %016RX64 GCPhys %016RX64\n", PGMSHWPTEPAE_GET_HCPHYS(uShw.pPTPae->a[iShw2]), GstPte.u & X86_PTE_PAE_PG_MASK));
353 pgmPoolTracDerefGCPhysHint(pPool, pPage,
354 PGMSHWPTEPAE_GET_HCPHYS(uShw.pPTPae->a[iShw2]),
355 GstPte.u & X86_PTE_PAE_PG_MASK,
356 iShw2);
357 PGMSHWPTEPAE_ATOMIC_SET(uShw.pPTPae->a[iShw2], 0);
358 }
359 }
360 break;
361 }
362
363 case PGMPOOLKIND_32BIT_PD:
364 {
365 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
366 const unsigned iShw = off / sizeof(X86PTE); // ASSUMING 32-bit guest paging!
367
368 LogFlow(("pgmPoolMonitorChainChanging: PGMPOOLKIND_32BIT_PD %x\n", iShw));
369 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPD));
370# ifndef IN_RING0
371 if (uShw.pPD->a[iShw].u & PGM_PDFLAGS_MAPPING)
372 {
373 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
374 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
375 STAM_COUNTER_INC(&(pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZGuestCR3WriteConflict));
376 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw=%#x!\n", iShw));
377 break;
378 }
379# endif /* !IN_RING0 */
380# ifndef IN_RING0
381 else
382# endif /* !IN_RING0 */
383 {
384 if (uShw.pPD->a[iShw].n.u1Present)
385 {
386 LogFlow(("pgmPoolMonitorChainChanging: 32 bit pd iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPD->a[iShw].u));
387 pgmPoolFree(pVM,
388 uShw.pPD->a[iShw].u & X86_PDE_PAE_PG_MASK,
389 pPage->idx,
390 iShw);
391 ASMAtomicWriteU32(&uShw.pPD->a[iShw].u, 0);
392 }
393 }
394 /* paranoia / a bit assumptive. */
395 if ( (off & 3)
396 && (off & 3) + cbWrite > sizeof(X86PTE))
397 {
398 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PTE);
399 if ( iShw2 != iShw
400 && iShw2 < RT_ELEMENTS(uShw.pPD->a))
401 {
402# ifndef IN_RING0
403 if (uShw.pPD->a[iShw2].u & PGM_PDFLAGS_MAPPING)
404 {
405 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
406 STAM_COUNTER_INC(&(pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZGuestCR3WriteConflict));
407 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
408 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw2=%#x!\n", iShw2));
409 break;
410 }
411# endif /* !IN_RING0 */
412# ifndef IN_RING0
413 else
414# endif /* !IN_RING0 */
415 {
416 if (uShw.pPD->a[iShw2].n.u1Present)
417 {
418 LogFlow(("pgmPoolMonitorChainChanging: 32 bit pd iShw=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPD->a[iShw2].u));
419 pgmPoolFree(pVM,
420 uShw.pPD->a[iShw2].u & X86_PDE_PAE_PG_MASK,
421 pPage->idx,
422 iShw2);
423 ASMAtomicWriteU32(&uShw.pPD->a[iShw2].u, 0);
424 }
425 }
426 }
427 }
428#if 0 /* useful when running PGMAssertCR3(), a bit too troublesome for general use (TLBs). */
429 if ( uShw.pPD->a[iShw].n.u1Present
430 && !VMCPU_FF_ISSET(pVCpu, VMCPU_FF_PGM_SYNC_CR3))
431 {
432 LogFlow(("pgmPoolMonitorChainChanging: iShw=%#x: %RX32 -> freeing it!\n", iShw, uShw.pPD->a[iShw].u));
433# ifdef IN_RC /* TLB load - we're pushing things a bit... */
434 ASMProbeReadByte(pvAddress);
435# endif
436 pgmPoolFree(pVM, uShw.pPD->a[iShw].u & X86_PDE_PG_MASK, pPage->idx, iShw);
437 ASMAtomicWriteU32(&uShw.pPD->a[iShw].u, 0);
438 }
439#endif
440 break;
441 }
442
443 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
444 {
445 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
446 const unsigned iShw = off / sizeof(X86PDEPAE);
447 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPD));
448#ifndef IN_RING0
449 if (uShw.pPDPae->a[iShw].u & PGM_PDFLAGS_MAPPING)
450 {
451 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
452 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
453 STAM_COUNTER_INC(&(pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZGuestCR3WriteConflict));
454 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw=%#x!\n", iShw));
455 break;
456 }
457#endif /* !IN_RING0 */
458 /*
459 * Causes trouble when the guest uses a PDE to refer to the whole page table level
460 * structure. (Invalidate here; faults later on when it tries to change the page
461 * table entries -> recheck; probably only applies to the RC case.)
462 */
463# ifndef IN_RING0
464 else
465# endif /* !IN_RING0 */
466 {
467 if (uShw.pPDPae->a[iShw].n.u1Present)
468 {
469 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPae->a[iShw].u));
470 pgmPoolFree(pVM,
471 uShw.pPDPae->a[iShw].u & X86_PDE_PAE_PG_MASK,
472 pPage->idx,
473 iShw);
474 ASMAtomicWriteU64(&uShw.pPDPae->a[iShw].u, 0);
475 }
476 }
477 /* paranoia / a bit assumptive. */
478 if ( (off & 7)
479 && (off & 7) + cbWrite > sizeof(X86PDEPAE))
480 {
481 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PDEPAE);
482 AssertBreak(iShw2 < RT_ELEMENTS(uShw.pPDPae->a));
483
484#ifndef IN_RING0
485 if ( iShw2 != iShw
486 && uShw.pPDPae->a[iShw2].u & PGM_PDFLAGS_MAPPING)
487 {
488 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
489 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
490 STAM_COUNTER_INC(&(pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZGuestCR3WriteConflict));
491 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw2=%#x!\n", iShw2));
492 break;
493 }
494#endif /* !IN_RING0 */
495# ifndef IN_RING0
496 else
497# endif /* !IN_RING0 */
498 if (uShw.pPDPae->a[iShw2].n.u1Present)
499 {
500 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPae->a[iShw2].u));
501 pgmPoolFree(pVM,
502 uShw.pPDPae->a[iShw2].u & X86_PDE_PAE_PG_MASK,
503 pPage->idx,
504 iShw2);
505 ASMAtomicWriteU64(&uShw.pPDPae->a[iShw2].u, 0);
506 }
507 }
508 break;
509 }
510
511 case PGMPOOLKIND_PAE_PDPT:
512 {
513 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPDPT));
514 /*
515 * Hopefully this doesn't happen very often:
516 * - touching unused parts of the page
517 * - messing with the bits of pd pointers without changing the physical address
518 */
519 /* PDPT roots are not page aligned; 32 byte only! */
520 const unsigned offPdpt = GCPhysFault - pPage->GCPhys;
521
522 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
523 const unsigned iShw = offPdpt / sizeof(X86PDPE);
524 if (iShw < X86_PG_PAE_PDPE_ENTRIES) /* don't use RT_ELEMENTS(uShw.pPDPT->a), because that's for long mode only */
525 {
526# ifndef IN_RING0
527 if (uShw.pPDPT->a[iShw].u & PGM_PLXFLAGS_MAPPING)
528 {
529 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
530 STAM_COUNTER_INC(&(pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZGuestCR3WriteConflict));
531 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
532 LogFlow(("pgmPoolMonitorChainChanging: Detected pdpt conflict at iShw=%#x!\n", iShw));
533 break;
534 }
535# endif /* !IN_RING0 */
536# ifndef IN_RING0
537 else
538# endif /* !IN_RING0 */
539 if (uShw.pPDPT->a[iShw].n.u1Present)
540 {
541 LogFlow(("pgmPoolMonitorChainChanging: pae pdpt iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPT->a[iShw].u));
542 pgmPoolFree(pVM,
543 uShw.pPDPT->a[iShw].u & X86_PDPE_PG_MASK,
544 pPage->idx,
545 iShw);
546 ASMAtomicWriteU64(&uShw.pPDPT->a[iShw].u, 0);
547 }
548
549 /* paranoia / a bit assumptive. */
550 if ( (offPdpt & 7)
551 && (offPdpt & 7) + cbWrite > sizeof(X86PDPE))
552 {
553 const unsigned iShw2 = (offPdpt + cbWrite - 1) / sizeof(X86PDPE);
554 if ( iShw2 != iShw
555 && iShw2 < X86_PG_PAE_PDPE_ENTRIES)
556 {
557# ifndef IN_RING0
558 if (uShw.pPDPT->a[iShw2].u & PGM_PLXFLAGS_MAPPING)
559 {
560 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
561 STAM_COUNTER_INC(&(pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZGuestCR3WriteConflict));
562 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
563 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw2=%#x!\n", iShw2));
564 break;
565 }
566# endif /* !IN_RING0 */
567# ifndef IN_RING0
568 else
569# endif /* !IN_RING0 */
570 if (uShw.pPDPT->a[iShw2].n.u1Present)
571 {
572 LogFlow(("pgmPoolMonitorChainChanging: pae pdpt iShw=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPT->a[iShw2].u));
573 pgmPoolFree(pVM,
574 uShw.pPDPT->a[iShw2].u & X86_PDPE_PG_MASK,
575 pPage->idx,
576 iShw2);
577 ASMAtomicWriteU64(&uShw.pPDPT->a[iShw2].u, 0);
578 }
579 }
580 }
581 }
582 break;
583 }
584
585#ifndef IN_RC
586 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
587 {
588 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPD));
589 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
590 const unsigned iShw = off / sizeof(X86PDEPAE);
591 Assert(!(uShw.pPDPae->a[iShw].u & PGM_PDFLAGS_MAPPING));
592 if (uShw.pPDPae->a[iShw].n.u1Present)
593 {
594 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPae->a[iShw].u));
595 pgmPoolFree(pVM,
596 uShw.pPDPae->a[iShw].u & X86_PDE_PAE_PG_MASK,
597 pPage->idx,
598 iShw);
599 ASMAtomicWriteU64(&uShw.pPDPae->a[iShw].u, 0);
600 }
601 /* paranoia / a bit assumptive. */
602 if ( (off & 7)
603 && (off & 7) + cbWrite > sizeof(X86PDEPAE))
604 {
605 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PDEPAE);
606 AssertBreak(iShw2 < RT_ELEMENTS(uShw.pPDPae->a));
607
608 Assert(!(uShw.pPDPae->a[iShw2].u & PGM_PDFLAGS_MAPPING));
609 if (uShw.pPDPae->a[iShw2].n.u1Present)
610 {
611 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPae->a[iShw2].u));
612 pgmPoolFree(pVM,
613 uShw.pPDPae->a[iShw2].u & X86_PDE_PAE_PG_MASK,
614 pPage->idx,
615 iShw2);
616 ASMAtomicWriteU64(&uShw.pPDPae->a[iShw2].u, 0);
617 }
618 }
619 break;
620 }
621
622 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
623 {
624 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPDPT));
625 /*
626 * Hopefully this doesn't happen very often:
627 * - messing with the bits of pd pointers without changing the physical address
628 */
629 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
630 const unsigned iShw = off / sizeof(X86PDPE);
631 if (uShw.pPDPT->a[iShw].n.u1Present)
632 {
633 LogFlow(("pgmPoolMonitorChainChanging: pdpt iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPT->a[iShw].u));
634 pgmPoolFree(pVM, uShw.pPDPT->a[iShw].u & X86_PDPE_PG_MASK, pPage->idx, iShw);
635 ASMAtomicWriteU64(&uShw.pPDPT->a[iShw].u, 0);
636 }
637 /* paranoia / a bit assumptive. */
638 if ( (off & 7)
639 && (off & 7) + cbWrite > sizeof(X86PDPE))
640 {
641 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PDPE);
642 if (uShw.pPDPT->a[iShw2].n.u1Present)
643 {
644 LogFlow(("pgmPoolMonitorChainChanging: pdpt iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPT->a[iShw2].u));
645 pgmPoolFree(pVM, uShw.pPDPT->a[iShw2].u & X86_PDPE_PG_MASK, pPage->idx, iShw2);
646 ASMAtomicWriteU64(&uShw.pPDPT->a[iShw2].u, 0);
647 }
648 }
649 break;
650 }
651
652 case PGMPOOLKIND_64BIT_PML4:
653 {
654 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPML4));
655 /*
656 * Hopefully this doesn't happen very often:
657 * - messing with the bits of pd pointers without changing the physical address
658 */
659 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
660 const unsigned iShw = off / sizeof(X86PDPE);
661 if (uShw.pPML4->a[iShw].n.u1Present)
662 {
663 LogFlow(("pgmPoolMonitorChainChanging: pml4 iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPML4->a[iShw].u));
664 pgmPoolFree(pVM, uShw.pPML4->a[iShw].u & X86_PML4E_PG_MASK, pPage->idx, iShw);
665 ASMAtomicWriteU64(&uShw.pPML4->a[iShw].u, 0);
666 }
667 /* paranoia / a bit assumptive. */
668 if ( (off & 7)
669 && (off & 7) + cbWrite > sizeof(X86PDPE))
670 {
671 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PML4E);
672 if (uShw.pPML4->a[iShw2].n.u1Present)
673 {
674 LogFlow(("pgmPoolMonitorChainChanging: pml4 iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPML4->a[iShw2].u));
675 pgmPoolFree(pVM, uShw.pPML4->a[iShw2].u & X86_PML4E_PG_MASK, pPage->idx, iShw2);
676 ASMAtomicWriteU64(&uShw.pPML4->a[iShw2].u, 0);
677 }
678 }
679 break;
680 }
681#endif /* IN_RING0 */
682
683 default:
684 AssertFatalMsgFailed(("enmKind=%d\n", pPage->enmKind));
685 }
686 PGM_DYNMAP_UNUSED_HINT_VM(pVM, uShw.pv);
687
688 /* next */
689 if (pPage->iMonitoredNext == NIL_PGMPOOL_IDX)
690 return;
691 pPage = &pPool->aPages[pPage->iMonitoredNext];
692 }
693}
694
695# ifndef IN_RING3
696/**
697 * Checks if a access could be a fork operation in progress.
698 *
699 * Meaning, that the guest is setting up the parent process for Copy-On-Write.
700 *
701 * @returns true if it's likly that we're forking, otherwise false.
702 * @param pPool The pool.
703 * @param pDis The disassembled instruction.
704 * @param offFault The access offset.
705 */
706DECLINLINE(bool) pgmPoolMonitorIsForking(PPGMPOOL pPool, PDISCPUSTATE pDis, unsigned offFault)
707{
708 /*
709 * i386 linux is using btr to clear X86_PTE_RW.
710 * The functions involved are (2.6.16 source inspection):
711 * clear_bit
712 * ptep_set_wrprotect
713 * copy_one_pte
714 * copy_pte_range
715 * copy_pmd_range
716 * copy_pud_range
717 * copy_page_range
718 * dup_mmap
719 * dup_mm
720 * copy_mm
721 * copy_process
722 * do_fork
723 */
724 if ( pDis->pCurInstr->opcode == OP_BTR
725 && !(offFault & 4)
726 /** @todo Validate that the bit index is X86_PTE_RW. */
727 )
728 {
729 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,Fork));
730 return true;
731 }
732 return false;
733}
734
735
736/**
737 * Determine whether the page is likely to have been reused.
738 *
739 * @returns true if we consider the page as being reused for a different purpose.
740 * @returns false if we consider it to still be a paging page.
741 * @param pVM VM Handle.
742 * @param pVCpu VMCPU Handle.
743 * @param pRegFrame Trap register frame.
744 * @param pDis The disassembly info for the faulting instruction.
745 * @param pvFault The fault address.
746 *
747 * @remark The REP prefix check is left to the caller because of STOSD/W.
748 */
749DECLINLINE(bool) pgmPoolMonitorIsReused(PVM pVM, PVMCPU pVCpu, PCPUMCTXCORE pRegFrame, PDISCPUSTATE pDis, RTGCPTR pvFault)
750{
751#ifndef IN_RC
752 /** @todo could make this general, faulting close to rsp should be a safe reuse heuristic. */
753 if ( HWACCMHasPendingIrq(pVM)
754 && (pRegFrame->rsp - pvFault) < 32)
755 {
756 /* Fault caused by stack writes while trying to inject an interrupt event. */
757 Log(("pgmPoolMonitorIsReused: reused %RGv for interrupt stack (rsp=%RGv).\n", pvFault, pRegFrame->rsp));
758 return true;
759 }
760#else
761 NOREF(pVM); NOREF(pvFault);
762#endif
763
764 LogFlow(("Reused instr %RGv %d at %RGv param1.flags=%x param1.reg=%d\n", pRegFrame->rip, pDis->pCurInstr->opcode, pvFault, pDis->param1.flags, pDis->param1.base.reg_gen));
765
766 /* Non-supervisor mode write means it's used for something else. */
767 if (CPUMGetGuestCPL(pVCpu, pRegFrame) != 0)
768 return true;
769
770 switch (pDis->pCurInstr->opcode)
771 {
772 /* call implies the actual push of the return address faulted */
773 case OP_CALL:
774 Log4(("pgmPoolMonitorIsReused: CALL\n"));
775 return true;
776 case OP_PUSH:
777 Log4(("pgmPoolMonitorIsReused: PUSH\n"));
778 return true;
779 case OP_PUSHF:
780 Log4(("pgmPoolMonitorIsReused: PUSHF\n"));
781 return true;
782 case OP_PUSHA:
783 Log4(("pgmPoolMonitorIsReused: PUSHA\n"));
784 return true;
785 case OP_FXSAVE:
786 Log4(("pgmPoolMonitorIsReused: FXSAVE\n"));
787 return true;
788 case OP_MOVNTI: /* solaris - block_zero_no_xmm */
789 Log4(("pgmPoolMonitorIsReused: MOVNTI\n"));
790 return true;
791 case OP_MOVNTDQ: /* solaris - hwblkclr & hwblkpagecopy */
792 Log4(("pgmPoolMonitorIsReused: MOVNTDQ\n"));
793 return true;
794 case OP_MOVSWD:
795 case OP_STOSWD:
796 if ( pDis->prefix == (PREFIX_REP|PREFIX_REX)
797 && pRegFrame->rcx >= 0x40
798 )
799 {
800 Assert(pDis->mode == CPUMODE_64BIT);
801
802 Log(("pgmPoolMonitorIsReused: OP_STOSQ\n"));
803 return true;
804 }
805 return false;
806 }
807 if ( ( (pDis->param1.flags & USE_REG_GEN32)
808 || (pDis->param1.flags & USE_REG_GEN64))
809 && (pDis->param1.base.reg_gen == USE_REG_ESP))
810 {
811 Log4(("pgmPoolMonitorIsReused: ESP\n"));
812 return true;
813 }
814
815 return false;
816}
817
818/**
819 * Flushes the page being accessed.
820 *
821 * @returns VBox status code suitable for scheduling.
822 * @param pVM The VM handle.
823 * @param pVCpu The VMCPU handle.
824 * @param pPool The pool.
825 * @param pPage The pool page (head).
826 * @param pDis The disassembly of the write instruction.
827 * @param pRegFrame The trap register frame.
828 * @param GCPhysFault The fault address as guest physical address.
829 * @param pvFault The fault address.
830 * @todo VBOXSTRICTRC
831 */
832static int pgmPoolAccessHandlerFlush(PVM pVM, PVMCPU pVCpu, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pDis,
833 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault)
834{
835 /*
836 * First, do the flushing.
837 */
838 int rc = pgmPoolMonitorChainFlush(pPool, pPage);
839
840 /*
841 * Emulate the instruction (xp/w2k problem, requires pc/cr2/sp detection).
842 * Must do this in raw mode (!); XP boot will fail otherwise.
843 */
844 uint32_t cbWritten;
845 VBOXSTRICTRC rc2 = EMInterpretInstructionCPU(pVM, pVCpu, pDis, pRegFrame, pvFault, EMCODETYPE_ALL, &cbWritten);
846 if (RT_SUCCESS(rc2))
847 {
848 pRegFrame->rip += pDis->opsize;
849 AssertMsg(rc2 == VINF_SUCCESS, ("%Rrc\n", VBOXSTRICTRC_VAL(rc2))); /* ASSUMES no complicated stuff here. */
850 }
851 else if (rc2 == VERR_EM_INTERPRETER)
852 {
853#ifdef IN_RC
854 if (PATMIsPatchGCAddr(pVM, pRegFrame->eip))
855 {
856 LogFlow(("pgmPoolAccessHandlerPTWorker: Interpretation failed for patch code %04x:%RGv, ignoring.\n",
857 pRegFrame->cs, (RTGCPTR)pRegFrame->eip));
858 rc = VINF_SUCCESS;
859 STAM_COUNTER_INC(&pPool->StatMonitorRZIntrFailPatch2);
860 }
861 else
862#endif
863 {
864 rc = VINF_EM_RAW_EMULATE_INSTR;
865 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,EmulateInstr));
866 }
867 }
868 else
869 rc = VBOXSTRICTRC_VAL(rc2);
870
871 LogFlow(("pgmPoolAccessHandlerPT: returns %Rrc (flushed)\n", rc));
872 return rc;
873}
874
875/**
876 * Handles the STOSD write accesses.
877 *
878 * @returns VBox status code suitable for scheduling.
879 * @param pVM The VM handle.
880 * @param pPool The pool.
881 * @param pPage The pool page (head).
882 * @param pDis The disassembly of the write instruction.
883 * @param pRegFrame The trap register frame.
884 * @param GCPhysFault The fault address as guest physical address.
885 * @param pvFault The fault address.
886 */
887DECLINLINE(int) pgmPoolAccessHandlerSTOSD(PVM pVM, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pDis,
888 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault)
889{
890 unsigned uIncrement = pDis->param1.size;
891
892 Assert(pDis->mode == CPUMODE_32BIT || pDis->mode == CPUMODE_64BIT);
893 Assert(pRegFrame->rcx <= 0x20);
894
895#ifdef VBOX_STRICT
896 if (pDis->opmode == CPUMODE_32BIT)
897 Assert(uIncrement == 4);
898 else
899 Assert(uIncrement == 8);
900#endif
901
902 Log3(("pgmPoolAccessHandlerSTOSD\n"));
903
904 /*
905 * Increment the modification counter and insert it into the list
906 * of modified pages the first time.
907 */
908 if (!pPage->cModifications++)
909 pgmPoolMonitorModifiedInsert(pPool, pPage);
910
911 /*
912 * Execute REP STOSD.
913 *
914 * This ASSUMES that we're not invoked by Trap0e on in a out-of-sync
915 * write situation, meaning that it's safe to write here.
916 */
917 PVMCPU pVCpu = VMMGetCpu(pPool->CTX_SUFF(pVM));
918 RTGCUINTPTR pu32 = (RTGCUINTPTR)pvFault;
919 while (pRegFrame->rcx)
920 {
921#if defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0) || defined(IN_RC)
922 uint32_t iPrevSubset = PGMRZDynMapPushAutoSubset(pVCpu);
923 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault, (RTGCPTR)pu32, uIncrement);
924 PGMRZDynMapPopAutoSubset(pVCpu, iPrevSubset);
925#else
926 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault, (RTGCPTR)pu32, uIncrement);
927#endif
928#ifdef IN_RC
929 *(uint32_t *)(uintptr_t)pu32 = pRegFrame->eax;
930#else
931 PGMPhysSimpleWriteGCPhys(pVM, GCPhysFault, &pRegFrame->rax, uIncrement);
932#endif
933 pu32 += uIncrement;
934 GCPhysFault += uIncrement;
935 pRegFrame->rdi += uIncrement;
936 pRegFrame->rcx--;
937 }
938 pRegFrame->rip += pDis->opsize;
939
940 LogFlow(("pgmPoolAccessHandlerSTOSD: returns\n"));
941 return VINF_SUCCESS;
942}
943
944
945/**
946 * Handles the simple write accesses.
947 *
948 * @returns VBox status code suitable for scheduling.
949 * @param pVM The VM handle.
950 * @param pVCpu The VMCPU handle.
951 * @param pPool The pool.
952 * @param pPage The pool page (head).
953 * @param pDis The disassembly of the write instruction.
954 * @param pRegFrame The trap register frame.
955 * @param GCPhysFault The fault address as guest physical address.
956 * @param pvFault The fault address.
957 * @param pfReused Reused state (out)
958 */
959DECLINLINE(int) pgmPoolAccessHandlerSimple(PVM pVM, PVMCPU pVCpu, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pDis,
960 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault, bool *pfReused)
961{
962 Log3(("pgmPoolAccessHandlerSimple\n"));
963 /*
964 * Increment the modification counter and insert it into the list
965 * of modified pages the first time.
966 */
967 if (!pPage->cModifications++)
968 pgmPoolMonitorModifiedInsert(pPool, pPage);
969
970 /*
971 * Clear all the pages. ASSUMES that pvFault is readable.
972 */
973#if defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0) || defined(IN_RC)
974 uint32_t iPrevSubset = PGMRZDynMapPushAutoSubset(pVCpu);
975 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault, pvFault, DISGetParamSize(pDis, &pDis->param1));
976 PGMRZDynMapPopAutoSubset(pVCpu, iPrevSubset);
977#else
978 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault, pvFault, DISGetParamSize(pDis, &pDis->param1));
979#endif
980
981 /*
982 * Interpret the instruction.
983 */
984 uint32_t cb;
985 VBOXSTRICTRC rc = EMInterpretInstructionCPU(pVM, pVCpu, pDis, pRegFrame, pvFault, EMCODETYPE_ALL, &cb);
986 if (RT_SUCCESS(rc))
987 {
988 pRegFrame->rip += pDis->opsize;
989 AssertMsg(rc == VINF_SUCCESS, ("%Rrc\n", VBOXSTRICTRC_VAL(rc))); /* ASSUMES no complicated stuff here. */
990 }
991 else if (rc == VERR_EM_INTERPRETER)
992 {
993 LogFlow(("pgmPoolAccessHandlerPTWorker: Interpretation failed for %04x:%RGv - opcode=%d\n",
994 pRegFrame->cs, (RTGCPTR)pRegFrame->rip, pDis->pCurInstr->opcode));
995 rc = VINF_EM_RAW_EMULATE_INSTR;
996 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,EmulateInstr));
997 }
998
999#if 0 /* experimental code */
1000 if (rc == VINF_SUCCESS)
1001 {
1002 switch (pPage->enmKind)
1003 {
1004 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1005 {
1006 X86PTEPAE GstPte;
1007 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvFault, GCPhysFault, sizeof(GstPte));
1008 AssertRC(rc);
1009
1010 /* Check the new value written by the guest. If present and with a bogus physical address, then
1011 * it's fairly safe to assume the guest is reusing the PT.
1012 */
1013 if (GstPte.n.u1Present)
1014 {
1015 RTHCPHYS HCPhys = -1;
1016 int rc = PGMPhysGCPhys2HCPhys(pVM, GstPte.u & X86_PTE_PAE_PG_MASK, &HCPhys);
1017 if (rc != VINF_SUCCESS)
1018 {
1019 *pfReused = true;
1020 STAM_COUNTER_INC(&pPool->StatForceFlushReused);
1021 }
1022 }
1023 break;
1024 }
1025 }
1026 }
1027#endif
1028
1029 LogFlow(("pgmPoolAccessHandlerSimple: returns %Rrc cb=%d\n", VBOXSTRICTRC_VAL(rc), cb));
1030 return VBOXSTRICTRC_VAL(rc);
1031}
1032
1033/**
1034 * \#PF Handler callback for PT write accesses.
1035 *
1036 * @returns VBox status code (appropriate for GC return).
1037 * @param pVM VM Handle.
1038 * @param uErrorCode CPU Error code.
1039 * @param pRegFrame Trap register frame.
1040 * NULL on DMA and other non CPU access.
1041 * @param pvFault The fault address (cr2).
1042 * @param GCPhysFault The GC physical address corresponding to pvFault.
1043 * @param pvUser User argument.
1044 */
1045DECLEXPORT(int) pgmPoolAccessHandler(PVM pVM, RTGCUINT uErrorCode, PCPUMCTXCORE pRegFrame, RTGCPTR pvFault, RTGCPHYS GCPhysFault, void *pvUser)
1046{
1047 STAM_PROFILE_START(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), a);
1048 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1049 PPGMPOOLPAGE pPage = (PPGMPOOLPAGE)pvUser;
1050 PVMCPU pVCpu = VMMGetCpu(pVM);
1051 unsigned cMaxModifications;
1052 bool fForcedFlush = false;
1053
1054 LogFlow(("pgmPoolAccessHandler: pvFault=%RGv pPage=%p:{.idx=%d} GCPhysFault=%RGp\n", pvFault, pPage, pPage->idx, GCPhysFault));
1055
1056 pgmLock(pVM);
1057 if (PHYS_PAGE_ADDRESS(GCPhysFault) != PHYS_PAGE_ADDRESS(pPage->GCPhys))
1058 {
1059 /* Pool page changed while we were waiting for the lock; ignore. */
1060 Log(("CPU%d: pgmPoolAccessHandler pgm pool page for %RGp changed (to %RGp) while waiting!\n", pVCpu->idCpu, PHYS_PAGE_ADDRESS(GCPhysFault), PHYS_PAGE_ADDRESS(pPage->GCPhys)));
1061 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), &pPool->CTX_MID_Z(StatMonitor,Handled), a);
1062 pgmUnlock(pVM);
1063 return VINF_SUCCESS;
1064 }
1065#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
1066 if (pPage->fDirty)
1067 {
1068 Assert(VMCPU_FF_ISSET(pVCpu, VMCPU_FF_TLB_FLUSH));
1069 pgmUnlock(pVM);
1070 return VINF_SUCCESS; /* SMP guest case where we were blocking on the pgm lock while the same page was being marked dirty. */
1071 }
1072#endif
1073
1074#if 0 /* test code defined(VBOX_STRICT) && defined(PGMPOOL_WITH_OPTIMIZED_DIRTY_PT) */
1075 if (pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT)
1076 {
1077 void *pvShw = PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage);
1078 void *pvGst;
1079 int rc = PGM_GCPHYS_2_PTR(pPool->CTX_SUFF(pVM), pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
1080 pgmPoolTrackCheckPTPaePae(pPool, pPage, (PPGMSHWPTPAE)pvShw, (PCX86PTPAE)pvGst);
1081 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvGst);
1082 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvShw);
1083 }
1084#endif
1085
1086 /*
1087 * Disassemble the faulting instruction.
1088 */
1089 PDISCPUSTATE pDis = &pVCpu->pgm.s.DisState;
1090 int rc = EMInterpretDisasOne(pVM, pVCpu, pRegFrame, pDis, NULL);
1091 if (RT_UNLIKELY(rc != VINF_SUCCESS))
1092 {
1093 AssertMsg(rc == VERR_PAGE_NOT_PRESENT || rc == VERR_PAGE_TABLE_NOT_PRESENT, ("Unexpected rc %d\n", rc));
1094 pgmUnlock(pVM);
1095 return rc;
1096 }
1097
1098 Assert(pPage->enmKind != PGMPOOLKIND_FREE);
1099
1100 /*
1101 * We should ALWAYS have the list head as user parameter. This
1102 * is because we use that page to record the changes.
1103 */
1104 Assert(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
1105
1106#ifdef IN_RING0
1107 /* Maximum nr of modifications depends on the page type. */
1108 if ( pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT
1109 || pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_32BIT_PT)
1110 cMaxModifications = 4;
1111 else
1112 cMaxModifications = 24;
1113#else
1114 cMaxModifications = 48;
1115#endif
1116
1117 /*
1118 * Incremental page table updates should weigh more than random ones.
1119 * (Only applies when started from offset 0)
1120 */
1121 pVCpu->pgm.s.cPoolAccessHandler++;
1122 if ( pPage->pvLastAccessHandlerRip >= pRegFrame->rip - 0x40 /* observed loops in Windows 7 x64 */
1123 && pPage->pvLastAccessHandlerRip < pRegFrame->rip + 0x40
1124 && pvFault == (pPage->pvLastAccessHandlerFault + pDis->param1.size)
1125 && pVCpu->pgm.s.cPoolAccessHandler == (pPage->cLastAccessHandlerCount + 1))
1126 {
1127 Log(("Possible page reuse cMods=%d -> %d (locked=%d type=%s)\n", pPage->cModifications, pPage->cModifications * 2, pgmPoolIsPageLocked(&pVM->pgm.s, pPage), pgmPoolPoolKindToStr(pPage->enmKind)));
1128 Assert(pPage->cModifications < 32000);
1129 pPage->cModifications = pPage->cModifications * 2;
1130 pPage->pvLastAccessHandlerFault = pvFault;
1131 pPage->cLastAccessHandlerCount = pVCpu->pgm.s.cPoolAccessHandler;
1132 if (pPage->cModifications >= cMaxModifications)
1133 {
1134 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FlushReinit));
1135 fForcedFlush = true;
1136 }
1137 }
1138
1139 if (pPage->cModifications >= cMaxModifications)
1140 Log(("Mod overflow %RGv cMods=%d (locked=%d type=%s)\n", pvFault, pPage->cModifications, pgmPoolIsPageLocked(&pVM->pgm.s, pPage), pgmPoolPoolKindToStr(pPage->enmKind)));
1141
1142 /*
1143 * Check if it's worth dealing with.
1144 */
1145 bool fReused = false;
1146 bool fNotReusedNotForking = false;
1147 if ( ( pPage->cModifications < cMaxModifications /** @todo #define */ /** @todo need to check that it's not mapping EIP. */ /** @todo adjust this! */
1148 || pgmPoolIsPageLocked(&pVM->pgm.s, pPage)
1149 )
1150 && !(fReused = pgmPoolMonitorIsReused(pVM, pVCpu, pRegFrame, pDis, pvFault))
1151 && !pgmPoolMonitorIsForking(pPool, pDis, GCPhysFault & PAGE_OFFSET_MASK))
1152 {
1153 /*
1154 * Simple instructions, no REP prefix.
1155 */
1156 if (!(pDis->prefix & (PREFIX_REP | PREFIX_REPNE)))
1157 {
1158 rc = pgmPoolAccessHandlerSimple(pVM, pVCpu, pPool, pPage, pDis, pRegFrame, GCPhysFault, pvFault, &fReused);
1159 if (fReused)
1160 goto flushPage;
1161
1162 /* A mov instruction to change the first page table entry will be remembered so we can detect
1163 * full page table changes early on. This will reduce the amount of unnecessary traps we'll take.
1164 */
1165 if ( rc == VINF_SUCCESS
1166 && !pPage->cLocked /* only applies to unlocked pages as we can't free locked ones (e.g. cr3 root). */
1167 && pDis->pCurInstr->opcode == OP_MOV
1168 && (pvFault & PAGE_OFFSET_MASK) == 0)
1169 {
1170 pPage->pvLastAccessHandlerFault = pvFault;
1171 pPage->cLastAccessHandlerCount = pVCpu->pgm.s.cPoolAccessHandler;
1172 pPage->pvLastAccessHandlerRip = pRegFrame->rip;
1173 /* Make sure we don't kick out a page too quickly. */
1174 if (pPage->cModifications > 8)
1175 pPage->cModifications = 2;
1176 }
1177 else
1178 if (pPage->pvLastAccessHandlerFault == pvFault)
1179 {
1180 /* ignore the 2nd write to this page table entry. */
1181 pPage->cLastAccessHandlerCount = pVCpu->pgm.s.cPoolAccessHandler;
1182 }
1183 else
1184 {
1185 pPage->pvLastAccessHandlerFault = 0;
1186 pPage->pvLastAccessHandlerRip = 0;
1187 }
1188
1189 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), &pPool->CTX_MID_Z(StatMonitor,Handled), a);
1190 pgmUnlock(pVM);
1191 return rc;
1192 }
1193
1194 /*
1195 * Windows is frequently doing small memset() operations (netio test 4k+).
1196 * We have to deal with these or we'll kill the cache and performance.
1197 */
1198 if ( pDis->pCurInstr->opcode == OP_STOSWD
1199 && !pRegFrame->eflags.Bits.u1DF
1200 && pDis->opmode == pDis->mode
1201 && pDis->addrmode == pDis->mode)
1202 {
1203 bool fValidStosd = false;
1204
1205 if ( pDis->mode == CPUMODE_32BIT
1206 && pDis->prefix == PREFIX_REP
1207 && pRegFrame->ecx <= 0x20
1208 && pRegFrame->ecx * 4 <= PAGE_SIZE - ((uintptr_t)pvFault & PAGE_OFFSET_MASK)
1209 && !((uintptr_t)pvFault & 3)
1210 && (pRegFrame->eax == 0 || pRegFrame->eax == 0x80) /* the two values observed. */
1211 )
1212 {
1213 fValidStosd = true;
1214 pRegFrame->rcx &= 0xffffffff; /* paranoia */
1215 }
1216 else
1217 if ( pDis->mode == CPUMODE_64BIT
1218 && pDis->prefix == (PREFIX_REP | PREFIX_REX)
1219 && pRegFrame->rcx <= 0x20
1220 && pRegFrame->rcx * 8 <= PAGE_SIZE - ((uintptr_t)pvFault & PAGE_OFFSET_MASK)
1221 && !((uintptr_t)pvFault & 7)
1222 && (pRegFrame->rax == 0 || pRegFrame->rax == 0x80) /* the two values observed. */
1223 )
1224 {
1225 fValidStosd = true;
1226 }
1227
1228 if (fValidStosd)
1229 {
1230 rc = pgmPoolAccessHandlerSTOSD(pVM, pPool, pPage, pDis, pRegFrame, GCPhysFault, pvFault);
1231 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), &pPool->CTX_MID_Z(StatMonitor,RepStosd), a);
1232 pgmUnlock(pVM);
1233 return rc;
1234 }
1235 }
1236
1237 /* REP prefix, don't bother. */
1238 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,RepPrefix));
1239 Log4(("pgmPoolAccessHandler: eax=%#x ecx=%#x edi=%#x esi=%#x rip=%RGv opcode=%d prefix=%#x\n",
1240 pRegFrame->eax, pRegFrame->ecx, pRegFrame->edi, pRegFrame->esi, (RTGCPTR)pRegFrame->rip, pDis->pCurInstr->opcode, pDis->prefix));
1241 fNotReusedNotForking = true;
1242 }
1243
1244#if defined(PGMPOOL_WITH_OPTIMIZED_DIRTY_PT) && defined(IN_RING0)
1245 /* E.g. Windows 7 x64 initializes page tables and touches some pages in the table during the process. This
1246 * leads to pgm pool trashing and an excessive amount of write faults due to page monitoring.
1247 */
1248 if ( pPage->cModifications >= cMaxModifications
1249 && !fForcedFlush
1250 && (pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT || pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_32BIT_PT)
1251 && ( fNotReusedNotForking
1252 || ( !pgmPoolMonitorIsReused(pVM, pVCpu, pRegFrame, pDis, pvFault)
1253 && !pgmPoolMonitorIsForking(pPool, pDis, GCPhysFault & PAGE_OFFSET_MASK))
1254 )
1255 )
1256 {
1257 Assert(!pgmPoolIsPageLocked(&pVM->pgm.s, pPage));
1258 Assert(pPage->fDirty == false);
1259
1260 /* Flush any monitored duplicates as we will disable write protection. */
1261 if ( pPage->iMonitoredNext != NIL_PGMPOOL_IDX
1262 || pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
1263 {
1264 PPGMPOOLPAGE pPageHead = pPage;
1265
1266 /* Find the monitor head. */
1267 while (pPageHead->iMonitoredPrev != NIL_PGMPOOL_IDX)
1268 pPageHead = &pPool->aPages[pPageHead->iMonitoredPrev];
1269
1270 while (pPageHead)
1271 {
1272 unsigned idxNext = pPageHead->iMonitoredNext;
1273
1274 if (pPageHead != pPage)
1275 {
1276 STAM_COUNTER_INC(&pPool->StatDirtyPageDupFlush);
1277 Log(("Flush duplicate page idx=%d GCPhys=%RGp type=%s\n", pPageHead->idx, pPageHead->GCPhys, pgmPoolPoolKindToStr(pPageHead->enmKind)));
1278 int rc2 = pgmPoolFlushPage(pPool, pPageHead);
1279 AssertRC(rc2);
1280 }
1281
1282 if (idxNext == NIL_PGMPOOL_IDX)
1283 break;
1284
1285 pPageHead = &pPool->aPages[idxNext];
1286 }
1287 }
1288
1289 /* The flushing above might fail for locked pages, so double check. */
1290 if ( pPage->iMonitoredNext == NIL_PGMPOOL_IDX
1291 && pPage->iMonitoredPrev == NIL_PGMPOOL_IDX)
1292 {
1293 pgmPoolAddDirtyPage(pVM, pPool, pPage);
1294
1295 /* Temporarily allow write access to the page table again. */
1296 rc = PGMHandlerPhysicalPageTempOff(pVM, pPage->GCPhys & PAGE_BASE_GC_MASK, pPage->GCPhys & PAGE_BASE_GC_MASK);
1297 if (rc == VINF_SUCCESS)
1298 {
1299 rc = PGMShwMakePageWritable(pVCpu, pvFault, PGM_MK_PG_IS_WRITE_FAULT);
1300 AssertMsg(rc == VINF_SUCCESS
1301 /* In the SMP case the page table might be removed while we wait for the PGM lock in the trap handler. */
1302 || rc == VERR_PAGE_TABLE_NOT_PRESENT
1303 || rc == VERR_PAGE_NOT_PRESENT,
1304 ("PGMShwModifyPage -> GCPtr=%RGv rc=%d\n", pvFault, rc));
1305
1306 pPage->pvDirtyFault = pvFault;
1307
1308 STAM_PROFILE_STOP(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), a);
1309 pgmUnlock(pVM);
1310 return rc;
1311 }
1312 }
1313 }
1314#endif /* PGMPOOL_WITH_OPTIMIZED_DIRTY_PT */
1315
1316 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FlushModOverflow));
1317flushPage:
1318 /*
1319 * Not worth it, so flush it.
1320 *
1321 * If we considered it to be reused, don't go back to ring-3
1322 * to emulate failed instructions since we usually cannot
1323 * interpret then. This may be a bit risky, in which case
1324 * the reuse detection must be fixed.
1325 */
1326 rc = pgmPoolAccessHandlerFlush(pVM, pVCpu, pPool, pPage, pDis, pRegFrame, GCPhysFault, pvFault);
1327 if ( rc == VINF_EM_RAW_EMULATE_INSTR
1328 && fReused)
1329 {
1330 /* Make sure that the current instruction still has shadow page backing, otherwise we'll end up in a loop. */
1331 if (PGMShwGetPage(pVCpu, pRegFrame->rip, NULL, NULL) == VINF_SUCCESS)
1332 rc = VINF_SUCCESS; /* safe to restart the instruction. */
1333 }
1334 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), &pPool->CTX_MID_Z(StatMonitor,FlushPage), a);
1335 pgmUnlock(pVM);
1336 return rc;
1337}
1338
1339# endif /* !IN_RING3 */
1340
1341# ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
1342
1343# ifdef VBOX_STRICT
1344/**
1345 * Check references to guest physical memory in a PAE / PAE page table.
1346 *
1347 * @param pPool The pool.
1348 * @param pPage The page.
1349 * @param pShwPT The shadow page table (mapping of the page).
1350 * @param pGstPT The guest page table.
1351 */
1352static void pgmPoolTrackCheckPTPaePae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT, PCX86PTPAE pGstPT)
1353{
1354 unsigned cErrors = 0;
1355 int LastRc = -1; /* initialized to shut up gcc */
1356 unsigned LastPTE = ~0U; /* initialized to shut up gcc */
1357 RTHCPHYS LastHCPhys = NIL_RTHCPHYS; /* initialized to shut up gcc */
1358 PVM pVM = pPool->CTX_SUFF(pVM);
1359
1360#ifdef VBOX_STRICT
1361 for (unsigned i = 0; i < RT_MIN(RT_ELEMENTS(pShwPT->a), pPage->iFirstPresent); i++)
1362 AssertMsg(!PGMSHWPTEPAE_IS_P(pShwPT->a[i]), ("Unexpected PTE: idx=%d %RX64 (first=%d)\n", i, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), pPage->iFirstPresent));
1363#endif
1364 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
1365 {
1366 if (PGMSHWPTEPAE_IS_P(pShwPT->a[i]))
1367 {
1368 RTHCPHYS HCPhys = NIL_RTHCPHYS;
1369 int rc = PGMPhysGCPhys2HCPhys(pVM, pGstPT->a[i].u & X86_PTE_PAE_PG_MASK, &HCPhys);
1370 if ( rc != VINF_SUCCESS
1371 || PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]) != HCPhys)
1372 {
1373 Log(("rc=%d idx=%d guest %RX64 shw=%RX64 vs %RHp\n", rc, i, pGstPT->a[i].u, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), HCPhys));
1374 LastPTE = i;
1375 LastRc = rc;
1376 LastHCPhys = HCPhys;
1377 cErrors++;
1378
1379 RTHCPHYS HCPhysPT = NIL_RTHCPHYS;
1380 rc = PGMPhysGCPhys2HCPhys(pVM, pPage->GCPhys, &HCPhysPT);
1381 AssertRC(rc);
1382
1383 for (unsigned iPage = 0; iPage < pPool->cCurPages; iPage++)
1384 {
1385 PPGMPOOLPAGE pTempPage = &pPool->aPages[iPage];
1386
1387 if (pTempPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT)
1388 {
1389 PPGMSHWPTPAE pShwPT2 = (PPGMSHWPTPAE)PGMPOOL_PAGE_2_PTR(pVM, pTempPage);
1390
1391 for (unsigned j = 0; j < RT_ELEMENTS(pShwPT->a); j++)
1392 {
1393 if ( PGMSHWPTEPAE_IS_P_RW(pShwPT2->a[j])
1394 && PGMSHWPTEPAE_GET_HCPHYS(pShwPT2->a[j]) == HCPhysPT)
1395 {
1396 Log(("GCPhys=%RGp idx=%d %RX64 vs %RX64\n", pTempPage->GCPhys, j, PGMSHWPTEPAE_GET_LOG(pShwPT->a[j]), PGMSHWPTEPAE_GET_LOG(pShwPT2->a[j])));
1397 }
1398 }
1399
1400 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pShwPT2);
1401 }
1402 }
1403 }
1404 }
1405 }
1406 AssertMsg(!cErrors, ("cErrors=%d: last rc=%d idx=%d guest %RX64 shw=%RX64 vs %RHp\n", cErrors, LastRc, LastPTE, pGstPT->a[LastPTE].u, PGMSHWPTEPAE_GET_LOG(pShwPT->a[LastPTE]), LastHCPhys));
1407}
1408
1409/**
1410 * Check references to guest physical memory in a PAE / 32-bit page table.
1411 *
1412 * @param pPool The pool.
1413 * @param pPage The page.
1414 * @param pShwPT The shadow page table (mapping of the page).
1415 * @param pGstPT The guest page table.
1416 */
1417static void pgmPoolTrackCheckPTPae32Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT, PCX86PT pGstPT)
1418{
1419 unsigned cErrors = 0;
1420 int LastRc = -1; /* initialized to shut up gcc */
1421 unsigned LastPTE = ~0U; /* initialized to shut up gcc */
1422 RTHCPHYS LastHCPhys = NIL_RTHCPHYS; /* initialized to shut up gcc */
1423 PVM pVM = pPool->CTX_SUFF(pVM);
1424
1425#ifdef VBOX_STRICT
1426 for (unsigned i = 0; i < RT_MIN(RT_ELEMENTS(pShwPT->a), pPage->iFirstPresent); i++)
1427 AssertMsg(!PGMSHWPTEPAE_IS_P(pShwPT->a[i]), ("Unexpected PTE: idx=%d %RX64 (first=%d)\n", i, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), pPage->iFirstPresent));
1428#endif
1429 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
1430 {
1431 if (PGMSHWPTEPAE_IS_P(pShwPT->a[i]))
1432 {
1433 RTHCPHYS HCPhys = NIL_RTHCPHYS;
1434 int rc = PGMPhysGCPhys2HCPhys(pVM, pGstPT->a[i].u & X86_PTE_PG_MASK, &HCPhys);
1435 if ( rc != VINF_SUCCESS
1436 || PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]) != HCPhys)
1437 {
1438 Log(("rc=%d idx=%d guest %x shw=%RX64 vs %RHp\n", rc, i, pGstPT->a[i].u, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), HCPhys));
1439 LastPTE = i;
1440 LastRc = rc;
1441 LastHCPhys = HCPhys;
1442 cErrors++;
1443
1444 RTHCPHYS HCPhysPT = NIL_RTHCPHYS;
1445 rc = PGMPhysGCPhys2HCPhys(pVM, pPage->GCPhys, &HCPhysPT);
1446 AssertRC(rc);
1447
1448 for (unsigned iPage = 0; iPage < pPool->cCurPages; iPage++)
1449 {
1450 PPGMPOOLPAGE pTempPage = &pPool->aPages[iPage];
1451
1452 if (pTempPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_32BIT_PT)
1453 {
1454 PPGMSHWPTPAE pShwPT2 = (PPGMSHWPTPAE)PGMPOOL_PAGE_2_PTR(pVM, pTempPage);
1455
1456 for (unsigned j = 0; j < RT_ELEMENTS(pShwPT->a); j++)
1457 {
1458 if ( PGMSHWPTEPAE_IS_P_RW(pShwPT2->a[j])
1459 && PGMSHWPTEPAE_GET_HCPHYS(pShwPT2->a[j]) == HCPhysPT)
1460 {
1461 Log(("GCPhys=%RGp idx=%d %RX64 vs %RX64\n", pTempPage->GCPhys, j, PGMSHWPTEPAE_GET_LOG(pShwPT->a[j]), PGMSHWPTEPAE_GET_LOG(pShwPT2->a[j])));
1462 }
1463 }
1464
1465 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pShwPT2);
1466 }
1467 }
1468 }
1469 }
1470 }
1471 AssertMsg(!cErrors, ("cErrors=%d: last rc=%d idx=%d guest %x shw=%RX64 vs %RHp\n", cErrors, LastRc, LastPTE, pGstPT->a[LastPTE].u, PGMSHWPTEPAE_GET_LOG(pShwPT->a[LastPTE]), LastHCPhys));
1472}
1473
1474# endif /* VBOX_STRICT */
1475
1476/**
1477 * Clear references to guest physical memory in a PAE / PAE page table.
1478 *
1479 * @returns nr of changed PTEs
1480 * @param pPool The pool.
1481 * @param pPage The page.
1482 * @param pShwPT The shadow page table (mapping of the page).
1483 * @param pGstPT The guest page table.
1484 * @param pOldGstPT The old cached guest page table.
1485 * @param fAllowRemoval Bail out as soon as we encounter an invalid PTE
1486 * @param pfFlush Flush reused page table (out)
1487 */
1488DECLINLINE(unsigned) pgmPoolTrackFlushPTPaePae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT, PCX86PTPAE pGstPT,
1489 PCX86PTPAE pOldGstPT, bool fAllowRemoval, bool *pfFlush)
1490{
1491 unsigned cChanged = 0;
1492
1493#ifdef VBOX_STRICT
1494 for (unsigned i = 0; i < RT_MIN(RT_ELEMENTS(pShwPT->a), pPage->iFirstPresent); i++)
1495 AssertMsg(!PGMSHWPTEPAE_IS_P(pShwPT->a[i]), ("Unexpected PTE: idx=%d %RX64 (first=%d)\n", i, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), pPage->iFirstPresent));
1496#endif
1497 *pfFlush = false;
1498
1499 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
1500 {
1501 /* Check the new value written by the guest. If present and with a bogus physical address, then
1502 * it's fairly safe to assume the guest is reusing the PT.
1503 */
1504 if ( fAllowRemoval
1505 && pGstPT->a[i].n.u1Present)
1506 {
1507 if (!PGMPhysIsGCPhysValid(pPool->CTX_SUFF(pVM), pGstPT->a[i].u & X86_PTE_PAE_PG_MASK))
1508 {
1509 *pfFlush = true;
1510 return ++cChanged;
1511 }
1512 }
1513 if (PGMSHWPTEPAE_IS_P(pShwPT->a[i]))
1514 {
1515 /* If the old cached PTE is identical, then there's no need to flush the shadow copy. */
1516 if ((pGstPT->a[i].u & X86_PTE_PAE_PG_MASK) == (pOldGstPT->a[i].u & X86_PTE_PAE_PG_MASK))
1517 {
1518#ifdef VBOX_STRICT
1519 RTHCPHYS HCPhys = NIL_RTGCPHYS;
1520 int rc = PGMPhysGCPhys2HCPhys(pPool->CTX_SUFF(pVM), pGstPT->a[i].u & X86_PTE_PAE_PG_MASK, &HCPhys);
1521 AssertMsg(rc == VINF_SUCCESS && PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]) == HCPhys, ("rc=%d guest %RX64 old %RX64 shw=%RX64 vs %RHp\n", rc, pGstPT->a[i].u, pOldGstPT->a[i].u, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), HCPhys));
1522#endif
1523 uint64_t uHostAttr = PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & (X86_PTE_P | X86_PTE_US | X86_PTE_A | X86_PTE_D | X86_PTE_G | X86_PTE_PAE_NX);
1524 bool fHostRW = !!(PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & X86_PTE_RW);
1525 uint64_t uGuestAttr = pGstPT->a[i].u & (X86_PTE_P | X86_PTE_US | X86_PTE_A | X86_PTE_D | X86_PTE_G | X86_PTE_PAE_NX);
1526 bool fGuestRW = !!(pGstPT->a[i].u & X86_PTE_RW);
1527
1528 if ( uHostAttr == uGuestAttr
1529 && fHostRW <= fGuestRW)
1530 continue;
1531 }
1532 cChanged++;
1533 /* Something was changed, so flush it. */
1534 Log4(("pgmPoolTrackDerefPTPaePae: i=%d pte=%RX64 hint=%RX64\n",
1535 i, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pOldGstPT->a[i].u & X86_PTE_PAE_PG_MASK));
1536 pgmPoolTracDerefGCPhysHint(pPool, pPage, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pOldGstPT->a[i].u & X86_PTE_PAE_PG_MASK, i);
1537 PGMSHWPTEPAE_ATOMIC_SET(pShwPT->a[i], 0);
1538 }
1539 }
1540 return cChanged;
1541}
1542
1543/**
1544 * Clear references to guest physical memory in a PAE / PAE page table.
1545 *
1546 * @returns nr of changed PTEs
1547 * @param pPool The pool.
1548 * @param pPage The page.
1549 * @param pShwPT The shadow page table (mapping of the page).
1550 * @param pGstPT The guest page table.
1551 * @param pOldGstPT The old cached guest page table.
1552 * @param fAllowRemoval Bail out as soon as we encounter an invalid PTE
1553 * @param pfFlush Flush reused page table (out)
1554 */
1555DECLINLINE(unsigned) pgmPoolTrackFlushPTPae32Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT, PCX86PT pGstPT,
1556 PCX86PT pOldGstPT, bool fAllowRemoval, bool *pfFlush)
1557{
1558 unsigned cChanged = 0;
1559
1560#ifdef VBOX_STRICT
1561 for (unsigned i = 0; i < RT_MIN(RT_ELEMENTS(pShwPT->a), pPage->iFirstPresent); i++)
1562 AssertMsg(!PGMSHWPTEPAE_IS_P(pShwPT->a[i]), ("Unexpected PTE: idx=%d %RX64 (first=%d)\n", i, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), pPage->iFirstPresent));
1563#endif
1564 *pfFlush = false;
1565
1566 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
1567 {
1568 /* Check the new value written by the guest. If present and with a bogus physical address, then
1569 * it's fairly safe to assume the guest is reusing the PT.
1570 */
1571 if ( fAllowRemoval
1572 && pGstPT->a[i].n.u1Present)
1573 {
1574 if (!PGMPhysIsGCPhysValid(pPool->CTX_SUFF(pVM), pGstPT->a[i].u & X86_PTE_PG_MASK))
1575 {
1576 *pfFlush = true;
1577 return ++cChanged;
1578 }
1579 }
1580 if (PGMSHWPTEPAE_IS_P(pShwPT->a[i]))
1581 {
1582 /* If the old cached PTE is identical, then there's no need to flush the shadow copy. */
1583 if ((pGstPT->a[i].u & X86_PTE_PG_MASK) == (pOldGstPT->a[i].u & X86_PTE_PG_MASK))
1584 {
1585#ifdef VBOX_STRICT
1586 RTHCPHYS HCPhys = NIL_RTGCPHYS;
1587 int rc = PGMPhysGCPhys2HCPhys(pPool->CTX_SUFF(pVM), pGstPT->a[i].u & X86_PTE_PG_MASK, &HCPhys);
1588 AssertMsg(rc == VINF_SUCCESS && PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]) == HCPhys, ("rc=%d guest %x old %x shw=%RX64 vs %RHp\n", rc, pGstPT->a[i].u, pOldGstPT->a[i].u, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), HCPhys));
1589#endif
1590 uint64_t uHostAttr = PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & (X86_PTE_P | X86_PTE_US | X86_PTE_A | X86_PTE_D | X86_PTE_G);
1591 bool fHostRW = !!(PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & X86_PTE_RW);
1592 uint64_t uGuestAttr = pGstPT->a[i].u & (X86_PTE_P | X86_PTE_US | X86_PTE_A | X86_PTE_D | X86_PTE_G);
1593 bool fGuestRW = !!(pGstPT->a[i].u & X86_PTE_RW);
1594
1595 if ( uHostAttr == uGuestAttr
1596 && fHostRW <= fGuestRW)
1597 continue;
1598 }
1599 cChanged++;
1600 /* Something was changed, so flush it. */
1601 Log4(("pgmPoolTrackDerefPTPaePae: i=%d pte=%RX64 hint=%x\n",
1602 i, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pOldGstPT->a[i].u & X86_PTE_PG_MASK));
1603 pgmPoolTracDerefGCPhysHint(pPool, pPage, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pOldGstPT->a[i].u & X86_PTE_PG_MASK, i);
1604 PGMSHWPTEPAE_ATOMIC_SET(pShwPT->a[i], 0);
1605 }
1606 }
1607 return cChanged;
1608}
1609
1610/**
1611 * Flush a dirty page
1612 *
1613 * @param pVM VM Handle.
1614 * @param pPool The pool.
1615 * @param idxSlot Dirty array slot index
1616 * @param fAllowRemoval Allow a reused page table to be removed
1617 */
1618static void pgmPoolFlushDirtyPage(PVM pVM, PPGMPOOL pPool, unsigned idxSlot, bool fAllowRemoval = false)
1619{
1620 PPGMPOOLPAGE pPage;
1621 unsigned idxPage;
1622
1623 Assert(idxSlot < RT_ELEMENTS(pPool->aDirtyPages));
1624 if (pPool->aDirtyPages[idxSlot].uIdx == NIL_PGMPOOL_IDX)
1625 return;
1626
1627 idxPage = pPool->aDirtyPages[idxSlot].uIdx;
1628 AssertRelease(idxPage != NIL_PGMPOOL_IDX);
1629 pPage = &pPool->aPages[idxPage];
1630 Assert(pPage->idx == idxPage);
1631 Assert(pPage->iMonitoredNext == NIL_PGMPOOL_IDX && pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
1632
1633 AssertMsg(pPage->fDirty, ("Page %RGp (slot=%d) not marked dirty!", pPage->GCPhys, idxSlot));
1634 Log(("Flush dirty page %RGp cMods=%d\n", pPage->GCPhys, pPage->cModifications));
1635
1636#if defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0) || defined(IN_RC)
1637 PVMCPU pVCpu = VMMGetCpu(pVM);
1638 uint32_t iPrevSubset = PGMRZDynMapPushAutoSubset(pVCpu);
1639#endif
1640
1641 /* First write protect the page again to catch all write accesses. (before checking for changes -> SMP) */
1642 int rc = PGMHandlerPhysicalReset(pVM, pPage->GCPhys & PAGE_BASE_GC_MASK);
1643 Assert(rc == VINF_SUCCESS);
1644 pPage->fDirty = false;
1645
1646#ifdef VBOX_STRICT
1647 uint64_t fFlags = 0;
1648 RTHCPHYS HCPhys;
1649 rc = PGMShwGetPage(VMMGetCpu(pVM), pPage->pvDirtyFault, &fFlags, &HCPhys);
1650 AssertMsg( ( rc == VINF_SUCCESS
1651 && (!(fFlags & X86_PTE_RW) || HCPhys != pPage->Core.Key))
1652 /* In the SMP case the page table might be removed while we wait for the PGM lock in the trap handler. */
1653 || rc == VERR_PAGE_TABLE_NOT_PRESENT
1654 || rc == VERR_PAGE_NOT_PRESENT,
1655 ("PGMShwGetPage -> GCPtr=%RGv rc=%d flags=%RX64\n", pPage->pvDirtyFault, rc, fFlags));
1656#endif
1657
1658 /* Flush those PTEs that have changed. */
1659 STAM_PROFILE_START(&pPool->StatTrackDeref,a);
1660 void *pvShw = PGMPOOL_PAGE_2_PTR(pVM, pPage);
1661 void *pvGst;
1662 rc = PGM_GCPHYS_2_PTR_EX(pVM, pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
1663 bool fFlush;
1664 unsigned cChanges;
1665
1666 if (pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT)
1667 cChanges = pgmPoolTrackFlushPTPaePae(pPool, pPage, (PPGMSHWPTPAE)pvShw, (PCX86PTPAE)pvGst,
1668 (PCX86PTPAE)&pPool->aDirtyPages[idxSlot].aPage[0], fAllowRemoval, &fFlush);
1669 else
1670 cChanges = pgmPoolTrackFlushPTPae32Bit(pPool, pPage, (PPGMSHWPTPAE)pvShw, (PCX86PT)pvGst,
1671 (PCX86PT)&pPool->aDirtyPages[idxSlot].aPage[0], fAllowRemoval, &fFlush);
1672
1673 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvGst);
1674 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvShw);
1675 STAM_PROFILE_STOP(&pPool->StatTrackDeref,a);
1676 /* Note: we might want to consider keeping the dirty page active in case there were many changes. */
1677
1678 /* This page is likely to be modified again, so reduce the nr of modifications just a bit here. */
1679 Assert(pPage->cModifications);
1680 if (cChanges < 4)
1681 pPage->cModifications = 1; /* must use > 0 here */
1682 else
1683 pPage->cModifications = RT_MAX(1, pPage->cModifications / 2);
1684
1685 STAM_COUNTER_INC(&pPool->StatResetDirtyPages);
1686 if (pPool->cDirtyPages == RT_ELEMENTS(pPool->aDirtyPages))
1687 pPool->idxFreeDirtyPage = idxSlot;
1688
1689 pPool->cDirtyPages--;
1690 pPool->aDirtyPages[idxSlot].uIdx = NIL_PGMPOOL_IDX;
1691 Assert(pPool->cDirtyPages <= RT_ELEMENTS(pPool->aDirtyPages));
1692 if (fFlush)
1693 {
1694 Assert(fAllowRemoval);
1695 Log(("Flush reused page table!\n"));
1696 pgmPoolFlushPage(pPool, pPage);
1697 STAM_COUNTER_INC(&pPool->StatForceFlushReused);
1698 }
1699 else
1700 Log(("Removed dirty page %RGp cMods=%d cChanges=%d\n", pPage->GCPhys, pPage->cModifications, cChanges));
1701
1702#if defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0) || defined(IN_RC)
1703 PGMRZDynMapPopAutoSubset(pVCpu, iPrevSubset);
1704#endif
1705}
1706
1707# ifndef IN_RING3
1708/**
1709 * Add a new dirty page
1710 *
1711 * @param pVM VM Handle.
1712 * @param pPool The pool.
1713 * @param pPage The page.
1714 */
1715void pgmPoolAddDirtyPage(PVM pVM, PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1716{
1717 unsigned idxFree;
1718
1719 Assert(PGMIsLocked(pVM));
1720 AssertCompile(RT_ELEMENTS(pPool->aDirtyPages) == 8 || RT_ELEMENTS(pPool->aDirtyPages) == 16);
1721 Assert(!pPage->fDirty);
1722
1723 idxFree = pPool->idxFreeDirtyPage;
1724 Assert(idxFree < RT_ELEMENTS(pPool->aDirtyPages));
1725 Assert(pPage->iMonitoredNext == NIL_PGMPOOL_IDX && pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
1726
1727 if (pPool->cDirtyPages >= RT_ELEMENTS(pPool->aDirtyPages))
1728 {
1729 STAM_COUNTER_INC(&pPool->StatDirtyPageOverFlowFlush);
1730 pgmPoolFlushDirtyPage(pVM, pPool, idxFree, true /* allow removal of reused page tables*/);
1731 }
1732 Assert(pPool->cDirtyPages < RT_ELEMENTS(pPool->aDirtyPages));
1733 AssertMsg(pPool->aDirtyPages[idxFree].uIdx == NIL_PGMPOOL_IDX, ("idxFree=%d cDirtyPages=%d\n", idxFree, pPool->cDirtyPages));
1734
1735 Log(("Add dirty page %RGp (slot=%d)\n", pPage->GCPhys, idxFree));
1736
1737 /*
1738 * Make a copy of the guest page table as we require valid GCPhys addresses
1739 * when removing references to physical pages.
1740 * (The HCPhys linear lookup is *extremely* expensive!)
1741 */
1742 void *pvGst;
1743 int rc = PGM_GCPHYS_2_PTR_EX(pVM, pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
1744 memcpy(&pPool->aDirtyPages[idxFree].aPage[0], pvGst, (pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT) ? PAGE_SIZE : PAGE_SIZE/2);
1745#ifdef VBOX_STRICT
1746 void *pvShw = PGMPOOL_PAGE_2_PTR(pVM, pPage);
1747 if (pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT)
1748 pgmPoolTrackCheckPTPaePae(pPool, pPage, (PPGMSHWPTPAE)pvShw, (PCX86PTPAE)pvGst);
1749 else
1750 pgmPoolTrackCheckPTPae32Bit(pPool, pPage, (PPGMSHWPTPAE)pvShw, (PCX86PT)pvGst);
1751 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvShw);
1752#endif
1753 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvGst);
1754
1755 STAM_COUNTER_INC(&pPool->StatDirtyPage);
1756 pPage->fDirty = true;
1757 pPage->idxDirty = idxFree;
1758 pPool->aDirtyPages[idxFree].uIdx = pPage->idx;
1759 pPool->cDirtyPages++;
1760
1761 pPool->idxFreeDirtyPage = (pPool->idxFreeDirtyPage + 1) & (RT_ELEMENTS(pPool->aDirtyPages) - 1);
1762 if ( pPool->cDirtyPages < RT_ELEMENTS(pPool->aDirtyPages)
1763 && pPool->aDirtyPages[pPool->idxFreeDirtyPage].uIdx != NIL_PGMPOOL_IDX)
1764 {
1765 unsigned i;
1766 for (i = 1; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
1767 {
1768 idxFree = (pPool->idxFreeDirtyPage + i) & (RT_ELEMENTS(pPool->aDirtyPages) - 1);
1769 if (pPool->aDirtyPages[idxFree].uIdx == NIL_PGMPOOL_IDX)
1770 {
1771 pPool->idxFreeDirtyPage = idxFree;
1772 break;
1773 }
1774 }
1775 Assert(i != RT_ELEMENTS(pPool->aDirtyPages));
1776 }
1777
1778 Assert(pPool->cDirtyPages == RT_ELEMENTS(pPool->aDirtyPages) || pPool->aDirtyPages[pPool->idxFreeDirtyPage].uIdx == NIL_PGMPOOL_IDX);
1779 return;
1780}
1781# endif /* !IN_RING3 */
1782
1783/**
1784 * Check if the specified page is dirty (not write monitored)
1785 *
1786 * @return dirty or not
1787 * @param pVM VM Handle.
1788 * @param GCPhys Guest physical address
1789 */
1790bool pgmPoolIsDirtyPage(PVM pVM, RTGCPHYS GCPhys)
1791{
1792 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1793 Assert(PGMIsLocked(pVM));
1794 if (!pPool->cDirtyPages)
1795 return false;
1796
1797 GCPhys = GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK;
1798
1799 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
1800 {
1801 if (pPool->aDirtyPages[i].uIdx != NIL_PGMPOOL_IDX)
1802 {
1803 PPGMPOOLPAGE pPage;
1804 unsigned idxPage = pPool->aDirtyPages[i].uIdx;
1805
1806 pPage = &pPool->aPages[idxPage];
1807 if (pPage->GCPhys == GCPhys)
1808 return true;
1809 }
1810 }
1811 return false;
1812}
1813
1814/**
1815 * Reset all dirty pages by reinstating page monitoring.
1816 *
1817 * @param pVM VM Handle.
1818 */
1819void pgmPoolResetDirtyPages(PVM pVM)
1820{
1821 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1822 Assert(PGMIsLocked(pVM));
1823 Assert(pPool->cDirtyPages <= RT_ELEMENTS(pPool->aDirtyPages));
1824
1825 if (!pPool->cDirtyPages)
1826 return;
1827
1828 Log(("pgmPoolResetDirtyPages\n"));
1829 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
1830 pgmPoolFlushDirtyPage(pVM, pPool, i, true /* allow removal of reused page tables*/);
1831
1832 pPool->idxFreeDirtyPage = 0;
1833 if ( pPool->cDirtyPages != RT_ELEMENTS(pPool->aDirtyPages)
1834 && pPool->aDirtyPages[pPool->idxFreeDirtyPage].uIdx != NIL_PGMPOOL_IDX)
1835 {
1836 unsigned i;
1837 for (i = 1; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
1838 {
1839 if (pPool->aDirtyPages[i].uIdx == NIL_PGMPOOL_IDX)
1840 {
1841 pPool->idxFreeDirtyPage = i;
1842 break;
1843 }
1844 }
1845 AssertMsg(i != RT_ELEMENTS(pPool->aDirtyPages), ("cDirtyPages %d", pPool->cDirtyPages));
1846 }
1847
1848 Assert(pPool->aDirtyPages[pPool->idxFreeDirtyPage].uIdx == NIL_PGMPOOL_IDX || pPool->cDirtyPages == RT_ELEMENTS(pPool->aDirtyPages));
1849 return;
1850}
1851
1852/**
1853 * Invalidate the PT entry for the specified page
1854 *
1855 * @param pVM VM Handle.
1856 * @param GCPtrPage Guest page to invalidate
1857 */
1858void pgmPoolResetDirtyPage(PVM pVM, RTGCPTR GCPtrPage)
1859{
1860 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1861 Assert(PGMIsLocked(pVM));
1862 Assert(pPool->cDirtyPages <= RT_ELEMENTS(pPool->aDirtyPages));
1863
1864 if (!pPool->cDirtyPages)
1865 return;
1866
1867 Log(("pgmPoolResetDirtyPage %RGv\n", GCPtrPage));
1868 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
1869 {
1870 }
1871}
1872
1873/**
1874 * Reset all dirty pages by reinstating page monitoring.
1875 *
1876 * @param pVM VM Handle.
1877 * @param GCPhysPT Physical address of the page table
1878 */
1879void pgmPoolInvalidateDirtyPage(PVM pVM, RTGCPHYS GCPhysPT)
1880{
1881 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1882 Assert(PGMIsLocked(pVM));
1883 Assert(pPool->cDirtyPages <= RT_ELEMENTS(pPool->aDirtyPages));
1884 unsigned idxDirtyPage = RT_ELEMENTS(pPool->aDirtyPages);
1885
1886 if (!pPool->cDirtyPages)
1887 return;
1888
1889 GCPhysPT = GCPhysPT & ~(RTGCPHYS)PAGE_OFFSET_MASK;
1890
1891 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
1892 {
1893 if (pPool->aDirtyPages[i].uIdx != NIL_PGMPOOL_IDX)
1894 {
1895 unsigned idxPage = pPool->aDirtyPages[i].uIdx;
1896
1897 PPGMPOOLPAGE pPage = &pPool->aPages[idxPage];
1898 if (pPage->GCPhys == GCPhysPT)
1899 {
1900 idxDirtyPage = i;
1901 break;
1902 }
1903 }
1904 }
1905
1906 if (idxDirtyPage != RT_ELEMENTS(pPool->aDirtyPages))
1907 {
1908 pgmPoolFlushDirtyPage(pVM, pPool, idxDirtyPage, true /* allow removal of reused page tables*/);
1909 if ( pPool->cDirtyPages != RT_ELEMENTS(pPool->aDirtyPages)
1910 && pPool->aDirtyPages[pPool->idxFreeDirtyPage].uIdx != NIL_PGMPOOL_IDX)
1911 {
1912 unsigned i;
1913 for (i = 0; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
1914 {
1915 if (pPool->aDirtyPages[i].uIdx == NIL_PGMPOOL_IDX)
1916 {
1917 pPool->idxFreeDirtyPage = i;
1918 break;
1919 }
1920 }
1921 AssertMsg(i != RT_ELEMENTS(pPool->aDirtyPages), ("cDirtyPages %d", pPool->cDirtyPages));
1922 }
1923 }
1924}
1925
1926# endif /* PGMPOOL_WITH_OPTIMIZED_DIRTY_PT */
1927
1928/**
1929 * Inserts a page into the GCPhys hash table.
1930 *
1931 * @param pPool The pool.
1932 * @param pPage The page.
1933 */
1934DECLINLINE(void) pgmPoolHashInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1935{
1936 Log3(("pgmPoolHashInsert: %RGp\n", pPage->GCPhys));
1937 Assert(pPage->GCPhys != NIL_RTGCPHYS); Assert(pPage->iNext == NIL_PGMPOOL_IDX);
1938 uint16_t iHash = PGMPOOL_HASH(pPage->GCPhys);
1939 pPage->iNext = pPool->aiHash[iHash];
1940 pPool->aiHash[iHash] = pPage->idx;
1941}
1942
1943
1944/**
1945 * Removes a page from the GCPhys hash table.
1946 *
1947 * @param pPool The pool.
1948 * @param pPage The page.
1949 */
1950DECLINLINE(void) pgmPoolHashRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1951{
1952 Log3(("pgmPoolHashRemove: %RGp\n", pPage->GCPhys));
1953 uint16_t iHash = PGMPOOL_HASH(pPage->GCPhys);
1954 if (pPool->aiHash[iHash] == pPage->idx)
1955 pPool->aiHash[iHash] = pPage->iNext;
1956 else
1957 {
1958 uint16_t iPrev = pPool->aiHash[iHash];
1959 for (;;)
1960 {
1961 const int16_t i = pPool->aPages[iPrev].iNext;
1962 if (i == pPage->idx)
1963 {
1964 pPool->aPages[iPrev].iNext = pPage->iNext;
1965 break;
1966 }
1967 if (i == NIL_PGMPOOL_IDX)
1968 {
1969 AssertReleaseMsgFailed(("GCPhys=%RGp idx=%d\n", pPage->GCPhys, pPage->idx));
1970 break;
1971 }
1972 iPrev = i;
1973 }
1974 }
1975 pPage->iNext = NIL_PGMPOOL_IDX;
1976}
1977
1978
1979/**
1980 * Frees up one cache page.
1981 *
1982 * @returns VBox status code.
1983 * @retval VINF_SUCCESS on success.
1984 * @param pPool The pool.
1985 * @param iUser The user index.
1986 */
1987static int pgmPoolCacheFreeOne(PPGMPOOL pPool, uint16_t iUser)
1988{
1989#ifndef IN_RC
1990 const PVM pVM = pPool->CTX_SUFF(pVM);
1991#endif
1992 Assert(pPool->iAgeHead != pPool->iAgeTail); /* We shouldn't be here if there < 2 cached entries! */
1993 STAM_COUNTER_INC(&pPool->StatCacheFreeUpOne);
1994
1995 /*
1996 * Select one page from the tail of the age list.
1997 */
1998 PPGMPOOLPAGE pPage;
1999 for (unsigned iLoop = 0; ; iLoop++)
2000 {
2001 uint16_t iToFree = pPool->iAgeTail;
2002 if (iToFree == iUser)
2003 iToFree = pPool->aPages[iToFree].iAgePrev;
2004/* This is the alternative to the SyncCR3 pgmPoolCacheUsed calls.
2005 if (pPool->aPages[iToFree].iUserHead != NIL_PGMPOOL_USER_INDEX)
2006 {
2007 uint16_t i = pPool->aPages[iToFree].iAgePrev;
2008 for (unsigned j = 0; j < 10 && i != NIL_PGMPOOL_USER_INDEX; j++, i = pPool->aPages[i].iAgePrev)
2009 {
2010 if (pPool->aPages[iToFree].iUserHead == NIL_PGMPOOL_USER_INDEX)
2011 continue;
2012 iToFree = i;
2013 break;
2014 }
2015 }
2016*/
2017 Assert(iToFree != iUser);
2018 AssertRelease(iToFree != NIL_PGMPOOL_IDX);
2019 pPage = &pPool->aPages[iToFree];
2020
2021 /*
2022 * Reject any attempts at flushing the currently active shadow CR3 mapping.
2023 * Call pgmPoolCacheUsed to move the page to the head of the age list.
2024 */
2025 if (!pgmPoolIsPageLocked(&pPool->CTX_SUFF(pVM)->pgm.s, pPage))
2026 break;
2027 LogFlow(("pgmPoolCacheFreeOne: refuse CR3 mapping\n"));
2028 pgmPoolCacheUsed(pPool, pPage);
2029 AssertLogRelReturn(iLoop < 8192, VERR_INTERNAL_ERROR);
2030 }
2031
2032 /*
2033 * Found a usable page, flush it and return.
2034 */
2035 int rc = pgmPoolFlushPage(pPool, pPage);
2036 /* This flush was initiated by us and not the guest, so explicitly flush the TLB. */
2037 /* todo: find out why this is necessary; pgmPoolFlushPage should trigger a flush if one is really needed. */
2038 if (rc == VINF_SUCCESS)
2039 PGM_INVL_ALL_VCPU_TLBS(pVM);
2040 return rc;
2041}
2042
2043
2044/**
2045 * Checks if a kind mismatch is really a page being reused
2046 * or if it's just normal remappings.
2047 *
2048 * @returns true if reused and the cached page (enmKind1) should be flushed
2049 * @returns false if not reused.
2050 * @param enmKind1 The kind of the cached page.
2051 * @param enmKind2 The kind of the requested page.
2052 */
2053static bool pgmPoolCacheReusedByKind(PGMPOOLKIND enmKind1, PGMPOOLKIND enmKind2)
2054{
2055 switch (enmKind1)
2056 {
2057 /*
2058 * Never reuse them. There is no remapping in non-paging mode.
2059 */
2060 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2061 case PGMPOOLKIND_32BIT_PD_PHYS:
2062 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2063 case PGMPOOLKIND_PAE_PD_PHYS:
2064 case PGMPOOLKIND_PAE_PDPT_PHYS:
2065 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2066 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2067 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2068 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2069 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2070 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT: /* never reuse them for other types */
2071 return false;
2072
2073 /*
2074 * It's perfectly fine to reuse these, except for PAE and non-paging stuff.
2075 */
2076 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2077 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2078 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2079 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2080 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2081 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2082 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2083 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2084 case PGMPOOLKIND_32BIT_PD:
2085 case PGMPOOLKIND_PAE_PDPT:
2086 switch (enmKind2)
2087 {
2088 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2089 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2090 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2091 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2092 case PGMPOOLKIND_64BIT_PML4:
2093 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2094 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2095 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2096 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2097 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2098 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2099 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2100 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2101 return true;
2102 default:
2103 return false;
2104 }
2105
2106 /*
2107 * It's perfectly fine to reuse these, except for PAE and non-paging stuff.
2108 */
2109 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2110 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2111 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2112 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2113 case PGMPOOLKIND_64BIT_PML4:
2114 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2115 switch (enmKind2)
2116 {
2117 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2118 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2119 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2120 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2121 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2122 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2123 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2124 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2125 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2126 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2127 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2128 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2129 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2130 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2131 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2132 return true;
2133 default:
2134 return false;
2135 }
2136
2137 /*
2138 * These cannot be flushed, and it's common to reuse the PDs as PTs.
2139 */
2140 case PGMPOOLKIND_ROOT_NESTED:
2141 return false;
2142
2143 default:
2144 AssertFatalMsgFailed(("enmKind1=%d\n", enmKind1));
2145 }
2146}
2147
2148
2149/**
2150 * Attempts to satisfy a pgmPoolAlloc request from the cache.
2151 *
2152 * @returns VBox status code.
2153 * @retval VINF_PGM_CACHED_PAGE on success.
2154 * @retval VERR_FILE_NOT_FOUND if not found.
2155 * @param pPool The pool.
2156 * @param GCPhys The GC physical address of the page we're gonna shadow.
2157 * @param enmKind The kind of mapping.
2158 * @param enmAccess Access type for the mapping (only relevant for big pages)
2159 * @param iUser The shadow page pool index of the user table.
2160 * @param iUserTable The index into the user table (shadowed).
2161 * @param ppPage Where to store the pointer to the page.
2162 */
2163static int pgmPoolCacheAlloc(PPGMPOOL pPool, RTGCPHYS GCPhys, PGMPOOLKIND enmKind, PGMPOOLACCESS enmAccess, uint16_t iUser, uint32_t iUserTable, PPPGMPOOLPAGE ppPage)
2164{
2165#ifndef IN_RC
2166 const PVM pVM = pPool->CTX_SUFF(pVM);
2167#endif
2168 /*
2169 * Look up the GCPhys in the hash.
2170 */
2171 unsigned i = pPool->aiHash[PGMPOOL_HASH(GCPhys)];
2172 Log3(("pgmPoolCacheAlloc: %RGp kind %s iUser=%d iUserTable=%x SLOT=%d\n", GCPhys, pgmPoolPoolKindToStr(enmKind), iUser, iUserTable, i));
2173 if (i != NIL_PGMPOOL_IDX)
2174 {
2175 do
2176 {
2177 PPGMPOOLPAGE pPage = &pPool->aPages[i];
2178 Log4(("pgmPoolCacheAlloc: slot %d found page %RGp\n", i, pPage->GCPhys));
2179 if (pPage->GCPhys == GCPhys)
2180 {
2181 if ( (PGMPOOLKIND)pPage->enmKind == enmKind
2182 && (PGMPOOLACCESS)pPage->enmAccess == enmAccess)
2183 {
2184 /* Put it at the start of the use list to make sure pgmPoolTrackAddUser
2185 * doesn't flush it in case there are no more free use records.
2186 */
2187 pgmPoolCacheUsed(pPool, pPage);
2188
2189 int rc = pgmPoolTrackAddUser(pPool, pPage, iUser, iUserTable);
2190 if (RT_SUCCESS(rc))
2191 {
2192 Assert((PGMPOOLKIND)pPage->enmKind == enmKind);
2193 *ppPage = pPage;
2194 if (pPage->cModifications)
2195 pPage->cModifications = 1; /* reset counter (can't use 0, or else it will be reinserted in the modified list) */
2196 STAM_COUNTER_INC(&pPool->StatCacheHits);
2197 return VINF_PGM_CACHED_PAGE;
2198 }
2199 return rc;
2200 }
2201
2202 if ((PGMPOOLKIND)pPage->enmKind != enmKind)
2203 {
2204 /*
2205 * The kind is different. In some cases we should now flush the page
2206 * as it has been reused, but in most cases this is normal remapping
2207 * of PDs as PT or big pages using the GCPhys field in a slightly
2208 * different way than the other kinds.
2209 */
2210 if (pgmPoolCacheReusedByKind((PGMPOOLKIND)pPage->enmKind, enmKind))
2211 {
2212 STAM_COUNTER_INC(&pPool->StatCacheKindMismatches);
2213 pgmPoolFlushPage(pPool, pPage);
2214 break;
2215 }
2216 }
2217 }
2218
2219 /* next */
2220 i = pPage->iNext;
2221 } while (i != NIL_PGMPOOL_IDX);
2222 }
2223
2224 Log3(("pgmPoolCacheAlloc: Missed GCPhys=%RGp enmKind=%s\n", GCPhys, pgmPoolPoolKindToStr(enmKind)));
2225 STAM_COUNTER_INC(&pPool->StatCacheMisses);
2226 return VERR_FILE_NOT_FOUND;
2227}
2228
2229
2230/**
2231 * Inserts a page into the cache.
2232 *
2233 * @param pPool The pool.
2234 * @param pPage The cached page.
2235 * @param fCanBeCached Set if the page is fit for caching from the caller's point of view.
2236 */
2237static void pgmPoolCacheInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage, bool fCanBeCached)
2238{
2239 /*
2240 * Insert into the GCPhys hash if the page is fit for that.
2241 */
2242 Assert(!pPage->fCached);
2243 if (fCanBeCached)
2244 {
2245 pPage->fCached = true;
2246 pgmPoolHashInsert(pPool, pPage);
2247 Log3(("pgmPoolCacheInsert: Caching %p:{.Core=%RHp, .idx=%d, .enmKind=%s, GCPhys=%RGp}\n",
2248 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), pPage->GCPhys));
2249 STAM_COUNTER_INC(&pPool->StatCacheCacheable);
2250 }
2251 else
2252 {
2253 Log3(("pgmPoolCacheInsert: Not caching %p:{.Core=%RHp, .idx=%d, .enmKind=%s, GCPhys=%RGp}\n",
2254 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), pPage->GCPhys));
2255 STAM_COUNTER_INC(&pPool->StatCacheUncacheable);
2256 }
2257
2258 /*
2259 * Insert at the head of the age list.
2260 */
2261 pPage->iAgePrev = NIL_PGMPOOL_IDX;
2262 pPage->iAgeNext = pPool->iAgeHead;
2263 if (pPool->iAgeHead != NIL_PGMPOOL_IDX)
2264 pPool->aPages[pPool->iAgeHead].iAgePrev = pPage->idx;
2265 else
2266 pPool->iAgeTail = pPage->idx;
2267 pPool->iAgeHead = pPage->idx;
2268}
2269
2270
2271/**
2272 * Flushes a cached page.
2273 *
2274 * @param pPool The pool.
2275 * @param pPage The cached page.
2276 */
2277static void pgmPoolCacheFlushPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2278{
2279 Log3(("pgmPoolCacheFlushPage: %RGp\n", pPage->GCPhys));
2280
2281 /*
2282 * Remove the page from the hash.
2283 */
2284 if (pPage->fCached)
2285 {
2286 pPage->fCached = false;
2287 pgmPoolHashRemove(pPool, pPage);
2288 }
2289 else
2290 Assert(pPage->iNext == NIL_PGMPOOL_IDX);
2291
2292 /*
2293 * Remove it from the age list.
2294 */
2295 if (pPage->iAgeNext != NIL_PGMPOOL_IDX)
2296 pPool->aPages[pPage->iAgeNext].iAgePrev = pPage->iAgePrev;
2297 else
2298 pPool->iAgeTail = pPage->iAgePrev;
2299 if (pPage->iAgePrev != NIL_PGMPOOL_IDX)
2300 pPool->aPages[pPage->iAgePrev].iAgeNext = pPage->iAgeNext;
2301 else
2302 pPool->iAgeHead = pPage->iAgeNext;
2303 pPage->iAgeNext = NIL_PGMPOOL_IDX;
2304 pPage->iAgePrev = NIL_PGMPOOL_IDX;
2305}
2306
2307
2308/**
2309 * Looks for pages sharing the monitor.
2310 *
2311 * @returns Pointer to the head page.
2312 * @returns NULL if not found.
2313 * @param pPool The Pool
2314 * @param pNewPage The page which is going to be monitored.
2315 */
2316static PPGMPOOLPAGE pgmPoolMonitorGetPageByGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pNewPage)
2317{
2318 /*
2319 * Look up the GCPhys in the hash.
2320 */
2321 RTGCPHYS GCPhys = pNewPage->GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK;
2322 unsigned i = pPool->aiHash[PGMPOOL_HASH(GCPhys)];
2323 if (i == NIL_PGMPOOL_IDX)
2324 return NULL;
2325 do
2326 {
2327 PPGMPOOLPAGE pPage = &pPool->aPages[i];
2328 if ( pPage->GCPhys - GCPhys < PAGE_SIZE
2329 && pPage != pNewPage)
2330 {
2331 switch (pPage->enmKind)
2332 {
2333 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2334 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2335 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2336 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2337 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2338 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2339 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2340 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2341 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2342 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2343 case PGMPOOLKIND_64BIT_PML4:
2344 case PGMPOOLKIND_32BIT_PD:
2345 case PGMPOOLKIND_PAE_PDPT:
2346 {
2347 /* find the head */
2348 while (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
2349 {
2350 Assert(pPage->iMonitoredPrev != pPage->idx);
2351 pPage = &pPool->aPages[pPage->iMonitoredPrev];
2352 }
2353 return pPage;
2354 }
2355
2356 /* ignore, no monitoring. */
2357 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2358 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2359 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2360 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2361 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2362 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2363 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2364 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2365 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2366 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2367 case PGMPOOLKIND_ROOT_NESTED:
2368 case PGMPOOLKIND_PAE_PD_PHYS:
2369 case PGMPOOLKIND_PAE_PDPT_PHYS:
2370 case PGMPOOLKIND_32BIT_PD_PHYS:
2371 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
2372 break;
2373 default:
2374 AssertFatalMsgFailed(("enmKind=%d idx=%d\n", pPage->enmKind, pPage->idx));
2375 }
2376 }
2377
2378 /* next */
2379 i = pPage->iNext;
2380 } while (i != NIL_PGMPOOL_IDX);
2381 return NULL;
2382}
2383
2384
2385/**
2386 * Enabled write monitoring of a guest page.
2387 *
2388 * @returns VBox status code.
2389 * @retval VINF_SUCCESS on success.
2390 * @param pPool The pool.
2391 * @param pPage The cached page.
2392 */
2393static int pgmPoolMonitorInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2394{
2395 LogFlow(("pgmPoolMonitorInsert %RGp\n", pPage->GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK));
2396
2397 /*
2398 * Filter out the relevant kinds.
2399 */
2400 switch (pPage->enmKind)
2401 {
2402 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2403 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2404 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2405 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2406 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2407 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2408 case PGMPOOLKIND_64BIT_PML4:
2409 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2410 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2411 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2412 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2413 case PGMPOOLKIND_32BIT_PD:
2414 case PGMPOOLKIND_PAE_PDPT:
2415 break;
2416
2417 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2418 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2419 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2420 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2421 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2422 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2423 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2424 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2425 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2426 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2427 case PGMPOOLKIND_ROOT_NESTED:
2428 /* Nothing to monitor here. */
2429 return VINF_SUCCESS;
2430
2431 case PGMPOOLKIND_32BIT_PD_PHYS:
2432 case PGMPOOLKIND_PAE_PDPT_PHYS:
2433 case PGMPOOLKIND_PAE_PD_PHYS:
2434 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
2435 /* Nothing to monitor here. */
2436 return VINF_SUCCESS;
2437 default:
2438 AssertFatalMsgFailed(("This can't happen! enmKind=%d\n", pPage->enmKind));
2439 }
2440
2441 /*
2442 * Install handler.
2443 */
2444 int rc;
2445 PPGMPOOLPAGE pPageHead = pgmPoolMonitorGetPageByGCPhys(pPool, pPage);
2446 if (pPageHead)
2447 {
2448 Assert(pPageHead != pPage); Assert(pPageHead->iMonitoredNext != pPage->idx);
2449 Assert(pPageHead->iMonitoredPrev != pPage->idx);
2450
2451#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
2452 if (pPageHead->fDirty)
2453 pgmPoolFlushDirtyPage(pPool->CTX_SUFF(pVM), pPool, pPageHead->idxDirty, false /* do not remove */);
2454#endif
2455
2456 pPage->iMonitoredPrev = pPageHead->idx;
2457 pPage->iMonitoredNext = pPageHead->iMonitoredNext;
2458 if (pPageHead->iMonitoredNext != NIL_PGMPOOL_IDX)
2459 pPool->aPages[pPageHead->iMonitoredNext].iMonitoredPrev = pPage->idx;
2460 pPageHead->iMonitoredNext = pPage->idx;
2461 rc = VINF_SUCCESS;
2462 }
2463 else
2464 {
2465 Assert(pPage->iMonitoredNext == NIL_PGMPOOL_IDX); Assert(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
2466 PVM pVM = pPool->CTX_SUFF(pVM);
2467 const RTGCPHYS GCPhysPage = pPage->GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK;
2468 rc = PGMHandlerPhysicalRegisterEx(pVM, PGMPHYSHANDLERTYPE_PHYSICAL_WRITE,
2469 GCPhysPage, GCPhysPage + PAGE_OFFSET_MASK,
2470 pPool->pfnAccessHandlerR3, MMHyperCCToR3(pVM, pPage),
2471 pPool->pfnAccessHandlerR0, MMHyperCCToR0(pVM, pPage),
2472 pPool->pfnAccessHandlerRC, MMHyperCCToRC(pVM, pPage),
2473 pPool->pszAccessHandler);
2474 /** @todo we should probably deal with out-of-memory conditions here, but for now increasing
2475 * the heap size should suffice. */
2476 AssertFatalMsgRC(rc, ("PGMHandlerPhysicalRegisterEx %RGp failed with %Rrc\n", GCPhysPage, rc));
2477 PVMCPU pVCpu = VMMGetCpu(pVM);
2478 AssertFatalMsg(!(pVCpu->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL) || VMCPU_FF_ISSET(pVCpu, VMCPU_FF_PGM_SYNC_CR3), ("fSyncFlags=%x syncff=%d\n", pVCpu->pgm.s.fSyncFlags, VMCPU_FF_ISSET(pVCpu, VMCPU_FF_PGM_SYNC_CR3)));
2479 }
2480 pPage->fMonitored = true;
2481 return rc;
2482}
2483
2484
2485/**
2486 * Disables write monitoring of a guest page.
2487 *
2488 * @returns VBox status code.
2489 * @retval VINF_SUCCESS on success.
2490 * @param pPool The pool.
2491 * @param pPage The cached page.
2492 */
2493static int pgmPoolMonitorFlush(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2494{
2495 /*
2496 * Filter out the relevant kinds.
2497 */
2498 switch (pPage->enmKind)
2499 {
2500 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2501 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2502 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2503 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2504 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2505 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2506 case PGMPOOLKIND_64BIT_PML4:
2507 case PGMPOOLKIND_32BIT_PD:
2508 case PGMPOOLKIND_PAE_PDPT:
2509 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2510 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2511 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2512 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2513 break;
2514
2515 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2516 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2517 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2518 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2519 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2520 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2521 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2522 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2523 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2524 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2525 case PGMPOOLKIND_ROOT_NESTED:
2526 case PGMPOOLKIND_PAE_PD_PHYS:
2527 case PGMPOOLKIND_PAE_PDPT_PHYS:
2528 case PGMPOOLKIND_32BIT_PD_PHYS:
2529 /* Nothing to monitor here. */
2530 Assert(!pPage->fMonitored);
2531 return VINF_SUCCESS;
2532
2533 default:
2534 AssertFatalMsgFailed(("This can't happen! enmKind=%d\n", pPage->enmKind));
2535 }
2536 Assert(pPage->fMonitored);
2537
2538 /*
2539 * Remove the page from the monitored list or uninstall it if last.
2540 */
2541 const PVM pVM = pPool->CTX_SUFF(pVM);
2542 int rc;
2543 if ( pPage->iMonitoredNext != NIL_PGMPOOL_IDX
2544 || pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
2545 {
2546 if (pPage->iMonitoredPrev == NIL_PGMPOOL_IDX)
2547 {
2548 PPGMPOOLPAGE pNewHead = &pPool->aPages[pPage->iMonitoredNext];
2549 pNewHead->iMonitoredPrev = NIL_PGMPOOL_IDX;
2550 rc = PGMHandlerPhysicalChangeCallbacks(pVM, pPage->GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK,
2551 pPool->pfnAccessHandlerR3, MMHyperCCToR3(pVM, pNewHead),
2552 pPool->pfnAccessHandlerR0, MMHyperCCToR0(pVM, pNewHead),
2553 pPool->pfnAccessHandlerRC, MMHyperCCToRC(pVM, pNewHead),
2554 pPool->pszAccessHandler);
2555 AssertFatalRCSuccess(rc);
2556 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
2557 }
2558 else
2559 {
2560 pPool->aPages[pPage->iMonitoredPrev].iMonitoredNext = pPage->iMonitoredNext;
2561 if (pPage->iMonitoredNext != NIL_PGMPOOL_IDX)
2562 {
2563 pPool->aPages[pPage->iMonitoredNext].iMonitoredPrev = pPage->iMonitoredPrev;
2564 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
2565 }
2566 pPage->iMonitoredPrev = NIL_PGMPOOL_IDX;
2567 rc = VINF_SUCCESS;
2568 }
2569 }
2570 else
2571 {
2572 rc = PGMHandlerPhysicalDeregister(pVM, pPage->GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK);
2573 AssertFatalRC(rc);
2574 PVMCPU pVCpu = VMMGetCpu(pVM);
2575 AssertFatalMsg(!(pVCpu->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL) || VMCPU_FF_ISSET(pVCpu, VMCPU_FF_PGM_SYNC_CR3),
2576 ("%#x %#x\n", pVCpu->pgm.s.fSyncFlags, pVM->fGlobalForcedActions));
2577 }
2578 pPage->fMonitored = false;
2579
2580 /*
2581 * Remove it from the list of modified pages (if in it).
2582 */
2583 pgmPoolMonitorModifiedRemove(pPool, pPage);
2584
2585 return rc;
2586}
2587
2588
2589/**
2590 * Inserts the page into the list of modified pages.
2591 *
2592 * @param pPool The pool.
2593 * @param pPage The page.
2594 */
2595void pgmPoolMonitorModifiedInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2596{
2597 Log3(("pgmPoolMonitorModifiedInsert: idx=%d\n", pPage->idx));
2598 AssertMsg( pPage->iModifiedNext == NIL_PGMPOOL_IDX
2599 && pPage->iModifiedPrev == NIL_PGMPOOL_IDX
2600 && pPool->iModifiedHead != pPage->idx,
2601 ("Next=%d Prev=%d idx=%d cModifications=%d Head=%d cModifiedPages=%d\n",
2602 pPage->iModifiedNext, pPage->iModifiedPrev, pPage->idx, pPage->cModifications,
2603 pPool->iModifiedHead, pPool->cModifiedPages));
2604
2605 pPage->iModifiedNext = pPool->iModifiedHead;
2606 if (pPool->iModifiedHead != NIL_PGMPOOL_IDX)
2607 pPool->aPages[pPool->iModifiedHead].iModifiedPrev = pPage->idx;
2608 pPool->iModifiedHead = pPage->idx;
2609 pPool->cModifiedPages++;
2610#ifdef VBOX_WITH_STATISTICS
2611 if (pPool->cModifiedPages > pPool->cModifiedPagesHigh)
2612 pPool->cModifiedPagesHigh = pPool->cModifiedPages;
2613#endif
2614}
2615
2616
2617/**
2618 * Removes the page from the list of modified pages and resets the
2619 * moficiation counter.
2620 *
2621 * @param pPool The pool.
2622 * @param pPage The page which is believed to be in the list of modified pages.
2623 */
2624static void pgmPoolMonitorModifiedRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2625{
2626 Log3(("pgmPoolMonitorModifiedRemove: idx=%d cModifications=%d\n", pPage->idx, pPage->cModifications));
2627 if (pPool->iModifiedHead == pPage->idx)
2628 {
2629 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX);
2630 pPool->iModifiedHead = pPage->iModifiedNext;
2631 if (pPage->iModifiedNext != NIL_PGMPOOL_IDX)
2632 {
2633 pPool->aPages[pPage->iModifiedNext].iModifiedPrev = NIL_PGMPOOL_IDX;
2634 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
2635 }
2636 pPool->cModifiedPages--;
2637 }
2638 else if (pPage->iModifiedPrev != NIL_PGMPOOL_IDX)
2639 {
2640 pPool->aPages[pPage->iModifiedPrev].iModifiedNext = pPage->iModifiedNext;
2641 if (pPage->iModifiedNext != NIL_PGMPOOL_IDX)
2642 {
2643 pPool->aPages[pPage->iModifiedNext].iModifiedPrev = pPage->iModifiedPrev;
2644 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
2645 }
2646 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
2647 pPool->cModifiedPages--;
2648 }
2649 else
2650 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX);
2651 pPage->cModifications = 0;
2652}
2653
2654
2655/**
2656 * Zaps the list of modified pages, resetting their modification counters in the process.
2657 *
2658 * @param pVM The VM handle.
2659 */
2660static void pgmPoolMonitorModifiedClearAll(PVM pVM)
2661{
2662 pgmLock(pVM);
2663 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
2664 LogFlow(("pgmPoolMonitorModifiedClearAll: cModifiedPages=%d\n", pPool->cModifiedPages));
2665
2666 unsigned cPages = 0; NOREF(cPages);
2667
2668#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
2669 pgmPoolResetDirtyPages(pVM);
2670#endif
2671
2672 uint16_t idx = pPool->iModifiedHead;
2673 pPool->iModifiedHead = NIL_PGMPOOL_IDX;
2674 while (idx != NIL_PGMPOOL_IDX)
2675 {
2676 PPGMPOOLPAGE pPage = &pPool->aPages[idx];
2677 idx = pPage->iModifiedNext;
2678 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
2679 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
2680 pPage->cModifications = 0;
2681 Assert(++cPages);
2682 }
2683 AssertMsg(cPages == pPool->cModifiedPages, ("%d != %d\n", cPages, pPool->cModifiedPages));
2684 pPool->cModifiedPages = 0;
2685 pgmUnlock(pVM);
2686}
2687
2688
2689/**
2690 * Handle SyncCR3 pool tasks
2691 *
2692 * @returns VBox status code.
2693 * @retval VINF_SUCCESS if successfully added.
2694 * @retval VINF_PGM_SYNC_CR3 is it needs to be deferred to ring 3 (GC only)
2695 * @param pVCpu The VMCPU handle.
2696 * @remark Should only be used when monitoring is available, thus placed in
2697 * the PGMPOOL_WITH_MONITORING #ifdef.
2698 */
2699int pgmPoolSyncCR3(PVMCPU pVCpu)
2700{
2701 PVM pVM = pVCpu->CTX_SUFF(pVM);
2702 LogFlow(("pgmPoolSyncCR3 fSyncFlags=%x\n", pVCpu->pgm.s.fSyncFlags));
2703
2704 /*
2705 * When monitoring shadowed pages, we reset the modification counters on CR3 sync.
2706 * Occasionally we will have to clear all the shadow page tables because we wanted
2707 * to monitor a page which was mapped by too many shadowed page tables. This operation
2708 * sometimes refered to as a 'lightweight flush'.
2709 */
2710# ifdef IN_RING3 /* Don't flush in ring-0 or raw mode, it's taking too long. */
2711 if (pVCpu->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)
2712 pgmR3PoolClearAll(pVM, false /*fFlushRemTlb*/);
2713# else /* !IN_RING3 */
2714 if (pVCpu->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)
2715 {
2716 Log(("SyncCR3: PGM_SYNC_CLEAR_PGM_POOL is set -> VINF_PGM_SYNC_CR3\n"));
2717 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3); /** @todo no need to do global sync, right? */
2718
2719 /* Make sure all other VCPUs return to ring 3. */
2720 if (pVM->cCpus > 1)
2721 {
2722 VM_FF_SET(pVM, VM_FF_PGM_POOL_FLUSH_PENDING);
2723 PGM_INVL_ALL_VCPU_TLBS(pVM);
2724 }
2725 return VINF_PGM_SYNC_CR3;
2726 }
2727# endif /* !IN_RING3 */
2728 else
2729 {
2730 pgmPoolMonitorModifiedClearAll(pVM);
2731
2732 /* pgmPoolMonitorModifiedClearAll can cause a pgm pool flush (dirty page clearing), so make sure we handle this! */
2733 if (pVCpu->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)
2734 {
2735 Log(("pgmPoolMonitorModifiedClearAll caused a pgm flush -> call pgmPoolSyncCR3 again!\n"));
2736 return pgmPoolSyncCR3(pVCpu);
2737 }
2738 }
2739 return VINF_SUCCESS;
2740}
2741
2742
2743/**
2744 * Frees up at least one user entry.
2745 *
2746 * @returns VBox status code.
2747 * @retval VINF_SUCCESS if successfully added.
2748 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
2749 * @param pPool The pool.
2750 * @param iUser The user index.
2751 */
2752static int pgmPoolTrackFreeOneUser(PPGMPOOL pPool, uint16_t iUser)
2753{
2754 STAM_COUNTER_INC(&pPool->StatTrackFreeUpOneUser);
2755 /*
2756 * Just free cached pages in a braindead fashion.
2757 */
2758 /** @todo walk the age list backwards and free the first with usage. */
2759 int rc = VINF_SUCCESS;
2760 do
2761 {
2762 int rc2 = pgmPoolCacheFreeOne(pPool, iUser);
2763 if (RT_FAILURE(rc2) && rc == VINF_SUCCESS)
2764 rc = rc2;
2765 } while (pPool->iUserFreeHead == NIL_PGMPOOL_USER_INDEX);
2766 return rc;
2767}
2768
2769
2770/**
2771 * Inserts a page into the cache.
2772 *
2773 * This will create user node for the page, insert it into the GCPhys
2774 * hash, and insert it into the age list.
2775 *
2776 * @returns VBox status code.
2777 * @retval VINF_SUCCESS if successfully added.
2778 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
2779 * @param pPool The pool.
2780 * @param pPage The cached page.
2781 * @param GCPhys The GC physical address of the page we're gonna shadow.
2782 * @param iUser The user index.
2783 * @param iUserTable The user table index.
2784 */
2785DECLINLINE(int) pgmPoolTrackInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTGCPHYS GCPhys, uint16_t iUser, uint32_t iUserTable)
2786{
2787 int rc = VINF_SUCCESS;
2788 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
2789
2790 LogFlow(("pgmPoolTrackInsert GCPhys=%RGp iUser=%d iUserTable=%x\n", GCPhys, iUser, iUserTable));
2791
2792#ifdef VBOX_STRICT
2793 /*
2794 * Check that the entry doesn't already exists.
2795 */
2796 if (pPage->iUserHead != NIL_PGMPOOL_USER_INDEX)
2797 {
2798 uint16_t i = pPage->iUserHead;
2799 do
2800 {
2801 Assert(i < pPool->cMaxUsers);
2802 AssertMsg(paUsers[i].iUser != iUser || paUsers[i].iUserTable != iUserTable, ("%x %x vs new %x %x\n", paUsers[i].iUser, paUsers[i].iUserTable, iUser, iUserTable));
2803 i = paUsers[i].iNext;
2804 } while (i != NIL_PGMPOOL_USER_INDEX);
2805 }
2806#endif
2807
2808 /*
2809 * Find free a user node.
2810 */
2811 uint16_t i = pPool->iUserFreeHead;
2812 if (i == NIL_PGMPOOL_USER_INDEX)
2813 {
2814 rc = pgmPoolTrackFreeOneUser(pPool, iUser);
2815 if (RT_FAILURE(rc))
2816 return rc;
2817 i = pPool->iUserFreeHead;
2818 }
2819
2820 /*
2821 * Unlink the user node from the free list,
2822 * initialize and insert it into the user list.
2823 */
2824 pPool->iUserFreeHead = paUsers[i].iNext;
2825 paUsers[i].iNext = NIL_PGMPOOL_USER_INDEX;
2826 paUsers[i].iUser = iUser;
2827 paUsers[i].iUserTable = iUserTable;
2828 pPage->iUserHead = i;
2829
2830 /*
2831 * Insert into cache and enable monitoring of the guest page if enabled.
2832 *
2833 * Until we implement caching of all levels, including the CR3 one, we'll
2834 * have to make sure we don't try monitor & cache any recursive reuse of
2835 * a monitored CR3 page. Because all windows versions are doing this we'll
2836 * have to be able to do combined access monitoring, CR3 + PT and
2837 * PD + PT (guest PAE).
2838 *
2839 * Update:
2840 * We're now cooperating with the CR3 monitor if an uncachable page is found.
2841 */
2842 const bool fCanBeMonitored = true;
2843 pgmPoolCacheInsert(pPool, pPage, fCanBeMonitored); /* This can be expanded. */
2844 if (fCanBeMonitored)
2845 {
2846 rc = pgmPoolMonitorInsert(pPool, pPage);
2847 AssertRC(rc);
2848 }
2849 return rc;
2850}
2851
2852
2853/**
2854 * Adds a user reference to a page.
2855 *
2856 * This will move the page to the head of the
2857 *
2858 * @returns VBox status code.
2859 * @retval VINF_SUCCESS if successfully added.
2860 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
2861 * @param pPool The pool.
2862 * @param pPage The cached page.
2863 * @param iUser The user index.
2864 * @param iUserTable The user table.
2865 */
2866static int pgmPoolTrackAddUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable)
2867{
2868 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
2869
2870 Log3(("pgmPoolTrackAddUser GCPhys = %RGp iUser %x iUserTable %x\n", pPage->GCPhys, iUser, iUserTable));
2871
2872# ifdef VBOX_STRICT
2873 /*
2874 * Check that the entry doesn't already exists. We only allow multiple
2875 * users of top-level paging structures (SHW_POOL_ROOT_IDX).
2876 */
2877 if (pPage->iUserHead != NIL_PGMPOOL_USER_INDEX)
2878 {
2879 uint16_t i = pPage->iUserHead;
2880 do
2881 {
2882 Assert(i < pPool->cMaxUsers);
2883 AssertMsg(iUser != PGMPOOL_IDX_PD || iUser != PGMPOOL_IDX_PDPT || iUser != PGMPOOL_IDX_NESTED_ROOT || iUser != PGMPOOL_IDX_AMD64_CR3 ||
2884 paUsers[i].iUser != iUser || paUsers[i].iUserTable != iUserTable, ("%x %x vs new %x %x\n", paUsers[i].iUser, paUsers[i].iUserTable, iUser, iUserTable));
2885 i = paUsers[i].iNext;
2886 } while (i != NIL_PGMPOOL_USER_INDEX);
2887 }
2888# endif
2889
2890 /*
2891 * Allocate a user node.
2892 */
2893 uint16_t i = pPool->iUserFreeHead;
2894 if (i == NIL_PGMPOOL_USER_INDEX)
2895 {
2896 int rc = pgmPoolTrackFreeOneUser(pPool, iUser);
2897 if (RT_FAILURE(rc))
2898 return rc;
2899 i = pPool->iUserFreeHead;
2900 }
2901 pPool->iUserFreeHead = paUsers[i].iNext;
2902
2903 /*
2904 * Initialize the user node and insert it.
2905 */
2906 paUsers[i].iNext = pPage->iUserHead;
2907 paUsers[i].iUser = iUser;
2908 paUsers[i].iUserTable = iUserTable;
2909 pPage->iUserHead = i;
2910
2911# ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
2912 if (pPage->fDirty)
2913 pgmPoolFlushDirtyPage(pPool->CTX_SUFF(pVM), pPool, pPage->idxDirty, false /* do not remove */);
2914# endif
2915
2916 /*
2917 * Tell the cache to update its replacement stats for this page.
2918 */
2919 pgmPoolCacheUsed(pPool, pPage);
2920 return VINF_SUCCESS;
2921}
2922
2923
2924/**
2925 * Frees a user record associated with a page.
2926 *
2927 * This does not clear the entry in the user table, it simply replaces the
2928 * user record to the chain of free records.
2929 *
2930 * @param pPool The pool.
2931 * @param HCPhys The HC physical address of the shadow page.
2932 * @param iUser The shadow page pool index of the user table.
2933 * @param iUserTable The index into the user table (shadowed).
2934 */
2935static void pgmPoolTrackFreeUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable)
2936{
2937 /*
2938 * Unlink and free the specified user entry.
2939 */
2940 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
2941
2942 Log3(("pgmPoolTrackFreeUser %RGp %x %x\n", pPage->GCPhys, iUser, iUserTable));
2943 /* Special: For PAE and 32-bit paging, there is usually no more than one user. */
2944 uint16_t i = pPage->iUserHead;
2945 if ( i != NIL_PGMPOOL_USER_INDEX
2946 && paUsers[i].iUser == iUser
2947 && paUsers[i].iUserTable == iUserTable)
2948 {
2949 pPage->iUserHead = paUsers[i].iNext;
2950
2951 paUsers[i].iUser = NIL_PGMPOOL_IDX;
2952 paUsers[i].iNext = pPool->iUserFreeHead;
2953 pPool->iUserFreeHead = i;
2954 return;
2955 }
2956
2957 /* General: Linear search. */
2958 uint16_t iPrev = NIL_PGMPOOL_USER_INDEX;
2959 while (i != NIL_PGMPOOL_USER_INDEX)
2960 {
2961 if ( paUsers[i].iUser == iUser
2962 && paUsers[i].iUserTable == iUserTable)
2963 {
2964 if (iPrev != NIL_PGMPOOL_USER_INDEX)
2965 paUsers[iPrev].iNext = paUsers[i].iNext;
2966 else
2967 pPage->iUserHead = paUsers[i].iNext;
2968
2969 paUsers[i].iUser = NIL_PGMPOOL_IDX;
2970 paUsers[i].iNext = pPool->iUserFreeHead;
2971 pPool->iUserFreeHead = i;
2972 return;
2973 }
2974 iPrev = i;
2975 i = paUsers[i].iNext;
2976 }
2977
2978 /* Fatal: didn't find it */
2979 AssertFatalMsgFailed(("Didn't find the user entry! iUser=%d iUserTable=%#x GCPhys=%RGp\n",
2980 iUser, iUserTable, pPage->GCPhys));
2981}
2982
2983
2984/**
2985 * Gets the entry size of a shadow table.
2986 *
2987 * @param enmKind The kind of page.
2988 *
2989 * @returns The size of the entry in bytes. That is, 4 or 8.
2990 * @returns If the kind is not for a table, an assertion is raised and 0 is
2991 * returned.
2992 */
2993DECLINLINE(unsigned) pgmPoolTrackGetShadowEntrySize(PGMPOOLKIND enmKind)
2994{
2995 switch (enmKind)
2996 {
2997 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2998 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2999 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
3000 case PGMPOOLKIND_32BIT_PD:
3001 case PGMPOOLKIND_32BIT_PD_PHYS:
3002 return 4;
3003
3004 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
3005 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
3006 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
3007 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
3008 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
3009 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
3010 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
3011 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
3012 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
3013 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
3014 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
3015 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
3016 case PGMPOOLKIND_64BIT_PML4:
3017 case PGMPOOLKIND_PAE_PDPT:
3018 case PGMPOOLKIND_ROOT_NESTED:
3019 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
3020 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
3021 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
3022 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
3023 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
3024 case PGMPOOLKIND_PAE_PD_PHYS:
3025 case PGMPOOLKIND_PAE_PDPT_PHYS:
3026 return 8;
3027
3028 default:
3029 AssertFatalMsgFailed(("enmKind=%d\n", enmKind));
3030 }
3031}
3032
3033
3034/**
3035 * Gets the entry size of a guest table.
3036 *
3037 * @param enmKind The kind of page.
3038 *
3039 * @returns The size of the entry in bytes. That is, 0, 4 or 8.
3040 * @returns If the kind is not for a table, an assertion is raised and 0 is
3041 * returned.
3042 */
3043DECLINLINE(unsigned) pgmPoolTrackGetGuestEntrySize(PGMPOOLKIND enmKind)
3044{
3045 switch (enmKind)
3046 {
3047 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
3048 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
3049 case PGMPOOLKIND_32BIT_PD:
3050 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
3051 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
3052 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
3053 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
3054 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
3055 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
3056 return 4;
3057
3058 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
3059 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
3060 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
3061 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
3062 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
3063 case PGMPOOLKIND_64BIT_PML4:
3064 case PGMPOOLKIND_PAE_PDPT:
3065 return 8;
3066
3067 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
3068 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
3069 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
3070 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
3071 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
3072 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
3073 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
3074 case PGMPOOLKIND_ROOT_NESTED:
3075 case PGMPOOLKIND_PAE_PD_PHYS:
3076 case PGMPOOLKIND_PAE_PDPT_PHYS:
3077 case PGMPOOLKIND_32BIT_PD_PHYS:
3078 /** @todo can we return 0? (nobody is calling this...) */
3079 AssertFailed();
3080 return 0;
3081
3082 default:
3083 AssertFatalMsgFailed(("enmKind=%d\n", enmKind));
3084 }
3085}
3086
3087
3088/**
3089 * Checks one shadow page table entry for a mapping of a physical page.
3090 *
3091 * @returns true / false indicating removal of all relevant PTEs
3092 *
3093 * @param pVM The VM handle.
3094 * @param pPhysPage The guest page in question.
3095 * @param fFlushPTEs Flush PTEs or allow them to be updated (e.g. in case of an RW bit change)
3096 * @param iShw The shadow page table.
3097 * @param iPte Page table entry or NIL_PGMPOOL_PHYSEXT_IDX_PTE if unknown
3098 */
3099static bool pgmPoolTrackFlushGCPhysPTInt(PVM pVM, PCPGMPAGE pPhysPage, bool fFlushPTEs, uint16_t iShw, uint16_t iPte)
3100{
3101 LogFlow(("pgmPoolTrackFlushGCPhysPTInt: pPhysPage=%RHp iShw=%d iPte=%d\n", PGM_PAGE_GET_HCPHYS(pPhysPage), iShw, iPte));
3102 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3103 bool fRet = false;
3104
3105 /*
3106 * Assert sanity.
3107 */
3108 Assert(iPte != NIL_PGMPOOL_PHYSEXT_IDX_PTE);
3109 AssertFatalMsg(iShw < pPool->cCurPages && iShw != NIL_PGMPOOL_IDX, ("iShw=%d\n", iShw));
3110 PPGMPOOLPAGE pPage = &pPool->aPages[iShw];
3111
3112 /*
3113 * Then, clear the actual mappings to the page in the shadow PT.
3114 */
3115 switch (pPage->enmKind)
3116 {
3117 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
3118 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
3119 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
3120 {
3121 const uint32_t u32 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P;
3122 PX86PT pPT = (PX86PT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3123 uint32_t u32AndMask = 0;
3124 uint32_t u32OrMask = 0;
3125
3126 if (!fFlushPTEs)
3127 {
3128 switch (PGM_PAGE_GET_HNDL_PHYS_STATE(pPhysPage))
3129 {
3130 case PGM_PAGE_HNDL_PHYS_STATE_NONE: /** No handler installed. */
3131 case PGM_PAGE_HNDL_PHYS_STATE_DISABLED: /** Monitoring is temporarily disabled. */
3132 u32OrMask = X86_PTE_RW;
3133 u32AndMask = UINT32_MAX;
3134 fRet = true;
3135 STAM_COUNTER_INC(&pPool->StatTrackFlushEntryKeep);
3136 break;
3137
3138 case PGM_PAGE_HNDL_PHYS_STATE_WRITE: /** Write access is monitored. */
3139 u32OrMask = 0;
3140 u32AndMask = ~X86_PTE_RW;
3141 fRet = true;
3142 STAM_COUNTER_INC(&pPool->StatTrackFlushEntryKeep);
3143 break;
3144 default:
3145 /* (shouldn't be here, will assert below) */
3146 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3147 break;
3148 }
3149 }
3150 else
3151 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3152
3153 /* Update the counter if we're removing references. */
3154 if (!u32AndMask)
3155 {
3156 Assert(pPage->cPresent );
3157 Assert(pPool->cPresent);
3158 pPage->cPresent--;
3159 pPool->cPresent--;
3160 }
3161
3162 if ((pPT->a[iPte].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
3163 {
3164 X86PTE Pte;
3165
3166 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pte=%RX32\n", iPte, pPT->a[iPte]));
3167 Pte.u = (pPT->a[iPte].u & u32AndMask) | u32OrMask;
3168 if (Pte.u & PGM_PTFLAGS_TRACK_DIRTY)
3169 Pte.n.u1Write = 0; /* need to disallow writes when dirty bit tracking is still active. */
3170
3171 ASMAtomicWriteU32(&pPT->a[iPte].u, Pte.u);
3172 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);
3173 return fRet;
3174 }
3175#ifdef LOG_ENABLED
3176 Log(("iFirstPresent=%d cPresent=%d\n", pPage->iFirstPresent, pPage->cPresent));
3177 for (unsigned i = 0, cFound = 0; i < RT_ELEMENTS(pPT->a); i++)
3178 if ((pPT->a[i].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
3179 {
3180 Log(("i=%d cFound=%d\n", i, ++cFound));
3181 }
3182#endif
3183 AssertFatalMsgFailed(("iFirstPresent=%d cPresent=%d u32=%RX32 poolkind=%x\n", pPage->iFirstPresent, pPage->cPresent, u32, pPage->enmKind));
3184 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);
3185 break;
3186 }
3187
3188 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
3189 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
3190 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
3191 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
3192 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
3193 case PGMPOOLKIND_EPT_PT_FOR_PHYS: /* physical mask the same as PAE; RW bit as well; be careful! */
3194 {
3195 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P;
3196 PPGMSHWPTPAE pPT = (PPGMSHWPTPAE)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3197 uint64_t u64OrMask = 0;
3198 uint64_t u64AndMask = 0;
3199
3200 if (!fFlushPTEs)
3201 {
3202 switch (PGM_PAGE_GET_HNDL_PHYS_STATE(pPhysPage))
3203 {
3204 case PGM_PAGE_HNDL_PHYS_STATE_NONE: /* No handler installed. */
3205 case PGM_PAGE_HNDL_PHYS_STATE_DISABLED: /* Monitoring is temporarily disabled. */
3206 u64OrMask = X86_PTE_RW;
3207 u64AndMask = UINT64_MAX;
3208 fRet = true;
3209 STAM_COUNTER_INC(&pPool->StatTrackFlushEntryKeep);
3210 break;
3211
3212 case PGM_PAGE_HNDL_PHYS_STATE_WRITE: /* Write access is monitored. */
3213 u64OrMask = 0;
3214 u64AndMask = ~(uint64_t)X86_PTE_RW;
3215 fRet = true;
3216 STAM_COUNTER_INC(&pPool->StatTrackFlushEntryKeep);
3217 break;
3218
3219 default:
3220 /* (shouldn't be here, will assert below) */
3221 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3222 break;
3223 }
3224 }
3225 else
3226 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3227
3228 /* Update the counter if we're removing references. */
3229 if (!u64AndMask)
3230 {
3231 Assert(pPage->cPresent);
3232 Assert(pPool->cPresent);
3233 pPage->cPresent--;
3234 pPool->cPresent--;
3235 }
3236
3237 if ((PGMSHWPTEPAE_GET_U(pPT->a[iPte]) & (X86_PTE_PAE_PG_MASK | X86_PTE_P | X86_PTE_PAE_MBZ_MASK_NX)) == u64)
3238 {
3239 X86PTEPAE Pte;
3240
3241 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pte=%RX64\n", iPte, PGMSHWPTEPAE_GET_LOG(pPT->a[iPte])));
3242 Pte.u = (PGMSHWPTEPAE_GET_U(pPT->a[iPte]) & u64AndMask) | u64OrMask;
3243 if (Pte.u & PGM_PTFLAGS_TRACK_DIRTY)
3244 Pte.n.u1Write = 0; /* need to disallow writes when dirty bit tracking is still active. */
3245
3246 PGMSHWPTEPAE_ATOMIC_SET(pPT->a[iPte], Pte.u);
3247 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);
3248 return fRet;
3249 }
3250#ifdef LOG_ENABLED
3251 Log(("iFirstPresent=%d cPresent=%d\n", pPage->iFirstPresent, pPage->cPresent));
3252 Log(("Found %RX64 expected %RX64\n", PGMSHWPTEPAE_GET_U(pPT->a[iPte]) & (X86_PTE_PAE_PG_MASK | X86_PTE_P | X86_PTE_PAE_MBZ_MASK_NX), u64));
3253 for (unsigned i = 0, cFound = 0; i < RT_ELEMENTS(pPT->a); i++)
3254 if ((PGMSHWPTEPAE_GET_U(pPT->a[i]) & (X86_PTE_PAE_PG_MASK | X86_PTE_P | X86_PTE_PAE_MBZ_MASK_NX)) == u64)
3255 Log(("i=%d cFound=%d\n", i, ++cFound));
3256#endif
3257 AssertFatalMsgFailed(("iFirstPresent=%d cPresent=%d u64=%RX64 poolkind=%x iPte=%d PT=%RX64\n", pPage->iFirstPresent, pPage->cPresent, u64, pPage->enmKind, iPte, PGMSHWPTEPAE_GET_LOG(pPT->a[iPte])));
3258 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);
3259 break;
3260 }
3261
3262#ifdef PGM_WITH_LARGE_PAGES
3263 /* Large page case only. */
3264 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
3265 {
3266 Assert(pVM->pgm.s.fNestedPaging);
3267
3268 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PDE4M_P | X86_PDE4M_PS;
3269 PEPTPD pPD = (PEPTPD)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3270
3271 if ((pPD->a[iPte].u & (EPT_PDE2M_PG_MASK | X86_PDE4M_P | X86_PDE4M_PS)) == u64)
3272 {
3273 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pde=%RX64\n", iPte, pPD->a[iPte]));
3274 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3275 pPD->a[iPte].u = 0;
3276 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPD);
3277
3278 /* Update the counter as we're removing references. */
3279 Assert(pPage->cPresent);
3280 Assert(pPool->cPresent);
3281 pPage->cPresent--;
3282 pPool->cPresent--;
3283
3284 return fRet;
3285 }
3286# ifdef LOG_ENABLED
3287 Log(("iFirstPresent=%d cPresent=%d\n", pPage->iFirstPresent, pPage->cPresent));
3288 for (unsigned i = 0, cFound = 0; i < RT_ELEMENTS(pPD->a); i++)
3289 if ((pPD->a[i].u & (EPT_PDE2M_PG_MASK | X86_PDE4M_P | X86_PDE4M_PS)) == u64)
3290 Log(("i=%d cFound=%d\n", i, ++cFound));
3291# endif
3292 AssertFatalMsgFailed(("iFirstPresent=%d cPresent=%d\n", pPage->iFirstPresent, pPage->cPresent));
3293 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPD);
3294 break;
3295 }
3296
3297 /* AMD-V nested paging */ /** @todo merge with EPT as we only check the parts that are identical. */
3298 case PGMPOOLKIND_PAE_PD_PHYS:
3299 {
3300 Assert(pVM->pgm.s.fNestedPaging);
3301
3302 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PDE4M_P | X86_PDE4M_PS;
3303 PX86PD pPD = (PX86PD)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3304
3305 if ((pPD->a[iPte].u & (X86_PDE2M_PAE_PG_MASK | X86_PDE4M_P | X86_PDE4M_PS)) == u64)
3306 {
3307 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pde=%RX64\n", iPte, pPD->a[iPte]));
3308 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3309 pPD->a[iPte].u = 0;
3310 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPD);
3311
3312 /* Update the counter as we're removing references. */
3313 Assert(pPage->cPresent);
3314 Assert(pPool->cPresent);
3315 pPage->cPresent--;
3316 pPool->cPresent--;
3317 return fRet;
3318 }
3319# ifdef LOG_ENABLED
3320 Log(("iFirstPresent=%d cPresent=%d\n", pPage->iFirstPresent, pPage->cPresent));
3321 for (unsigned i = 0, cFound = 0; i < RT_ELEMENTS(pPD->a); i++)
3322 if ((pPD->a[i].u & (X86_PDE2M_PAE_PG_MASK | X86_PDE4M_P | X86_PDE4M_PS)) == u64)
3323 Log(("i=%d cFound=%d\n", i, ++cFound));
3324# endif
3325 AssertFatalMsgFailed(("iFirstPresent=%d cPresent=%d\n", pPage->iFirstPresent, pPage->cPresent));
3326 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPD);
3327 break;
3328 }
3329#endif /* PGM_WITH_LARGE_PAGES */
3330
3331 default:
3332 AssertFatalMsgFailed(("enmKind=%d iShw=%d\n", pPage->enmKind, iShw));
3333 }
3334 return fRet;
3335}
3336
3337
3338/**
3339 * Scans one shadow page table for mappings of a physical page.
3340 *
3341 * @param pVM The VM handle.
3342 * @param pPhysPage The guest page in question.
3343 * @param fFlushPTEs Flush PTEs or allow them to be updated (e.g. in case of an RW bit change)
3344 * @param iShw The shadow page table.
3345 */
3346static void pgmPoolTrackFlushGCPhysPT(PVM pVM, PPGMPAGE pPhysPage, bool fFlushPTEs, uint16_t iShw)
3347{
3348 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool); NOREF(pPool);
3349
3350 /* We should only come here with when there's only one reference to this physical page. */
3351 Assert(PGMPOOL_TD_GET_CREFS(PGM_PAGE_GET_TRACKING(pPhysPage)) == 1);
3352
3353 Log2(("pgmPoolTrackFlushGCPhysPT: pPhysPage=%RHp iShw=%d\n", PGM_PAGE_GET_HCPHYS(pPhysPage), iShw));
3354 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPT, f);
3355 bool fKeptPTEs = pgmPoolTrackFlushGCPhysPTInt(pVM, pPhysPage, fFlushPTEs, iShw, PGM_PAGE_GET_PTE_INDEX(pPhysPage));
3356 if (!fKeptPTEs)
3357 PGM_PAGE_SET_TRACKING(pPhysPage, 0);
3358 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPT, f);
3359}
3360
3361
3362/**
3363 * Flushes a list of shadow page tables mapping the same physical page.
3364 *
3365 * @param pVM The VM handle.
3366 * @param pPhysPage The guest page in question.
3367 * @param fFlushPTEs Flush PTEs or allow them to be updated (e.g. in case of an RW bit change)
3368 * @param iPhysExt The physical cross reference extent list to flush.
3369 */
3370static void pgmPoolTrackFlushGCPhysPTs(PVM pVM, PPGMPAGE pPhysPage, bool fFlushPTEs, uint16_t iPhysExt)
3371{
3372 Assert(PGMIsLockOwner(pVM));
3373 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3374 bool fKeepList = false;
3375
3376 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPTs, f);
3377 Log2(("pgmPoolTrackFlushGCPhysPTs: pPhysPage=%RHp iPhysExt\n", PGM_PAGE_GET_HCPHYS(pPhysPage), iPhysExt));
3378
3379 const uint16_t iPhysExtStart = iPhysExt;
3380 PPGMPOOLPHYSEXT pPhysExt;
3381 do
3382 {
3383 Assert(iPhysExt < pPool->cMaxPhysExts);
3384 pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
3385 for (unsigned i = 0; i < RT_ELEMENTS(pPhysExt->aidx); i++)
3386 {
3387 if (pPhysExt->aidx[i] != NIL_PGMPOOL_IDX)
3388 {
3389 bool fKeptPTEs = pgmPoolTrackFlushGCPhysPTInt(pVM, pPhysPage, fFlushPTEs, pPhysExt->aidx[i], pPhysExt->apte[i]);
3390 if (!fKeptPTEs)
3391 {
3392 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
3393 pPhysExt->apte[i] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
3394 }
3395 else
3396 fKeepList = true;
3397 }
3398 }
3399 /* next */
3400 iPhysExt = pPhysExt->iNext;
3401 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
3402
3403 if (!fKeepList)
3404 {
3405 /* insert the list into the free list and clear the ram range entry. */
3406 pPhysExt->iNext = pPool->iPhysExtFreeHead;
3407 pPool->iPhysExtFreeHead = iPhysExtStart;
3408 /* Invalidate the tracking data. */
3409 PGM_PAGE_SET_TRACKING(pPhysPage, 0);
3410 }
3411
3412 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTs, f);
3413}
3414
3415
3416/**
3417 * Flushes all shadow page table mappings of the given guest page.
3418 *
3419 * This is typically called when the host page backing the guest one has been
3420 * replaced or when the page protection was changed due to a guest access
3421 * caught by the monitoring.
3422 *
3423 * @returns VBox status code.
3424 * @retval VINF_SUCCESS if all references has been successfully cleared.
3425 * @retval VINF_PGM_SYNC_CR3 if we're better off with a CR3 sync and a page
3426 * pool cleaning. FF and sync flags are set.
3427 *
3428 * @param pVM The VM handle.
3429 * @param GCPhysPage GC physical address of the page in question
3430 * @param pPhysPage The guest page in question.
3431 * @param fFlushPTEs Flush PTEs or allow them to be updated (e.g. in case of an RW bit change)
3432 * @param pfFlushTLBs This is set to @a true if the shadow TLBs should be
3433 * flushed, it is NOT touched if this isn't necessary.
3434 * The caller MUST initialized this to @a false.
3435 */
3436int pgmPoolTrackUpdateGCPhys(PVM pVM, RTGCPHYS GCPhysPage, PPGMPAGE pPhysPage, bool fFlushPTEs, bool *pfFlushTLBs)
3437{
3438 PVMCPU pVCpu = VMMGetCpu(pVM);
3439 pgmLock(pVM);
3440 int rc = VINF_SUCCESS;
3441
3442#ifdef PGM_WITH_LARGE_PAGES
3443 /* Is this page part of a large page? */
3444 if (PGM_PAGE_GET_PDE_TYPE(pPhysPage) == PGM_PAGE_PDE_TYPE_PDE)
3445 {
3446 PPGMPAGE pPhysBase;
3447 RTGCPHYS GCPhysBase = GCPhysPage & X86_PDE2M_PAE_PG_MASK;
3448
3449 GCPhysPage &= X86_PDE_PAE_PG_MASK;
3450
3451 /* Fetch the large page base. */
3452 if (GCPhysBase != GCPhysPage)
3453 {
3454 pPhysBase = pgmPhysGetPage(&pVM->pgm.s, GCPhysBase);
3455 AssertFatal(pPhysBase);
3456 }
3457 else
3458 pPhysBase = pPhysPage;
3459
3460 Log(("pgmPoolTrackUpdateGCPhys: update large page PDE for %RGp (%RGp)\n", GCPhysBase, GCPhysPage));
3461
3462 if (PGM_PAGE_GET_PDE_TYPE(pPhysBase) == PGM_PAGE_PDE_TYPE_PDE)
3463 {
3464 /* Mark the large page as disabled as we need to break it up to change a single page in the 2 MB range. */
3465 PGM_PAGE_SET_PDE_TYPE(pPhysBase, PGM_PAGE_PDE_TYPE_PDE_DISABLED);
3466
3467 /* Update the base as that *only* that one has a reference and there's only one PDE to clear. */
3468 rc = pgmPoolTrackUpdateGCPhys(pVM, GCPhysBase, pPhysBase, fFlushPTEs, pfFlushTLBs);
3469
3470 *pfFlushTLBs = true;
3471 pgmUnlock(pVM);
3472 return rc;
3473 }
3474 }
3475#else
3476 NOREF(GCPhysPage);
3477#endif /* PGM_WITH_LARGE_PAGES */
3478
3479 const uint16_t u16 = PGM_PAGE_GET_TRACKING(pPhysPage);
3480 if (u16)
3481 {
3482 /*
3483 * The zero page is currently screwing up the tracking and we'll
3484 * have to flush the whole shebang. Unless VBOX_WITH_NEW_LAZY_PAGE_ALLOC
3485 * is defined, zero pages won't normally be mapped. Some kind of solution
3486 * will be needed for this problem of course, but it will have to wait...
3487 */
3488 if ( PGM_PAGE_IS_ZERO(pPhysPage)
3489 || PGM_PAGE_IS_BALLOONED(pPhysPage))
3490 rc = VINF_PGM_GCPHYS_ALIASED;
3491 else
3492 {
3493# if defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0) || defined(IN_RC) /** @todo we can drop this now. */
3494 /* Start a subset here because pgmPoolTrackFlushGCPhysPTsSlow and
3495 pgmPoolTrackFlushGCPhysPTs will/may kill the pool otherwise. */
3496 uint32_t iPrevSubset = PGMRZDynMapPushAutoSubset(pVCpu);
3497# endif
3498
3499 if (PGMPOOL_TD_GET_CREFS(u16) != PGMPOOL_TD_CREFS_PHYSEXT)
3500 {
3501 Assert(PGMPOOL_TD_GET_CREFS(u16) == 1);
3502 pgmPoolTrackFlushGCPhysPT(pVM,
3503 pPhysPage,
3504 fFlushPTEs,
3505 PGMPOOL_TD_GET_IDX(u16));
3506 }
3507 else if (u16 != PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED))
3508 pgmPoolTrackFlushGCPhysPTs(pVM, pPhysPage, fFlushPTEs, PGMPOOL_TD_GET_IDX(u16));
3509 else
3510 rc = pgmPoolTrackFlushGCPhysPTsSlow(pVM, pPhysPage);
3511 *pfFlushTLBs = true;
3512
3513# if defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0) || defined(IN_RC)
3514 PGMRZDynMapPopAutoSubset(pVCpu, iPrevSubset);
3515# endif
3516 }
3517 }
3518
3519 if (rc == VINF_PGM_GCPHYS_ALIASED)
3520 {
3521 pVCpu->pgm.s.fSyncFlags |= PGM_SYNC_CLEAR_PGM_POOL;
3522 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
3523 rc = VINF_PGM_SYNC_CR3;
3524 }
3525 pgmUnlock(pVM);
3526 return rc;
3527}
3528
3529
3530/**
3531 * Scans all shadow page tables for mappings of a physical page.
3532 *
3533 * This may be slow, but it's most likely more efficient than cleaning
3534 * out the entire page pool / cache.
3535 *
3536 * @returns VBox status code.
3537 * @retval VINF_SUCCESS if all references has been successfully cleared.
3538 * @retval VINF_PGM_GCPHYS_ALIASED if we're better off with a CR3 sync and
3539 * a page pool cleaning.
3540 *
3541 * @param pVM The VM handle.
3542 * @param pPhysPage The guest page in question.
3543 */
3544int pgmPoolTrackFlushGCPhysPTsSlow(PVM pVM, PPGMPAGE pPhysPage)
3545{
3546 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3547 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPTsSlow, s);
3548 LogFlow(("pgmPoolTrackFlushGCPhysPTsSlow: cUsedPages=%d cPresent=%d pPhysPage=%R[pgmpage]\n",
3549 pPool->cUsedPages, pPool->cPresent, pPhysPage));
3550
3551 /*
3552 * There is a limit to what makes sense.
3553 */
3554 if ( pPool->cPresent > 1024
3555 && pVM->cCpus == 1)
3556 {
3557 LogFlow(("pgmPoolTrackFlushGCPhysPTsSlow: giving up... (cPresent=%d)\n", pPool->cPresent));
3558 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTsSlow, s);
3559 return VINF_PGM_GCPHYS_ALIASED;
3560 }
3561
3562 /*
3563 * Iterate all the pages until we've encountered all that in use.
3564 * This is simple but not quite optimal solution.
3565 */
3566 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P; /** @todo drop X86_PTE_P here as we always test if present separately, anyway. */
3567 const uint32_t u32 = u64; /** @todo move into the 32BIT_PT_xx case */
3568 unsigned cLeft = pPool->cUsedPages;
3569 unsigned iPage = pPool->cCurPages;
3570 while (--iPage >= PGMPOOL_IDX_FIRST)
3571 {
3572 PPGMPOOLPAGE pPage = &pPool->aPages[iPage];
3573 if ( pPage->GCPhys != NIL_RTGCPHYS
3574 && pPage->cPresent)
3575 {
3576 switch (pPage->enmKind)
3577 {
3578 /*
3579 * We only care about shadow page tables.
3580 */
3581 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
3582 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
3583 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
3584 {
3585 unsigned cPresent = pPage->cPresent;
3586 PX86PT pPT = (PX86PT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3587 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
3588 if (pPT->a[i].n.u1Present)
3589 {
3590 if ((pPT->a[i].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
3591 {
3592 //Log4(("pgmPoolTrackFlushGCPhysPTsSlow: idx=%d i=%d pte=%RX32\n", iPage, i, pPT->a[i]));
3593 pPT->a[i].u = 0;
3594
3595 /* Update the counter as we're removing references. */
3596 Assert(pPage->cPresent);
3597 Assert(pPool->cPresent);
3598 pPage->cPresent--;
3599 pPool->cPresent--;
3600 }
3601 if (!--cPresent)
3602 break;
3603 }
3604 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);
3605 break;
3606 }
3607
3608 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
3609 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
3610 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
3611 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
3612 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
3613 {
3614 unsigned cPresent = pPage->cPresent;
3615 PPGMSHWPTPAE pPT = (PPGMSHWPTPAE)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3616 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
3617 if (PGMSHWPTEPAE_IS_P(pPT->a[i]))
3618 {
3619 if ((PGMSHWPTEPAE_GET_U(pPT->a[i]) & (X86_PTE_PAE_PG_MASK | X86_PTE_P)) == u64)
3620 {
3621 //Log4(("pgmPoolTrackFlushGCPhysPTsSlow: idx=%d i=%d pte=%RX64\n", iPage, i, pPT->a[i]));
3622 PGMSHWPTEPAE_SET(pPT->a[i], 0); /// @todo why not atomic?
3623
3624 /* Update the counter as we're removing references. */
3625 Assert(pPage->cPresent);
3626 Assert(pPool->cPresent);
3627 pPage->cPresent--;
3628 pPool->cPresent--;
3629 }
3630 if (!--cPresent)
3631 break;
3632 }
3633 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);
3634 break;
3635 }
3636#ifndef IN_RC
3637 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
3638 {
3639 unsigned cPresent = pPage->cPresent;
3640 PEPTPT pPT = (PEPTPT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3641 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
3642 if (pPT->a[i].n.u1Present)
3643 {
3644 if ((pPT->a[i].u & (EPT_PTE_PG_MASK | X86_PTE_P)) == u64)
3645 {
3646 //Log4(("pgmPoolTrackFlushGCPhysPTsSlow: idx=%d i=%d pte=%RX64\n", iPage, i, pPT->a[i]));
3647 pPT->a[i].u = 0;
3648
3649 /* Update the counter as we're removing references. */
3650 Assert(pPage->cPresent);
3651 Assert(pPool->cPresent);
3652 pPage->cPresent--;
3653 pPool->cPresent--;
3654 }
3655 if (!--cPresent)
3656 break;
3657 }
3658 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);
3659 break;
3660 }
3661#endif
3662 }
3663 if (!--cLeft)
3664 break;
3665 }
3666 }
3667
3668 PGM_PAGE_SET_TRACKING(pPhysPage, 0);
3669 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTsSlow, s);
3670
3671 /*
3672 * There is a limit to what makes sense. The above search is very expensive, so force a pgm pool flush.
3673 */
3674 if (pPool->cPresent > 1024)
3675 {
3676 LogFlow(("pgmPoolTrackFlushGCPhysPTsSlow: giving up... (cPresent=%d)\n", pPool->cPresent));
3677 return VINF_PGM_GCPHYS_ALIASED;
3678 }
3679
3680 return VINF_SUCCESS;
3681}
3682
3683
3684/**
3685 * Clears the user entry in a user table.
3686 *
3687 * This is used to remove all references to a page when flushing it.
3688 */
3689static void pgmPoolTrackClearPageUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PCPGMPOOLUSER pUser)
3690{
3691 Assert(pUser->iUser != NIL_PGMPOOL_IDX);
3692 Assert(pUser->iUser < pPool->cCurPages);
3693 uint32_t iUserTable = pUser->iUserTable;
3694
3695 /*
3696 * Map the user page.
3697 */
3698 PPGMPOOLPAGE pUserPage = &pPool->aPages[pUser->iUser];
3699 union
3700 {
3701 uint64_t *pau64;
3702 uint32_t *pau32;
3703 } u;
3704 u.pau64 = (uint64_t *)PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pUserPage);
3705
3706 LogFlow(("pgmPoolTrackClearPageUser: clear %x in %s (%RGp) (flushing %s)\n", iUserTable, pgmPoolPoolKindToStr(pUserPage->enmKind), pUserPage->Core.Key, pgmPoolPoolKindToStr(pPage->enmKind)));
3707
3708 /* Safety precaution in case we change the paging for other modes too in the future. */
3709 Assert(!pgmPoolIsPageLocked(&pPool->CTX_SUFF(pVM)->pgm.s, pPage));
3710
3711#ifdef VBOX_STRICT
3712 /*
3713 * Some sanity checks.
3714 */
3715 switch (pUserPage->enmKind)
3716 {
3717 case PGMPOOLKIND_32BIT_PD:
3718 case PGMPOOLKIND_32BIT_PD_PHYS:
3719 Assert(iUserTable < X86_PG_ENTRIES);
3720 break;
3721 case PGMPOOLKIND_PAE_PDPT:
3722 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
3723 case PGMPOOLKIND_PAE_PDPT_PHYS:
3724 Assert(iUserTable < 4);
3725 Assert(!(u.pau64[iUserTable] & PGM_PLXFLAGS_PERMANENT));
3726 break;
3727 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
3728 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
3729 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
3730 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
3731 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
3732 case PGMPOOLKIND_PAE_PD_PHYS:
3733 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3734 break;
3735 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
3736 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3737 Assert(!(u.pau64[iUserTable] & PGM_PDFLAGS_MAPPING));
3738 break;
3739 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
3740 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3741 Assert(!(u.pau64[iUserTable] & PGM_PLXFLAGS_PERMANENT));
3742 break;
3743 case PGMPOOLKIND_64BIT_PML4:
3744 Assert(!(u.pau64[iUserTable] & PGM_PLXFLAGS_PERMANENT));
3745 /* GCPhys >> PAGE_SHIFT is the index here */
3746 break;
3747 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
3748 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
3749 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3750 break;
3751
3752 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
3753 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
3754 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3755 break;
3756
3757 case PGMPOOLKIND_ROOT_NESTED:
3758 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3759 break;
3760
3761 default:
3762 AssertMsgFailed(("enmKind=%d\n", pUserPage->enmKind));
3763 break;
3764 }
3765#endif /* VBOX_STRICT */
3766
3767 /*
3768 * Clear the entry in the user page.
3769 */
3770 switch (pUserPage->enmKind)
3771 {
3772 /* 32-bit entries */
3773 case PGMPOOLKIND_32BIT_PD:
3774 case PGMPOOLKIND_32BIT_PD_PHYS:
3775 ASMAtomicWriteU32(&u.pau32[iUserTable], 0);
3776 break;
3777
3778 /* 64-bit entries */
3779 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
3780 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
3781 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
3782 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
3783 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
3784#ifdef IN_RC
3785 /*
3786 * In 32 bits PAE mode we *must* invalidate the TLB when changing a
3787 * PDPT entry; the CPU fetches them only during cr3 load, so any
3788 * non-present PDPT will continue to cause page faults.
3789 */
3790 ASMReloadCR3();
3791 /* no break */
3792#endif
3793 case PGMPOOLKIND_PAE_PD_PHYS:
3794 case PGMPOOLKIND_PAE_PDPT_PHYS:
3795 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
3796 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
3797 case PGMPOOLKIND_64BIT_PML4:
3798 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
3799 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
3800 case PGMPOOLKIND_PAE_PDPT:
3801 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
3802 case PGMPOOLKIND_ROOT_NESTED:
3803 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
3804 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
3805 ASMAtomicWriteU64(&u.pau64[iUserTable], 0);
3806 break;
3807
3808 default:
3809 AssertFatalMsgFailed(("enmKind=%d iUser=%d iUserTable=%#x\n", pUserPage->enmKind, pUser->iUser, pUser->iUserTable));
3810 }
3811 PGM_DYNMAP_UNUSED_HINT_VM(pPool->CTX_SUFF(pVM), u.pau64);
3812}
3813
3814
3815/**
3816 * Clears all users of a page.
3817 */
3818static void pgmPoolTrackClearPageUsers(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
3819{
3820 /*
3821 * Free all the user records.
3822 */
3823 LogFlow(("pgmPoolTrackClearPageUsers %RGp\n", pPage->GCPhys));
3824
3825 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
3826 uint16_t i = pPage->iUserHead;
3827 while (i != NIL_PGMPOOL_USER_INDEX)
3828 {
3829 /* Clear enter in user table. */
3830 pgmPoolTrackClearPageUser(pPool, pPage, &paUsers[i]);
3831
3832 /* Free it. */
3833 const uint16_t iNext = paUsers[i].iNext;
3834 paUsers[i].iUser = NIL_PGMPOOL_IDX;
3835 paUsers[i].iNext = pPool->iUserFreeHead;
3836 pPool->iUserFreeHead = i;
3837
3838 /* Next. */
3839 i = iNext;
3840 }
3841 pPage->iUserHead = NIL_PGMPOOL_USER_INDEX;
3842}
3843
3844
3845/**
3846 * Allocates a new physical cross reference extent.
3847 *
3848 * @returns Pointer to the allocated extent on success. NULL if we're out of them.
3849 * @param pVM The VM handle.
3850 * @param piPhysExt Where to store the phys ext index.
3851 */
3852PPGMPOOLPHYSEXT pgmPoolTrackPhysExtAlloc(PVM pVM, uint16_t *piPhysExt)
3853{
3854 Assert(PGMIsLockOwner(pVM));
3855 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3856 uint16_t iPhysExt = pPool->iPhysExtFreeHead;
3857 if (iPhysExt == NIL_PGMPOOL_PHYSEXT_INDEX)
3858 {
3859 STAM_COUNTER_INC(&pPool->StamTrackPhysExtAllocFailures);
3860 return NULL;
3861 }
3862 PPGMPOOLPHYSEXT pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
3863 pPool->iPhysExtFreeHead = pPhysExt->iNext;
3864 pPhysExt->iNext = NIL_PGMPOOL_PHYSEXT_INDEX;
3865 *piPhysExt = iPhysExt;
3866 return pPhysExt;
3867}
3868
3869
3870/**
3871 * Frees a physical cross reference extent.
3872 *
3873 * @param pVM The VM handle.
3874 * @param iPhysExt The extent to free.
3875 */
3876void pgmPoolTrackPhysExtFree(PVM pVM, uint16_t iPhysExt)
3877{
3878 Assert(PGMIsLockOwner(pVM));
3879 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3880 Assert(iPhysExt < pPool->cMaxPhysExts);
3881 PPGMPOOLPHYSEXT pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
3882 for (unsigned i = 0; i < RT_ELEMENTS(pPhysExt->aidx); i++)
3883 {
3884 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
3885 pPhysExt->apte[i] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
3886 }
3887 pPhysExt->iNext = pPool->iPhysExtFreeHead;
3888 pPool->iPhysExtFreeHead = iPhysExt;
3889}
3890
3891
3892/**
3893 * Frees a physical cross reference extent.
3894 *
3895 * @param pVM The VM handle.
3896 * @param iPhysExt The extent to free.
3897 */
3898void pgmPoolTrackPhysExtFreeList(PVM pVM, uint16_t iPhysExt)
3899{
3900 Assert(PGMIsLockOwner(pVM));
3901 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3902
3903 const uint16_t iPhysExtStart = iPhysExt;
3904 PPGMPOOLPHYSEXT pPhysExt;
3905 do
3906 {
3907 Assert(iPhysExt < pPool->cMaxPhysExts);
3908 pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
3909 for (unsigned i = 0; i < RT_ELEMENTS(pPhysExt->aidx); i++)
3910 {
3911 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
3912 pPhysExt->apte[i] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
3913 }
3914
3915 /* next */
3916 iPhysExt = pPhysExt->iNext;
3917 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
3918
3919 pPhysExt->iNext = pPool->iPhysExtFreeHead;
3920 pPool->iPhysExtFreeHead = iPhysExtStart;
3921}
3922
3923
3924/**
3925 * Insert a reference into a list of physical cross reference extents.
3926 *
3927 * @returns The new tracking data for PGMPAGE.
3928 *
3929 * @param pVM The VM handle.
3930 * @param iPhysExt The physical extent index of the list head.
3931 * @param iShwPT The shadow page table index.
3932 * @param iPte Page table entry
3933 *
3934 */
3935static uint16_t pgmPoolTrackPhysExtInsert(PVM pVM, uint16_t iPhysExt, uint16_t iShwPT, uint16_t iPte)
3936{
3937 Assert(PGMIsLockOwner(pVM));
3938 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3939 PPGMPOOLPHYSEXT paPhysExts = pPool->CTX_SUFF(paPhysExts);
3940
3941 /*
3942 * Special common cases.
3943 */
3944 if (paPhysExts[iPhysExt].aidx[1] == NIL_PGMPOOL_IDX)
3945 {
3946 paPhysExts[iPhysExt].aidx[1] = iShwPT;
3947 paPhysExts[iPhysExt].apte[1] = iPte;
3948 STAM_COUNTER_INC(&pVM->pgm.s.CTX_SUFF(pStats)->StatTrackAliasedMany);
3949 LogFlow(("pgmPoolTrackPhysExtInsert: %d:{,%d pte %d,}\n", iPhysExt, iShwPT, iPte));
3950 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExt);
3951 }
3952 if (paPhysExts[iPhysExt].aidx[2] == NIL_PGMPOOL_IDX)
3953 {
3954 paPhysExts[iPhysExt].aidx[2] = iShwPT;
3955 paPhysExts[iPhysExt].apte[2] = iPte;
3956 STAM_COUNTER_INC(&pVM->pgm.s.CTX_SUFF(pStats)->StatTrackAliasedMany);
3957 LogFlow(("pgmPoolTrackPhysExtInsert: %d:{,,%d pte %d}\n", iPhysExt, iShwPT, iPte));
3958 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExt);
3959 }
3960 AssertCompile(RT_ELEMENTS(paPhysExts[iPhysExt].aidx) == 3);
3961
3962 /*
3963 * General treatment.
3964 */
3965 const uint16_t iPhysExtStart = iPhysExt;
3966 unsigned cMax = 15;
3967 for (;;)
3968 {
3969 Assert(iPhysExt < pPool->cMaxPhysExts);
3970 for (unsigned i = 0; i < RT_ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
3971 if (paPhysExts[iPhysExt].aidx[i] == NIL_PGMPOOL_IDX)
3972 {
3973 paPhysExts[iPhysExt].aidx[i] = iShwPT;
3974 paPhysExts[iPhysExt].apte[i] = iPte;
3975 STAM_COUNTER_INC(&pVM->pgm.s.CTX_SUFF(pStats)->StatTrackAliasedMany);
3976 LogFlow(("pgmPoolTrackPhysExtInsert: %d:{%d pte %d} i=%d cMax=%d\n", iPhysExt, iShwPT, iPte, i, cMax));
3977 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExtStart);
3978 }
3979 if (!--cMax)
3980 {
3981 STAM_COUNTER_INC(&pVM->pgm.s.CTX_SUFF(pStats)->StatTrackOverflows);
3982 pgmPoolTrackPhysExtFreeList(pVM, iPhysExtStart);
3983 LogFlow(("pgmPoolTrackPhysExtInsert: overflow (1) iShwPT=%d\n", iShwPT));
3984 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED);
3985 }
3986
3987 /* advance */
3988 iPhysExt = paPhysExts[iPhysExt].iNext;
3989 if (iPhysExt == NIL_PGMPOOL_PHYSEXT_INDEX)
3990 break;
3991 }
3992
3993 /*
3994 * Add another extent to the list.
3995 */
3996 PPGMPOOLPHYSEXT pNew = pgmPoolTrackPhysExtAlloc(pVM, &iPhysExt);
3997 if (!pNew)
3998 {
3999 STAM_COUNTER_INC(&pVM->pgm.s.CTX_SUFF(pStats)->StatTrackNoExtentsLeft);
4000 pgmPoolTrackPhysExtFreeList(pVM, iPhysExtStart);
4001 LogFlow(("pgmPoolTrackPhysExtInsert: pgmPoolTrackPhysExtAlloc failed iShwPT=%d\n", iShwPT));
4002 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED);
4003 }
4004 pNew->iNext = iPhysExtStart;
4005 pNew->aidx[0] = iShwPT;
4006 pNew->apte[0] = iPte;
4007 LogFlow(("pgmPoolTrackPhysExtInsert: added new extent %d:{%d pte %d}->%d\n", iPhysExt, iShwPT, iPte, iPhysExtStart));
4008 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExt);
4009}
4010
4011
4012/**
4013 * Add a reference to guest physical page where extents are in use.
4014 *
4015 * @returns The new tracking data for PGMPAGE.
4016 *
4017 * @param pVM The VM handle.
4018 * @param pPhysPage Pointer to the aPages entry in the ram range.
4019 * @param u16 The ram range flags (top 16-bits).
4020 * @param iShwPT The shadow page table index.
4021 * @param iPte Page table entry
4022 */
4023uint16_t pgmPoolTrackPhysExtAddref(PVM pVM, PPGMPAGE pPhysPage, uint16_t u16, uint16_t iShwPT, uint16_t iPte)
4024{
4025 pgmLock(pVM);
4026 if (PGMPOOL_TD_GET_CREFS(u16) != PGMPOOL_TD_CREFS_PHYSEXT)
4027 {
4028 /*
4029 * Convert to extent list.
4030 */
4031 Assert(PGMPOOL_TD_GET_CREFS(u16) == 1);
4032 uint16_t iPhysExt;
4033 PPGMPOOLPHYSEXT pPhysExt = pgmPoolTrackPhysExtAlloc(pVM, &iPhysExt);
4034 if (pPhysExt)
4035 {
4036 LogFlow(("pgmPoolTrackPhysExtAddref: new extent: %d:{%d, %d}\n", iPhysExt, PGMPOOL_TD_GET_IDX(u16), iShwPT));
4037 STAM_COUNTER_INC(&pVM->pgm.s.CTX_SUFF(pStats)->StatTrackAliased);
4038 pPhysExt->aidx[0] = PGMPOOL_TD_GET_IDX(u16);
4039 pPhysExt->apte[0] = PGM_PAGE_GET_PTE_INDEX(pPhysPage);
4040 pPhysExt->aidx[1] = iShwPT;
4041 pPhysExt->apte[1] = iPte;
4042 u16 = PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExt);
4043 }
4044 else
4045 u16 = PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED);
4046 }
4047 else if (u16 != PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED))
4048 {
4049 /*
4050 * Insert into the extent list.
4051 */
4052 u16 = pgmPoolTrackPhysExtInsert(pVM, PGMPOOL_TD_GET_IDX(u16), iShwPT, iPte);
4053 }
4054 else
4055 STAM_COUNTER_INC(&pVM->pgm.s.CTX_SUFF(pStats)->StatTrackAliasedLots);
4056 pgmUnlock(pVM);
4057 return u16;
4058}
4059
4060
4061/**
4062 * Clear references to guest physical memory.
4063 *
4064 * @param pPool The pool.
4065 * @param pPage The page.
4066 * @param pPhysPage Pointer to the aPages entry in the ram range.
4067 * @param iPte Shadow PTE index
4068 */
4069void pgmPoolTrackPhysExtDerefGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMPAGE pPhysPage, uint16_t iPte)
4070{
4071 const unsigned cRefs = PGM_PAGE_GET_TD_CREFS(pPhysPage);
4072 AssertFatalMsg(cRefs == PGMPOOL_TD_CREFS_PHYSEXT, ("cRefs=%d pPhysPage=%R[pgmpage] pPage=%p:{.idx=%d}\n", cRefs, pPhysPage, pPage, pPage->idx));
4073
4074 uint16_t iPhysExt = PGM_PAGE_GET_TD_IDX(pPhysPage);
4075 if (iPhysExt != PGMPOOL_TD_IDX_OVERFLOWED)
4076 {
4077 PVM pVM = pPool->CTX_SUFF(pVM);
4078 pgmLock(pVM);
4079
4080 uint16_t iPhysExtPrev = NIL_PGMPOOL_PHYSEXT_INDEX;
4081 PPGMPOOLPHYSEXT paPhysExts = pPool->CTX_SUFF(paPhysExts);
4082 do
4083 {
4084 Assert(iPhysExt < pPool->cMaxPhysExts);
4085
4086 /*
4087 * Look for the shadow page and check if it's all freed.
4088 */
4089 for (unsigned i = 0; i < RT_ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
4090 {
4091 if ( paPhysExts[iPhysExt].aidx[i] == pPage->idx
4092 && paPhysExts[iPhysExt].apte[i] == iPte)
4093 {
4094 paPhysExts[iPhysExt].aidx[i] = NIL_PGMPOOL_IDX;
4095 paPhysExts[iPhysExt].apte[i] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
4096
4097 for (i = 0; i < RT_ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
4098 if (paPhysExts[iPhysExt].aidx[i] != NIL_PGMPOOL_IDX)
4099 {
4100 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage] idx=%d\n", pPhysPage, pPage->idx));
4101 pgmUnlock(pVM);
4102 return;
4103 }
4104
4105 /* we can free the node. */
4106 const uint16_t iPhysExtNext = paPhysExts[iPhysExt].iNext;
4107 if ( iPhysExtPrev == NIL_PGMPOOL_PHYSEXT_INDEX
4108 && iPhysExtNext == NIL_PGMPOOL_PHYSEXT_INDEX)
4109 {
4110 /* lonely node */
4111 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
4112 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage] idx=%d lonely\n", pPhysPage, pPage->idx));
4113 PGM_PAGE_SET_TRACKING(pPhysPage, 0);
4114 }
4115 else if (iPhysExtPrev == NIL_PGMPOOL_PHYSEXT_INDEX)
4116 {
4117 /* head */
4118 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage] idx=%d head\n", pPhysPage, pPage->idx));
4119 PGM_PAGE_SET_TRACKING(pPhysPage, PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExtNext));
4120 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
4121 }
4122 else
4123 {
4124 /* in list */
4125 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage] idx=%d in list\n", pPhysPage, pPage->idx));
4126 paPhysExts[iPhysExtPrev].iNext = iPhysExtNext;
4127 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
4128 }
4129 iPhysExt = iPhysExtNext;
4130 pgmUnlock(pVM);
4131 return;
4132 }
4133 }
4134
4135 /* next */
4136 iPhysExtPrev = iPhysExt;
4137 iPhysExt = paPhysExts[iPhysExt].iNext;
4138 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
4139
4140 pgmUnlock(pVM);
4141 AssertFatalMsgFailed(("not-found! cRefs=%d pPhysPage=%R[pgmpage] pPage=%p:{.idx=%d}\n", cRefs, pPhysPage, pPage, pPage->idx));
4142 }
4143 else /* nothing to do */
4144 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage]\n", pPhysPage));
4145}
4146
4147/**
4148 * Clear references to guest physical memory.
4149 *
4150 * This is the same as pgmPoolTracDerefGCPhys except that the guest physical address
4151 * is assumed to be correct, so the linear search can be skipped and we can assert
4152 * at an earlier point.
4153 *
4154 * @param pPool The pool.
4155 * @param pPage The page.
4156 * @param HCPhys The host physical address corresponding to the guest page.
4157 * @param GCPhys The guest physical address corresponding to HCPhys.
4158 * @param iPte Shadow PTE index
4159 */
4160static void pgmPoolTracDerefGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTHCPHYS HCPhys, RTGCPHYS GCPhys, uint16_t iPte)
4161{
4162 /*
4163 * Walk range list.
4164 */
4165 PPGMRAMRANGE pRam = pPool->CTX_SUFF(pVM)->pgm.s.CTX_SUFF(pRamRanges);
4166 while (pRam)
4167 {
4168 RTGCPHYS off = GCPhys - pRam->GCPhys;
4169 if (off < pRam->cb)
4170 {
4171 /* does it match? */
4172 const unsigned iPage = off >> PAGE_SHIFT;
4173 Assert(PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]));
4174#ifdef LOG_ENABLED
4175 RTHCPHYS HCPhysPage = PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]);
4176 Log2(("pgmPoolTracDerefGCPhys %RHp vs %RHp\n", HCPhysPage, HCPhys));
4177#endif
4178 if (PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]) == HCPhys)
4179 {
4180 Assert(pPage->cPresent);
4181 Assert(pPool->cPresent);
4182 pPage->cPresent--;
4183 pPool->cPresent--;
4184 pgmTrackDerefGCPhys(pPool, pPage, &pRam->aPages[iPage], iPte);
4185 return;
4186 }
4187 break;
4188 }
4189 pRam = pRam->CTX_SUFF(pNext);
4190 }
4191 AssertFatalMsgFailed(("HCPhys=%RHp GCPhys=%RGp\n", HCPhys, GCPhys));
4192}
4193
4194
4195/**
4196 * Clear references to guest physical memory.
4197 *
4198 * @param pPool The pool.
4199 * @param pPage The page.
4200 * @param HCPhys The host physical address corresponding to the guest page.
4201 * @param GCPhysHint The guest physical address which may corresponding to HCPhys.
4202 * @param iPte Shadow pte index
4203 */
4204void pgmPoolTracDerefGCPhysHint(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTHCPHYS HCPhys, RTGCPHYS GCPhysHint, uint16_t iPte)
4205{
4206 RTHCPHYS HCPhysExpected = 0xDEADBEEFDEADBEEFULL;
4207
4208 Log4(("pgmPoolTracDerefGCPhysHint %RHp %RGp\n", HCPhys, GCPhysHint));
4209
4210 /*
4211 * Walk range list.
4212 */
4213 PPGMRAMRANGE pRam = pPool->CTX_SUFF(pVM)->pgm.s.CTX_SUFF(pRamRanges);
4214 while (pRam)
4215 {
4216 RTGCPHYS off = GCPhysHint - pRam->GCPhys;
4217 if (off < pRam->cb)
4218 {
4219 /* does it match? */
4220 const unsigned iPage = off >> PAGE_SHIFT;
4221 Assert(PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]));
4222 if (PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]) == HCPhys)
4223 {
4224 Assert(pPage->cPresent);
4225 Assert(pPool->cPresent);
4226 pPage->cPresent--;
4227 pPool->cPresent--;
4228 pgmTrackDerefGCPhys(pPool, pPage, &pRam->aPages[iPage], iPte);
4229 return;
4230 }
4231 HCPhysExpected = PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]);
4232 break;
4233 }
4234 pRam = pRam->CTX_SUFF(pNext);
4235 }
4236
4237 /*
4238 * Damn, the hint didn't work. We'll have to do an expensive linear search.
4239 */
4240 STAM_COUNTER_INC(&pPool->StatTrackLinearRamSearches);
4241 pRam = pPool->CTX_SUFF(pVM)->pgm.s.CTX_SUFF(pRamRanges);
4242 while (pRam)
4243 {
4244 unsigned iPage = pRam->cb >> PAGE_SHIFT;
4245 while (iPage-- > 0)
4246 {
4247 if (PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]) == HCPhys)
4248 {
4249 Log4(("pgmPoolTracDerefGCPhysHint: Linear HCPhys=%RHp GCPhysHint=%RGp GCPhysReal=%RGp\n",
4250 HCPhys, GCPhysHint, pRam->GCPhys + (iPage << PAGE_SHIFT)));
4251 Assert(pPage->cPresent);
4252 Assert(pPool->cPresent);
4253 pPage->cPresent--;
4254 pPool->cPresent--;
4255 pgmTrackDerefGCPhys(pPool, pPage, &pRam->aPages[iPage], iPte);
4256 return;
4257 }
4258 }
4259 pRam = pRam->CTX_SUFF(pNext);
4260 }
4261
4262 AssertFatalMsgFailed(("HCPhys=%RHp GCPhysHint=%RGp (Expected HCPhys with hint = %RHp)\n", HCPhys, GCPhysHint, HCPhysExpected));
4263}
4264
4265
4266/**
4267 * Clear references to guest physical memory in a 32-bit / 32-bit page table.
4268 *
4269 * @param pPool The pool.
4270 * @param pPage The page.
4271 * @param pShwPT The shadow page table (mapping of the page).
4272 * @param pGstPT The guest page table.
4273 */
4274DECLINLINE(void) pgmPoolTrackDerefPT32Bit32Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PT pShwPT, PCX86PT pGstPT)
4275{
4276 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
4277 {
4278 Assert(!(pShwPT->a[i].u & RT_BIT_32(10)));
4279 if (pShwPT->a[i].n.u1Present)
4280 {
4281 Log4(("pgmPoolTrackDerefPT32Bit32Bit: i=%d pte=%RX32 hint=%RX32\n",
4282 i, pShwPT->a[i].u & X86_PTE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK));
4283 pgmPoolTracDerefGCPhysHint(pPool, pPage, pShwPT->a[i].u & X86_PTE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK, i);
4284 if (!pPage->cPresent)
4285 break;
4286 }
4287 }
4288}
4289
4290
4291/**
4292 * Clear references to guest physical memory in a PAE / 32-bit page table.
4293 *
4294 * @param pPool The pool.
4295 * @param pPage The page.
4296 * @param pShwPT The shadow page table (mapping of the page).
4297 * @param pGstPT The guest page table (just a half one).
4298 */
4299DECLINLINE(void) pgmPoolTrackDerefPTPae32Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT, PCX86PT pGstPT)
4300{
4301 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
4302 {
4303 Assert( (PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & UINT64_C(0x7ff0000000000400)) == 0
4304 || (PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & UINT64_C(0x7ff0000000000400)) == UINT64_C(0x7ff0000000000000));
4305 if (PGMSHWPTEPAE_IS_P(pShwPT->a[i]))
4306 {
4307 Log4(("pgmPoolTrackDerefPTPae32Bit: i=%d pte=%RX64 hint=%RX32\n",
4308 i, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pGstPT->a[i].u & X86_PTE_PG_MASK));
4309 pgmPoolTracDerefGCPhysHint(pPool, pPage, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pGstPT->a[i].u & X86_PTE_PG_MASK, i);
4310 if (!pPage->cPresent)
4311 break;
4312 }
4313 }
4314}
4315
4316
4317/**
4318 * Clear references to guest physical memory in a PAE / PAE page table.
4319 *
4320 * @param pPool The pool.
4321 * @param pPage The page.
4322 * @param pShwPT The shadow page table (mapping of the page).
4323 * @param pGstPT The guest page table.
4324 */
4325DECLINLINE(void) pgmPoolTrackDerefPTPaePae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT, PCX86PTPAE pGstPT)
4326{
4327 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
4328 {
4329 Assert( (PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & UINT64_C(0x7ff0000000000400)) == 0
4330 || (PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & UINT64_C(0x7ff0000000000400)) == UINT64_C(0x7ff0000000000000));
4331 if (PGMSHWPTEPAE_IS_P(pShwPT->a[i]))
4332 {
4333 Log4(("pgmPoolTrackDerefPTPaePae: i=%d pte=%RX32 hint=%RX32\n",
4334 i, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pGstPT->a[i].u & X86_PTE_PAE_PG_MASK));
4335 pgmPoolTracDerefGCPhysHint(pPool, pPage, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pGstPT->a[i].u & X86_PTE_PAE_PG_MASK, i);
4336 if (!pPage->cPresent)
4337 break;
4338 }
4339 }
4340}
4341
4342
4343/**
4344 * Clear references to guest physical memory in a 32-bit / 4MB page table.
4345 *
4346 * @param pPool The pool.
4347 * @param pPage The page.
4348 * @param pShwPT The shadow page table (mapping of the page).
4349 */
4350DECLINLINE(void) pgmPoolTrackDerefPT32Bit4MB(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PT pShwPT)
4351{
4352 RTGCPHYS GCPhys = pPage->GCPhys + PAGE_SIZE * pPage->iFirstPresent;
4353 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
4354 {
4355 Assert(!(pShwPT->a[i].u & RT_BIT_32(10)));
4356 if (pShwPT->a[i].n.u1Present)
4357 {
4358 Log4(("pgmPoolTrackDerefPT32Bit4MB: i=%d pte=%RX32 GCPhys=%RGp\n",
4359 i, pShwPT->a[i].u & X86_PTE_PG_MASK, GCPhys));
4360 pgmPoolTracDerefGCPhys(pPool, pPage, pShwPT->a[i].u & X86_PTE_PG_MASK, GCPhys, i);
4361 if (!pPage->cPresent)
4362 break;
4363 }
4364 }
4365}
4366
4367
4368/**
4369 * Clear references to guest physical memory in a PAE / 2/4MB page table.
4370 *
4371 * @param pPool The pool.
4372 * @param pPage The page.
4373 * @param pShwPT The shadow page table (mapping of the page).
4374 */
4375DECLINLINE(void) pgmPoolTrackDerefPTPaeBig(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT)
4376{
4377 RTGCPHYS GCPhys = pPage->GCPhys + PAGE_SIZE * pPage->iFirstPresent;
4378 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
4379 {
4380 Assert( (PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & UINT64_C(0x7ff0000000000400)) == 0
4381 || (PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & UINT64_C(0x7ff0000000000400)) == UINT64_C(0x7ff0000000000000));
4382 if (PGMSHWPTEPAE_IS_P(pShwPT->a[i]))
4383 {
4384 Log4(("pgmPoolTrackDerefPTPaeBig: i=%d pte=%RX64 hint=%RGp\n",
4385 i, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), GCPhys));
4386 pgmPoolTracDerefGCPhys(pPool, pPage, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), GCPhys, i);
4387 if (!pPage->cPresent)
4388 break;
4389 }
4390 }
4391}
4392
4393
4394/**
4395 * Clear references to shadowed pages in an EPT page table.
4396 *
4397 * @param pPool The pool.
4398 * @param pPage The page.
4399 * @param pShwPML4 The shadow page directory pointer table (mapping of the page).
4400 */
4401DECLINLINE(void) pgmPoolTrackDerefPTEPT(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PEPTPT pShwPT)
4402{
4403 RTGCPHYS GCPhys = pPage->GCPhys + PAGE_SIZE * pPage->iFirstPresent;
4404 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
4405 {
4406 Assert((pShwPT->a[i].u & UINT64_C(0xfff0000000000f80)) == 0);
4407 if (pShwPT->a[i].n.u1Present)
4408 {
4409 Log4(("pgmPoolTrackDerefPTEPT: i=%d pte=%RX64 GCPhys=%RX64\n",
4410 i, pShwPT->a[i].u & EPT_PTE_PG_MASK, pPage->GCPhys));
4411 pgmPoolTracDerefGCPhys(pPool, pPage, pShwPT->a[i].u & EPT_PTE_PG_MASK, GCPhys, i);
4412 if (!pPage->cPresent)
4413 break;
4414 }
4415 }
4416}
4417
4418
4419
4420/**
4421 * Clear references to shadowed pages in a 32 bits page directory.
4422 *
4423 * @param pPool The pool.
4424 * @param pPage The page.
4425 * @param pShwPD The shadow page directory (mapping of the page).
4426 */
4427DECLINLINE(void) pgmPoolTrackDerefPD(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PD pShwPD)
4428{
4429 for (unsigned i = 0; i < RT_ELEMENTS(pShwPD->a); i++)
4430 {
4431 Assert(!(pShwPD->a[i].u & RT_BIT_32(9)));
4432 if ( pShwPD->a[i].n.u1Present
4433 && !(pShwPD->a[i].u & PGM_PDFLAGS_MAPPING)
4434 )
4435 {
4436 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPD->a[i].u & X86_PDE_PG_MASK);
4437 if (pSubPage)
4438 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4439 else
4440 AssertFatalMsgFailed(("%x\n", pShwPD->a[i].u & X86_PDE_PG_MASK));
4441 }
4442 }
4443}
4444
4445/**
4446 * Clear references to shadowed pages in a PAE (legacy or 64 bits) page directory.
4447 *
4448 * @param pPool The pool.
4449 * @param pPage The page.
4450 * @param pShwPD The shadow page directory (mapping of the page).
4451 */
4452DECLINLINE(void) pgmPoolTrackDerefPDPae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PDPAE pShwPD)
4453{
4454 for (unsigned i = 0; i < RT_ELEMENTS(pShwPD->a); i++)
4455 {
4456 Assert((pShwPD->a[i].u & (X86_PDE_PAE_MBZ_MASK_NX | UINT64_C(0x7ff0000000000200))) == 0);
4457 if ( pShwPD->a[i].n.u1Present
4458 && !(pShwPD->a[i].u & PGM_PDFLAGS_MAPPING))
4459 {
4460#ifdef PGM_WITH_LARGE_PAGES
4461 if (pShwPD->a[i].b.u1Size)
4462 {
4463 Log4(("pgmPoolTrackDerefPDPae: i=%d pde=%RX64 GCPhys=%RX64\n",
4464 i, pShwPD->a[i].u & X86_PDE2M_PAE_PG_MASK, pPage->GCPhys));
4465 pgmPoolTracDerefGCPhys(pPool, pPage, pShwPD->a[i].u & X86_PDE2M_PAE_PG_MASK, pPage->GCPhys /* == base of 2 MB page */, i);
4466 }
4467 else
4468#endif
4469 {
4470 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPD->a[i].u & X86_PDE_PAE_PG_MASK);
4471 if (pSubPage)
4472 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4473 else
4474 AssertFatalMsgFailed(("%RX64\n", pShwPD->a[i].u & X86_PDE_PAE_PG_MASK));
4475 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4476 }
4477 }
4478 }
4479}
4480
4481/**
4482 * Clear references to shadowed pages in a PAE page directory pointer table.
4483 *
4484 * @param pPool The pool.
4485 * @param pPage The page.
4486 * @param pShwPDPT The shadow page directory pointer table (mapping of the page).
4487 */
4488DECLINLINE(void) pgmPoolTrackDerefPDPTPae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PDPT pShwPDPT)
4489{
4490 for (unsigned i = 0; i < X86_PG_PAE_PDPE_ENTRIES; i++)
4491 {
4492 Assert((pShwPDPT->a[i].u & (X86_PDPE_PAE_MBZ_MASK | UINT64_C(0x7ff0000000000200))) == 0);
4493 if ( pShwPDPT->a[i].n.u1Present
4494 && !(pShwPDPT->a[i].u & PGM_PLXFLAGS_MAPPING)
4495 )
4496 {
4497 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPDPT->a[i].u & X86_PDPE_PG_MASK);
4498 if (pSubPage)
4499 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4500 else
4501 AssertFatalMsgFailed(("%RX64\n", pShwPDPT->a[i].u & X86_PDPE_PG_MASK));
4502 }
4503 }
4504}
4505
4506
4507/**
4508 * Clear references to shadowed pages in a 64-bit page directory pointer table.
4509 *
4510 * @param pPool The pool.
4511 * @param pPage The page.
4512 * @param pShwPDPT The shadow page directory pointer table (mapping of the page).
4513 */
4514DECLINLINE(void) pgmPoolTrackDerefPDPT64Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PDPT pShwPDPT)
4515{
4516 for (unsigned i = 0; i < RT_ELEMENTS(pShwPDPT->a); i++)
4517 {
4518 Assert((pShwPDPT->a[i].u & (X86_PDPE_LM_MBZ_MASK_NX | UINT64_C(0x7ff0000000000200))) == 0);
4519 if (pShwPDPT->a[i].n.u1Present)
4520 {
4521 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPDPT->a[i].u & X86_PDPE_PG_MASK);
4522 if (pSubPage)
4523 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4524 else
4525 AssertFatalMsgFailed(("%RX64\n", pShwPDPT->a[i].u & X86_PDPE_PG_MASK));
4526 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4527 }
4528 }
4529}
4530
4531
4532/**
4533 * Clear references to shadowed pages in a 64-bit level 4 page table.
4534 *
4535 * @param pPool The pool.
4536 * @param pPage The page.
4537 * @param pShwPML4 The shadow page directory pointer table (mapping of the page).
4538 */
4539DECLINLINE(void) pgmPoolTrackDerefPML464Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PML4 pShwPML4)
4540{
4541 for (unsigned i = 0; i < RT_ELEMENTS(pShwPML4->a); i++)
4542 {
4543 Assert((pShwPML4->a[i].u & (X86_PML4E_MBZ_MASK_NX | UINT64_C(0x7ff0000000000200))) == 0);
4544 if (pShwPML4->a[i].n.u1Present)
4545 {
4546 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPML4->a[i].u & X86_PDPE_PG_MASK);
4547 if (pSubPage)
4548 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4549 else
4550 AssertFatalMsgFailed(("%RX64\n", pShwPML4->a[i].u & X86_PML4E_PG_MASK));
4551 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4552 }
4553 }
4554}
4555
4556
4557/**
4558 * Clear references to shadowed pages in an EPT page directory.
4559 *
4560 * @param pPool The pool.
4561 * @param pPage The page.
4562 * @param pShwPD The shadow page directory (mapping of the page).
4563 */
4564DECLINLINE(void) pgmPoolTrackDerefPDEPT(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PEPTPD pShwPD)
4565{
4566 for (unsigned i = 0; i < RT_ELEMENTS(pShwPD->a); i++)
4567 {
4568 Assert((pShwPD->a[i].u & UINT64_C(0xfff0000000000f80)) == 0);
4569 if (pShwPD->a[i].n.u1Present)
4570 {
4571#ifdef PGM_WITH_LARGE_PAGES
4572 if (pShwPD->a[i].b.u1Size)
4573 {
4574 Log4(("pgmPoolTrackDerefPDEPT: i=%d pde=%RX64 GCPhys=%RX64\n",
4575 i, pShwPD->a[i].u & X86_PDE2M_PAE_PG_MASK, pPage->GCPhys));
4576 pgmPoolTracDerefGCPhys(pPool, pPage, pShwPD->a[i].u & X86_PDE2M_PAE_PG_MASK, pPage->GCPhys /* == base of 2 MB page */, i);
4577 }
4578 else
4579#endif
4580 {
4581 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPD->a[i].u & EPT_PDE_PG_MASK);
4582 if (pSubPage)
4583 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4584 else
4585 AssertFatalMsgFailed(("%RX64\n", pShwPD->a[i].u & EPT_PDE_PG_MASK));
4586 }
4587 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4588 }
4589 }
4590}
4591
4592
4593/**
4594 * Clear references to shadowed pages in an EPT page directory pointer table.
4595 *
4596 * @param pPool The pool.
4597 * @param pPage The page.
4598 * @param pShwPDPT The shadow page directory pointer table (mapping of the page).
4599 */
4600DECLINLINE(void) pgmPoolTrackDerefPDPTEPT(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PEPTPDPT pShwPDPT)
4601{
4602 for (unsigned i = 0; i < RT_ELEMENTS(pShwPDPT->a); i++)
4603 {
4604 Assert((pShwPDPT->a[i].u & UINT64_C(0xfff0000000000f80)) == 0);
4605 if (pShwPDPT->a[i].n.u1Present)
4606 {
4607 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPDPT->a[i].u & EPT_PDPTE_PG_MASK);
4608 if (pSubPage)
4609 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4610 else
4611 AssertFatalMsgFailed(("%RX64\n", pShwPDPT->a[i].u & EPT_PDPTE_PG_MASK));
4612 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4613 }
4614 }
4615}
4616
4617
4618/**
4619 * Clears all references made by this page.
4620 *
4621 * This includes other shadow pages and GC physical addresses.
4622 *
4623 * @param pPool The pool.
4624 * @param pPage The page.
4625 */
4626static void pgmPoolTrackDeref(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
4627{
4628 /*
4629 * Map the shadow page and take action according to the page kind.
4630 */
4631 PVM pVM = pPool->CTX_SUFF(pVM);
4632 void *pvShw = PGMPOOL_PAGE_2_PTR(pVM, pPage);
4633 switch (pPage->enmKind)
4634 {
4635 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
4636 {
4637 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
4638 void *pvGst;
4639 int rc = PGM_GCPHYS_2_PTR(pVM, pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
4640 pgmPoolTrackDerefPT32Bit32Bit(pPool, pPage, (PX86PT)pvShw, (PCX86PT)pvGst);
4641 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvGst);
4642 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
4643 break;
4644 }
4645
4646 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
4647 {
4648 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
4649 void *pvGst;
4650 int rc = PGM_GCPHYS_2_PTR_EX(pVM, pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
4651 pgmPoolTrackDerefPTPae32Bit(pPool, pPage, (PPGMSHWPTPAE)pvShw, (PCX86PT)pvGst);
4652 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvGst);
4653 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
4654 break;
4655 }
4656
4657 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
4658 {
4659 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
4660 void *pvGst;
4661 int rc = PGM_GCPHYS_2_PTR(pVM, pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
4662 pgmPoolTrackDerefPTPaePae(pPool, pPage, (PPGMSHWPTPAE)pvShw, (PCX86PTPAE)pvGst);
4663 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvGst);
4664 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
4665 break;
4666 }
4667
4668 case PGMPOOLKIND_32BIT_PT_FOR_PHYS: /* treat it like a 4 MB page */
4669 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
4670 {
4671 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
4672 pgmPoolTrackDerefPT32Bit4MB(pPool, pPage, (PX86PT)pvShw);
4673 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
4674 break;
4675 }
4676
4677 case PGMPOOLKIND_PAE_PT_FOR_PHYS: /* treat it like a 2 MB page */
4678 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
4679 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
4680 {
4681 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
4682 pgmPoolTrackDerefPTPaeBig(pPool, pPage, (PPGMSHWPTPAE)pvShw);
4683 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
4684 break;
4685 }
4686
4687 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
4688 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
4689 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
4690 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
4691 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
4692 case PGMPOOLKIND_PAE_PD_PHYS:
4693 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
4694 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
4695 pgmPoolTrackDerefPDPae(pPool, pPage, (PX86PDPAE)pvShw);
4696 break;
4697
4698 case PGMPOOLKIND_32BIT_PD_PHYS:
4699 case PGMPOOLKIND_32BIT_PD:
4700 pgmPoolTrackDerefPD(pPool, pPage, (PX86PD)pvShw);
4701 break;
4702
4703 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
4704 case PGMPOOLKIND_PAE_PDPT:
4705 case PGMPOOLKIND_PAE_PDPT_PHYS:
4706 pgmPoolTrackDerefPDPTPae(pPool, pPage, (PX86PDPT)pvShw);
4707 break;
4708
4709 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
4710 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
4711 pgmPoolTrackDerefPDPT64Bit(pPool, pPage, (PX86PDPT)pvShw);
4712 break;
4713
4714 case PGMPOOLKIND_64BIT_PML4:
4715 pgmPoolTrackDerefPML464Bit(pPool, pPage, (PX86PML4)pvShw);
4716 break;
4717
4718 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
4719 pgmPoolTrackDerefPTEPT(pPool, pPage, (PEPTPT)pvShw);
4720 break;
4721
4722 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
4723 pgmPoolTrackDerefPDEPT(pPool, pPage, (PEPTPD)pvShw);
4724 break;
4725
4726 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
4727 pgmPoolTrackDerefPDPTEPT(pPool, pPage, (PEPTPDPT)pvShw);
4728 break;
4729
4730 default:
4731 AssertFatalMsgFailed(("enmKind=%d\n", pPage->enmKind));
4732 }
4733
4734 /* paranoia, clear the shadow page. Remove this laser (i.e. let Alloc and ClearAll do it). */
4735 STAM_PROFILE_START(&pPool->StatZeroPage, z);
4736 ASMMemZeroPage(pvShw);
4737 STAM_PROFILE_STOP(&pPool->StatZeroPage, z);
4738 pPage->fZeroed = true;
4739 Assert(!pPage->cPresent);
4740 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvShw);
4741}
4742
4743/**
4744 * Flushes a pool page.
4745 *
4746 * This moves the page to the free list after removing all user references to it.
4747 *
4748 * @returns VBox status code.
4749 * @retval VINF_SUCCESS on success.
4750 * @param pPool The pool.
4751 * @param HCPhys The HC physical address of the shadow page.
4752 * @param fFlush Flush the TLBS when required (should only be false in very specific use cases!!)
4753 */
4754int pgmPoolFlushPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage, bool fFlush)
4755{
4756 PVM pVM = pPool->CTX_SUFF(pVM);
4757 bool fFlushRequired = false;
4758
4759 int rc = VINF_SUCCESS;
4760 STAM_PROFILE_START(&pPool->StatFlushPage, f);
4761 LogFlow(("pgmPoolFlushPage: pPage=%p:{.Key=%RHp, .idx=%d, .enmKind=%s, .GCPhys=%RGp}\n",
4762 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), pPage->GCPhys));
4763
4764 /*
4765 * Quietly reject any attempts at flushing any of the special root pages.
4766 */
4767 if (pPage->idx < PGMPOOL_IDX_FIRST)
4768 {
4769 AssertFailed(); /* can no longer happen */
4770 Log(("pgmPoolFlushPage: special root page, rejected. enmKind=%s idx=%d\n", pgmPoolPoolKindToStr(pPage->enmKind), pPage->idx));
4771 return VINF_SUCCESS;
4772 }
4773
4774 pgmLock(pVM);
4775
4776 /*
4777 * Quietly reject any attempts at flushing the currently active shadow CR3 mapping
4778 */
4779 if (pgmPoolIsPageLocked(&pVM->pgm.s, pPage))
4780 {
4781 AssertMsg( pPage->enmKind == PGMPOOLKIND_64BIT_PML4
4782 || pPage->enmKind == PGMPOOLKIND_PAE_PDPT
4783 || pPage->enmKind == PGMPOOLKIND_PAE_PDPT_FOR_32BIT
4784 || pPage->enmKind == PGMPOOLKIND_32BIT_PD
4785 || pPage->enmKind == PGMPOOLKIND_PAE_PD_FOR_PAE_PD
4786 || pPage->enmKind == PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD
4787 || pPage->enmKind == PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD
4788 || pPage->enmKind == PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD
4789 || pPage->enmKind == PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD,
4790 ("Can't free the shadow CR3! (%RHp vs %RHp kind=%d\n", PGMGetHyperCR3(VMMGetCpu(pVM)), pPage->Core.Key, pPage->enmKind));
4791 Log(("pgmPoolFlushPage: current active shadow CR3, rejected. enmKind=%s idx=%d\n", pgmPoolPoolKindToStr(pPage->enmKind), pPage->idx));
4792 pgmUnlock(pVM);
4793 return VINF_SUCCESS;
4794 }
4795
4796#if defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0) || defined(IN_RC)
4797 /* Start a subset so we won't run out of mapping space. */
4798 PVMCPU pVCpu = VMMGetCpu(pVM);
4799 uint32_t iPrevSubset = PGMRZDynMapPushAutoSubset(pVCpu);
4800#endif
4801
4802 /*
4803 * Mark the page as being in need of an ASMMemZeroPage().
4804 */
4805 pPage->fZeroed = false;
4806
4807#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
4808 if (pPage->fDirty)
4809 pgmPoolFlushDirtyPage(pVM, pPool, pPage->idxDirty, false /* do not remove */);
4810#endif
4811
4812 /* If there are any users of this table, then we *must* issue a tlb flush on all VCPUs. */
4813 if (pPage->iUserHead != NIL_PGMPOOL_USER_INDEX)
4814 fFlushRequired = true;
4815
4816 /*
4817 * Clear the page.
4818 */
4819 pgmPoolTrackClearPageUsers(pPool, pPage);
4820 STAM_PROFILE_START(&pPool->StatTrackDeref,a);
4821 pgmPoolTrackDeref(pPool, pPage);
4822 STAM_PROFILE_STOP(&pPool->StatTrackDeref,a);
4823
4824 /*
4825 * Flush it from the cache.
4826 */
4827 pgmPoolCacheFlushPage(pPool, pPage);
4828
4829#if defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0) || defined(IN_RC)
4830 /* Heavy stuff done. */
4831 PGMRZDynMapPopAutoSubset(pVCpu, iPrevSubset);
4832#endif
4833
4834 /*
4835 * Deregistering the monitoring.
4836 */
4837 if (pPage->fMonitored)
4838 rc = pgmPoolMonitorFlush(pPool, pPage);
4839
4840 /*
4841 * Free the page.
4842 */
4843 Assert(pPage->iNext == NIL_PGMPOOL_IDX);
4844 pPage->iNext = pPool->iFreeHead;
4845 pPool->iFreeHead = pPage->idx;
4846 pPage->enmKind = PGMPOOLKIND_FREE;
4847 pPage->enmAccess = PGMPOOLACCESS_DONTCARE;
4848 pPage->GCPhys = NIL_RTGCPHYS;
4849 pPage->fReusedFlushPending = false;
4850
4851 pPool->cUsedPages--;
4852
4853 /* Flush the TLBs of all VCPUs if required. */
4854 if ( fFlushRequired
4855 && fFlush)
4856 {
4857 PGM_INVL_ALL_VCPU_TLBS(pVM);
4858 }
4859
4860 pgmUnlock(pVM);
4861 STAM_PROFILE_STOP(&pPool->StatFlushPage, f);
4862 return rc;
4863}
4864
4865
4866/**
4867 * Frees a usage of a pool page.
4868 *
4869 * The caller is responsible to updating the user table so that it no longer
4870 * references the shadow page.
4871 *
4872 * @param pPool The pool.
4873 * @param HCPhys The HC physical address of the shadow page.
4874 * @param iUser The shadow page pool index of the user table.
4875 * @param iUserTable The index into the user table (shadowed).
4876 */
4877void pgmPoolFreeByPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable)
4878{
4879 PVM pVM = pPool->CTX_SUFF(pVM);
4880
4881 STAM_PROFILE_START(&pPool->StatFree, a);
4882 LogFlow(("pgmPoolFreeByPage: pPage=%p:{.Key=%RHp, .idx=%d, enmKind=%s} iUser=%d iUserTable=%#x\n",
4883 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), iUser, iUserTable));
4884 Assert(pPage->idx >= PGMPOOL_IDX_FIRST);
4885 pgmLock(pVM);
4886 pgmPoolTrackFreeUser(pPool, pPage, iUser, iUserTable);
4887 if (!pPage->fCached)
4888 pgmPoolFlushPage(pPool, pPage);
4889 pgmUnlock(pVM);
4890 STAM_PROFILE_STOP(&pPool->StatFree, a);
4891}
4892
4893
4894/**
4895 * Makes one or more free page free.
4896 *
4897 * @returns VBox status code.
4898 * @retval VINF_SUCCESS on success.
4899 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
4900 *
4901 * @param pPool The pool.
4902 * @param enmKind Page table kind
4903 * @param iUser The user of the page.
4904 */
4905static int pgmPoolMakeMoreFreePages(PPGMPOOL pPool, PGMPOOLKIND enmKind, uint16_t iUser)
4906{
4907 PVM pVM = pPool->CTX_SUFF(pVM);
4908
4909 LogFlow(("pgmPoolMakeMoreFreePages: iUser=%d\n", iUser));
4910
4911 /*
4912 * If the pool isn't full grown yet, expand it.
4913 */
4914 if ( pPool->cCurPages < pPool->cMaxPages
4915#if defined(IN_RC)
4916 /* Hack alert: we can't deal with jumps to ring 3 when called from MapCR3 and allocating pages for PAE PDs. */
4917 && enmKind != PGMPOOLKIND_PAE_PD_FOR_PAE_PD
4918 && (enmKind < PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD || enmKind > PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD)
4919#endif
4920 )
4921 {
4922 STAM_PROFILE_ADV_SUSPEND(&pPool->StatAlloc, a);
4923#ifdef IN_RING3
4924 int rc = PGMR3PoolGrow(pVM);
4925#else
4926 int rc = VMMRZCallRing3NoCpu(pVM, VMMCALLRING3_PGM_POOL_GROW, 0);
4927#endif
4928 if (RT_FAILURE(rc))
4929 return rc;
4930 STAM_PROFILE_ADV_RESUME(&pPool->StatAlloc, a);
4931 if (pPool->iFreeHead != NIL_PGMPOOL_IDX)
4932 return VINF_SUCCESS;
4933 }
4934
4935 /*
4936 * Free one cached page.
4937 */
4938 return pgmPoolCacheFreeOne(pPool, iUser);
4939}
4940
4941/**
4942 * Allocates a page from the pool.
4943 *
4944 * This page may actually be a cached page and not in need of any processing
4945 * on the callers part.
4946 *
4947 * @returns VBox status code.
4948 * @retval VINF_SUCCESS if a NEW page was allocated.
4949 * @retval VINF_PGM_CACHED_PAGE if a CACHED page was returned.
4950 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
4951 * @param pVM The VM handle.
4952 * @param GCPhys The GC physical address of the page we're gonna shadow.
4953 * For 4MB and 2MB PD entries, it's the first address the
4954 * shadow PT is covering.
4955 * @param enmKind The kind of mapping.
4956 * @param enmAccess Access type for the mapping (only relevant for big pages)
4957 * @param iUser The shadow page pool index of the user table.
4958 * @param iUserTable The index into the user table (shadowed).
4959 * @param fLockPage Lock the page
4960 * @param ppPage Where to store the pointer to the page. NULL is stored here on failure.
4961 */
4962int pgmPoolAllocEx(PVM pVM, RTGCPHYS GCPhys, PGMPOOLKIND enmKind, PGMPOOLACCESS enmAccess, uint16_t iUser, uint32_t iUserTable,
4963 bool fLockPage, PPPGMPOOLPAGE ppPage)
4964{
4965 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
4966 STAM_PROFILE_ADV_START(&pPool->StatAlloc, a);
4967 LogFlow(("pgmPoolAllocEx: GCPhys=%RGp enmKind=%s iUser=%d iUserTable=%#x\n", GCPhys, pgmPoolPoolKindToStr(enmKind), iUser, iUserTable));
4968 *ppPage = NULL;
4969 /** @todo CSAM/PGMPrefetchPage messes up here during CSAMR3CheckGates
4970 * (TRPMR3SyncIDT) because of FF priority. Try fix that?
4971 * Assert(!(pVM->pgm.s.fGlobalSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)); */
4972
4973 pgmLock(pVM);
4974
4975 if (pPool->fCacheEnabled)
4976 {
4977 int rc2 = pgmPoolCacheAlloc(pPool, GCPhys, enmKind, enmAccess, iUser, iUserTable, ppPage);
4978 if (RT_SUCCESS(rc2))
4979 {
4980 if (fLockPage)
4981 pgmPoolLockPage(pPool, *ppPage);
4982 pgmUnlock(pVM);
4983 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
4984 LogFlow(("pgmPoolAllocEx: cached returns %Rrc *ppPage=%p:{.Key=%RHp, .idx=%d}\n", rc2, *ppPage, (*ppPage)->Core.Key, (*ppPage)->idx));
4985 return rc2;
4986 }
4987 }
4988
4989 /*
4990 * Allocate a new one.
4991 */
4992 int rc = VINF_SUCCESS;
4993 uint16_t iNew = pPool->iFreeHead;
4994 if (iNew == NIL_PGMPOOL_IDX)
4995 {
4996 rc = pgmPoolMakeMoreFreePages(pPool, enmKind, iUser);
4997 if (RT_FAILURE(rc))
4998 {
4999 pgmUnlock(pVM);
5000 Log(("pgmPoolAllocEx: returns %Rrc (Free)\n", rc));
5001 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
5002 return rc;
5003 }
5004 iNew = pPool->iFreeHead;
5005 AssertReleaseReturn(iNew != NIL_PGMPOOL_IDX, VERR_INTERNAL_ERROR);
5006 }
5007
5008 /* unlink the free head */
5009 PPGMPOOLPAGE pPage = &pPool->aPages[iNew];
5010 pPool->iFreeHead = pPage->iNext;
5011 pPage->iNext = NIL_PGMPOOL_IDX;
5012
5013 /*
5014 * Initialize it.
5015 */
5016 pPool->cUsedPages++; /* physical handler registration / pgmPoolTrackFlushGCPhysPTsSlow requirement. */
5017 pPage->enmKind = enmKind;
5018 pPage->enmAccess = enmAccess;
5019 pPage->GCPhys = GCPhys;
5020 pPage->fSeenNonGlobal = false; /* Set this to 'true' to disable this feature. */
5021 pPage->fMonitored = false;
5022 pPage->fCached = false;
5023#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
5024 pPage->fDirty = false;
5025#endif
5026 pPage->fReusedFlushPending = false;
5027 pPage->cModifications = 0;
5028 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
5029 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
5030 pPage->cLocked = 0;
5031 pPage->cPresent = 0;
5032 pPage->iFirstPresent = NIL_PGMPOOL_PRESENT_INDEX;
5033 pPage->pvLastAccessHandlerFault = 0;
5034 pPage->cLastAccessHandlerCount = 0;
5035 pPage->pvLastAccessHandlerRip = 0;
5036
5037 /*
5038 * Insert into the tracking and cache. If this fails, free the page.
5039 */
5040 int rc3 = pgmPoolTrackInsert(pPool, pPage, GCPhys, iUser, iUserTable);
5041 if (RT_FAILURE(rc3))
5042 {
5043 pPool->cUsedPages--;
5044 pPage->enmKind = PGMPOOLKIND_FREE;
5045 pPage->enmAccess = PGMPOOLACCESS_DONTCARE;
5046 pPage->GCPhys = NIL_RTGCPHYS;
5047 pPage->iNext = pPool->iFreeHead;
5048 pPool->iFreeHead = pPage->idx;
5049 pgmUnlock(pVM);
5050 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
5051 Log(("pgmPoolAllocEx: returns %Rrc (Insert)\n", rc3));
5052 return rc3;
5053 }
5054
5055 /*
5056 * Commit the allocation, clear the page and return.
5057 */
5058#ifdef VBOX_WITH_STATISTICS
5059 if (pPool->cUsedPages > pPool->cUsedPagesHigh)
5060 pPool->cUsedPagesHigh = pPool->cUsedPages;
5061#endif
5062
5063 if (!pPage->fZeroed)
5064 {
5065 STAM_PROFILE_START(&pPool->StatZeroPage, z);
5066 void *pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
5067 ASMMemZeroPage(pv);
5068 STAM_PROFILE_STOP(&pPool->StatZeroPage, z);
5069 }
5070
5071 *ppPage = pPage;
5072 if (fLockPage)
5073 pgmPoolLockPage(pPool, pPage);
5074 pgmUnlock(pVM);
5075 LogFlow(("pgmPoolAllocEx: returns %Rrc *ppPage=%p:{.Key=%RHp, .idx=%d, .fCached=%RTbool, .fMonitored=%RTbool}\n",
5076 rc, pPage, pPage->Core.Key, pPage->idx, pPage->fCached, pPage->fMonitored));
5077 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
5078 return rc;
5079}
5080
5081
5082/**
5083 * Frees a usage of a pool page.
5084 *
5085 * @param pVM The VM handle.
5086 * @param HCPhys The HC physical address of the shadow page.
5087 * @param iUser The shadow page pool index of the user table.
5088 * @param iUserTable The index into the user table (shadowed).
5089 */
5090void pgmPoolFree(PVM pVM, RTHCPHYS HCPhys, uint16_t iUser, uint32_t iUserTable)
5091{
5092 LogFlow(("pgmPoolFree: HCPhys=%RHp iUser=%d iUserTable=%#x\n", HCPhys, iUser, iUserTable));
5093 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
5094 pgmPoolFreeByPage(pPool, pgmPoolGetPage(pPool, HCPhys), iUser, iUserTable);
5095}
5096
5097/**
5098 * Internal worker for finding a 'in-use' shadow page give by it's physical address.
5099 *
5100 * @returns Pointer to the shadow page structure.
5101 * @param pPool The pool.
5102 * @param HCPhys The HC physical address of the shadow page.
5103 */
5104PPGMPOOLPAGE pgmPoolGetPage(PPGMPOOL pPool, RTHCPHYS HCPhys)
5105{
5106 PVM pVM = pPool->CTX_SUFF(pVM);
5107
5108 Assert(PGMIsLockOwner(pVM));
5109
5110 /*
5111 * Look up the page.
5112 */
5113 PPGMPOOLPAGE pPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, HCPhys & X86_PTE_PAE_PG_MASK);
5114
5115 AssertFatalMsg(pPage && pPage->enmKind != PGMPOOLKIND_FREE, ("HCPhys=%RHp pPage=%p idx=%d\n", HCPhys, pPage, (pPage) ? pPage->idx : 0));
5116 return pPage;
5117}
5118
5119
5120/**
5121 * Internal worker for finding a page for debugging purposes, no assertions.
5122 *
5123 * @returns Pointer to the shadow page structure. NULL on if not found.
5124 * @param pPool The pool.
5125 * @param HCPhys The HC physical address of the shadow page.
5126 */
5127PPGMPOOLPAGE pgmPoolQueryPageForDbg(PPGMPOOL pPool, RTHCPHYS HCPhys)
5128{
5129 PVM pVM = pPool->CTX_SUFF(pVM);
5130 Assert(PGMIsLockOwner(pVM));
5131 return (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, HCPhys & X86_PTE_PAE_PG_MASK);
5132}
5133
5134
5135#ifdef IN_RING3 /* currently only used in ring 3; save some space in the R0 & GC modules (left it here as we might need it elsewhere later on) */
5136/**
5137 * Flush the specified page if present
5138 *
5139 * @param pVM The VM handle.
5140 * @param GCPhys Guest physical address of the page to flush
5141 */
5142void pgmPoolFlushPageByGCPhys(PVM pVM, RTGCPHYS GCPhys)
5143{
5144 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
5145
5146 VM_ASSERT_EMT(pVM);
5147
5148 /*
5149 * Look up the GCPhys in the hash.
5150 */
5151 GCPhys = GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK;
5152 unsigned i = pPool->aiHash[PGMPOOL_HASH(GCPhys)];
5153 if (i == NIL_PGMPOOL_IDX)
5154 return;
5155
5156 do
5157 {
5158 PPGMPOOLPAGE pPage = &pPool->aPages[i];
5159 if (pPage->GCPhys - GCPhys < PAGE_SIZE)
5160 {
5161 switch (pPage->enmKind)
5162 {
5163 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
5164 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
5165 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
5166 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
5167 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
5168 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
5169 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
5170 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
5171 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
5172 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
5173 case PGMPOOLKIND_64BIT_PML4:
5174 case PGMPOOLKIND_32BIT_PD:
5175 case PGMPOOLKIND_PAE_PDPT:
5176 {
5177 Log(("PGMPoolFlushPage: found pgm pool pages for %RGp\n", GCPhys));
5178#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
5179 if (pPage->fDirty)
5180 STAM_COUNTER_INC(&pPool->StatForceFlushDirtyPage);
5181 else
5182#endif
5183 STAM_COUNTER_INC(&pPool->StatForceFlushPage);
5184 Assert(!pgmPoolIsPageLocked(&pVM->pgm.s, pPage));
5185 pgmPoolMonitorChainFlush(pPool, pPage);
5186 return;
5187 }
5188
5189 /* ignore, no monitoring. */
5190 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
5191 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
5192 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
5193 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
5194 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
5195 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
5196 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
5197 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
5198 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
5199 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
5200 case PGMPOOLKIND_ROOT_NESTED:
5201 case PGMPOOLKIND_PAE_PD_PHYS:
5202 case PGMPOOLKIND_PAE_PDPT_PHYS:
5203 case PGMPOOLKIND_32BIT_PD_PHYS:
5204 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
5205 break;
5206
5207 default:
5208 AssertFatalMsgFailed(("enmKind=%d idx=%d\n", pPage->enmKind, pPage->idx));
5209 }
5210 }
5211
5212 /* next */
5213 i = pPage->iNext;
5214 } while (i != NIL_PGMPOOL_IDX);
5215 return;
5216}
5217#endif /* IN_RING3 */
5218
5219#ifdef IN_RING3
5220
5221
5222/**
5223 * Reset CPU on hot plugging.
5224 *
5225 * @param pVM The VM handle.
5226 * @param pVCpu The virtual CPU.
5227 */
5228void pgmR3PoolResetUnpluggedCpu(PVM pVM, PVMCPU pVCpu)
5229{
5230 pgmR3ExitShadowModeBeforePoolFlush(pVM, pVCpu);
5231
5232 pgmR3ReEnterShadowModeAfterPoolFlush(pVM, pVCpu);
5233 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
5234 VMCPU_FF_SET(pVCpu, VMCPU_FF_TLB_FLUSH);
5235}
5236
5237
5238/**
5239 * Flushes the entire cache.
5240 *
5241 * It will assert a global CR3 flush (FF) and assumes the caller is aware of
5242 * this and execute this CR3 flush.
5243 *
5244 * @param pPool The pool.
5245 */
5246void pgmR3PoolReset(PVM pVM)
5247{
5248 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
5249
5250 Assert(PGMIsLockOwner(pVM));
5251 STAM_PROFILE_START(&pPool->StatR3Reset, a);
5252 LogFlow(("pgmR3PoolReset:\n"));
5253
5254 /*
5255 * If there are no pages in the pool, there is nothing to do.
5256 */
5257 if (pPool->cCurPages <= PGMPOOL_IDX_FIRST)
5258 {
5259 STAM_PROFILE_STOP(&pPool->StatR3Reset, a);
5260 return;
5261 }
5262
5263 /*
5264 * Exit the shadow mode since we're going to clear everything,
5265 * including the root page.
5266 */
5267 for (VMCPUID i = 0; i < pVM->cCpus; i++)
5268 {
5269 PVMCPU pVCpu = &pVM->aCpus[i];
5270 pgmR3ExitShadowModeBeforePoolFlush(pVM, pVCpu);
5271 }
5272
5273 /*
5274 * Nuke the free list and reinsert all pages into it.
5275 */
5276 for (unsigned i = pPool->cCurPages - 1; i >= PGMPOOL_IDX_FIRST; i--)
5277 {
5278 PPGMPOOLPAGE pPage = &pPool->aPages[i];
5279
5280 Assert(pPage->Core.Key == MMPage2Phys(pVM, pPage->pvPageR3));
5281 if (pPage->fMonitored)
5282 pgmPoolMonitorFlush(pPool, pPage);
5283 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
5284 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
5285 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
5286 pPage->iMonitoredPrev = NIL_PGMPOOL_IDX;
5287 pPage->cModifications = 0;
5288 pPage->GCPhys = NIL_RTGCPHYS;
5289 pPage->enmKind = PGMPOOLKIND_FREE;
5290 pPage->enmAccess = PGMPOOLACCESS_DONTCARE;
5291 Assert(pPage->idx == i);
5292 pPage->iNext = i + 1;
5293 pPage->fZeroed = false; /* This could probably be optimized, but better safe than sorry. */
5294 pPage->fSeenNonGlobal = false;
5295 pPage->fMonitored = false;
5296#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
5297 pPage->fDirty = false;
5298#endif
5299 pPage->fCached = false;
5300 pPage->fReusedFlushPending = false;
5301 pPage->iUserHead = NIL_PGMPOOL_USER_INDEX;
5302 pPage->iAgeNext = NIL_PGMPOOL_IDX;
5303 pPage->iAgePrev = NIL_PGMPOOL_IDX;
5304 pPage->cLocked = 0;
5305 }
5306 pPool->aPages[pPool->cCurPages - 1].iNext = NIL_PGMPOOL_IDX;
5307 pPool->iFreeHead = PGMPOOL_IDX_FIRST;
5308 pPool->cUsedPages = 0;
5309
5310 /*
5311 * Zap and reinitialize the user records.
5312 */
5313 pPool->cPresent = 0;
5314 pPool->iUserFreeHead = 0;
5315 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
5316 const unsigned cMaxUsers = pPool->cMaxUsers;
5317 for (unsigned i = 0; i < cMaxUsers; i++)
5318 {
5319 paUsers[i].iNext = i + 1;
5320 paUsers[i].iUser = NIL_PGMPOOL_IDX;
5321 paUsers[i].iUserTable = 0xfffffffe;
5322 }
5323 paUsers[cMaxUsers - 1].iNext = NIL_PGMPOOL_USER_INDEX;
5324
5325 /*
5326 * Clear all the GCPhys links and rebuild the phys ext free list.
5327 */
5328 for (PPGMRAMRANGE pRam = pVM->pgm.s.CTX_SUFF(pRamRanges);
5329 pRam;
5330 pRam = pRam->CTX_SUFF(pNext))
5331 {
5332 unsigned iPage = pRam->cb >> PAGE_SHIFT;
5333 while (iPage-- > 0)
5334 PGM_PAGE_SET_TRACKING(&pRam->aPages[iPage], 0);
5335 }
5336
5337 pPool->iPhysExtFreeHead = 0;
5338 PPGMPOOLPHYSEXT paPhysExts = pPool->CTX_SUFF(paPhysExts);
5339 const unsigned cMaxPhysExts = pPool->cMaxPhysExts;
5340 for (unsigned i = 0; i < cMaxPhysExts; i++)
5341 {
5342 paPhysExts[i].iNext = i + 1;
5343 paPhysExts[i].aidx[0] = NIL_PGMPOOL_IDX;
5344 paPhysExts[i].apte[0] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
5345 paPhysExts[i].aidx[1] = NIL_PGMPOOL_IDX;
5346 paPhysExts[i].apte[1] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
5347 paPhysExts[i].aidx[2] = NIL_PGMPOOL_IDX;
5348 paPhysExts[i].apte[2] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
5349 }
5350 paPhysExts[cMaxPhysExts - 1].iNext = NIL_PGMPOOL_PHYSEXT_INDEX;
5351
5352 /*
5353 * Just zap the modified list.
5354 */
5355 pPool->cModifiedPages = 0;
5356 pPool->iModifiedHead = NIL_PGMPOOL_IDX;
5357
5358 /*
5359 * Clear the GCPhys hash and the age list.
5360 */
5361 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aiHash); i++)
5362 pPool->aiHash[i] = NIL_PGMPOOL_IDX;
5363 pPool->iAgeHead = NIL_PGMPOOL_IDX;
5364 pPool->iAgeTail = NIL_PGMPOOL_IDX;
5365
5366#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
5367 /* Clear all dirty pages. */
5368 pPool->idxFreeDirtyPage = 0;
5369 pPool->cDirtyPages = 0;
5370 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
5371 pPool->aDirtyPages[i].uIdx = NIL_PGMPOOL_IDX;
5372#endif
5373
5374 /*
5375 * Reinsert active pages into the hash and ensure monitoring chains are correct.
5376 */
5377 for (unsigned i = PGMPOOL_IDX_FIRST_SPECIAL; i < PGMPOOL_IDX_FIRST; i++)
5378 {
5379 PPGMPOOLPAGE pPage = &pPool->aPages[i];
5380 pPage->iNext = NIL_PGMPOOL_IDX;
5381 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
5382 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
5383 pPage->cModifications = 0;
5384 /* ASSUMES that we're not sharing with any of the other special pages (safe for now). */
5385 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
5386 pPage->iMonitoredPrev = NIL_PGMPOOL_IDX;
5387 if (pPage->fMonitored)
5388 {
5389 int rc = PGMHandlerPhysicalChangeCallbacks(pVM, pPage->GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK,
5390 pPool->pfnAccessHandlerR3, MMHyperCCToR3(pVM, pPage),
5391 pPool->pfnAccessHandlerR0, MMHyperCCToR0(pVM, pPage),
5392 pPool->pfnAccessHandlerRC, MMHyperCCToRC(pVM, pPage),
5393 pPool->pszAccessHandler);
5394 AssertFatalRCSuccess(rc);
5395 pgmPoolHashInsert(pPool, pPage);
5396 }
5397 Assert(pPage->iUserHead == NIL_PGMPOOL_USER_INDEX); /* for now */
5398 Assert(pPage->iAgeNext == NIL_PGMPOOL_IDX);
5399 Assert(pPage->iAgePrev == NIL_PGMPOOL_IDX);
5400 }
5401
5402 for (VMCPUID i = 0; i < pVM->cCpus; i++)
5403 {
5404 /*
5405 * Re-enter the shadowing mode and assert Sync CR3 FF.
5406 */
5407 PVMCPU pVCpu = &pVM->aCpus[i];
5408 pgmR3ReEnterShadowModeAfterPoolFlush(pVM, pVCpu);
5409 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
5410 VMCPU_FF_SET(pVCpu, VMCPU_FF_TLB_FLUSH);
5411 }
5412
5413 STAM_PROFILE_STOP(&pPool->StatR3Reset, a);
5414}
5415#endif /* IN_RING3 */
5416
5417#ifdef LOG_ENABLED
5418static const char *pgmPoolPoolKindToStr(uint8_t enmKind)
5419{
5420 switch(enmKind)
5421 {
5422 case PGMPOOLKIND_INVALID:
5423 return "PGMPOOLKIND_INVALID";
5424 case PGMPOOLKIND_FREE:
5425 return "PGMPOOLKIND_FREE";
5426 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
5427 return "PGMPOOLKIND_32BIT_PT_FOR_PHYS";
5428 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
5429 return "PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT";
5430 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
5431 return "PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB";
5432 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
5433 return "PGMPOOLKIND_PAE_PT_FOR_PHYS";
5434 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
5435 return "PGMPOOLKIND_PAE_PT_FOR_32BIT_PT";
5436 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
5437 return "PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB";
5438 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
5439 return "PGMPOOLKIND_PAE_PT_FOR_PAE_PT";
5440 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
5441 return "PGMPOOLKIND_PAE_PT_FOR_PAE_2MB";
5442 case PGMPOOLKIND_32BIT_PD:
5443 return "PGMPOOLKIND_32BIT_PD";
5444 case PGMPOOLKIND_32BIT_PD_PHYS:
5445 return "PGMPOOLKIND_32BIT_PD_PHYS";
5446 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
5447 return "PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD";
5448 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
5449 return "PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD";
5450 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
5451 return "PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD";
5452 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
5453 return "PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD";
5454 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
5455 return "PGMPOOLKIND_PAE_PD_FOR_PAE_PD";
5456 case PGMPOOLKIND_PAE_PD_PHYS:
5457 return "PGMPOOLKIND_PAE_PD_PHYS";
5458 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
5459 return "PGMPOOLKIND_PAE_PDPT_FOR_32BIT";
5460 case PGMPOOLKIND_PAE_PDPT:
5461 return "PGMPOOLKIND_PAE_PDPT";
5462 case PGMPOOLKIND_PAE_PDPT_PHYS:
5463 return "PGMPOOLKIND_PAE_PDPT_PHYS";
5464 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
5465 return "PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT";
5466 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
5467 return "PGMPOOLKIND_64BIT_PDPT_FOR_PHYS";
5468 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
5469 return "PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD";
5470 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
5471 return "PGMPOOLKIND_64BIT_PD_FOR_PHYS";
5472 case PGMPOOLKIND_64BIT_PML4:
5473 return "PGMPOOLKIND_64BIT_PML4";
5474 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
5475 return "PGMPOOLKIND_EPT_PDPT_FOR_PHYS";
5476 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
5477 return "PGMPOOLKIND_EPT_PD_FOR_PHYS";
5478 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
5479 return "PGMPOOLKIND_EPT_PT_FOR_PHYS";
5480 case PGMPOOLKIND_ROOT_NESTED:
5481 return "PGMPOOLKIND_ROOT_NESTED";
5482 }
5483 return "Unknown kind!";
5484}
5485#endif /* LOG_ENABLED*/
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette