VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/PGMAllPool.cpp@ 68019

Last change on this file since 68019 was 65957, checked in by vboxsync, 8 years ago

PGMPool: Address problem where we would incorrectly try to flush an active CR3 page (or pages in PAE) because we thought it was being reused, which obviously would fail, then pretend we succeeded and restart the instruction. Happend on a STOS during linux bootup (e.g. ubuntu 15.10/amd64). Fix was to extend pgmRZPoolMonitorIsReused with a check on locked pages.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id Revision
File size: 216.5 KB
Line 
1/* $Id: PGMAllPool.cpp 65957 2017-03-06 20:41:43Z vboxsync $ */
2/** @file
3 * PGM Shadow Page Pool.
4 */
5
6/*
7 * Copyright (C) 2006-2016 Oracle Corporation
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 */
17
18
19/*********************************************************************************************************************************
20* Header Files *
21*********************************************************************************************************************************/
22#define LOG_GROUP LOG_GROUP_PGM_POOL
23#include <VBox/vmm/pgm.h>
24#include <VBox/vmm/mm.h>
25#include <VBox/vmm/em.h>
26#include <VBox/vmm/cpum.h>
27#ifdef IN_RC
28# include <VBox/vmm/patm.h>
29#endif
30#include "PGMInternal.h"
31#include <VBox/vmm/vm.h>
32#include "PGMInline.h"
33#include <VBox/disopcode.h>
34#include <VBox/vmm/hm_vmx.h>
35
36#include <VBox/log.h>
37#include <VBox/err.h>
38#include <iprt/asm.h>
39#include <iprt/asm-amd64-x86.h>
40#include <iprt/string.h>
41
42
43/*********************************************************************************************************************************
44* Internal Functions *
45*********************************************************************************************************************************/
46RT_C_DECLS_BEGIN
47#if 0 /* unused */
48DECLINLINE(unsigned) pgmPoolTrackGetShadowEntrySize(PGMPOOLKIND enmKind);
49DECLINLINE(unsigned) pgmPoolTrackGetGuestEntrySize(PGMPOOLKIND enmKind);
50#endif /* unused */
51static void pgmPoolTrackClearPageUsers(PPGMPOOL pPool, PPGMPOOLPAGE pPage);
52static void pgmPoolTrackDeref(PPGMPOOL pPool, PPGMPOOLPAGE pPage);
53static int pgmPoolTrackAddUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable);
54static void pgmPoolMonitorModifiedRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage);
55#if defined(LOG_ENABLED) || defined(VBOX_STRICT)
56static const char *pgmPoolPoolKindToStr(uint8_t enmKind);
57#endif
58#if 0 /*defined(VBOX_STRICT) && defined(PGMPOOL_WITH_OPTIMIZED_DIRTY_PT)*/
59static void pgmPoolTrackCheckPTPaePae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT, PCX86PTPAE pGstPT);
60#endif
61
62int pgmPoolTrackFlushGCPhysPTsSlow(PVM pVM, PPGMPAGE pPhysPage);
63PPGMPOOLPHYSEXT pgmPoolTrackPhysExtAlloc(PVM pVM, uint16_t *piPhysExt);
64void pgmPoolTrackPhysExtFree(PVM pVM, uint16_t iPhysExt);
65void pgmPoolTrackPhysExtFreeList(PVM pVM, uint16_t iPhysExt);
66
67RT_C_DECLS_END
68
69
70#if 0 /* unused */
71/**
72 * Checks if the specified page pool kind is for a 4MB or 2MB guest page.
73 *
74 * @returns true if it's the shadow of a 4MB or 2MB guest page, otherwise false.
75 * @param enmKind The page kind.
76 */
77DECLINLINE(bool) pgmPoolIsBigPage(PGMPOOLKIND enmKind)
78{
79 switch (enmKind)
80 {
81 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
82 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
83 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
84 return true;
85 default:
86 return false;
87 }
88}
89#endif /* unused */
90
91
92/**
93 * Flushes a chain of pages sharing the same access monitor.
94 *
95 * @returns VBox status code suitable for scheduling.
96 * @param pPool The pool.
97 * @param pPage A page in the chain.
98 * @todo VBOXSTRICTRC
99 */
100int pgmPoolMonitorChainFlush(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
101{
102 LogFlow(("pgmPoolMonitorChainFlush: Flush page %RGp type=%d\n", pPage->GCPhys, pPage->enmKind));
103
104 /*
105 * Find the list head.
106 */
107 uint16_t idx = pPage->idx;
108 if (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
109 {
110 while (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
111 {
112 idx = pPage->iMonitoredPrev;
113 Assert(idx != pPage->idx);
114 pPage = &pPool->aPages[idx];
115 }
116 }
117
118 /*
119 * Iterate the list flushing each shadow page.
120 */
121 int rc = VINF_SUCCESS;
122 for (;;)
123 {
124 idx = pPage->iMonitoredNext;
125 Assert(idx != pPage->idx);
126 if (pPage->idx >= PGMPOOL_IDX_FIRST)
127 {
128 int rc2 = pgmPoolFlushPage(pPool, pPage);
129 AssertRC(rc2);
130 }
131 /* next */
132 if (idx == NIL_PGMPOOL_IDX)
133 break;
134 pPage = &pPool->aPages[idx];
135 }
136 return rc;
137}
138
139
140/**
141 * Wrapper for getting the current context pointer to the entry being modified.
142 *
143 * @returns VBox status code suitable for scheduling.
144 * @param pVM The cross context VM structure.
145 * @param pvDst Destination address
146 * @param pvSrc Pointer to the mapping of @a GCPhysSrc or NULL depending
147 * on the context (e.g. \#PF in R0 & RC).
148 * @param GCPhysSrc The source guest physical address.
149 * @param cb Size of data to read
150 */
151DECLINLINE(int) pgmPoolPhysSimpleReadGCPhys(PVM pVM, void *pvDst, void const *pvSrc, RTGCPHYS GCPhysSrc, size_t cb)
152{
153#if defined(IN_RING3)
154 NOREF(pVM); NOREF(GCPhysSrc);
155 memcpy(pvDst, (RTHCPTR)((uintptr_t)pvSrc & ~(RTHCUINTPTR)(cb - 1)), cb);
156 return VINF_SUCCESS;
157#else
158 /** @todo in RC we could attempt to use the virtual address, although this can cause many faults (PAE Windows XP guest). */
159 NOREF(pvSrc);
160 return PGMPhysSimpleReadGCPhys(pVM, pvDst, GCPhysSrc & ~(RTGCPHYS)(cb - 1), cb);
161#endif
162}
163
164
165/**
166 * Process shadow entries before they are changed by the guest.
167 *
168 * For PT entries we will clear them. For PD entries, we'll simply check
169 * for mapping conflicts and set the SyncCR3 FF if found.
170 *
171 * @param pVCpu The cross context virtual CPU structure.
172 * @param pPool The pool.
173 * @param pPage The head page.
174 * @param GCPhysFault The guest physical fault address.
175 * @param pvAddress Pointer to the mapping of @a GCPhysFault or NULL
176 * depending on the context (e.g. \#PF in R0 & RC).
177 * @param cbWrite Write size; might be zero if the caller knows we're not crossing entry boundaries
178 */
179static void pgmPoolMonitorChainChanging(PVMCPU pVCpu, PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTGCPHYS GCPhysFault,
180 void const *pvAddress, unsigned cbWrite)
181{
182 AssertMsg(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX, ("%u (idx=%u)\n", pPage->iMonitoredPrev, pPage->idx));
183 const unsigned off = GCPhysFault & PAGE_OFFSET_MASK;
184 PVM pVM = pPool->CTX_SUFF(pVM);
185 NOREF(pVCpu);
186
187 LogFlow(("pgmPoolMonitorChainChanging: %RGv phys=%RGp cbWrite=%d\n",
188 (RTGCPTR)(CTXTYPE(RTGCPTR, uintptr_t, RTGCPTR))(uintptr_t)pvAddress, GCPhysFault, cbWrite));
189
190 for (;;)
191 {
192 union
193 {
194 void *pv;
195 PX86PT pPT;
196 PPGMSHWPTPAE pPTPae;
197 PX86PD pPD;
198 PX86PDPAE pPDPae;
199 PX86PDPT pPDPT;
200 PX86PML4 pPML4;
201 } uShw;
202
203 LogFlow(("pgmPoolMonitorChainChanging: page idx=%d phys=%RGp (next=%d) kind=%s write=%#x\n",
204 pPage->idx, pPage->GCPhys, pPage->iMonitoredNext, pgmPoolPoolKindToStr(pPage->enmKind), cbWrite));
205
206 uShw.pv = NULL;
207 switch (pPage->enmKind)
208 {
209 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
210 {
211 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPT));
212 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
213 const unsigned iShw = off / sizeof(X86PTE);
214 LogFlow(("PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT iShw=%x\n", iShw));
215 if (uShw.pPT->a[iShw].n.u1Present)
216 {
217 X86PTE GstPte;
218
219 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvAddress, GCPhysFault, sizeof(GstPte));
220 AssertRC(rc);
221 Log4(("pgmPoolMonitorChainChanging 32_32: deref %016RX64 GCPhys %08RX32\n", uShw.pPT->a[iShw].u & X86_PTE_PAE_PG_MASK, GstPte.u & X86_PTE_PG_MASK));
222 pgmPoolTracDerefGCPhysHint(pPool, pPage,
223 uShw.pPT->a[iShw].u & X86_PTE_PAE_PG_MASK,
224 GstPte.u & X86_PTE_PG_MASK,
225 iShw);
226 ASMAtomicWriteU32(&uShw.pPT->a[iShw].u, 0);
227 }
228 break;
229 }
230
231 /* page/2 sized */
232 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
233 {
234 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPT));
235 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
236 if (!((off ^ pPage->GCPhys) & (PAGE_SIZE / 2)))
237 {
238 const unsigned iShw = (off / sizeof(X86PTE)) & (X86_PG_PAE_ENTRIES - 1);
239 LogFlow(("PGMPOOLKIND_PAE_PT_FOR_32BIT_PT iShw=%x\n", iShw));
240 if (PGMSHWPTEPAE_IS_P(uShw.pPTPae->a[iShw]))
241 {
242 X86PTE GstPte;
243 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvAddress, GCPhysFault, sizeof(GstPte));
244 AssertRC(rc);
245
246 Log4(("pgmPoolMonitorChainChanging pae_32: deref %016RX64 GCPhys %08RX32\n", uShw.pPT->a[iShw].u & X86_PTE_PAE_PG_MASK, GstPte.u & X86_PTE_PG_MASK));
247 pgmPoolTracDerefGCPhysHint(pPool, pPage,
248 PGMSHWPTEPAE_GET_HCPHYS(uShw.pPTPae->a[iShw]),
249 GstPte.u & X86_PTE_PG_MASK,
250 iShw);
251 PGMSHWPTEPAE_ATOMIC_SET(uShw.pPTPae->a[iShw], 0);
252 }
253 }
254 break;
255 }
256
257 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
258 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
259 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
260 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
261 {
262 unsigned iGst = off / sizeof(X86PDE);
263 unsigned iShwPdpt = iGst / 256;
264 unsigned iShw = (iGst % 256) * 2;
265 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
266
267 LogFlow(("pgmPoolMonitorChainChanging PAE for 32 bits: iGst=%x iShw=%x idx = %d page idx=%d\n", iGst, iShw, iShwPdpt, pPage->enmKind - PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD));
268 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPD));
269 if (iShwPdpt == pPage->enmKind - (unsigned)PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD)
270 {
271 for (unsigned i = 0; i < 2; i++)
272 {
273# ifdef VBOX_WITH_RAW_MODE_NOT_R0
274 if ((uShw.pPDPae->a[iShw + i].u & (PGM_PDFLAGS_MAPPING | X86_PDE_P)) == (PGM_PDFLAGS_MAPPING | X86_PDE_P))
275 {
276 Assert(pgmMapAreMappingsEnabled(pVM));
277 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
278 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShwPdpt=%#x iShw=%#x!\n", iShwPdpt, iShw+i));
279 break;
280 }
281# endif /* VBOX_WITH_RAW_MODE_NOT_R0 */
282 if (uShw.pPDPae->a[iShw+i].n.u1Present)
283 {
284 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw+i, uShw.pPDPae->a[iShw+i].u));
285 pgmPoolFree(pVM,
286 uShw.pPDPae->a[iShw+i].u & X86_PDE_PAE_PG_MASK,
287 pPage->idx,
288 iShw + i);
289 ASMAtomicWriteU64(&uShw.pPDPae->a[iShw+i].u, 0);
290 }
291
292 /* paranoia / a bit assumptive. */
293 if ( (off & 3)
294 && (off & 3) + cbWrite > 4)
295 {
296 const unsigned iShw2 = iShw + 2 + i;
297 if (iShw2 < RT_ELEMENTS(uShw.pPDPae->a))
298 {
299# ifdef VBOX_WITH_RAW_MODE_NOT_R0
300 if ((uShw.pPDPae->a[iShw2].u & (PGM_PDFLAGS_MAPPING | X86_PDE_P)) == (PGM_PDFLAGS_MAPPING | X86_PDE_P))
301 {
302 Assert(pgmMapAreMappingsEnabled(pVM));
303 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
304 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShwPdpt=%#x iShw2=%#x!\n", iShwPdpt, iShw2));
305 break;
306 }
307# endif /* VBOX_WITH_RAW_MODE_NOT_R0 */
308 if (uShw.pPDPae->a[iShw2].n.u1Present)
309 {
310 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPae->a[iShw2].u));
311 pgmPoolFree(pVM,
312 uShw.pPDPae->a[iShw2].u & X86_PDE_PAE_PG_MASK,
313 pPage->idx,
314 iShw2);
315 ASMAtomicWriteU64(&uShw.pPDPae->a[iShw2].u, 0);
316 }
317 }
318 }
319 }
320 }
321 break;
322 }
323
324 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
325 {
326 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
327 const unsigned iShw = off / sizeof(X86PTEPAE);
328 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPT));
329 if (PGMSHWPTEPAE_IS_P(uShw.pPTPae->a[iShw]))
330 {
331 X86PTEPAE GstPte;
332 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvAddress, GCPhysFault, sizeof(GstPte));
333 AssertRC(rc);
334
335 Log4(("pgmPoolMonitorChainChanging pae: deref %016RX64 GCPhys %016RX64\n", PGMSHWPTEPAE_GET_HCPHYS(uShw.pPTPae->a[iShw]), GstPte.u & X86_PTE_PAE_PG_MASK));
336 pgmPoolTracDerefGCPhysHint(pPool, pPage,
337 PGMSHWPTEPAE_GET_HCPHYS(uShw.pPTPae->a[iShw]),
338 GstPte.u & X86_PTE_PAE_PG_MASK,
339 iShw);
340 PGMSHWPTEPAE_ATOMIC_SET(uShw.pPTPae->a[iShw], 0);
341 }
342
343 /* paranoia / a bit assumptive. */
344 if ( (off & 7)
345 && (off & 7) + cbWrite > sizeof(X86PTEPAE))
346 {
347 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PTEPAE);
348 AssertBreak(iShw2 < RT_ELEMENTS(uShw.pPTPae->a));
349
350 if (PGMSHWPTEPAE_IS_P(uShw.pPTPae->a[iShw2]))
351 {
352 X86PTEPAE GstPte;
353 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte,
354 pvAddress ? (uint8_t const *)pvAddress + sizeof(GstPte) : NULL,
355 GCPhysFault + sizeof(GstPte), sizeof(GstPte));
356 AssertRC(rc);
357 Log4(("pgmPoolMonitorChainChanging pae: deref %016RX64 GCPhys %016RX64\n", PGMSHWPTEPAE_GET_HCPHYS(uShw.pPTPae->a[iShw2]), GstPte.u & X86_PTE_PAE_PG_MASK));
358 pgmPoolTracDerefGCPhysHint(pPool, pPage,
359 PGMSHWPTEPAE_GET_HCPHYS(uShw.pPTPae->a[iShw2]),
360 GstPte.u & X86_PTE_PAE_PG_MASK,
361 iShw2);
362 PGMSHWPTEPAE_ATOMIC_SET(uShw.pPTPae->a[iShw2], 0);
363 }
364 }
365 break;
366 }
367
368 case PGMPOOLKIND_32BIT_PD:
369 {
370 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
371 const unsigned iShw = off / sizeof(X86PTE); // ASSUMING 32-bit guest paging!
372
373 LogFlow(("pgmPoolMonitorChainChanging: PGMPOOLKIND_32BIT_PD %x\n", iShw));
374 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPD));
375# ifdef VBOX_WITH_RAW_MODE_NOT_R0
376 if (uShw.pPD->a[iShw].u & PGM_PDFLAGS_MAPPING)
377 {
378 Assert(pgmMapAreMappingsEnabled(pVM));
379 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
380 STAM_COUNTER_INC(&(pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZGuestCR3WriteConflict));
381 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw=%#x!\n", iShw));
382 break;
383 }
384 else
385# endif /* VBOX_WITH_RAW_MODE_NOT_R0 */
386 {
387 if (uShw.pPD->a[iShw].n.u1Present)
388 {
389 LogFlow(("pgmPoolMonitorChainChanging: 32 bit pd iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPD->a[iShw].u));
390 pgmPoolFree(pVM,
391 uShw.pPD->a[iShw].u & X86_PDE_PAE_PG_MASK,
392 pPage->idx,
393 iShw);
394 ASMAtomicWriteU32(&uShw.pPD->a[iShw].u, 0);
395 }
396 }
397 /* paranoia / a bit assumptive. */
398 if ( (off & 3)
399 && (off & 3) + cbWrite > sizeof(X86PTE))
400 {
401 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PTE);
402 if ( iShw2 != iShw
403 && iShw2 < RT_ELEMENTS(uShw.pPD->a))
404 {
405# ifdef VBOX_WITH_RAW_MODE_NOT_R0
406 if (uShw.pPD->a[iShw2].u & PGM_PDFLAGS_MAPPING)
407 {
408 Assert(pgmMapAreMappingsEnabled(pVM));
409 STAM_COUNTER_INC(&(pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZGuestCR3WriteConflict));
410 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
411 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw2=%#x!\n", iShw2));
412 break;
413 }
414# endif /* VBOX_WITH_RAW_MODE_NOT_R0 */
415 if (uShw.pPD->a[iShw2].n.u1Present)
416 {
417 LogFlow(("pgmPoolMonitorChainChanging: 32 bit pd iShw=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPD->a[iShw2].u));
418 pgmPoolFree(pVM,
419 uShw.pPD->a[iShw2].u & X86_PDE_PAE_PG_MASK,
420 pPage->idx,
421 iShw2);
422 ASMAtomicWriteU32(&uShw.pPD->a[iShw2].u, 0);
423 }
424 }
425 }
426#if 0 /* useful when running PGMAssertCR3(), a bit too troublesome for general use (TLBs). - not working any longer... */
427 if ( uShw.pPD->a[iShw].n.u1Present
428 && !VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3))
429 {
430 LogFlow(("pgmPoolMonitorChainChanging: iShw=%#x: %RX32 -> freeing it!\n", iShw, uShw.pPD->a[iShw].u));
431# ifdef IN_RC /* TLB load - we're pushing things a bit... */
432 ASMProbeReadByte(pvAddress);
433# endif
434 pgmPoolFree(pVM, uShw.pPD->a[iShw].u & X86_PDE_PG_MASK, pPage->idx, iShw);
435 ASMAtomicWriteU32(&uShw.pPD->a[iShw].u, 0);
436 }
437#endif
438 break;
439 }
440
441 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
442 {
443 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
444 const unsigned iShw = off / sizeof(X86PDEPAE);
445 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPD));
446#ifdef VBOX_WITH_RAW_MODE_NOT_R0
447 if (uShw.pPDPae->a[iShw].u & PGM_PDFLAGS_MAPPING)
448 {
449 Assert(pgmMapAreMappingsEnabled(pVM));
450 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
451 STAM_COUNTER_INC(&(pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZGuestCR3WriteConflict));
452 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw=%#x!\n", iShw));
453 break;
454 }
455#endif /* VBOX_WITH_RAW_MODE_NOT_R0 */
456 /*
457 * Causes trouble when the guest uses a PDE to refer to the whole page table level
458 * structure. (Invalidate here; faults later on when it tries to change the page
459 * table entries -> recheck; probably only applies to the RC case.)
460 */
461#ifdef VBOX_WITH_RAW_MODE_NOT_R0
462 else
463#endif
464 {
465 if (uShw.pPDPae->a[iShw].n.u1Present)
466 {
467 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPae->a[iShw].u));
468 pgmPoolFree(pVM,
469 uShw.pPDPae->a[iShw].u & X86_PDE_PAE_PG_MASK,
470 pPage->idx,
471 iShw);
472 ASMAtomicWriteU64(&uShw.pPDPae->a[iShw].u, 0);
473 }
474 }
475 /* paranoia / a bit assumptive. */
476 if ( (off & 7)
477 && (off & 7) + cbWrite > sizeof(X86PDEPAE))
478 {
479 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PDEPAE);
480 AssertBreak(iShw2 < RT_ELEMENTS(uShw.pPDPae->a));
481
482#ifdef VBOX_WITH_RAW_MODE_NOT_R0
483 if ( iShw2 != iShw
484 && uShw.pPDPae->a[iShw2].u & PGM_PDFLAGS_MAPPING)
485 {
486 Assert(pgmMapAreMappingsEnabled(pVM));
487 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
488 STAM_COUNTER_INC(&(pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZGuestCR3WriteConflict));
489 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw2=%#x!\n", iShw2));
490 break;
491 }
492 else
493#endif /* VBOX_WITH_RAW_MODE_NOT_R0 */
494 if (uShw.pPDPae->a[iShw2].n.u1Present)
495 {
496 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPae->a[iShw2].u));
497 pgmPoolFree(pVM,
498 uShw.pPDPae->a[iShw2].u & X86_PDE_PAE_PG_MASK,
499 pPage->idx,
500 iShw2);
501 ASMAtomicWriteU64(&uShw.pPDPae->a[iShw2].u, 0);
502 }
503 }
504 break;
505 }
506
507 case PGMPOOLKIND_PAE_PDPT:
508 {
509 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPDPT));
510 /*
511 * Hopefully this doesn't happen very often:
512 * - touching unused parts of the page
513 * - messing with the bits of pd pointers without changing the physical address
514 */
515 /* PDPT roots are not page aligned; 32 byte only! */
516 const unsigned offPdpt = GCPhysFault - pPage->GCPhys;
517
518 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
519 const unsigned iShw = offPdpt / sizeof(X86PDPE);
520 if (iShw < X86_PG_PAE_PDPE_ENTRIES) /* don't use RT_ELEMENTS(uShw.pPDPT->a), because that's for long mode only */
521 {
522# ifdef VBOX_WITH_RAW_MODE_NOT_R0
523 if (uShw.pPDPT->a[iShw].u & PGM_PLXFLAGS_MAPPING)
524 {
525 Assert(pgmMapAreMappingsEnabled(pVM));
526 STAM_COUNTER_INC(&(pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZGuestCR3WriteConflict));
527 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
528 LogFlow(("pgmPoolMonitorChainChanging: Detected pdpt conflict at iShw=%#x!\n", iShw));
529 break;
530 }
531 else
532# endif /* VBOX_WITH_RAW_MODE_NOT_R0 */
533 if (uShw.pPDPT->a[iShw].n.u1Present)
534 {
535 LogFlow(("pgmPoolMonitorChainChanging: pae pdpt iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPT->a[iShw].u));
536 pgmPoolFree(pVM,
537 uShw.pPDPT->a[iShw].u & X86_PDPE_PG_MASK,
538 pPage->idx,
539 iShw);
540 ASMAtomicWriteU64(&uShw.pPDPT->a[iShw].u, 0);
541 }
542
543 /* paranoia / a bit assumptive. */
544 if ( (offPdpt & 7)
545 && (offPdpt & 7) + cbWrite > sizeof(X86PDPE))
546 {
547 const unsigned iShw2 = (offPdpt + cbWrite - 1) / sizeof(X86PDPE);
548 if ( iShw2 != iShw
549 && iShw2 < X86_PG_PAE_PDPE_ENTRIES)
550 {
551# ifdef VBOX_WITH_RAW_MODE_NOT_R0
552 if (uShw.pPDPT->a[iShw2].u & PGM_PLXFLAGS_MAPPING)
553 {
554 Assert(pgmMapAreMappingsEnabled(pVM));
555 STAM_COUNTER_INC(&(pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZGuestCR3WriteConflict));
556 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
557 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw2=%#x!\n", iShw2));
558 break;
559 }
560 else
561# endif /* VBOX_WITH_RAW_MODE_NOT_R0 */
562 if (uShw.pPDPT->a[iShw2].n.u1Present)
563 {
564 LogFlow(("pgmPoolMonitorChainChanging: pae pdpt iShw=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPT->a[iShw2].u));
565 pgmPoolFree(pVM,
566 uShw.pPDPT->a[iShw2].u & X86_PDPE_PG_MASK,
567 pPage->idx,
568 iShw2);
569 ASMAtomicWriteU64(&uShw.pPDPT->a[iShw2].u, 0);
570 }
571 }
572 }
573 }
574 break;
575 }
576
577#ifndef IN_RC
578 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
579 {
580 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPD));
581 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
582 const unsigned iShw = off / sizeof(X86PDEPAE);
583 Assert(!(uShw.pPDPae->a[iShw].u & PGM_PDFLAGS_MAPPING));
584 if (uShw.pPDPae->a[iShw].n.u1Present)
585 {
586 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPae->a[iShw].u));
587 pgmPoolFree(pVM,
588 uShw.pPDPae->a[iShw].u & X86_PDE_PAE_PG_MASK,
589 pPage->idx,
590 iShw);
591 ASMAtomicWriteU64(&uShw.pPDPae->a[iShw].u, 0);
592 }
593 /* paranoia / a bit assumptive. */
594 if ( (off & 7)
595 && (off & 7) + cbWrite > sizeof(X86PDEPAE))
596 {
597 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PDEPAE);
598 AssertBreak(iShw2 < RT_ELEMENTS(uShw.pPDPae->a));
599
600 Assert(!(uShw.pPDPae->a[iShw2].u & PGM_PDFLAGS_MAPPING));
601 if (uShw.pPDPae->a[iShw2].n.u1Present)
602 {
603 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPae->a[iShw2].u));
604 pgmPoolFree(pVM,
605 uShw.pPDPae->a[iShw2].u & X86_PDE_PAE_PG_MASK,
606 pPage->idx,
607 iShw2);
608 ASMAtomicWriteU64(&uShw.pPDPae->a[iShw2].u, 0);
609 }
610 }
611 break;
612 }
613
614 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
615 {
616 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPDPT));
617 /*
618 * Hopefully this doesn't happen very often:
619 * - messing with the bits of pd pointers without changing the physical address
620 */
621 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
622 const unsigned iShw = off / sizeof(X86PDPE);
623 if (uShw.pPDPT->a[iShw].n.u1Present)
624 {
625 LogFlow(("pgmPoolMonitorChainChanging: pdpt iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPT->a[iShw].u));
626 pgmPoolFree(pVM, uShw.pPDPT->a[iShw].u & X86_PDPE_PG_MASK, pPage->idx, iShw);
627 ASMAtomicWriteU64(&uShw.pPDPT->a[iShw].u, 0);
628 }
629 /* paranoia / a bit assumptive. */
630 if ( (off & 7)
631 && (off & 7) + cbWrite > sizeof(X86PDPE))
632 {
633 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PDPE);
634 if (uShw.pPDPT->a[iShw2].n.u1Present)
635 {
636 LogFlow(("pgmPoolMonitorChainChanging: pdpt iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPT->a[iShw2].u));
637 pgmPoolFree(pVM, uShw.pPDPT->a[iShw2].u & X86_PDPE_PG_MASK, pPage->idx, iShw2);
638 ASMAtomicWriteU64(&uShw.pPDPT->a[iShw2].u, 0);
639 }
640 }
641 break;
642 }
643
644 case PGMPOOLKIND_64BIT_PML4:
645 {
646 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPML4));
647 /*
648 * Hopefully this doesn't happen very often:
649 * - messing with the bits of pd pointers without changing the physical address
650 */
651 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
652 const unsigned iShw = off / sizeof(X86PDPE);
653 if (uShw.pPML4->a[iShw].n.u1Present)
654 {
655 LogFlow(("pgmPoolMonitorChainChanging: pml4 iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPML4->a[iShw].u));
656 pgmPoolFree(pVM, uShw.pPML4->a[iShw].u & X86_PML4E_PG_MASK, pPage->idx, iShw);
657 ASMAtomicWriteU64(&uShw.pPML4->a[iShw].u, 0);
658 }
659 /* paranoia / a bit assumptive. */
660 if ( (off & 7)
661 && (off & 7) + cbWrite > sizeof(X86PDPE))
662 {
663 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PML4E);
664 if (uShw.pPML4->a[iShw2].n.u1Present)
665 {
666 LogFlow(("pgmPoolMonitorChainChanging: pml4 iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPML4->a[iShw2].u));
667 pgmPoolFree(pVM, uShw.pPML4->a[iShw2].u & X86_PML4E_PG_MASK, pPage->idx, iShw2);
668 ASMAtomicWriteU64(&uShw.pPML4->a[iShw2].u, 0);
669 }
670 }
671 break;
672 }
673#endif /* IN_RING0 */
674
675 default:
676 AssertFatalMsgFailed(("enmKind=%d\n", pPage->enmKind));
677 }
678 PGM_DYNMAP_UNUSED_HINT_VM(pVM, uShw.pv);
679
680 /* next */
681 if (pPage->iMonitoredNext == NIL_PGMPOOL_IDX)
682 return;
683 pPage = &pPool->aPages[pPage->iMonitoredNext];
684 }
685}
686
687#ifndef IN_RING3
688
689/**
690 * Checks if a access could be a fork operation in progress.
691 *
692 * Meaning, that the guest is setting up the parent process for Copy-On-Write.
693 *
694 * @returns true if it's likely that we're forking, otherwise false.
695 * @param pPool The pool.
696 * @param pDis The disassembled instruction.
697 * @param offFault The access offset.
698 */
699DECLINLINE(bool) pgmRZPoolMonitorIsForking(PPGMPOOL pPool, PDISCPUSTATE pDis, unsigned offFault)
700{
701 /*
702 * i386 linux is using btr to clear X86_PTE_RW.
703 * The functions involved are (2.6.16 source inspection):
704 * clear_bit
705 * ptep_set_wrprotect
706 * copy_one_pte
707 * copy_pte_range
708 * copy_pmd_range
709 * copy_pud_range
710 * copy_page_range
711 * dup_mmap
712 * dup_mm
713 * copy_mm
714 * copy_process
715 * do_fork
716 */
717 if ( pDis->pCurInstr->uOpcode == OP_BTR
718 && !(offFault & 4)
719 /** @todo Validate that the bit index is X86_PTE_RW. */
720 )
721 {
722 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitorPf,Fork)); RT_NOREF_PV(pPool);
723 return true;
724 }
725 return false;
726}
727
728
729/**
730 * Determine whether the page is likely to have been reused.
731 *
732 * @returns true if we consider the page as being reused for a different purpose.
733 * @returns false if we consider it to still be a paging page.
734 * @param pVM The cross context VM structure.
735 * @param pVCpu The cross context virtual CPU structure.
736 * @param pRegFrame Trap register frame.
737 * @param pDis The disassembly info for the faulting instruction.
738 * @param pvFault The fault address.
739 * @param pPage The pool page being accessed.
740 *
741 * @remark The REP prefix check is left to the caller because of STOSD/W.
742 */
743DECLINLINE(bool) pgmRZPoolMonitorIsReused(PVM pVM, PVMCPU pVCpu, PCPUMCTXCORE pRegFrame, PDISCPUSTATE pDis, RTGCPTR pvFault,
744 PPGMPOOLPAGE pPage)
745{
746 /* Locked (CR3, PDPTR*4) should not be reusable. Considering them as
747 such may cause loops booting tst-ubuntu-15_10-64-efi, ++. */
748 if (pPage->cLocked)
749 {
750 Log2(("pgmRZPoolMonitorIsReused: %RGv (%p) can't have been resued, because it's locked!\n", pvFault, pPage));
751 return false;
752 }
753
754# ifndef IN_RC
755 /** @todo could make this general, faulting close to rsp should be a safe reuse heuristic. */
756 if ( HMHasPendingIrq(pVM)
757 && (pRegFrame->rsp - pvFault) < 32)
758 {
759 /* Fault caused by stack writes while trying to inject an interrupt event. */
760 Log(("pgmRZPoolMonitorIsReused: reused %RGv for interrupt stack (rsp=%RGv).\n", pvFault, pRegFrame->rsp));
761 return true;
762 }
763# else
764 NOREF(pVM); NOREF(pvFault);
765# endif
766
767 LogFlow(("Reused instr %RGv %d at %RGv param1.fUse=%llx param1.reg=%d\n", pRegFrame->rip, pDis->pCurInstr->uOpcode, pvFault, pDis->Param1.fUse, pDis->Param1.Base.idxGenReg));
768
769 /* Non-supervisor mode write means it's used for something else. */
770 if (CPUMGetGuestCPL(pVCpu) == 3)
771 return true;
772
773 switch (pDis->pCurInstr->uOpcode)
774 {
775 /* call implies the actual push of the return address faulted */
776 case OP_CALL:
777 Log4(("pgmRZPoolMonitorIsReused: CALL\n"));
778 return true;
779 case OP_PUSH:
780 Log4(("pgmRZPoolMonitorIsReused: PUSH\n"));
781 return true;
782 case OP_PUSHF:
783 Log4(("pgmRZPoolMonitorIsReused: PUSHF\n"));
784 return true;
785 case OP_PUSHA:
786 Log4(("pgmRZPoolMonitorIsReused: PUSHA\n"));
787 return true;
788 case OP_FXSAVE:
789 Log4(("pgmRZPoolMonitorIsReused: FXSAVE\n"));
790 return true;
791 case OP_MOVNTI: /* solaris - block_zero_no_xmm */
792 Log4(("pgmRZPoolMonitorIsReused: MOVNTI\n"));
793 return true;
794 case OP_MOVNTDQ: /* solaris - hwblkclr & hwblkpagecopy */
795 Log4(("pgmRZPoolMonitorIsReused: MOVNTDQ\n"));
796 return true;
797 case OP_MOVSWD:
798 case OP_STOSWD:
799 if ( pDis->fPrefix == (DISPREFIX_REP|DISPREFIX_REX)
800 && pRegFrame->rcx >= 0x40
801 )
802 {
803 Assert(pDis->uCpuMode == DISCPUMODE_64BIT);
804
805 Log(("pgmRZPoolMonitorIsReused: OP_STOSQ\n"));
806 return true;
807 }
808 break;
809
810 default:
811 /*
812 * Anything having ESP on the left side means stack writes.
813 */
814 if ( ( (pDis->Param1.fUse & DISUSE_REG_GEN32)
815 || (pDis->Param1.fUse & DISUSE_REG_GEN64))
816 && (pDis->Param1.Base.idxGenReg == DISGREG_ESP))
817 {
818 Log4(("pgmRZPoolMonitorIsReused: ESP\n"));
819 return true;
820 }
821 break;
822 }
823
824 /*
825 * Page table updates are very very unlikely to be crossing page boundraries,
826 * and we don't want to deal with that in pgmPoolMonitorChainChanging and such.
827 */
828 uint32_t const cbWrite = DISGetParamSize(pDis, &pDis->Param1);
829 if ( (((uintptr_t)pvFault + cbWrite) >> X86_PAGE_SHIFT) != ((uintptr_t)pvFault >> X86_PAGE_SHIFT) )
830 {
831 Log4(("pgmRZPoolMonitorIsReused: cross page write\n"));
832 return true;
833 }
834
835 /*
836 * Nobody does an unaligned 8 byte write to a page table, right.
837 */
838 if (cbWrite >= 8 && ((uintptr_t)pvFault & 7) != 0)
839 {
840 Log4(("pgmRZPoolMonitorIsReused: Unaligned 8+ byte write\n"));
841 return true;
842 }
843
844 return false;
845}
846
847
848/**
849 * Flushes the page being accessed.
850 *
851 * @returns VBox status code suitable for scheduling.
852 * @param pVM The cross context VM structure.
853 * @param pVCpu The cross context virtual CPU structure.
854 * @param pPool The pool.
855 * @param pPage The pool page (head).
856 * @param pDis The disassembly of the write instruction.
857 * @param pRegFrame The trap register frame.
858 * @param GCPhysFault The fault address as guest physical address.
859 * @param pvFault The fault address.
860 * @todo VBOXSTRICTRC
861 */
862static int pgmRZPoolAccessPfHandlerFlush(PVM pVM, PVMCPU pVCpu, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pDis,
863 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault)
864{
865 NOREF(pVM); NOREF(GCPhysFault);
866
867 /*
868 * First, do the flushing.
869 */
870 int rc = pgmPoolMonitorChainFlush(pPool, pPage);
871
872 /*
873 * Emulate the instruction (xp/w2k problem, requires pc/cr2/sp detection).
874 * Must do this in raw mode (!); XP boot will fail otherwise.
875 */
876 VBOXSTRICTRC rc2 = EMInterpretInstructionDisasState(pVCpu, pDis, pRegFrame, pvFault, EMCODETYPE_ALL);
877 if (rc2 == VINF_SUCCESS)
878 { /* do nothing */ }
879 else if (rc2 == VINF_EM_RESCHEDULE)
880 {
881 if (rc == VINF_SUCCESS)
882 rc = VBOXSTRICTRC_VAL(rc2);
883# ifndef IN_RING3
884 VMCPU_FF_SET(pVCpu, VMCPU_FF_TO_R3);
885# endif
886 }
887 else if (rc2 == VERR_EM_INTERPRETER)
888 {
889# ifdef IN_RC
890 if (PATMIsPatchGCAddr(pVM, pRegFrame->eip))
891 {
892 LogFlow(("pgmRZPoolAccessPfHandlerFlush: Interpretation failed for patch code %04x:%RGv, ignoring.\n",
893 pRegFrame->cs.Sel, (RTGCPTR)pRegFrame->eip));
894 rc = VINF_SUCCESS;
895 STAM_COUNTER_INC(&pPool->StatMonitorPfRZIntrFailPatch2);
896 }
897 else
898# endif
899 {
900 rc = VINF_EM_RAW_EMULATE_INSTR;
901 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitorPf,EmulateInstr));
902 }
903 }
904 else if (RT_FAILURE_NP(rc2))
905 rc = VBOXSTRICTRC_VAL(rc2);
906 else
907 AssertMsgFailed(("%Rrc\n", VBOXSTRICTRC_VAL(rc2))); /* ASSUMES no complicated stuff here. */
908
909 LogFlow(("pgmRZPoolAccessPfHandlerFlush: returns %Rrc (flushed)\n", rc));
910 return rc;
911}
912
913
914/**
915 * Handles the STOSD write accesses.
916 *
917 * @returns VBox status code suitable for scheduling.
918 * @param pVM The cross context VM structure.
919 * @param pPool The pool.
920 * @param pPage The pool page (head).
921 * @param pDis The disassembly of the write instruction.
922 * @param pRegFrame The trap register frame.
923 * @param GCPhysFault The fault address as guest physical address.
924 * @param pvFault The fault address.
925 */
926DECLINLINE(int) pgmRZPoolAccessPfHandlerSTOSD(PVM pVM, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pDis,
927 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault)
928{
929 unsigned uIncrement = pDis->Param1.cb;
930 NOREF(pVM);
931
932 Assert(pDis->uCpuMode == DISCPUMODE_32BIT || pDis->uCpuMode == DISCPUMODE_64BIT);
933 Assert(pRegFrame->rcx <= 0x20);
934
935# ifdef VBOX_STRICT
936 if (pDis->uOpMode == DISCPUMODE_32BIT)
937 Assert(uIncrement == 4);
938 else
939 Assert(uIncrement == 8);
940# endif
941
942 Log3(("pgmRZPoolAccessPfHandlerSTOSD\n"));
943
944 /*
945 * Increment the modification counter and insert it into the list
946 * of modified pages the first time.
947 */
948 if (!pPage->cModifications++)
949 pgmPoolMonitorModifiedInsert(pPool, pPage);
950
951 /*
952 * Execute REP STOSD.
953 *
954 * This ASSUMES that we're not invoked by Trap0e on in a out-of-sync
955 * write situation, meaning that it's safe to write here.
956 */
957 PVMCPU pVCpu = VMMGetCpu(pPool->CTX_SUFF(pVM));
958 RTGCUINTPTR pu32 = (RTGCUINTPTR)pvFault;
959 while (pRegFrame->rcx)
960 {
961# if defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0) || defined(IN_RC)
962 uint32_t iPrevSubset = PGMRZDynMapPushAutoSubset(pVCpu);
963 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault, NULL, uIncrement);
964 PGMRZDynMapPopAutoSubset(pVCpu, iPrevSubset);
965# else
966 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault, NULL, uIncrement);
967# endif
968# ifdef IN_RC
969 *(uint32_t *)(uintptr_t)pu32 = pRegFrame->eax;
970# else
971 PGMPhysSimpleWriteGCPhys(pVM, GCPhysFault, &pRegFrame->rax, uIncrement);
972# endif
973 pu32 += uIncrement;
974 GCPhysFault += uIncrement;
975 pRegFrame->rdi += uIncrement;
976 pRegFrame->rcx--;
977 }
978 pRegFrame->rip += pDis->cbInstr;
979
980 LogFlow(("pgmRZPoolAccessPfHandlerSTOSD: returns\n"));
981 return VINF_SUCCESS;
982}
983
984
985/**
986 * Handles the simple write accesses.
987 *
988 * @returns VBox status code suitable for scheduling.
989 * @param pVM The cross context VM structure.
990 * @param pVCpu The cross context virtual CPU structure.
991 * @param pPool The pool.
992 * @param pPage The pool page (head).
993 * @param pDis The disassembly of the write instruction.
994 * @param pRegFrame The trap register frame.
995 * @param GCPhysFault The fault address as guest physical address.
996 * @param pvFault The fault address.
997 * @param pfReused Reused state (in/out)
998 */
999DECLINLINE(int) pgmRZPoolAccessPfHandlerSimple(PVM pVM, PVMCPU pVCpu, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pDis,
1000 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault, bool *pfReused)
1001{
1002 Log3(("pgmRZPoolAccessPfHandlerSimple\n"));
1003 NOREF(pVM);
1004 NOREF(pfReused); /* initialized by caller */
1005
1006 /*
1007 * Increment the modification counter and insert it into the list
1008 * of modified pages the first time.
1009 */
1010 if (!pPage->cModifications++)
1011 pgmPoolMonitorModifiedInsert(pPool, pPage);
1012
1013 /*
1014 * Clear all the pages. ASSUMES that pvFault is readable.
1015 */
1016# if defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0) || defined(IN_RC)
1017 uint32_t iPrevSubset = PGMRZDynMapPushAutoSubset(pVCpu);
1018# endif
1019
1020 uint32_t cbWrite = DISGetParamSize(pDis, &pDis->Param1);
1021 if (cbWrite <= 8)
1022 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault, NULL, cbWrite);
1023 else if (cbWrite <= 16)
1024 {
1025 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault, NULL, 8);
1026 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault + 8, NULL, cbWrite - 8);
1027 }
1028 else
1029 {
1030 Assert(cbWrite <= 32);
1031 for (uint32_t off = 0; off < cbWrite; off += 8)
1032 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault + off, NULL, RT_MIN(8, cbWrite - off));
1033 }
1034
1035# if defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0) || defined(IN_RC)
1036 PGMRZDynMapPopAutoSubset(pVCpu, iPrevSubset);
1037# endif
1038
1039 /*
1040 * Interpret the instruction.
1041 */
1042 VBOXSTRICTRC rc = EMInterpretInstructionDisasState(pVCpu, pDis, pRegFrame, pvFault, EMCODETYPE_ALL);
1043 if (RT_SUCCESS(rc))
1044 AssertMsg(rc == VINF_SUCCESS, ("%Rrc\n", VBOXSTRICTRC_VAL(rc))); /* ASSUMES no complicated stuff here. */
1045 else if (rc == VERR_EM_INTERPRETER)
1046 {
1047 LogFlow(("pgmRZPoolAccessPfHandlerSimple: Interpretation failed for %04x:%RGv - opcode=%d\n",
1048 pRegFrame->cs.Sel, (RTGCPTR)pRegFrame->rip, pDis->pCurInstr->uOpcode));
1049 rc = VINF_EM_RAW_EMULATE_INSTR;
1050 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitorPf,EmulateInstr));
1051 }
1052
1053# if 0 /* experimental code */
1054 if (rc == VINF_SUCCESS)
1055 {
1056 switch (pPage->enmKind)
1057 {
1058 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1059 {
1060 X86PTEPAE GstPte;
1061 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvFault, GCPhysFault, sizeof(GstPte));
1062 AssertRC(rc);
1063
1064 /* Check the new value written by the guest. If present and with a bogus physical address, then
1065 * it's fairly safe to assume the guest is reusing the PT.
1066 */
1067 if (GstPte.n.u1Present)
1068 {
1069 RTHCPHYS HCPhys = -1;
1070 int rc = PGMPhysGCPhys2HCPhys(pVM, GstPte.u & X86_PTE_PAE_PG_MASK, &HCPhys);
1071 if (rc != VINF_SUCCESS)
1072 {
1073 *pfReused = true;
1074 STAM_COUNTER_INC(&pPool->StatForceFlushReused);
1075 }
1076 }
1077 break;
1078 }
1079 }
1080 }
1081# endif
1082
1083 LogFlow(("pgmRZPoolAccessPfHandlerSimple: returns %Rrc\n", VBOXSTRICTRC_VAL(rc)));
1084 return VBOXSTRICTRC_VAL(rc);
1085}
1086
1087
1088/**
1089 * @callback_method_impl{FNPGMRZPHYSPFHANDLER,
1090 * \#PF access handler callback for page table pages.}
1091 *
1092 * @remarks The @a pvUser argument points to the PGMPOOLPAGE.
1093 */
1094DECLEXPORT(VBOXSTRICTRC) pgmRZPoolAccessPfHandler(PVM pVM, PVMCPU pVCpu, RTGCUINT uErrorCode, PCPUMCTXCORE pRegFrame,
1095 RTGCPTR pvFault, RTGCPHYS GCPhysFault, void *pvUser)
1096{
1097 STAM_PROFILE_START(&pVM->pgm.s.CTX_SUFF(pPool)->StatMonitorRZ, a);
1098 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1099 PPGMPOOLPAGE pPage = (PPGMPOOLPAGE)pvUser;
1100 unsigned cMaxModifications;
1101 bool fForcedFlush = false;
1102 NOREF(uErrorCode);
1103
1104 LogFlow(("pgmRZPoolAccessPfHandler: pvFault=%RGv pPage=%p:{.idx=%d} GCPhysFault=%RGp\n", pvFault, pPage, pPage->idx, GCPhysFault));
1105
1106 pgmLock(pVM);
1107 if (PHYS_PAGE_ADDRESS(GCPhysFault) != PHYS_PAGE_ADDRESS(pPage->GCPhys))
1108 {
1109 /* Pool page changed while we were waiting for the lock; ignore. */
1110 Log(("CPU%d: pgmRZPoolAccessPfHandler pgm pool page for %RGp changed (to %RGp) while waiting!\n", pVCpu->idCpu, PHYS_PAGE_ADDRESS(GCPhysFault), PHYS_PAGE_ADDRESS(pPage->GCPhys)));
1111 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->StatMonitorPfRZ, &pPool->StatMonitorPfRZHandled, a);
1112 pgmUnlock(pVM);
1113 return VINF_SUCCESS;
1114 }
1115# ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
1116 if (pPage->fDirty)
1117 {
1118 Assert(VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_TLB_FLUSH));
1119 pgmUnlock(pVM);
1120 return VINF_SUCCESS; /* SMP guest case where we were blocking on the pgm lock while the same page was being marked dirty. */
1121 }
1122# endif
1123
1124# if 0 /* test code defined(VBOX_STRICT) && defined(PGMPOOL_WITH_OPTIMIZED_DIRTY_PT) */
1125 if (pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT)
1126 {
1127 void *pvShw = PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage);
1128 void *pvGst;
1129 int rc = PGM_GCPHYS_2_PTR(pPool->CTX_SUFF(pVM), pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
1130 pgmPoolTrackCheckPTPaePae(pPool, pPage, (PPGMSHWPTPAE)pvShw, (PCX86PTPAE)pvGst);
1131 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvGst);
1132 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvShw);
1133 }
1134# endif
1135
1136 /*
1137 * Disassemble the faulting instruction.
1138 */
1139 PDISCPUSTATE pDis = &pVCpu->pgm.s.DisState;
1140 int rc = EMInterpretDisasCurrent(pVM, pVCpu, pDis, NULL);
1141 if (RT_UNLIKELY(rc != VINF_SUCCESS))
1142 {
1143 AssertMsg(rc == VERR_PAGE_NOT_PRESENT || rc == VERR_PAGE_TABLE_NOT_PRESENT, ("Unexpected rc %d\n", rc));
1144 pgmUnlock(pVM);
1145 return rc;
1146 }
1147
1148 Assert(pPage->enmKind != PGMPOOLKIND_FREE);
1149
1150 /*
1151 * We should ALWAYS have the list head as user parameter. This
1152 * is because we use that page to record the changes.
1153 */
1154 Assert(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
1155
1156# ifdef IN_RING0
1157 /* Maximum nr of modifications depends on the page type. */
1158 if ( pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT
1159 || pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_32BIT_PT)
1160 cMaxModifications = 4;
1161 else
1162 cMaxModifications = 24;
1163# else
1164 cMaxModifications = 48;
1165# endif
1166
1167 /*
1168 * Incremental page table updates should weigh more than random ones.
1169 * (Only applies when started from offset 0)
1170 */
1171 pVCpu->pgm.s.cPoolAccessHandler++;
1172 if ( pPage->GCPtrLastAccessHandlerRip >= pRegFrame->rip - 0x40 /* observed loops in Windows 7 x64 */
1173 && pPage->GCPtrLastAccessHandlerRip < pRegFrame->rip + 0x40
1174 && pvFault == (pPage->GCPtrLastAccessHandlerFault + pDis->Param1.cb)
1175 && pVCpu->pgm.s.cPoolAccessHandler == pPage->cLastAccessHandler + 1)
1176 {
1177 Log(("Possible page reuse cMods=%d -> %d (locked=%d type=%s)\n", pPage->cModifications, pPage->cModifications * 2, pgmPoolIsPageLocked(pPage), pgmPoolPoolKindToStr(pPage->enmKind)));
1178 Assert(pPage->cModifications < 32000);
1179 pPage->cModifications = pPage->cModifications * 2;
1180 pPage->GCPtrLastAccessHandlerFault = pvFault;
1181 pPage->cLastAccessHandler = pVCpu->pgm.s.cPoolAccessHandler;
1182 if (pPage->cModifications >= cMaxModifications)
1183 {
1184 STAM_COUNTER_INC(&pPool->StatMonitorPfRZFlushReinit);
1185 fForcedFlush = true;
1186 }
1187 }
1188
1189 if (pPage->cModifications >= cMaxModifications)
1190 Log(("Mod overflow %RGv cMods=%d (locked=%d type=%s)\n", pvFault, pPage->cModifications, pgmPoolIsPageLocked(pPage), pgmPoolPoolKindToStr(pPage->enmKind)));
1191
1192 /*
1193 * Check if it's worth dealing with.
1194 */
1195 bool fReused = false;
1196 bool fNotReusedNotForking = false;
1197 if ( ( pPage->cModifications < cMaxModifications /** @todo \#define */ /** @todo need to check that it's not mapping EIP. */ /** @todo adjust this! */
1198 || pgmPoolIsPageLocked(pPage)
1199 )
1200 && !(fReused = pgmRZPoolMonitorIsReused(pVM, pVCpu, pRegFrame, pDis, pvFault, pPage))
1201 && !pgmRZPoolMonitorIsForking(pPool, pDis, GCPhysFault & PAGE_OFFSET_MASK))
1202 {
1203 /*
1204 * Simple instructions, no REP prefix.
1205 */
1206 if (!(pDis->fPrefix & (DISPREFIX_REP | DISPREFIX_REPNE)))
1207 {
1208 rc = pgmRZPoolAccessPfHandlerSimple(pVM, pVCpu, pPool, pPage, pDis, pRegFrame, GCPhysFault, pvFault, &fReused);
1209 if (fReused)
1210 goto flushPage;
1211
1212 /* A mov instruction to change the first page table entry will be remembered so we can detect
1213 * full page table changes early on. This will reduce the amount of unnecessary traps we'll take.
1214 */
1215 if ( rc == VINF_SUCCESS
1216 && !pPage->cLocked /* only applies to unlocked pages as we can't free locked ones (e.g. cr3 root). */
1217 && pDis->pCurInstr->uOpcode == OP_MOV
1218 && (pvFault & PAGE_OFFSET_MASK) == 0)
1219 {
1220 pPage->GCPtrLastAccessHandlerFault = pvFault;
1221 pPage->cLastAccessHandler = pVCpu->pgm.s.cPoolAccessHandler;
1222 pPage->GCPtrLastAccessHandlerRip = pRegFrame->rip;
1223 /* Make sure we don't kick out a page too quickly. */
1224 if (pPage->cModifications > 8)
1225 pPage->cModifications = 2;
1226 }
1227 else if (pPage->GCPtrLastAccessHandlerFault == pvFault)
1228 {
1229 /* ignore the 2nd write to this page table entry. */
1230 pPage->cLastAccessHandler = pVCpu->pgm.s.cPoolAccessHandler;
1231 }
1232 else
1233 {
1234 pPage->GCPtrLastAccessHandlerFault = NIL_RTGCPTR;
1235 pPage->GCPtrLastAccessHandlerRip = 0;
1236 }
1237
1238 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->StatMonitorPfRZ, &pPool->StatMonitorPfRZHandled, a);
1239 pgmUnlock(pVM);
1240 return rc;
1241 }
1242
1243 /*
1244 * Windows is frequently doing small memset() operations (netio test 4k+).
1245 * We have to deal with these or we'll kill the cache and performance.
1246 */
1247 if ( pDis->pCurInstr->uOpcode == OP_STOSWD
1248 && !pRegFrame->eflags.Bits.u1DF
1249 && pDis->uOpMode == pDis->uCpuMode
1250 && pDis->uAddrMode == pDis->uCpuMode)
1251 {
1252 bool fValidStosd = false;
1253
1254 if ( pDis->uCpuMode == DISCPUMODE_32BIT
1255 && pDis->fPrefix == DISPREFIX_REP
1256 && pRegFrame->ecx <= 0x20
1257 && pRegFrame->ecx * 4 <= PAGE_SIZE - ((uintptr_t)pvFault & PAGE_OFFSET_MASK)
1258 && !((uintptr_t)pvFault & 3)
1259 && (pRegFrame->eax == 0 || pRegFrame->eax == 0x80) /* the two values observed. */
1260 )
1261 {
1262 fValidStosd = true;
1263 pRegFrame->rcx &= 0xffffffff; /* paranoia */
1264 }
1265 else
1266 if ( pDis->uCpuMode == DISCPUMODE_64BIT
1267 && pDis->fPrefix == (DISPREFIX_REP | DISPREFIX_REX)
1268 && pRegFrame->rcx <= 0x20
1269 && pRegFrame->rcx * 8 <= PAGE_SIZE - ((uintptr_t)pvFault & PAGE_OFFSET_MASK)
1270 && !((uintptr_t)pvFault & 7)
1271 && (pRegFrame->rax == 0 || pRegFrame->rax == 0x80) /* the two values observed. */
1272 )
1273 {
1274 fValidStosd = true;
1275 }
1276
1277 if (fValidStosd)
1278 {
1279 rc = pgmRZPoolAccessPfHandlerSTOSD(pVM, pPool, pPage, pDis, pRegFrame, GCPhysFault, pvFault);
1280 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->StatMonitorPfRZ, &pPool->StatMonitorPfRZRepStosd, a);
1281 pgmUnlock(pVM);
1282 return rc;
1283 }
1284 }
1285
1286 /* REP prefix, don't bother. */
1287 STAM_COUNTER_INC(&pPool->StatMonitorPfRZRepPrefix);
1288 Log4(("pgmRZPoolAccessPfHandler: eax=%#x ecx=%#x edi=%#x esi=%#x rip=%RGv opcode=%d prefix=%#x\n",
1289 pRegFrame->eax, pRegFrame->ecx, pRegFrame->edi, pRegFrame->esi, (RTGCPTR)pRegFrame->rip, pDis->pCurInstr->uOpcode, pDis->fPrefix));
1290 fNotReusedNotForking = true;
1291 }
1292
1293# if defined(PGMPOOL_WITH_OPTIMIZED_DIRTY_PT) && defined(IN_RING0)
1294 /* E.g. Windows 7 x64 initializes page tables and touches some pages in the table during the process. This
1295 * leads to pgm pool trashing and an excessive amount of write faults due to page monitoring.
1296 */
1297 if ( pPage->cModifications >= cMaxModifications
1298 && !fForcedFlush
1299 && (pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT || pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_32BIT_PT)
1300 && ( fNotReusedNotForking
1301 || ( !pgmRZPoolMonitorIsReused(pVM, pVCpu, pRegFrame, pDis, pvFault, pPage)
1302 && !pgmRZPoolMonitorIsForking(pPool, pDis, GCPhysFault & PAGE_OFFSET_MASK))
1303 )
1304 )
1305 {
1306 Assert(!pgmPoolIsPageLocked(pPage));
1307 Assert(pPage->fDirty == false);
1308
1309 /* Flush any monitored duplicates as we will disable write protection. */
1310 if ( pPage->iMonitoredNext != NIL_PGMPOOL_IDX
1311 || pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
1312 {
1313 PPGMPOOLPAGE pPageHead = pPage;
1314
1315 /* Find the monitor head. */
1316 while (pPageHead->iMonitoredPrev != NIL_PGMPOOL_IDX)
1317 pPageHead = &pPool->aPages[pPageHead->iMonitoredPrev];
1318
1319 while (pPageHead)
1320 {
1321 unsigned idxNext = pPageHead->iMonitoredNext;
1322
1323 if (pPageHead != pPage)
1324 {
1325 STAM_COUNTER_INC(&pPool->StatDirtyPageDupFlush);
1326 Log(("Flush duplicate page idx=%d GCPhys=%RGp type=%s\n", pPageHead->idx, pPageHead->GCPhys, pgmPoolPoolKindToStr(pPageHead->enmKind)));
1327 int rc2 = pgmPoolFlushPage(pPool, pPageHead);
1328 AssertRC(rc2);
1329 }
1330
1331 if (idxNext == NIL_PGMPOOL_IDX)
1332 break;
1333
1334 pPageHead = &pPool->aPages[idxNext];
1335 }
1336 }
1337
1338 /* The flushing above might fail for locked pages, so double check. */
1339 if ( pPage->iMonitoredNext == NIL_PGMPOOL_IDX
1340 && pPage->iMonitoredPrev == NIL_PGMPOOL_IDX)
1341 {
1342 pgmPoolAddDirtyPage(pVM, pPool, pPage);
1343
1344 /* Temporarily allow write access to the page table again. */
1345 rc = PGMHandlerPhysicalPageTempOff(pVM, pPage->GCPhys & PAGE_BASE_GC_MASK, pPage->GCPhys & PAGE_BASE_GC_MASK);
1346 if (rc == VINF_SUCCESS)
1347 {
1348 rc = PGMShwMakePageWritable(pVCpu, pvFault, PGM_MK_PG_IS_WRITE_FAULT);
1349 AssertMsg(rc == VINF_SUCCESS
1350 /* In the SMP case the page table might be removed while we wait for the PGM lock in the trap handler. */
1351 || rc == VERR_PAGE_TABLE_NOT_PRESENT
1352 || rc == VERR_PAGE_NOT_PRESENT,
1353 ("PGMShwModifyPage -> GCPtr=%RGv rc=%d\n", pvFault, rc));
1354# ifdef VBOX_STRICT
1355 pPage->GCPtrDirtyFault = pvFault;
1356# endif
1357
1358 STAM_PROFILE_STOP(&pVM->pgm.s.CTX_SUFF(pPool)->StatMonitorPfRZ, a);
1359 pgmUnlock(pVM);
1360 return rc;
1361 }
1362 }
1363 }
1364# endif /* PGMPOOL_WITH_OPTIMIZED_DIRTY_PT */
1365
1366 STAM_COUNTER_INC(&pPool->StatMonitorPfRZFlushModOverflow);
1367flushPage:
1368 /*
1369 * Not worth it, so flush it.
1370 *
1371 * If we considered it to be reused, don't go back to ring-3
1372 * to emulate failed instructions since we usually cannot
1373 * interpret then. This may be a bit risky, in which case
1374 * the reuse detection must be fixed.
1375 */
1376 rc = pgmRZPoolAccessPfHandlerFlush(pVM, pVCpu, pPool, pPage, pDis, pRegFrame, GCPhysFault, pvFault);
1377 if ( rc == VINF_EM_RAW_EMULATE_INSTR
1378 && fReused)
1379 {
1380 /* Make sure that the current instruction still has shadow page backing, otherwise we'll end up in a loop. */
1381 if (PGMShwGetPage(pVCpu, pRegFrame->rip, NULL, NULL) == VINF_SUCCESS)
1382 rc = VINF_SUCCESS; /* safe to restart the instruction. */
1383 }
1384 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->StatMonitorPfRZ, &pPool->StatMonitorPfRZFlushPage, a);
1385 pgmUnlock(pVM);
1386 return rc;
1387}
1388
1389#endif /* !IN_RING3 */
1390
1391/**
1392 * @callback_method_impl{FNPGMPHYSHANDLER,
1393 * Access handler for shadowed page table pages.}
1394 *
1395 * @remarks Only uses the VINF_PGM_HANDLER_DO_DEFAULT status.
1396 */
1397PGM_ALL_CB2_DECL(VBOXSTRICTRC)
1398pgmPoolAccessHandler(PVM pVM, PVMCPU pVCpu, RTGCPHYS GCPhys, void *pvPhys, void *pvBuf, size_t cbBuf,
1399 PGMACCESSTYPE enmAccessType, PGMACCESSORIGIN enmOrigin, void *pvUser)
1400{
1401 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1402 STAM_PROFILE_START(&pPool->CTX_SUFF_Z(StatMonitor), a);
1403 PPGMPOOLPAGE pPage = (PPGMPOOLPAGE)pvUser;
1404 LogFlow(("PGM_ALL_CB_DECL: GCPhys=%RGp %p:{.Core=%RHp, .idx=%d, .GCPhys=%RGp, .enmType=%d}\n",
1405 GCPhys, pPage, pPage->Core.Key, pPage->idx, pPage->GCPhys, pPage->enmKind));
1406
1407 NOREF(pvPhys); NOREF(pvBuf); NOREF(enmAccessType);
1408
1409 pgmLock(pVM);
1410
1411#ifdef VBOX_WITH_STATISTICS
1412 /*
1413 * Collect stats on the access.
1414 */
1415 AssertCompile(RT_ELEMENTS(pPool->CTX_MID_Z(aStatMonitor,Sizes)) == 19);
1416 if (cbBuf <= 16 && cbBuf > 0)
1417 STAM_COUNTER_INC(&pPool->CTX_MID_Z(aStatMonitor,Sizes)[cbBuf - 1]);
1418 else if (cbBuf >= 17 && cbBuf < 32)
1419 STAM_COUNTER_INC(&pPool->CTX_MID_Z(aStatMonitor,Sizes)[16]);
1420 else if (cbBuf >= 32 && cbBuf < 64)
1421 STAM_COUNTER_INC(&pPool->CTX_MID_Z(aStatMonitor,Sizes)[17]);
1422 else if (cbBuf >= 64)
1423 STAM_COUNTER_INC(&pPool->CTX_MID_Z(aStatMonitor,Sizes)[18]);
1424
1425 uint8_t cbAlign;
1426 switch (pPage->enmKind)
1427 {
1428 default:
1429 cbAlign = 7;
1430 break;
1431 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1432 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1433 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1434 case PGMPOOLKIND_32BIT_PD:
1435 case PGMPOOLKIND_32BIT_PD_PHYS:
1436 cbAlign = 3;
1437 break;
1438 }
1439 AssertCompile(RT_ELEMENTS(pPool->CTX_MID_Z(aStatMonitor,Misaligned)) == 7);
1440 if ((uint8_t)GCPhys & cbAlign)
1441 STAM_COUNTER_INC(&pPool->CTX_MID_Z(aStatMonitor,Misaligned)[((uint8_t)GCPhys & cbAlign) - 1]);
1442#endif
1443
1444 /*
1445 * Make sure the pool page wasn't modified by a different CPU.
1446 */
1447 if (PHYS_PAGE_ADDRESS(GCPhys) == PHYS_PAGE_ADDRESS(pPage->GCPhys))
1448 {
1449 Assert(pPage->enmKind != PGMPOOLKIND_FREE);
1450
1451 /* The max modification count before flushing depends on the context and page type. */
1452#ifdef IN_RING3
1453 uint16_t const cMaxModifications = 96; /* it's cheaper here, right? */
1454#else
1455 uint16_t cMaxModifications;
1456 if ( pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT
1457 || pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_32BIT_PT)
1458 cMaxModifications = 4;
1459 else
1460 cMaxModifications = 24;
1461# ifdef IN_RC
1462 cMaxModifications *= 2; /* traps are cheaper than exists. */
1463# endif
1464#endif
1465
1466 /*
1467 * We don't have to be very sophisticated about this since there are relativly few calls here.
1468 * However, we must try our best to detect any non-cpu accesses (disk / networking).
1469 */
1470 if ( ( pPage->cModifications < cMaxModifications
1471 || pgmPoolIsPageLocked(pPage) )
1472 && enmOrigin != PGMACCESSORIGIN_DEVICE
1473 && cbBuf <= 16)
1474 {
1475 /* Clear the shadow entry. */
1476 if (!pPage->cModifications++)
1477 pgmPoolMonitorModifiedInsert(pPool, pPage);
1478
1479 if (cbBuf <= 8)
1480 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhys, pvBuf, (uint32_t)cbBuf);
1481 else
1482 {
1483 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhys, pvBuf, 8);
1484 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhys + 8, (uint8_t *)pvBuf + 8, (uint32_t)cbBuf - 8);
1485 }
1486 }
1487 else
1488 {
1489 /* ASSUME that VERR_PGM_POOL_CLEARED can be ignored here and that FFs will deal with it in due time. */
1490 pgmPoolMonitorChainFlush(pPool, pPage);
1491 }
1492
1493 STAM_PROFILE_STOP_EX(&pPool->CTX_SUFF_Z(StatMonitor), &pPool->CTX_MID_Z(StatMonitor,FlushPage), a);
1494 }
1495 else
1496 Log(("CPU%d: PGM_ALL_CB_DECL pgm pool page for %RGp changed (to %RGp) while waiting!\n", pVCpu->idCpu, PHYS_PAGE_ADDRESS(GCPhys), PHYS_PAGE_ADDRESS(pPage->GCPhys)));
1497 pgmUnlock(pVM);
1498 return VINF_PGM_HANDLER_DO_DEFAULT;
1499}
1500
1501
1502# ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
1503
1504# if defined(VBOX_STRICT) && !defined(IN_RING3)
1505
1506/**
1507 * Check references to guest physical memory in a PAE / PAE page table.
1508 *
1509 * @param pPool The pool.
1510 * @param pPage The page.
1511 * @param pShwPT The shadow page table (mapping of the page).
1512 * @param pGstPT The guest page table.
1513 */
1514static void pgmPoolTrackCheckPTPaePae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT, PCX86PTPAE pGstPT)
1515{
1516 unsigned cErrors = 0;
1517 int LastRc = -1; /* initialized to shut up gcc */
1518 unsigned LastPTE = ~0U; /* initialized to shut up gcc */
1519 RTHCPHYS LastHCPhys = NIL_RTHCPHYS; /* initialized to shut up gcc */
1520 PVM pVM = pPool->CTX_SUFF(pVM);
1521
1522#ifdef VBOX_STRICT
1523 for (unsigned i = 0; i < RT_MIN(RT_ELEMENTS(pShwPT->a), pPage->iFirstPresent); i++)
1524 AssertMsg(!PGMSHWPTEPAE_IS_P(pShwPT->a[i]), ("Unexpected PTE: idx=%d %RX64 (first=%d)\n", i, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), pPage->iFirstPresent));
1525#endif
1526 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
1527 {
1528 if (PGMSHWPTEPAE_IS_P(pShwPT->a[i]))
1529 {
1530 RTHCPHYS HCPhys = NIL_RTHCPHYS;
1531 int rc = PGMPhysGCPhys2HCPhys(pVM, pGstPT->a[i].u & X86_PTE_PAE_PG_MASK, &HCPhys);
1532 if ( rc != VINF_SUCCESS
1533 || PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]) != HCPhys)
1534 {
1535 Log(("rc=%d idx=%d guest %RX64 shw=%RX64 vs %RHp\n", rc, i, pGstPT->a[i].u, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), HCPhys));
1536 LastPTE = i;
1537 LastRc = rc;
1538 LastHCPhys = HCPhys;
1539 cErrors++;
1540
1541 RTHCPHYS HCPhysPT = NIL_RTHCPHYS;
1542 rc = PGMPhysGCPhys2HCPhys(pVM, pPage->GCPhys, &HCPhysPT);
1543 AssertRC(rc);
1544
1545 for (unsigned iPage = 0; iPage < pPool->cCurPages; iPage++)
1546 {
1547 PPGMPOOLPAGE pTempPage = &pPool->aPages[iPage];
1548
1549 if (pTempPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT)
1550 {
1551 PPGMSHWPTPAE pShwPT2 = (PPGMSHWPTPAE)PGMPOOL_PAGE_2_PTR(pVM, pTempPage);
1552
1553 for (unsigned j = 0; j < RT_ELEMENTS(pShwPT->a); j++)
1554 {
1555 if ( PGMSHWPTEPAE_IS_P_RW(pShwPT2->a[j])
1556 && PGMSHWPTEPAE_GET_HCPHYS(pShwPT2->a[j]) == HCPhysPT)
1557 {
1558 Log(("GCPhys=%RGp idx=%d %RX64 vs %RX64\n", pTempPage->GCPhys, j, PGMSHWPTEPAE_GET_LOG(pShwPT->a[j]), PGMSHWPTEPAE_GET_LOG(pShwPT2->a[j])));
1559 }
1560 }
1561
1562 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pShwPT2);
1563 }
1564 }
1565 }
1566 }
1567 }
1568 AssertMsg(!cErrors, ("cErrors=%d: last rc=%d idx=%d guest %RX64 shw=%RX64 vs %RHp\n", cErrors, LastRc, LastPTE, pGstPT->a[LastPTE].u, PGMSHWPTEPAE_GET_LOG(pShwPT->a[LastPTE]), LastHCPhys));
1569}
1570
1571
1572/**
1573 * Check references to guest physical memory in a PAE / 32-bit page table.
1574 *
1575 * @param pPool The pool.
1576 * @param pPage The page.
1577 * @param pShwPT The shadow page table (mapping of the page).
1578 * @param pGstPT The guest page table.
1579 */
1580static void pgmPoolTrackCheckPTPae32Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT, PCX86PT pGstPT)
1581{
1582 unsigned cErrors = 0;
1583 int LastRc = -1; /* initialized to shut up gcc */
1584 unsigned LastPTE = ~0U; /* initialized to shut up gcc */
1585 RTHCPHYS LastHCPhys = NIL_RTHCPHYS; /* initialized to shut up gcc */
1586 PVM pVM = pPool->CTX_SUFF(pVM);
1587
1588#ifdef VBOX_STRICT
1589 for (unsigned i = 0; i < RT_MIN(RT_ELEMENTS(pShwPT->a), pPage->iFirstPresent); i++)
1590 AssertMsg(!PGMSHWPTEPAE_IS_P(pShwPT->a[i]), ("Unexpected PTE: idx=%d %RX64 (first=%d)\n", i, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), pPage->iFirstPresent));
1591#endif
1592 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
1593 {
1594 if (PGMSHWPTEPAE_IS_P(pShwPT->a[i]))
1595 {
1596 RTHCPHYS HCPhys = NIL_RTHCPHYS;
1597 int rc = PGMPhysGCPhys2HCPhys(pVM, pGstPT->a[i].u & X86_PTE_PG_MASK, &HCPhys);
1598 if ( rc != VINF_SUCCESS
1599 || PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]) != HCPhys)
1600 {
1601 Log(("rc=%d idx=%d guest %x shw=%RX64 vs %RHp\n", rc, i, pGstPT->a[i].u, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), HCPhys));
1602 LastPTE = i;
1603 LastRc = rc;
1604 LastHCPhys = HCPhys;
1605 cErrors++;
1606
1607 RTHCPHYS HCPhysPT = NIL_RTHCPHYS;
1608 rc = PGMPhysGCPhys2HCPhys(pVM, pPage->GCPhys, &HCPhysPT);
1609 AssertRC(rc);
1610
1611 for (unsigned iPage = 0; iPage < pPool->cCurPages; iPage++)
1612 {
1613 PPGMPOOLPAGE pTempPage = &pPool->aPages[iPage];
1614
1615 if (pTempPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_32BIT_PT)
1616 {
1617 PPGMSHWPTPAE pShwPT2 = (PPGMSHWPTPAE)PGMPOOL_PAGE_2_PTR(pVM, pTempPage);
1618
1619 for (unsigned j = 0; j < RT_ELEMENTS(pShwPT->a); j++)
1620 {
1621 if ( PGMSHWPTEPAE_IS_P_RW(pShwPT2->a[j])
1622 && PGMSHWPTEPAE_GET_HCPHYS(pShwPT2->a[j]) == HCPhysPT)
1623 {
1624 Log(("GCPhys=%RGp idx=%d %RX64 vs %RX64\n", pTempPage->GCPhys, j, PGMSHWPTEPAE_GET_LOG(pShwPT->a[j]), PGMSHWPTEPAE_GET_LOG(pShwPT2->a[j])));
1625 }
1626 }
1627
1628 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pShwPT2);
1629 }
1630 }
1631 }
1632 }
1633 }
1634 AssertMsg(!cErrors, ("cErrors=%d: last rc=%d idx=%d guest %x shw=%RX64 vs %RHp\n", cErrors, LastRc, LastPTE, pGstPT->a[LastPTE].u, PGMSHWPTEPAE_GET_LOG(pShwPT->a[LastPTE]), LastHCPhys));
1635}
1636
1637# endif /* VBOX_STRICT && !IN_RING3 */
1638
1639/**
1640 * Clear references to guest physical memory in a PAE / PAE page table.
1641 *
1642 * @returns nr of changed PTEs
1643 * @param pPool The pool.
1644 * @param pPage The page.
1645 * @param pShwPT The shadow page table (mapping of the page).
1646 * @param pGstPT The guest page table.
1647 * @param pOldGstPT The old cached guest page table.
1648 * @param fAllowRemoval Bail out as soon as we encounter an invalid PTE
1649 * @param pfFlush Flush reused page table (out)
1650 */
1651DECLINLINE(unsigned) pgmPoolTrackFlushPTPaePae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT, PCX86PTPAE pGstPT,
1652 PCX86PTPAE pOldGstPT, bool fAllowRemoval, bool *pfFlush)
1653{
1654 unsigned cChanged = 0;
1655
1656#ifdef VBOX_STRICT
1657 for (unsigned i = 0; i < RT_MIN(RT_ELEMENTS(pShwPT->a), pPage->iFirstPresent); i++)
1658 AssertMsg(!PGMSHWPTEPAE_IS_P(pShwPT->a[i]), ("Unexpected PTE: idx=%d %RX64 (first=%d)\n", i, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), pPage->iFirstPresent));
1659#endif
1660 *pfFlush = false;
1661
1662 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
1663 {
1664 /* Check the new value written by the guest. If present and with a bogus physical address, then
1665 * it's fairly safe to assume the guest is reusing the PT.
1666 */
1667 if ( fAllowRemoval
1668 && pGstPT->a[i].n.u1Present)
1669 {
1670 if (!PGMPhysIsGCPhysValid(pPool->CTX_SUFF(pVM), pGstPT->a[i].u & X86_PTE_PAE_PG_MASK))
1671 {
1672 *pfFlush = true;
1673 return ++cChanged;
1674 }
1675 }
1676 if (PGMSHWPTEPAE_IS_P(pShwPT->a[i]))
1677 {
1678 /* If the old cached PTE is identical, then there's no need to flush the shadow copy. */
1679 if ((pGstPT->a[i].u & X86_PTE_PAE_PG_MASK) == (pOldGstPT->a[i].u & X86_PTE_PAE_PG_MASK))
1680 {
1681#ifdef VBOX_STRICT
1682 RTHCPHYS HCPhys = NIL_RTGCPHYS;
1683 int rc = PGMPhysGCPhys2HCPhys(pPool->CTX_SUFF(pVM), pGstPT->a[i].u & X86_PTE_PAE_PG_MASK, &HCPhys);
1684 AssertMsg(rc == VINF_SUCCESS && PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]) == HCPhys, ("rc=%d guest %RX64 old %RX64 shw=%RX64 vs %RHp\n", rc, pGstPT->a[i].u, pOldGstPT->a[i].u, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), HCPhys));
1685#endif
1686 uint64_t uHostAttr = PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & (X86_PTE_P | X86_PTE_US | X86_PTE_A | X86_PTE_D | X86_PTE_G | X86_PTE_PAE_NX);
1687 bool fHostRW = !!(PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & X86_PTE_RW);
1688 uint64_t uGuestAttr = pGstPT->a[i].u & (X86_PTE_P | X86_PTE_US | X86_PTE_A | X86_PTE_D | X86_PTE_G | X86_PTE_PAE_NX);
1689 bool fGuestRW = !!(pGstPT->a[i].u & X86_PTE_RW);
1690
1691 if ( uHostAttr == uGuestAttr
1692 && fHostRW <= fGuestRW)
1693 continue;
1694 }
1695 cChanged++;
1696 /* Something was changed, so flush it. */
1697 Log4(("pgmPoolTrackDerefPTPaePae: i=%d pte=%RX64 hint=%RX64\n",
1698 i, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pOldGstPT->a[i].u & X86_PTE_PAE_PG_MASK));
1699 pgmPoolTracDerefGCPhysHint(pPool, pPage, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pOldGstPT->a[i].u & X86_PTE_PAE_PG_MASK, i);
1700 PGMSHWPTEPAE_ATOMIC_SET(pShwPT->a[i], 0);
1701 }
1702 }
1703 return cChanged;
1704}
1705
1706
1707/**
1708 * Clear references to guest physical memory in a PAE / PAE page table.
1709 *
1710 * @returns nr of changed PTEs
1711 * @param pPool The pool.
1712 * @param pPage The page.
1713 * @param pShwPT The shadow page table (mapping of the page).
1714 * @param pGstPT The guest page table.
1715 * @param pOldGstPT The old cached guest page table.
1716 * @param fAllowRemoval Bail out as soon as we encounter an invalid PTE
1717 * @param pfFlush Flush reused page table (out)
1718 */
1719DECLINLINE(unsigned) pgmPoolTrackFlushPTPae32Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT, PCX86PT pGstPT,
1720 PCX86PT pOldGstPT, bool fAllowRemoval, bool *pfFlush)
1721{
1722 unsigned cChanged = 0;
1723
1724#ifdef VBOX_STRICT
1725 for (unsigned i = 0; i < RT_MIN(RT_ELEMENTS(pShwPT->a), pPage->iFirstPresent); i++)
1726 AssertMsg(!PGMSHWPTEPAE_IS_P(pShwPT->a[i]), ("Unexpected PTE: idx=%d %RX64 (first=%d)\n", i, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), pPage->iFirstPresent));
1727#endif
1728 *pfFlush = false;
1729
1730 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
1731 {
1732 /* Check the new value written by the guest. If present and with a bogus physical address, then
1733 * it's fairly safe to assume the guest is reusing the PT.
1734 */
1735 if ( fAllowRemoval
1736 && pGstPT->a[i].n.u1Present)
1737 {
1738 if (!PGMPhysIsGCPhysValid(pPool->CTX_SUFF(pVM), pGstPT->a[i].u & X86_PTE_PG_MASK))
1739 {
1740 *pfFlush = true;
1741 return ++cChanged;
1742 }
1743 }
1744 if (PGMSHWPTEPAE_IS_P(pShwPT->a[i]))
1745 {
1746 /* If the old cached PTE is identical, then there's no need to flush the shadow copy. */
1747 if ((pGstPT->a[i].u & X86_PTE_PG_MASK) == (pOldGstPT->a[i].u & X86_PTE_PG_MASK))
1748 {
1749#ifdef VBOX_STRICT
1750 RTHCPHYS HCPhys = NIL_RTGCPHYS;
1751 int rc = PGMPhysGCPhys2HCPhys(pPool->CTX_SUFF(pVM), pGstPT->a[i].u & X86_PTE_PG_MASK, &HCPhys);
1752 AssertMsg(rc == VINF_SUCCESS && PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]) == HCPhys, ("rc=%d guest %x old %x shw=%RX64 vs %RHp\n", rc, pGstPT->a[i].u, pOldGstPT->a[i].u, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), HCPhys));
1753#endif
1754 uint64_t uHostAttr = PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & (X86_PTE_P | X86_PTE_US | X86_PTE_A | X86_PTE_D | X86_PTE_G);
1755 bool fHostRW = !!(PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & X86_PTE_RW);
1756 uint64_t uGuestAttr = pGstPT->a[i].u & (X86_PTE_P | X86_PTE_US | X86_PTE_A | X86_PTE_D | X86_PTE_G);
1757 bool fGuestRW = !!(pGstPT->a[i].u & X86_PTE_RW);
1758
1759 if ( uHostAttr == uGuestAttr
1760 && fHostRW <= fGuestRW)
1761 continue;
1762 }
1763 cChanged++;
1764 /* Something was changed, so flush it. */
1765 Log4(("pgmPoolTrackDerefPTPaePae: i=%d pte=%RX64 hint=%x\n",
1766 i, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pOldGstPT->a[i].u & X86_PTE_PG_MASK));
1767 pgmPoolTracDerefGCPhysHint(pPool, pPage, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pOldGstPT->a[i].u & X86_PTE_PG_MASK, i);
1768 PGMSHWPTEPAE_ATOMIC_SET(pShwPT->a[i], 0);
1769 }
1770 }
1771 return cChanged;
1772}
1773
1774
1775/**
1776 * Flush a dirty page
1777 *
1778 * @param pVM The cross context VM structure.
1779 * @param pPool The pool.
1780 * @param idxSlot Dirty array slot index
1781 * @param fAllowRemoval Allow a reused page table to be removed
1782 */
1783static void pgmPoolFlushDirtyPage(PVM pVM, PPGMPOOL pPool, unsigned idxSlot, bool fAllowRemoval = false)
1784{
1785 PPGMPOOLPAGE pPage;
1786 unsigned idxPage;
1787
1788 Assert(idxSlot < RT_ELEMENTS(pPool->aDirtyPages));
1789 if (pPool->aDirtyPages[idxSlot].uIdx == NIL_PGMPOOL_IDX)
1790 return;
1791
1792 idxPage = pPool->aDirtyPages[idxSlot].uIdx;
1793 AssertRelease(idxPage != NIL_PGMPOOL_IDX);
1794 pPage = &pPool->aPages[idxPage];
1795 Assert(pPage->idx == idxPage);
1796 Assert(pPage->iMonitoredNext == NIL_PGMPOOL_IDX && pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
1797
1798 AssertMsg(pPage->fDirty, ("Page %RGp (slot=%d) not marked dirty!", pPage->GCPhys, idxSlot));
1799 Log(("Flush dirty page %RGp cMods=%d\n", pPage->GCPhys, pPage->cModifications));
1800
1801#if defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0) || defined(IN_RC)
1802 PVMCPU pVCpu = VMMGetCpu(pVM);
1803 uint32_t iPrevSubset = PGMRZDynMapPushAutoSubset(pVCpu);
1804#endif
1805
1806 /* First write protect the page again to catch all write accesses. (before checking for changes -> SMP) */
1807 int rc = PGMHandlerPhysicalReset(pVM, pPage->GCPhys & PAGE_BASE_GC_MASK);
1808 Assert(rc == VINF_SUCCESS);
1809 pPage->fDirty = false;
1810
1811#ifdef VBOX_STRICT
1812 uint64_t fFlags = 0;
1813 RTHCPHYS HCPhys;
1814 rc = PGMShwGetPage(VMMGetCpu(pVM), pPage->GCPtrDirtyFault, &fFlags, &HCPhys);
1815 AssertMsg( ( rc == VINF_SUCCESS
1816 && (!(fFlags & X86_PTE_RW) || HCPhys != pPage->Core.Key))
1817 /* In the SMP case the page table might be removed while we wait for the PGM lock in the trap handler. */
1818 || rc == VERR_PAGE_TABLE_NOT_PRESENT
1819 || rc == VERR_PAGE_NOT_PRESENT,
1820 ("PGMShwGetPage -> GCPtr=%RGv rc=%d flags=%RX64\n", pPage->GCPtrDirtyFault, rc, fFlags));
1821#endif
1822
1823 /* Flush those PTEs that have changed. */
1824 STAM_PROFILE_START(&pPool->StatTrackDeref,a);
1825 void *pvShw = PGMPOOL_PAGE_2_PTR(pVM, pPage);
1826 void *pvGst;
1827 rc = PGM_GCPHYS_2_PTR_EX(pVM, pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
1828 bool fFlush;
1829 unsigned cChanges;
1830
1831 if (pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT)
1832 cChanges = pgmPoolTrackFlushPTPaePae(pPool, pPage, (PPGMSHWPTPAE)pvShw, (PCX86PTPAE)pvGst,
1833 (PCX86PTPAE)&pPool->aDirtyPages[idxSlot].aPage[0], fAllowRemoval, &fFlush);
1834 else
1835 cChanges = pgmPoolTrackFlushPTPae32Bit(pPool, pPage, (PPGMSHWPTPAE)pvShw, (PCX86PT)pvGst,
1836 (PCX86PT)&pPool->aDirtyPages[idxSlot].aPage[0], fAllowRemoval, &fFlush);
1837
1838 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvGst);
1839 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvShw);
1840 STAM_PROFILE_STOP(&pPool->StatTrackDeref,a);
1841 /* Note: we might want to consider keeping the dirty page active in case there were many changes. */
1842
1843 /* This page is likely to be modified again, so reduce the nr of modifications just a bit here. */
1844 Assert(pPage->cModifications);
1845 if (cChanges < 4)
1846 pPage->cModifications = 1; /* must use > 0 here */
1847 else
1848 pPage->cModifications = RT_MAX(1, pPage->cModifications / 2);
1849
1850 STAM_COUNTER_INC(&pPool->StatResetDirtyPages);
1851 if (pPool->cDirtyPages == RT_ELEMENTS(pPool->aDirtyPages))
1852 pPool->idxFreeDirtyPage = idxSlot;
1853
1854 pPool->cDirtyPages--;
1855 pPool->aDirtyPages[idxSlot].uIdx = NIL_PGMPOOL_IDX;
1856 Assert(pPool->cDirtyPages <= RT_ELEMENTS(pPool->aDirtyPages));
1857 if (fFlush)
1858 {
1859 Assert(fAllowRemoval);
1860 Log(("Flush reused page table!\n"));
1861 pgmPoolFlushPage(pPool, pPage);
1862 STAM_COUNTER_INC(&pPool->StatForceFlushReused);
1863 }
1864 else
1865 Log(("Removed dirty page %RGp cMods=%d cChanges=%d\n", pPage->GCPhys, pPage->cModifications, cChanges));
1866
1867#if defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0) || defined(IN_RC)
1868 PGMRZDynMapPopAutoSubset(pVCpu, iPrevSubset);
1869#endif
1870}
1871
1872
1873# ifndef IN_RING3
1874/**
1875 * Add a new dirty page
1876 *
1877 * @param pVM The cross context VM structure.
1878 * @param pPool The pool.
1879 * @param pPage The page.
1880 */
1881void pgmPoolAddDirtyPage(PVM pVM, PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1882{
1883 unsigned idxFree;
1884
1885 PGM_LOCK_ASSERT_OWNER(pVM);
1886 AssertCompile(RT_ELEMENTS(pPool->aDirtyPages) == 8 || RT_ELEMENTS(pPool->aDirtyPages) == 16);
1887 Assert(!pPage->fDirty);
1888
1889 idxFree = pPool->idxFreeDirtyPage;
1890 Assert(idxFree < RT_ELEMENTS(pPool->aDirtyPages));
1891 Assert(pPage->iMonitoredNext == NIL_PGMPOOL_IDX && pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
1892
1893 if (pPool->cDirtyPages >= RT_ELEMENTS(pPool->aDirtyPages))
1894 {
1895 STAM_COUNTER_INC(&pPool->StatDirtyPageOverFlowFlush);
1896 pgmPoolFlushDirtyPage(pVM, pPool, idxFree, true /* allow removal of reused page tables*/);
1897 }
1898 Assert(pPool->cDirtyPages < RT_ELEMENTS(pPool->aDirtyPages));
1899 AssertMsg(pPool->aDirtyPages[idxFree].uIdx == NIL_PGMPOOL_IDX, ("idxFree=%d cDirtyPages=%d\n", idxFree, pPool->cDirtyPages));
1900
1901 Log(("Add dirty page %RGp (slot=%d)\n", pPage->GCPhys, idxFree));
1902
1903 /*
1904 * Make a copy of the guest page table as we require valid GCPhys addresses
1905 * when removing references to physical pages.
1906 * (The HCPhys linear lookup is *extremely* expensive!)
1907 */
1908 void *pvGst;
1909 int rc = PGM_GCPHYS_2_PTR_EX(pVM, pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
1910 memcpy(&pPool->aDirtyPages[idxFree].aPage[0], pvGst, (pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT) ? PAGE_SIZE : PAGE_SIZE/2);
1911# ifdef VBOX_STRICT
1912 void *pvShw = PGMPOOL_PAGE_2_PTR(pVM, pPage);
1913 if (pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT)
1914 pgmPoolTrackCheckPTPaePae(pPool, pPage, (PPGMSHWPTPAE)pvShw, (PCX86PTPAE)pvGst);
1915 else
1916 pgmPoolTrackCheckPTPae32Bit(pPool, pPage, (PPGMSHWPTPAE)pvShw, (PCX86PT)pvGst);
1917 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvShw);
1918# endif
1919 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvGst);
1920
1921 STAM_COUNTER_INC(&pPool->StatDirtyPage);
1922 pPage->fDirty = true;
1923 pPage->idxDirtyEntry = (uint8_t)idxFree; Assert(pPage->idxDirtyEntry == idxFree);
1924 pPool->aDirtyPages[idxFree].uIdx = pPage->idx;
1925 pPool->cDirtyPages++;
1926
1927 pPool->idxFreeDirtyPage = (pPool->idxFreeDirtyPage + 1) & (RT_ELEMENTS(pPool->aDirtyPages) - 1);
1928 if ( pPool->cDirtyPages < RT_ELEMENTS(pPool->aDirtyPages)
1929 && pPool->aDirtyPages[pPool->idxFreeDirtyPage].uIdx != NIL_PGMPOOL_IDX)
1930 {
1931 unsigned i;
1932 for (i = 1; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
1933 {
1934 idxFree = (pPool->idxFreeDirtyPage + i) & (RT_ELEMENTS(pPool->aDirtyPages) - 1);
1935 if (pPool->aDirtyPages[idxFree].uIdx == NIL_PGMPOOL_IDX)
1936 {
1937 pPool->idxFreeDirtyPage = idxFree;
1938 break;
1939 }
1940 }
1941 Assert(i != RT_ELEMENTS(pPool->aDirtyPages));
1942 }
1943
1944 Assert(pPool->cDirtyPages == RT_ELEMENTS(pPool->aDirtyPages) || pPool->aDirtyPages[pPool->idxFreeDirtyPage].uIdx == NIL_PGMPOOL_IDX);
1945
1946 /*
1947 * Clear all references to this shadow table. See @bugref{7298}.
1948 */
1949 pgmPoolTrackClearPageUsers(pPool, pPage);
1950}
1951# endif /* !IN_RING3 */
1952
1953
1954/**
1955 * Check if the specified page is dirty (not write monitored)
1956 *
1957 * @return dirty or not
1958 * @param pVM The cross context VM structure.
1959 * @param GCPhys Guest physical address
1960 */
1961bool pgmPoolIsDirtyPage(PVM pVM, RTGCPHYS GCPhys)
1962{
1963 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1964 PGM_LOCK_ASSERT_OWNER(pVM);
1965 if (!pPool->cDirtyPages)
1966 return false;
1967
1968 GCPhys = GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK;
1969
1970 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
1971 {
1972 if (pPool->aDirtyPages[i].uIdx != NIL_PGMPOOL_IDX)
1973 {
1974 PPGMPOOLPAGE pPage;
1975 unsigned idxPage = pPool->aDirtyPages[i].uIdx;
1976
1977 pPage = &pPool->aPages[idxPage];
1978 if (pPage->GCPhys == GCPhys)
1979 return true;
1980 }
1981 }
1982 return false;
1983}
1984
1985
1986/**
1987 * Reset all dirty pages by reinstating page monitoring.
1988 *
1989 * @param pVM The cross context VM structure.
1990 */
1991void pgmPoolResetDirtyPages(PVM pVM)
1992{
1993 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1994 PGM_LOCK_ASSERT_OWNER(pVM);
1995 Assert(pPool->cDirtyPages <= RT_ELEMENTS(pPool->aDirtyPages));
1996
1997 if (!pPool->cDirtyPages)
1998 return;
1999
2000 Log(("pgmPoolResetDirtyPages\n"));
2001 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
2002 pgmPoolFlushDirtyPage(pVM, pPool, i, true /* allow removal of reused page tables*/);
2003
2004 pPool->idxFreeDirtyPage = 0;
2005 if ( pPool->cDirtyPages != RT_ELEMENTS(pPool->aDirtyPages)
2006 && pPool->aDirtyPages[pPool->idxFreeDirtyPage].uIdx != NIL_PGMPOOL_IDX)
2007 {
2008 unsigned i;
2009 for (i = 1; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
2010 {
2011 if (pPool->aDirtyPages[i].uIdx == NIL_PGMPOOL_IDX)
2012 {
2013 pPool->idxFreeDirtyPage = i;
2014 break;
2015 }
2016 }
2017 AssertMsg(i != RT_ELEMENTS(pPool->aDirtyPages), ("cDirtyPages %d", pPool->cDirtyPages));
2018 }
2019
2020 Assert(pPool->aDirtyPages[pPool->idxFreeDirtyPage].uIdx == NIL_PGMPOOL_IDX || pPool->cDirtyPages == RT_ELEMENTS(pPool->aDirtyPages));
2021 return;
2022}
2023
2024
2025/**
2026 * Invalidate the PT entry for the specified page
2027 *
2028 * @param pVM The cross context VM structure.
2029 * @param GCPtrPage Guest page to invalidate
2030 */
2031void pgmPoolResetDirtyPage(PVM pVM, RTGCPTR GCPtrPage)
2032{
2033 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
2034 PGM_LOCK_ASSERT_OWNER(pVM);
2035 Assert(pPool->cDirtyPages <= RT_ELEMENTS(pPool->aDirtyPages));
2036
2037 if (!pPool->cDirtyPages)
2038 return;
2039
2040 Log(("pgmPoolResetDirtyPage %RGv\n", GCPtrPage)); RT_NOREF_PV(GCPtrPage);
2041 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
2042 {
2043 }
2044}
2045
2046
2047/**
2048 * Reset all dirty pages by reinstating page monitoring.
2049 *
2050 * @param pVM The cross context VM structure.
2051 * @param GCPhysPT Physical address of the page table
2052 */
2053void pgmPoolInvalidateDirtyPage(PVM pVM, RTGCPHYS GCPhysPT)
2054{
2055 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
2056 PGM_LOCK_ASSERT_OWNER(pVM);
2057 Assert(pPool->cDirtyPages <= RT_ELEMENTS(pPool->aDirtyPages));
2058 unsigned idxDirtyPage = RT_ELEMENTS(pPool->aDirtyPages);
2059
2060 if (!pPool->cDirtyPages)
2061 return;
2062
2063 GCPhysPT = GCPhysPT & ~(RTGCPHYS)PAGE_OFFSET_MASK;
2064
2065 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
2066 {
2067 if (pPool->aDirtyPages[i].uIdx != NIL_PGMPOOL_IDX)
2068 {
2069 unsigned idxPage = pPool->aDirtyPages[i].uIdx;
2070
2071 PPGMPOOLPAGE pPage = &pPool->aPages[idxPage];
2072 if (pPage->GCPhys == GCPhysPT)
2073 {
2074 idxDirtyPage = i;
2075 break;
2076 }
2077 }
2078 }
2079
2080 if (idxDirtyPage != RT_ELEMENTS(pPool->aDirtyPages))
2081 {
2082 pgmPoolFlushDirtyPage(pVM, pPool, idxDirtyPage, true /* allow removal of reused page tables*/);
2083 if ( pPool->cDirtyPages != RT_ELEMENTS(pPool->aDirtyPages)
2084 && pPool->aDirtyPages[pPool->idxFreeDirtyPage].uIdx != NIL_PGMPOOL_IDX)
2085 {
2086 unsigned i;
2087 for (i = 0; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
2088 {
2089 if (pPool->aDirtyPages[i].uIdx == NIL_PGMPOOL_IDX)
2090 {
2091 pPool->idxFreeDirtyPage = i;
2092 break;
2093 }
2094 }
2095 AssertMsg(i != RT_ELEMENTS(pPool->aDirtyPages), ("cDirtyPages %d", pPool->cDirtyPages));
2096 }
2097 }
2098}
2099
2100# endif /* PGMPOOL_WITH_OPTIMIZED_DIRTY_PT */
2101
2102/**
2103 * Inserts a page into the GCPhys hash table.
2104 *
2105 * @param pPool The pool.
2106 * @param pPage The page.
2107 */
2108DECLINLINE(void) pgmPoolHashInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2109{
2110 Log3(("pgmPoolHashInsert: %RGp\n", pPage->GCPhys));
2111 Assert(pPage->GCPhys != NIL_RTGCPHYS); Assert(pPage->iNext == NIL_PGMPOOL_IDX);
2112 uint16_t iHash = PGMPOOL_HASH(pPage->GCPhys);
2113 pPage->iNext = pPool->aiHash[iHash];
2114 pPool->aiHash[iHash] = pPage->idx;
2115}
2116
2117
2118/**
2119 * Removes a page from the GCPhys hash table.
2120 *
2121 * @param pPool The pool.
2122 * @param pPage The page.
2123 */
2124DECLINLINE(void) pgmPoolHashRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2125{
2126 Log3(("pgmPoolHashRemove: %RGp\n", pPage->GCPhys));
2127 uint16_t iHash = PGMPOOL_HASH(pPage->GCPhys);
2128 if (pPool->aiHash[iHash] == pPage->idx)
2129 pPool->aiHash[iHash] = pPage->iNext;
2130 else
2131 {
2132 uint16_t iPrev = pPool->aiHash[iHash];
2133 for (;;)
2134 {
2135 const int16_t i = pPool->aPages[iPrev].iNext;
2136 if (i == pPage->idx)
2137 {
2138 pPool->aPages[iPrev].iNext = pPage->iNext;
2139 break;
2140 }
2141 if (i == NIL_PGMPOOL_IDX)
2142 {
2143 AssertReleaseMsgFailed(("GCPhys=%RGp idx=%d\n", pPage->GCPhys, pPage->idx));
2144 break;
2145 }
2146 iPrev = i;
2147 }
2148 }
2149 pPage->iNext = NIL_PGMPOOL_IDX;
2150}
2151
2152
2153/**
2154 * Frees up one cache page.
2155 *
2156 * @returns VBox status code.
2157 * @retval VINF_SUCCESS on success.
2158 * @param pPool The pool.
2159 * @param iUser The user index.
2160 */
2161static int pgmPoolCacheFreeOne(PPGMPOOL pPool, uint16_t iUser)
2162{
2163#ifndef IN_RC
2164 const PVM pVM = pPool->CTX_SUFF(pVM);
2165#endif
2166 Assert(pPool->iAgeHead != pPool->iAgeTail); /* We shouldn't be here if there < 2 cached entries! */
2167 STAM_COUNTER_INC(&pPool->StatCacheFreeUpOne);
2168
2169 /*
2170 * Select one page from the tail of the age list.
2171 */
2172 PPGMPOOLPAGE pPage;
2173 for (unsigned iLoop = 0; ; iLoop++)
2174 {
2175 uint16_t iToFree = pPool->iAgeTail;
2176 if (iToFree == iUser && iUser != NIL_PGMPOOL_IDX)
2177 iToFree = pPool->aPages[iToFree].iAgePrev;
2178/* This is the alternative to the SyncCR3 pgmPoolCacheUsed calls.
2179 if (pPool->aPages[iToFree].iUserHead != NIL_PGMPOOL_USER_INDEX)
2180 {
2181 uint16_t i = pPool->aPages[iToFree].iAgePrev;
2182 for (unsigned j = 0; j < 10 && i != NIL_PGMPOOL_USER_INDEX; j++, i = pPool->aPages[i].iAgePrev)
2183 {
2184 if (pPool->aPages[iToFree].iUserHead == NIL_PGMPOOL_USER_INDEX)
2185 continue;
2186 iToFree = i;
2187 break;
2188 }
2189 }
2190*/
2191 Assert(iToFree != iUser);
2192 AssertRelease(iToFree != NIL_PGMPOOL_IDX);
2193 pPage = &pPool->aPages[iToFree];
2194
2195 /*
2196 * Reject any attempts at flushing the currently active shadow CR3 mapping.
2197 * Call pgmPoolCacheUsed to move the page to the head of the age list.
2198 */
2199 if ( !pgmPoolIsPageLocked(pPage)
2200 && pPage->idx >= PGMPOOL_IDX_FIRST /* paranoia (#6349) */)
2201 break;
2202 LogFlow(("pgmPoolCacheFreeOne: refuse CR3 mapping\n"));
2203 pgmPoolCacheUsed(pPool, pPage);
2204 AssertLogRelReturn(iLoop < 8192, VERR_PGM_POOL_TOO_MANY_LOOPS);
2205 }
2206
2207 /*
2208 * Found a usable page, flush it and return.
2209 */
2210 int rc = pgmPoolFlushPage(pPool, pPage);
2211 /* This flush was initiated by us and not the guest, so explicitly flush the TLB. */
2212 /** @todo find out why this is necessary; pgmPoolFlushPage should trigger a flush if one is really needed. */
2213 if (rc == VINF_SUCCESS)
2214 PGM_INVL_ALL_VCPU_TLBS(pVM);
2215 return rc;
2216}
2217
2218
2219/**
2220 * Checks if a kind mismatch is really a page being reused
2221 * or if it's just normal remappings.
2222 *
2223 * @returns true if reused and the cached page (enmKind1) should be flushed
2224 * @returns false if not reused.
2225 * @param enmKind1 The kind of the cached page.
2226 * @param enmKind2 The kind of the requested page.
2227 */
2228static bool pgmPoolCacheReusedByKind(PGMPOOLKIND enmKind1, PGMPOOLKIND enmKind2)
2229{
2230 switch (enmKind1)
2231 {
2232 /*
2233 * Never reuse them. There is no remapping in non-paging mode.
2234 */
2235 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2236 case PGMPOOLKIND_32BIT_PD_PHYS:
2237 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2238 case PGMPOOLKIND_PAE_PD_PHYS:
2239 case PGMPOOLKIND_PAE_PDPT_PHYS:
2240 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2241 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2242 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2243 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2244 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2245 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT: /* never reuse them for other types */
2246 return false;
2247
2248 /*
2249 * It's perfectly fine to reuse these, except for PAE and non-paging stuff.
2250 */
2251 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2252 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2253 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2254 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2255 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2256 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2257 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2258 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2259 case PGMPOOLKIND_32BIT_PD:
2260 case PGMPOOLKIND_PAE_PDPT:
2261 switch (enmKind2)
2262 {
2263 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2264 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2265 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2266 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2267 case PGMPOOLKIND_64BIT_PML4:
2268 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2269 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2270 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2271 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2272 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2273 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2274 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2275 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2276 return true;
2277 default:
2278 return false;
2279 }
2280
2281 /*
2282 * It's perfectly fine to reuse these, except for PAE and non-paging stuff.
2283 */
2284 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2285 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2286 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2287 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2288 case PGMPOOLKIND_64BIT_PML4:
2289 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2290 switch (enmKind2)
2291 {
2292 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2293 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2294 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2295 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2296 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2297 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2298 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2299 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2300 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2301 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2302 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2303 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2304 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2305 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2306 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2307 return true;
2308 default:
2309 return false;
2310 }
2311
2312 /*
2313 * These cannot be flushed, and it's common to reuse the PDs as PTs.
2314 */
2315 case PGMPOOLKIND_ROOT_NESTED:
2316 return false;
2317
2318 default:
2319 AssertFatalMsgFailed(("enmKind1=%d\n", enmKind1));
2320 }
2321}
2322
2323
2324/**
2325 * Attempts to satisfy a pgmPoolAlloc request from the cache.
2326 *
2327 * @returns VBox status code.
2328 * @retval VINF_PGM_CACHED_PAGE on success.
2329 * @retval VERR_FILE_NOT_FOUND if not found.
2330 * @param pPool The pool.
2331 * @param GCPhys The GC physical address of the page we're gonna shadow.
2332 * @param enmKind The kind of mapping.
2333 * @param enmAccess Access type for the mapping (only relevant for big pages)
2334 * @param fA20Enabled Whether the CPU has the A20 gate enabled.
2335 * @param iUser The shadow page pool index of the user table. This is
2336 * NIL_PGMPOOL_IDX for root pages.
2337 * @param iUserTable The index into the user table (shadowed). Ignored if
2338 * root page
2339 * @param ppPage Where to store the pointer to the page.
2340 */
2341static int pgmPoolCacheAlloc(PPGMPOOL pPool, RTGCPHYS GCPhys, PGMPOOLKIND enmKind, PGMPOOLACCESS enmAccess, bool fA20Enabled,
2342 uint16_t iUser, uint32_t iUserTable, PPPGMPOOLPAGE ppPage)
2343{
2344 /*
2345 * Look up the GCPhys in the hash.
2346 */
2347 unsigned i = pPool->aiHash[PGMPOOL_HASH(GCPhys)];
2348 Log3(("pgmPoolCacheAlloc: %RGp kind %s iUser=%d iUserTable=%x SLOT=%d\n", GCPhys, pgmPoolPoolKindToStr(enmKind), iUser, iUserTable, i));
2349 if (i != NIL_PGMPOOL_IDX)
2350 {
2351 do
2352 {
2353 PPGMPOOLPAGE pPage = &pPool->aPages[i];
2354 Log4(("pgmPoolCacheAlloc: slot %d found page %RGp\n", i, pPage->GCPhys));
2355 if (pPage->GCPhys == GCPhys)
2356 {
2357 if ( (PGMPOOLKIND)pPage->enmKind == enmKind
2358 && (PGMPOOLACCESS)pPage->enmAccess == enmAccess
2359 && pPage->fA20Enabled == fA20Enabled)
2360 {
2361 /* Put it at the start of the use list to make sure pgmPoolTrackAddUser
2362 * doesn't flush it in case there are no more free use records.
2363 */
2364 pgmPoolCacheUsed(pPool, pPage);
2365
2366 int rc = VINF_SUCCESS;
2367 if (iUser != NIL_PGMPOOL_IDX)
2368 rc = pgmPoolTrackAddUser(pPool, pPage, iUser, iUserTable);
2369 if (RT_SUCCESS(rc))
2370 {
2371 Assert((PGMPOOLKIND)pPage->enmKind == enmKind);
2372 *ppPage = pPage;
2373 if (pPage->cModifications)
2374 pPage->cModifications = 1; /* reset counter (can't use 0, or else it will be reinserted in the modified list) */
2375 STAM_COUNTER_INC(&pPool->StatCacheHits);
2376 return VINF_PGM_CACHED_PAGE;
2377 }
2378 return rc;
2379 }
2380
2381 if ((PGMPOOLKIND)pPage->enmKind != enmKind)
2382 {
2383 /*
2384 * The kind is different. In some cases we should now flush the page
2385 * as it has been reused, but in most cases this is normal remapping
2386 * of PDs as PT or big pages using the GCPhys field in a slightly
2387 * different way than the other kinds.
2388 */
2389 if (pgmPoolCacheReusedByKind((PGMPOOLKIND)pPage->enmKind, enmKind))
2390 {
2391 STAM_COUNTER_INC(&pPool->StatCacheKindMismatches);
2392 pgmPoolFlushPage(pPool, pPage);
2393 break;
2394 }
2395 }
2396 }
2397
2398 /* next */
2399 i = pPage->iNext;
2400 } while (i != NIL_PGMPOOL_IDX);
2401 }
2402
2403 Log3(("pgmPoolCacheAlloc: Missed GCPhys=%RGp enmKind=%s\n", GCPhys, pgmPoolPoolKindToStr(enmKind)));
2404 STAM_COUNTER_INC(&pPool->StatCacheMisses);
2405 return VERR_FILE_NOT_FOUND;
2406}
2407
2408
2409/**
2410 * Inserts a page into the cache.
2411 *
2412 * @param pPool The pool.
2413 * @param pPage The cached page.
2414 * @param fCanBeCached Set if the page is fit for caching from the caller's point of view.
2415 */
2416static void pgmPoolCacheInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage, bool fCanBeCached)
2417{
2418 /*
2419 * Insert into the GCPhys hash if the page is fit for that.
2420 */
2421 Assert(!pPage->fCached);
2422 if (fCanBeCached)
2423 {
2424 pPage->fCached = true;
2425 pgmPoolHashInsert(pPool, pPage);
2426 Log3(("pgmPoolCacheInsert: Caching %p:{.Core=%RHp, .idx=%d, .enmKind=%s, GCPhys=%RGp}\n",
2427 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), pPage->GCPhys));
2428 STAM_COUNTER_INC(&pPool->StatCacheCacheable);
2429 }
2430 else
2431 {
2432 Log3(("pgmPoolCacheInsert: Not caching %p:{.Core=%RHp, .idx=%d, .enmKind=%s, GCPhys=%RGp}\n",
2433 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), pPage->GCPhys));
2434 STAM_COUNTER_INC(&pPool->StatCacheUncacheable);
2435 }
2436
2437 /*
2438 * Insert at the head of the age list.
2439 */
2440 pPage->iAgePrev = NIL_PGMPOOL_IDX;
2441 pPage->iAgeNext = pPool->iAgeHead;
2442 if (pPool->iAgeHead != NIL_PGMPOOL_IDX)
2443 pPool->aPages[pPool->iAgeHead].iAgePrev = pPage->idx;
2444 else
2445 pPool->iAgeTail = pPage->idx;
2446 pPool->iAgeHead = pPage->idx;
2447}
2448
2449
2450/**
2451 * Flushes a cached page.
2452 *
2453 * @param pPool The pool.
2454 * @param pPage The cached page.
2455 */
2456static void pgmPoolCacheFlushPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2457{
2458 Log3(("pgmPoolCacheFlushPage: %RGp\n", pPage->GCPhys));
2459
2460 /*
2461 * Remove the page from the hash.
2462 */
2463 if (pPage->fCached)
2464 {
2465 pPage->fCached = false;
2466 pgmPoolHashRemove(pPool, pPage);
2467 }
2468 else
2469 Assert(pPage->iNext == NIL_PGMPOOL_IDX);
2470
2471 /*
2472 * Remove it from the age list.
2473 */
2474 if (pPage->iAgeNext != NIL_PGMPOOL_IDX)
2475 pPool->aPages[pPage->iAgeNext].iAgePrev = pPage->iAgePrev;
2476 else
2477 pPool->iAgeTail = pPage->iAgePrev;
2478 if (pPage->iAgePrev != NIL_PGMPOOL_IDX)
2479 pPool->aPages[pPage->iAgePrev].iAgeNext = pPage->iAgeNext;
2480 else
2481 pPool->iAgeHead = pPage->iAgeNext;
2482 pPage->iAgeNext = NIL_PGMPOOL_IDX;
2483 pPage->iAgePrev = NIL_PGMPOOL_IDX;
2484}
2485
2486
2487/**
2488 * Looks for pages sharing the monitor.
2489 *
2490 * @returns Pointer to the head page.
2491 * @returns NULL if not found.
2492 * @param pPool The Pool
2493 * @param pNewPage The page which is going to be monitored.
2494 */
2495static PPGMPOOLPAGE pgmPoolMonitorGetPageByGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pNewPage)
2496{
2497 /*
2498 * Look up the GCPhys in the hash.
2499 */
2500 RTGCPHYS GCPhys = pNewPage->GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK;
2501 unsigned i = pPool->aiHash[PGMPOOL_HASH(GCPhys)];
2502 if (i == NIL_PGMPOOL_IDX)
2503 return NULL;
2504 do
2505 {
2506 PPGMPOOLPAGE pPage = &pPool->aPages[i];
2507 if ( pPage->GCPhys - GCPhys < PAGE_SIZE
2508 && pPage != pNewPage)
2509 {
2510 switch (pPage->enmKind)
2511 {
2512 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2513 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2514 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2515 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2516 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2517 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2518 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2519 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2520 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2521 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2522 case PGMPOOLKIND_64BIT_PML4:
2523 case PGMPOOLKIND_32BIT_PD:
2524 case PGMPOOLKIND_PAE_PDPT:
2525 {
2526 /* find the head */
2527 while (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
2528 {
2529 Assert(pPage->iMonitoredPrev != pPage->idx);
2530 pPage = &pPool->aPages[pPage->iMonitoredPrev];
2531 }
2532 return pPage;
2533 }
2534
2535 /* ignore, no monitoring. */
2536 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2537 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2538 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2539 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2540 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2541 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2542 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2543 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2544 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2545 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2546 case PGMPOOLKIND_ROOT_NESTED:
2547 case PGMPOOLKIND_PAE_PD_PHYS:
2548 case PGMPOOLKIND_PAE_PDPT_PHYS:
2549 case PGMPOOLKIND_32BIT_PD_PHYS:
2550 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
2551 break;
2552 default:
2553 AssertFatalMsgFailed(("enmKind=%d idx=%d\n", pPage->enmKind, pPage->idx));
2554 }
2555 }
2556
2557 /* next */
2558 i = pPage->iNext;
2559 } while (i != NIL_PGMPOOL_IDX);
2560 return NULL;
2561}
2562
2563
2564/**
2565 * Enabled write monitoring of a guest page.
2566 *
2567 * @returns VBox status code.
2568 * @retval VINF_SUCCESS on success.
2569 * @param pPool The pool.
2570 * @param pPage The cached page.
2571 */
2572static int pgmPoolMonitorInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2573{
2574 LogFlow(("pgmPoolMonitorInsert %RGp\n", pPage->GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK));
2575
2576 /*
2577 * Filter out the relevant kinds.
2578 */
2579 switch (pPage->enmKind)
2580 {
2581 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2582 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2583 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2584 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2585 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2586 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2587 case PGMPOOLKIND_64BIT_PML4:
2588 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2589 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2590 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2591 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2592 case PGMPOOLKIND_32BIT_PD:
2593 case PGMPOOLKIND_PAE_PDPT:
2594 break;
2595
2596 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2597 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2598 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2599 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2600 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2601 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2602 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2603 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2604 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2605 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2606 case PGMPOOLKIND_ROOT_NESTED:
2607 /* Nothing to monitor here. */
2608 return VINF_SUCCESS;
2609
2610 case PGMPOOLKIND_32BIT_PD_PHYS:
2611 case PGMPOOLKIND_PAE_PDPT_PHYS:
2612 case PGMPOOLKIND_PAE_PD_PHYS:
2613 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
2614 /* Nothing to monitor here. */
2615 return VINF_SUCCESS;
2616 default:
2617 AssertFatalMsgFailed(("This can't happen! enmKind=%d\n", pPage->enmKind));
2618 }
2619
2620 /*
2621 * Install handler.
2622 */
2623 int rc;
2624 PPGMPOOLPAGE pPageHead = pgmPoolMonitorGetPageByGCPhys(pPool, pPage);
2625 if (pPageHead)
2626 {
2627 Assert(pPageHead != pPage); Assert(pPageHead->iMonitoredNext != pPage->idx);
2628 Assert(pPageHead->iMonitoredPrev != pPage->idx);
2629
2630#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
2631 if (pPageHead->fDirty)
2632 pgmPoolFlushDirtyPage(pPool->CTX_SUFF(pVM), pPool, pPageHead->idxDirtyEntry, false /* do not remove */);
2633#endif
2634
2635 pPage->iMonitoredPrev = pPageHead->idx;
2636 pPage->iMonitoredNext = pPageHead->iMonitoredNext;
2637 if (pPageHead->iMonitoredNext != NIL_PGMPOOL_IDX)
2638 pPool->aPages[pPageHead->iMonitoredNext].iMonitoredPrev = pPage->idx;
2639 pPageHead->iMonitoredNext = pPage->idx;
2640 rc = VINF_SUCCESS;
2641 }
2642 else
2643 {
2644 Assert(pPage->iMonitoredNext == NIL_PGMPOOL_IDX); Assert(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
2645 PVM pVM = pPool->CTX_SUFF(pVM);
2646 const RTGCPHYS GCPhysPage = pPage->GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK;
2647 rc = PGMHandlerPhysicalRegister(pVM, GCPhysPage, GCPhysPage + PAGE_OFFSET_MASK, pPool->hAccessHandlerType,
2648 MMHyperCCToR3(pVM, pPage), MMHyperCCToR0(pVM, pPage), MMHyperCCToRC(pVM, pPage),
2649 NIL_RTR3PTR /*pszDesc*/);
2650 /** @todo we should probably deal with out-of-memory conditions here, but for now increasing
2651 * the heap size should suffice. */
2652 AssertFatalMsgRC(rc, ("PGMHandlerPhysicalRegisterEx %RGp failed with %Rrc\n", GCPhysPage, rc));
2653 PVMCPU pVCpu = VMMGetCpu(pVM);
2654 AssertFatalMsg(!(pVCpu->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL) || VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3), ("fSyncFlags=%x syncff=%d\n", pVCpu->pgm.s.fSyncFlags, VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3)));
2655 }
2656 pPage->fMonitored = true;
2657 return rc;
2658}
2659
2660
2661/**
2662 * Disables write monitoring of a guest page.
2663 *
2664 * @returns VBox status code.
2665 * @retval VINF_SUCCESS on success.
2666 * @param pPool The pool.
2667 * @param pPage The cached page.
2668 */
2669static int pgmPoolMonitorFlush(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2670{
2671 /*
2672 * Filter out the relevant kinds.
2673 */
2674 switch (pPage->enmKind)
2675 {
2676 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2677 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2678 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2679 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2680 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2681 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2682 case PGMPOOLKIND_64BIT_PML4:
2683 case PGMPOOLKIND_32BIT_PD:
2684 case PGMPOOLKIND_PAE_PDPT:
2685 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2686 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2687 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2688 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2689 break;
2690
2691 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2692 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2693 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2694 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2695 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2696 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2697 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2698 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2699 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2700 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2701 case PGMPOOLKIND_ROOT_NESTED:
2702 case PGMPOOLKIND_PAE_PD_PHYS:
2703 case PGMPOOLKIND_PAE_PDPT_PHYS:
2704 case PGMPOOLKIND_32BIT_PD_PHYS:
2705 /* Nothing to monitor here. */
2706 Assert(!pPage->fMonitored);
2707 return VINF_SUCCESS;
2708
2709 default:
2710 AssertFatalMsgFailed(("This can't happen! enmKind=%d\n", pPage->enmKind));
2711 }
2712 Assert(pPage->fMonitored);
2713
2714 /*
2715 * Remove the page from the monitored list or uninstall it if last.
2716 */
2717 const PVM pVM = pPool->CTX_SUFF(pVM);
2718 int rc;
2719 if ( pPage->iMonitoredNext != NIL_PGMPOOL_IDX
2720 || pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
2721 {
2722 if (pPage->iMonitoredPrev == NIL_PGMPOOL_IDX)
2723 {
2724 PPGMPOOLPAGE pNewHead = &pPool->aPages[pPage->iMonitoredNext];
2725 pNewHead->iMonitoredPrev = NIL_PGMPOOL_IDX;
2726 rc = PGMHandlerPhysicalChangeUserArgs(pVM, pPage->GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK, MMHyperCCToR3(pVM, pNewHead),
2727 MMHyperCCToR0(pVM, pNewHead), MMHyperCCToRC(pVM, pNewHead));
2728
2729 AssertFatalRCSuccess(rc);
2730 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
2731 }
2732 else
2733 {
2734 pPool->aPages[pPage->iMonitoredPrev].iMonitoredNext = pPage->iMonitoredNext;
2735 if (pPage->iMonitoredNext != NIL_PGMPOOL_IDX)
2736 {
2737 pPool->aPages[pPage->iMonitoredNext].iMonitoredPrev = pPage->iMonitoredPrev;
2738 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
2739 }
2740 pPage->iMonitoredPrev = NIL_PGMPOOL_IDX;
2741 rc = VINF_SUCCESS;
2742 }
2743 }
2744 else
2745 {
2746 rc = PGMHandlerPhysicalDeregister(pVM, pPage->GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK);
2747 AssertFatalRC(rc);
2748 PVMCPU pVCpu = VMMGetCpu(pVM);
2749 AssertFatalMsg(!(pVCpu->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL) || VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3),
2750 ("%#x %#x\n", pVCpu->pgm.s.fSyncFlags, pVM->fGlobalForcedActions));
2751 }
2752 pPage->fMonitored = false;
2753
2754 /*
2755 * Remove it from the list of modified pages (if in it).
2756 */
2757 pgmPoolMonitorModifiedRemove(pPool, pPage);
2758
2759 return rc;
2760}
2761
2762
2763/**
2764 * Inserts the page into the list of modified pages.
2765 *
2766 * @param pPool The pool.
2767 * @param pPage The page.
2768 */
2769void pgmPoolMonitorModifiedInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2770{
2771 Log3(("pgmPoolMonitorModifiedInsert: idx=%d\n", pPage->idx));
2772 AssertMsg( pPage->iModifiedNext == NIL_PGMPOOL_IDX
2773 && pPage->iModifiedPrev == NIL_PGMPOOL_IDX
2774 && pPool->iModifiedHead != pPage->idx,
2775 ("Next=%d Prev=%d idx=%d cModifications=%d Head=%d cModifiedPages=%d\n",
2776 pPage->iModifiedNext, pPage->iModifiedPrev, pPage->idx, pPage->cModifications,
2777 pPool->iModifiedHead, pPool->cModifiedPages));
2778
2779 pPage->iModifiedNext = pPool->iModifiedHead;
2780 if (pPool->iModifiedHead != NIL_PGMPOOL_IDX)
2781 pPool->aPages[pPool->iModifiedHead].iModifiedPrev = pPage->idx;
2782 pPool->iModifiedHead = pPage->idx;
2783 pPool->cModifiedPages++;
2784#ifdef VBOX_WITH_STATISTICS
2785 if (pPool->cModifiedPages > pPool->cModifiedPagesHigh)
2786 pPool->cModifiedPagesHigh = pPool->cModifiedPages;
2787#endif
2788}
2789
2790
2791/**
2792 * Removes the page from the list of modified pages and resets the
2793 * modification counter.
2794 *
2795 * @param pPool The pool.
2796 * @param pPage The page which is believed to be in the list of modified pages.
2797 */
2798static void pgmPoolMonitorModifiedRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2799{
2800 Log3(("pgmPoolMonitorModifiedRemove: idx=%d cModifications=%d\n", pPage->idx, pPage->cModifications));
2801 if (pPool->iModifiedHead == pPage->idx)
2802 {
2803 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX);
2804 pPool->iModifiedHead = pPage->iModifiedNext;
2805 if (pPage->iModifiedNext != NIL_PGMPOOL_IDX)
2806 {
2807 pPool->aPages[pPage->iModifiedNext].iModifiedPrev = NIL_PGMPOOL_IDX;
2808 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
2809 }
2810 pPool->cModifiedPages--;
2811 }
2812 else if (pPage->iModifiedPrev != NIL_PGMPOOL_IDX)
2813 {
2814 pPool->aPages[pPage->iModifiedPrev].iModifiedNext = pPage->iModifiedNext;
2815 if (pPage->iModifiedNext != NIL_PGMPOOL_IDX)
2816 {
2817 pPool->aPages[pPage->iModifiedNext].iModifiedPrev = pPage->iModifiedPrev;
2818 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
2819 }
2820 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
2821 pPool->cModifiedPages--;
2822 }
2823 else
2824 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX);
2825 pPage->cModifications = 0;
2826}
2827
2828
2829/**
2830 * Zaps the list of modified pages, resetting their modification counters in the process.
2831 *
2832 * @param pVM The cross context VM structure.
2833 */
2834static void pgmPoolMonitorModifiedClearAll(PVM pVM)
2835{
2836 pgmLock(pVM);
2837 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
2838 LogFlow(("pgmPoolMonitorModifiedClearAll: cModifiedPages=%d\n", pPool->cModifiedPages));
2839
2840 unsigned cPages = 0; NOREF(cPages);
2841
2842#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
2843 pgmPoolResetDirtyPages(pVM);
2844#endif
2845
2846 uint16_t idx = pPool->iModifiedHead;
2847 pPool->iModifiedHead = NIL_PGMPOOL_IDX;
2848 while (idx != NIL_PGMPOOL_IDX)
2849 {
2850 PPGMPOOLPAGE pPage = &pPool->aPages[idx];
2851 idx = pPage->iModifiedNext;
2852 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
2853 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
2854 pPage->cModifications = 0;
2855 Assert(++cPages);
2856 }
2857 AssertMsg(cPages == pPool->cModifiedPages, ("%d != %d\n", cPages, pPool->cModifiedPages));
2858 pPool->cModifiedPages = 0;
2859 pgmUnlock(pVM);
2860}
2861
2862
2863/**
2864 * Handle SyncCR3 pool tasks
2865 *
2866 * @returns VBox status code.
2867 * @retval VINF_SUCCESS if successfully added.
2868 * @retval VINF_PGM_SYNC_CR3 is it needs to be deferred to ring 3 (GC only)
2869 * @param pVCpu The cross context virtual CPU structure.
2870 * @remark Should only be used when monitoring is available, thus placed in
2871 * the PGMPOOL_WITH_MONITORING \#ifdef.
2872 */
2873int pgmPoolSyncCR3(PVMCPU pVCpu)
2874{
2875 PVM pVM = pVCpu->CTX_SUFF(pVM);
2876 LogFlow(("pgmPoolSyncCR3 fSyncFlags=%x\n", pVCpu->pgm.s.fSyncFlags));
2877
2878 /*
2879 * When monitoring shadowed pages, we reset the modification counters on CR3 sync.
2880 * Occasionally we will have to clear all the shadow page tables because we wanted
2881 * to monitor a page which was mapped by too many shadowed page tables. This operation
2882 * sometimes referred to as a 'lightweight flush'.
2883 */
2884# ifdef IN_RING3 /* Don't flush in ring-0 or raw mode, it's taking too long. */
2885 if (pVCpu->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)
2886 pgmR3PoolClearAll(pVM, false /*fFlushRemTlb*/);
2887# else /* !IN_RING3 */
2888 if (pVCpu->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)
2889 {
2890 Log(("SyncCR3: PGM_SYNC_CLEAR_PGM_POOL is set -> VINF_PGM_SYNC_CR3\n"));
2891 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3); /** @todo no need to do global sync, right? */
2892
2893 /* Make sure all other VCPUs return to ring 3. */
2894 if (pVM->cCpus > 1)
2895 {
2896 VM_FF_SET(pVM, VM_FF_PGM_POOL_FLUSH_PENDING);
2897 PGM_INVL_ALL_VCPU_TLBS(pVM);
2898 }
2899 return VINF_PGM_SYNC_CR3;
2900 }
2901# endif /* !IN_RING3 */
2902 else
2903 {
2904 pgmPoolMonitorModifiedClearAll(pVM);
2905
2906 /* pgmPoolMonitorModifiedClearAll can cause a pgm pool flush (dirty page clearing), so make sure we handle this! */
2907 if (pVCpu->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)
2908 {
2909 Log(("pgmPoolMonitorModifiedClearAll caused a pgm flush -> call pgmPoolSyncCR3 again!\n"));
2910 return pgmPoolSyncCR3(pVCpu);
2911 }
2912 }
2913 return VINF_SUCCESS;
2914}
2915
2916
2917/**
2918 * Frees up at least one user entry.
2919 *
2920 * @returns VBox status code.
2921 * @retval VINF_SUCCESS if successfully added.
2922 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
2923 * @param pPool The pool.
2924 * @param iUser The user index.
2925 */
2926static int pgmPoolTrackFreeOneUser(PPGMPOOL pPool, uint16_t iUser)
2927{
2928 STAM_COUNTER_INC(&pPool->StatTrackFreeUpOneUser);
2929 /*
2930 * Just free cached pages in a braindead fashion.
2931 */
2932 /** @todo walk the age list backwards and free the first with usage. */
2933 int rc = VINF_SUCCESS;
2934 do
2935 {
2936 int rc2 = pgmPoolCacheFreeOne(pPool, iUser);
2937 if (RT_FAILURE(rc2) && rc == VINF_SUCCESS)
2938 rc = rc2;
2939 } while (pPool->iUserFreeHead == NIL_PGMPOOL_USER_INDEX);
2940 return rc;
2941}
2942
2943
2944/**
2945 * Inserts a page into the cache.
2946 *
2947 * This will create user node for the page, insert it into the GCPhys
2948 * hash, and insert it into the age list.
2949 *
2950 * @returns VBox status code.
2951 * @retval VINF_SUCCESS if successfully added.
2952 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
2953 * @param pPool The pool.
2954 * @param pPage The cached page.
2955 * @param GCPhys The GC physical address of the page we're gonna shadow.
2956 * @param iUser The user index.
2957 * @param iUserTable The user table index.
2958 */
2959DECLINLINE(int) pgmPoolTrackInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTGCPHYS GCPhys, uint16_t iUser, uint32_t iUserTable)
2960{
2961 int rc = VINF_SUCCESS;
2962 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
2963
2964 LogFlow(("pgmPoolTrackInsert GCPhys=%RGp iUser=%d iUserTable=%x\n", GCPhys, iUser, iUserTable)); RT_NOREF_PV(GCPhys);
2965
2966 if (iUser != NIL_PGMPOOL_IDX)
2967 {
2968#ifdef VBOX_STRICT
2969 /*
2970 * Check that the entry doesn't already exists.
2971 */
2972 if (pPage->iUserHead != NIL_PGMPOOL_USER_INDEX)
2973 {
2974 uint16_t i = pPage->iUserHead;
2975 do
2976 {
2977 Assert(i < pPool->cMaxUsers);
2978 AssertMsg(paUsers[i].iUser != iUser || paUsers[i].iUserTable != iUserTable, ("%x %x vs new %x %x\n", paUsers[i].iUser, paUsers[i].iUserTable, iUser, iUserTable));
2979 i = paUsers[i].iNext;
2980 } while (i != NIL_PGMPOOL_USER_INDEX);
2981 }
2982#endif
2983
2984 /*
2985 * Find free a user node.
2986 */
2987 uint16_t i = pPool->iUserFreeHead;
2988 if (i == NIL_PGMPOOL_USER_INDEX)
2989 {
2990 rc = pgmPoolTrackFreeOneUser(pPool, iUser);
2991 if (RT_FAILURE(rc))
2992 return rc;
2993 i = pPool->iUserFreeHead;
2994 }
2995
2996 /*
2997 * Unlink the user node from the free list,
2998 * initialize and insert it into the user list.
2999 */
3000 pPool->iUserFreeHead = paUsers[i].iNext;
3001 paUsers[i].iNext = NIL_PGMPOOL_USER_INDEX;
3002 paUsers[i].iUser = iUser;
3003 paUsers[i].iUserTable = iUserTable;
3004 pPage->iUserHead = i;
3005 }
3006 else
3007 pPage->iUserHead = NIL_PGMPOOL_USER_INDEX;
3008
3009
3010 /*
3011 * Insert into cache and enable monitoring of the guest page if enabled.
3012 *
3013 * Until we implement caching of all levels, including the CR3 one, we'll
3014 * have to make sure we don't try monitor & cache any recursive reuse of
3015 * a monitored CR3 page. Because all windows versions are doing this we'll
3016 * have to be able to do combined access monitoring, CR3 + PT and
3017 * PD + PT (guest PAE).
3018 *
3019 * Update:
3020 * We're now cooperating with the CR3 monitor if an uncachable page is found.
3021 */
3022 const bool fCanBeMonitored = true;
3023 pgmPoolCacheInsert(pPool, pPage, fCanBeMonitored); /* This can be expanded. */
3024 if (fCanBeMonitored)
3025 {
3026 rc = pgmPoolMonitorInsert(pPool, pPage);
3027 AssertRC(rc);
3028 }
3029 return rc;
3030}
3031
3032
3033/**
3034 * Adds a user reference to a page.
3035 *
3036 * This will move the page to the head of the
3037 *
3038 * @returns VBox status code.
3039 * @retval VINF_SUCCESS if successfully added.
3040 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
3041 * @param pPool The pool.
3042 * @param pPage The cached page.
3043 * @param iUser The user index.
3044 * @param iUserTable The user table.
3045 */
3046static int pgmPoolTrackAddUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable)
3047{
3048 Log3(("pgmPoolTrackAddUser: GCPhys=%RGp iUser=%x iUserTable=%x\n", pPage->GCPhys, iUser, iUserTable));
3049 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
3050 Assert(iUser != NIL_PGMPOOL_IDX);
3051
3052# ifdef VBOX_STRICT
3053 /*
3054 * Check that the entry doesn't already exists. We only allow multiple
3055 * users of top-level paging structures (SHW_POOL_ROOT_IDX).
3056 */
3057 if (pPage->iUserHead != NIL_PGMPOOL_USER_INDEX)
3058 {
3059 uint16_t i = pPage->iUserHead;
3060 do
3061 {
3062 Assert(i < pPool->cMaxUsers);
3063 /** @todo this assertion looks odd... Shouldn't it be && here? */
3064 AssertMsg(paUsers[i].iUser != iUser || paUsers[i].iUserTable != iUserTable, ("%x %x vs new %x %x\n", paUsers[i].iUser, paUsers[i].iUserTable, iUser, iUserTable));
3065 i = paUsers[i].iNext;
3066 } while (i != NIL_PGMPOOL_USER_INDEX);
3067 }
3068# endif
3069
3070 /*
3071 * Allocate a user node.
3072 */
3073 uint16_t i = pPool->iUserFreeHead;
3074 if (i == NIL_PGMPOOL_USER_INDEX)
3075 {
3076 int rc = pgmPoolTrackFreeOneUser(pPool, iUser);
3077 if (RT_FAILURE(rc))
3078 return rc;
3079 i = pPool->iUserFreeHead;
3080 }
3081 pPool->iUserFreeHead = paUsers[i].iNext;
3082
3083 /*
3084 * Initialize the user node and insert it.
3085 */
3086 paUsers[i].iNext = pPage->iUserHead;
3087 paUsers[i].iUser = iUser;
3088 paUsers[i].iUserTable = iUserTable;
3089 pPage->iUserHead = i;
3090
3091# ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
3092 if (pPage->fDirty)
3093 pgmPoolFlushDirtyPage(pPool->CTX_SUFF(pVM), pPool, pPage->idxDirtyEntry, false /* do not remove */);
3094# endif
3095
3096 /*
3097 * Tell the cache to update its replacement stats for this page.
3098 */
3099 pgmPoolCacheUsed(pPool, pPage);
3100 return VINF_SUCCESS;
3101}
3102
3103
3104/**
3105 * Frees a user record associated with a page.
3106 *
3107 * This does not clear the entry in the user table, it simply replaces the
3108 * user record to the chain of free records.
3109 *
3110 * @param pPool The pool.
3111 * @param pPage The shadow page.
3112 * @param iUser The shadow page pool index of the user table.
3113 * @param iUserTable The index into the user table (shadowed).
3114 *
3115 * @remarks Don't call this for root pages.
3116 */
3117static void pgmPoolTrackFreeUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable)
3118{
3119 Log3(("pgmPoolTrackFreeUser %RGp %x %x\n", pPage->GCPhys, iUser, iUserTable));
3120 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
3121 Assert(iUser != NIL_PGMPOOL_IDX);
3122
3123 /*
3124 * Unlink and free the specified user entry.
3125 */
3126
3127 /* Special: For PAE and 32-bit paging, there is usually no more than one user. */
3128 uint16_t i = pPage->iUserHead;
3129 if ( i != NIL_PGMPOOL_USER_INDEX
3130 && paUsers[i].iUser == iUser
3131 && paUsers[i].iUserTable == iUserTable)
3132 {
3133 pPage->iUserHead = paUsers[i].iNext;
3134
3135 paUsers[i].iUser = NIL_PGMPOOL_IDX;
3136 paUsers[i].iNext = pPool->iUserFreeHead;
3137 pPool->iUserFreeHead = i;
3138 return;
3139 }
3140
3141 /* General: Linear search. */
3142 uint16_t iPrev = NIL_PGMPOOL_USER_INDEX;
3143 while (i != NIL_PGMPOOL_USER_INDEX)
3144 {
3145 if ( paUsers[i].iUser == iUser
3146 && paUsers[i].iUserTable == iUserTable)
3147 {
3148 if (iPrev != NIL_PGMPOOL_USER_INDEX)
3149 paUsers[iPrev].iNext = paUsers[i].iNext;
3150 else
3151 pPage->iUserHead = paUsers[i].iNext;
3152
3153 paUsers[i].iUser = NIL_PGMPOOL_IDX;
3154 paUsers[i].iNext = pPool->iUserFreeHead;
3155 pPool->iUserFreeHead = i;
3156 return;
3157 }
3158 iPrev = i;
3159 i = paUsers[i].iNext;
3160 }
3161
3162 /* Fatal: didn't find it */
3163 AssertFatalMsgFailed(("Didn't find the user entry! iUser=%d iUserTable=%#x GCPhys=%RGp\n",
3164 iUser, iUserTable, pPage->GCPhys));
3165}
3166
3167
3168#if 0 /* unused */
3169/**
3170 * Gets the entry size of a shadow table.
3171 *
3172 * @param enmKind The kind of page.
3173 *
3174 * @returns The size of the entry in bytes. That is, 4 or 8.
3175 * @returns If the kind is not for a table, an assertion is raised and 0 is
3176 * returned.
3177 */
3178DECLINLINE(unsigned) pgmPoolTrackGetShadowEntrySize(PGMPOOLKIND enmKind)
3179{
3180 switch (enmKind)
3181 {
3182 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
3183 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
3184 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
3185 case PGMPOOLKIND_32BIT_PD:
3186 case PGMPOOLKIND_32BIT_PD_PHYS:
3187 return 4;
3188
3189 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
3190 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
3191 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
3192 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
3193 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
3194 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
3195 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
3196 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
3197 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
3198 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
3199 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
3200 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
3201 case PGMPOOLKIND_64BIT_PML4:
3202 case PGMPOOLKIND_PAE_PDPT:
3203 case PGMPOOLKIND_ROOT_NESTED:
3204 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
3205 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
3206 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
3207 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
3208 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
3209 case PGMPOOLKIND_PAE_PD_PHYS:
3210 case PGMPOOLKIND_PAE_PDPT_PHYS:
3211 return 8;
3212
3213 default:
3214 AssertFatalMsgFailed(("enmKind=%d\n", enmKind));
3215 }
3216}
3217#endif /* unused */
3218
3219#if 0 /* unused */
3220/**
3221 * Gets the entry size of a guest table.
3222 *
3223 * @param enmKind The kind of page.
3224 *
3225 * @returns The size of the entry in bytes. That is, 0, 4 or 8.
3226 * @returns If the kind is not for a table, an assertion is raised and 0 is
3227 * returned.
3228 */
3229DECLINLINE(unsigned) pgmPoolTrackGetGuestEntrySize(PGMPOOLKIND enmKind)
3230{
3231 switch (enmKind)
3232 {
3233 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
3234 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
3235 case PGMPOOLKIND_32BIT_PD:
3236 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
3237 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
3238 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
3239 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
3240 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
3241 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
3242 return 4;
3243
3244 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
3245 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
3246 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
3247 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
3248 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
3249 case PGMPOOLKIND_64BIT_PML4:
3250 case PGMPOOLKIND_PAE_PDPT:
3251 return 8;
3252
3253 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
3254 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
3255 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
3256 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
3257 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
3258 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
3259 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
3260 case PGMPOOLKIND_ROOT_NESTED:
3261 case PGMPOOLKIND_PAE_PD_PHYS:
3262 case PGMPOOLKIND_PAE_PDPT_PHYS:
3263 case PGMPOOLKIND_32BIT_PD_PHYS:
3264 /** @todo can we return 0? (nobody is calling this...) */
3265 AssertFailed();
3266 return 0;
3267
3268 default:
3269 AssertFatalMsgFailed(("enmKind=%d\n", enmKind));
3270 }
3271}
3272#endif /* unused */
3273
3274
3275/**
3276 * Checks one shadow page table entry for a mapping of a physical page.
3277 *
3278 * @returns true / false indicating removal of all relevant PTEs
3279 *
3280 * @param pVM The cross context VM structure.
3281 * @param pPhysPage The guest page in question.
3282 * @param fFlushPTEs Flush PTEs or allow them to be updated (e.g. in case of an RW bit change)
3283 * @param iShw The shadow page table.
3284 * @param iPte Page table entry or NIL_PGMPOOL_PHYSEXT_IDX_PTE if unknown
3285 */
3286static bool pgmPoolTrackFlushGCPhysPTInt(PVM pVM, PCPGMPAGE pPhysPage, bool fFlushPTEs, uint16_t iShw, uint16_t iPte)
3287{
3288 LogFlow(("pgmPoolTrackFlushGCPhysPTInt: pPhysPage=%RHp iShw=%d iPte=%d\n", PGM_PAGE_GET_HCPHYS(pPhysPage), iShw, iPte));
3289 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3290 bool fRet = false;
3291
3292 /*
3293 * Assert sanity.
3294 */
3295 Assert(iPte != NIL_PGMPOOL_PHYSEXT_IDX_PTE);
3296 AssertFatalMsg(iShw < pPool->cCurPages && iShw != NIL_PGMPOOL_IDX, ("iShw=%d\n", iShw));
3297 PPGMPOOLPAGE pPage = &pPool->aPages[iShw];
3298
3299 /*
3300 * Then, clear the actual mappings to the page in the shadow PT.
3301 */
3302 switch (pPage->enmKind)
3303 {
3304 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
3305 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
3306 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
3307 {
3308 const uint32_t u32 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P;
3309 PX86PT pPT = (PX86PT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3310 uint32_t u32AndMask = 0;
3311 uint32_t u32OrMask = 0;
3312
3313 if (!fFlushPTEs)
3314 {
3315 switch (PGM_PAGE_GET_HNDL_PHYS_STATE(pPhysPage))
3316 {
3317 case PGM_PAGE_HNDL_PHYS_STATE_NONE: /** No handler installed. */
3318 case PGM_PAGE_HNDL_PHYS_STATE_DISABLED: /** Monitoring is temporarily disabled. */
3319 u32OrMask = X86_PTE_RW;
3320 u32AndMask = UINT32_MAX;
3321 fRet = true;
3322 STAM_COUNTER_INC(&pPool->StatTrackFlushEntryKeep);
3323 break;
3324
3325 case PGM_PAGE_HNDL_PHYS_STATE_WRITE: /** Write access is monitored. */
3326 u32OrMask = 0;
3327 u32AndMask = ~X86_PTE_RW;
3328 fRet = true;
3329 STAM_COUNTER_INC(&pPool->StatTrackFlushEntryKeep);
3330 break;
3331 default:
3332 /* (shouldn't be here, will assert below) */
3333 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3334 break;
3335 }
3336 }
3337 else
3338 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3339
3340 /* Update the counter if we're removing references. */
3341 if (!u32AndMask)
3342 {
3343 Assert(pPage->cPresent);
3344 Assert(pPool->cPresent);
3345 pPage->cPresent--;
3346 pPool->cPresent--;
3347 }
3348
3349 if ((pPT->a[iPte].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
3350 {
3351 X86PTE Pte;
3352
3353 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pte=%RX32\n", iPte, pPT->a[iPte]));
3354 Pte.u = (pPT->a[iPte].u & u32AndMask) | u32OrMask;
3355 if (Pte.u & PGM_PTFLAGS_TRACK_DIRTY)
3356 Pte.n.u1Write = 0; /* need to disallow writes when dirty bit tracking is still active. */
3357
3358 ASMAtomicWriteU32(&pPT->a[iPte].u, Pte.u);
3359 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);
3360 return fRet;
3361 }
3362#ifdef LOG_ENABLED
3363 Log(("iFirstPresent=%d cPresent=%d\n", pPage->iFirstPresent, pPage->cPresent));
3364 for (unsigned i = 0, cFound = 0; i < RT_ELEMENTS(pPT->a); i++)
3365 if ((pPT->a[i].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
3366 {
3367 Log(("i=%d cFound=%d\n", i, ++cFound));
3368 }
3369#endif
3370 AssertFatalMsgFailed(("iFirstPresent=%d cPresent=%d u32=%RX32 poolkind=%x\n", pPage->iFirstPresent, pPage->cPresent, u32, pPage->enmKind));
3371 /*PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);*/
3372 break;
3373 }
3374
3375 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
3376 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
3377 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
3378 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
3379 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
3380 case PGMPOOLKIND_EPT_PT_FOR_PHYS: /* physical mask the same as PAE; RW bit as well; be careful! */
3381 {
3382 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P;
3383 PPGMSHWPTPAE pPT = (PPGMSHWPTPAE)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3384 uint64_t u64OrMask = 0;
3385 uint64_t u64AndMask = 0;
3386
3387 if (!fFlushPTEs)
3388 {
3389 switch (PGM_PAGE_GET_HNDL_PHYS_STATE(pPhysPage))
3390 {
3391 case PGM_PAGE_HNDL_PHYS_STATE_NONE: /* No handler installed. */
3392 case PGM_PAGE_HNDL_PHYS_STATE_DISABLED: /* Monitoring is temporarily disabled. */
3393 u64OrMask = X86_PTE_RW;
3394 u64AndMask = UINT64_MAX;
3395 fRet = true;
3396 STAM_COUNTER_INC(&pPool->StatTrackFlushEntryKeep);
3397 break;
3398
3399 case PGM_PAGE_HNDL_PHYS_STATE_WRITE: /* Write access is monitored. */
3400 u64OrMask = 0;
3401 u64AndMask = ~(uint64_t)X86_PTE_RW;
3402 fRet = true;
3403 STAM_COUNTER_INC(&pPool->StatTrackFlushEntryKeep);
3404 break;
3405
3406 default:
3407 /* (shouldn't be here, will assert below) */
3408 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3409 break;
3410 }
3411 }
3412 else
3413 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3414
3415 /* Update the counter if we're removing references. */
3416 if (!u64AndMask)
3417 {
3418 Assert(pPage->cPresent);
3419 Assert(pPool->cPresent);
3420 pPage->cPresent--;
3421 pPool->cPresent--;
3422 }
3423
3424 if ((PGMSHWPTEPAE_GET_U(pPT->a[iPte]) & (X86_PTE_PAE_PG_MASK | X86_PTE_P | X86_PTE_PAE_MBZ_MASK_NX)) == u64)
3425 {
3426 X86PTEPAE Pte;
3427
3428 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pte=%RX64\n", iPte, PGMSHWPTEPAE_GET_LOG(pPT->a[iPte])));
3429 Pte.u = (PGMSHWPTEPAE_GET_U(pPT->a[iPte]) & u64AndMask) | u64OrMask;
3430 if (Pte.u & PGM_PTFLAGS_TRACK_DIRTY)
3431 Pte.n.u1Write = 0; /* need to disallow writes when dirty bit tracking is still active. */
3432
3433 PGMSHWPTEPAE_ATOMIC_SET(pPT->a[iPte], Pte.u);
3434 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);
3435 return fRet;
3436 }
3437#ifdef LOG_ENABLED
3438 Log(("iFirstPresent=%d cPresent=%d\n", pPage->iFirstPresent, pPage->cPresent));
3439 Log(("Found %RX64 expected %RX64\n", PGMSHWPTEPAE_GET_U(pPT->a[iPte]) & (X86_PTE_PAE_PG_MASK | X86_PTE_P | X86_PTE_PAE_MBZ_MASK_NX), u64));
3440 for (unsigned i = 0, cFound = 0; i < RT_ELEMENTS(pPT->a); i++)
3441 if ((PGMSHWPTEPAE_GET_U(pPT->a[i]) & (X86_PTE_PAE_PG_MASK | X86_PTE_P | X86_PTE_PAE_MBZ_MASK_NX)) == u64)
3442 Log(("i=%d cFound=%d\n", i, ++cFound));
3443#endif
3444 AssertFatalMsgFailed(("iFirstPresent=%d cPresent=%d u64=%RX64 poolkind=%x iPte=%d PT=%RX64\n", pPage->iFirstPresent, pPage->cPresent, u64, pPage->enmKind, iPte, PGMSHWPTEPAE_GET_LOG(pPT->a[iPte])));
3445 /*PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);*/
3446 break;
3447 }
3448
3449#ifdef PGM_WITH_LARGE_PAGES
3450 /* Large page case only. */
3451 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
3452 {
3453 Assert(pVM->pgm.s.fNestedPaging);
3454
3455 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PDE4M_P | X86_PDE4M_PS;
3456 PEPTPD pPD = (PEPTPD)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3457
3458 if ((pPD->a[iPte].u & (EPT_PDE2M_PG_MASK | X86_PDE4M_P | X86_PDE4M_PS)) == u64)
3459 {
3460 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pde=%RX64\n", iPte, pPD->a[iPte]));
3461 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3462 pPD->a[iPte].u = 0;
3463 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPD);
3464
3465 /* Update the counter as we're removing references. */
3466 Assert(pPage->cPresent);
3467 Assert(pPool->cPresent);
3468 pPage->cPresent--;
3469 pPool->cPresent--;
3470
3471 return fRet;
3472 }
3473# ifdef LOG_ENABLED
3474 Log(("iFirstPresent=%d cPresent=%d\n", pPage->iFirstPresent, pPage->cPresent));
3475 for (unsigned i = 0, cFound = 0; i < RT_ELEMENTS(pPD->a); i++)
3476 if ((pPD->a[i].u & (EPT_PDE2M_PG_MASK | X86_PDE4M_P | X86_PDE4M_PS)) == u64)
3477 Log(("i=%d cFound=%d\n", i, ++cFound));
3478# endif
3479 AssertFatalMsgFailed(("iFirstPresent=%d cPresent=%d\n", pPage->iFirstPresent, pPage->cPresent));
3480 /*PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPD);*/
3481 break;
3482 }
3483
3484 /* AMD-V nested paging */ /** @todo merge with EPT as we only check the parts that are identical. */
3485 case PGMPOOLKIND_PAE_PD_PHYS:
3486 {
3487 Assert(pVM->pgm.s.fNestedPaging);
3488
3489 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PDE4M_P | X86_PDE4M_PS;
3490 PX86PD pPD = (PX86PD)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3491
3492 if ((pPD->a[iPte].u & (X86_PDE2M_PAE_PG_MASK | X86_PDE4M_P | X86_PDE4M_PS)) == u64)
3493 {
3494 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pde=%RX64\n", iPte, pPD->a[iPte]));
3495 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3496 pPD->a[iPte].u = 0;
3497 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPD);
3498
3499 /* Update the counter as we're removing references. */
3500 Assert(pPage->cPresent);
3501 Assert(pPool->cPresent);
3502 pPage->cPresent--;
3503 pPool->cPresent--;
3504 return fRet;
3505 }
3506# ifdef LOG_ENABLED
3507 Log(("iFirstPresent=%d cPresent=%d\n", pPage->iFirstPresent, pPage->cPresent));
3508 for (unsigned i = 0, cFound = 0; i < RT_ELEMENTS(pPD->a); i++)
3509 if ((pPD->a[i].u & (X86_PDE2M_PAE_PG_MASK | X86_PDE4M_P | X86_PDE4M_PS)) == u64)
3510 Log(("i=%d cFound=%d\n", i, ++cFound));
3511# endif
3512 AssertFatalMsgFailed(("iFirstPresent=%d cPresent=%d\n", pPage->iFirstPresent, pPage->cPresent));
3513 /*PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPD);*/
3514 break;
3515 }
3516#endif /* PGM_WITH_LARGE_PAGES */
3517
3518 default:
3519 AssertFatalMsgFailed(("enmKind=%d iShw=%d\n", pPage->enmKind, iShw));
3520 }
3521
3522 /* not reached. */
3523#ifndef _MSC_VER
3524 return fRet;
3525#endif
3526}
3527
3528
3529/**
3530 * Scans one shadow page table for mappings of a physical page.
3531 *
3532 * @param pVM The cross context VM structure.
3533 * @param pPhysPage The guest page in question.
3534 * @param fFlushPTEs Flush PTEs or allow them to be updated (e.g. in case of an RW bit change)
3535 * @param iShw The shadow page table.
3536 */
3537static void pgmPoolTrackFlushGCPhysPT(PVM pVM, PPGMPAGE pPhysPage, bool fFlushPTEs, uint16_t iShw)
3538{
3539 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool); NOREF(pPool);
3540
3541 /* We should only come here with when there's only one reference to this physical page. */
3542 Assert(PGMPOOL_TD_GET_CREFS(PGM_PAGE_GET_TRACKING(pPhysPage)) == 1);
3543
3544 Log2(("pgmPoolTrackFlushGCPhysPT: pPhysPage=%RHp iShw=%d\n", PGM_PAGE_GET_HCPHYS(pPhysPage), iShw));
3545 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPT, f);
3546 bool fKeptPTEs = pgmPoolTrackFlushGCPhysPTInt(pVM, pPhysPage, fFlushPTEs, iShw, PGM_PAGE_GET_PTE_INDEX(pPhysPage));
3547 if (!fKeptPTEs)
3548 PGM_PAGE_SET_TRACKING(pVM, pPhysPage, 0);
3549 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPT, f);
3550}
3551
3552
3553/**
3554 * Flushes a list of shadow page tables mapping the same physical page.
3555 *
3556 * @param pVM The cross context VM structure.
3557 * @param pPhysPage The guest page in question.
3558 * @param fFlushPTEs Flush PTEs or allow them to be updated (e.g. in case of an RW bit change)
3559 * @param iPhysExt The physical cross reference extent list to flush.
3560 */
3561static void pgmPoolTrackFlushGCPhysPTs(PVM pVM, PPGMPAGE pPhysPage, bool fFlushPTEs, uint16_t iPhysExt)
3562{
3563 PGM_LOCK_ASSERT_OWNER(pVM);
3564 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3565 bool fKeepList = false;
3566
3567 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPTs, f);
3568 Log2(("pgmPoolTrackFlushGCPhysPTs: pPhysPage=%RHp iPhysExt=%u\n", PGM_PAGE_GET_HCPHYS(pPhysPage), iPhysExt));
3569
3570 const uint16_t iPhysExtStart = iPhysExt;
3571 PPGMPOOLPHYSEXT pPhysExt;
3572 do
3573 {
3574 Assert(iPhysExt < pPool->cMaxPhysExts);
3575 pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
3576 for (unsigned i = 0; i < RT_ELEMENTS(pPhysExt->aidx); i++)
3577 {
3578 if (pPhysExt->aidx[i] != NIL_PGMPOOL_IDX)
3579 {
3580 bool fKeptPTEs = pgmPoolTrackFlushGCPhysPTInt(pVM, pPhysPage, fFlushPTEs, pPhysExt->aidx[i], pPhysExt->apte[i]);
3581 if (!fKeptPTEs)
3582 {
3583 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
3584 pPhysExt->apte[i] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
3585 }
3586 else
3587 fKeepList = true;
3588 }
3589 }
3590 /* next */
3591 iPhysExt = pPhysExt->iNext;
3592 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
3593
3594 if (!fKeepList)
3595 {
3596 /* insert the list into the free list and clear the ram range entry. */
3597 pPhysExt->iNext = pPool->iPhysExtFreeHead;
3598 pPool->iPhysExtFreeHead = iPhysExtStart;
3599 /* Invalidate the tracking data. */
3600 PGM_PAGE_SET_TRACKING(pVM, pPhysPage, 0);
3601 }
3602
3603 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTs, f);
3604}
3605
3606
3607/**
3608 * Flushes all shadow page table mappings of the given guest page.
3609 *
3610 * This is typically called when the host page backing the guest one has been
3611 * replaced or when the page protection was changed due to a guest access
3612 * caught by the monitoring.
3613 *
3614 * @returns VBox status code.
3615 * @retval VINF_SUCCESS if all references has been successfully cleared.
3616 * @retval VINF_PGM_SYNC_CR3 if we're better off with a CR3 sync and a page
3617 * pool cleaning. FF and sync flags are set.
3618 *
3619 * @param pVM The cross context VM structure.
3620 * @param GCPhysPage GC physical address of the page in question
3621 * @param pPhysPage The guest page in question.
3622 * @param fFlushPTEs Flush PTEs or allow them to be updated (e.g. in case of an RW bit change)
3623 * @param pfFlushTLBs This is set to @a true if the shadow TLBs should be
3624 * flushed, it is NOT touched if this isn't necessary.
3625 * The caller MUST initialized this to @a false.
3626 */
3627int pgmPoolTrackUpdateGCPhys(PVM pVM, RTGCPHYS GCPhysPage, PPGMPAGE pPhysPage, bool fFlushPTEs, bool *pfFlushTLBs)
3628{
3629 PVMCPU pVCpu = VMMGetCpu(pVM);
3630 pgmLock(pVM);
3631 int rc = VINF_SUCCESS;
3632
3633#ifdef PGM_WITH_LARGE_PAGES
3634 /* Is this page part of a large page? */
3635 if (PGM_PAGE_GET_PDE_TYPE(pPhysPage) == PGM_PAGE_PDE_TYPE_PDE)
3636 {
3637 RTGCPHYS GCPhysBase = GCPhysPage & X86_PDE2M_PAE_PG_MASK;
3638 GCPhysPage &= X86_PDE_PAE_PG_MASK;
3639
3640 /* Fetch the large page base. */
3641 PPGMPAGE pLargePage;
3642 if (GCPhysBase != GCPhysPage)
3643 {
3644 pLargePage = pgmPhysGetPage(pVM, GCPhysBase);
3645 AssertFatal(pLargePage);
3646 }
3647 else
3648 pLargePage = pPhysPage;
3649
3650 Log(("pgmPoolTrackUpdateGCPhys: update large page PDE for %RGp (%RGp)\n", GCPhysBase, GCPhysPage));
3651
3652 if (PGM_PAGE_GET_PDE_TYPE(pLargePage) == PGM_PAGE_PDE_TYPE_PDE)
3653 {
3654 /* Mark the large page as disabled as we need to break it up to change a single page in the 2 MB range. */
3655 PGM_PAGE_SET_PDE_TYPE(pVM, pLargePage, PGM_PAGE_PDE_TYPE_PDE_DISABLED);
3656 pVM->pgm.s.cLargePagesDisabled++;
3657
3658 /* Update the base as that *only* that one has a reference and there's only one PDE to clear. */
3659 rc = pgmPoolTrackUpdateGCPhys(pVM, GCPhysBase, pLargePage, fFlushPTEs, pfFlushTLBs);
3660
3661 *pfFlushTLBs = true;
3662 pgmUnlock(pVM);
3663 return rc;
3664 }
3665 }
3666#else
3667 NOREF(GCPhysPage);
3668#endif /* PGM_WITH_LARGE_PAGES */
3669
3670 const uint16_t u16 = PGM_PAGE_GET_TRACKING(pPhysPage);
3671 if (u16)
3672 {
3673 /*
3674 * The zero page is currently screwing up the tracking and we'll
3675 * have to flush the whole shebang. Unless VBOX_WITH_NEW_LAZY_PAGE_ALLOC
3676 * is defined, zero pages won't normally be mapped. Some kind of solution
3677 * will be needed for this problem of course, but it will have to wait...
3678 */
3679 if ( PGM_PAGE_IS_ZERO(pPhysPage)
3680 || PGM_PAGE_IS_BALLOONED(pPhysPage))
3681 rc = VINF_PGM_GCPHYS_ALIASED;
3682 else
3683 {
3684# if defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0) || defined(IN_RC) /** @todo we can drop this now. */
3685 /* Start a subset here because pgmPoolTrackFlushGCPhysPTsSlow and
3686 pgmPoolTrackFlushGCPhysPTs will/may kill the pool otherwise. */
3687 uint32_t iPrevSubset = PGMRZDynMapPushAutoSubset(pVCpu);
3688# endif
3689
3690 if (PGMPOOL_TD_GET_CREFS(u16) != PGMPOOL_TD_CREFS_PHYSEXT)
3691 {
3692 Assert(PGMPOOL_TD_GET_CREFS(u16) == 1);
3693 pgmPoolTrackFlushGCPhysPT(pVM,
3694 pPhysPage,
3695 fFlushPTEs,
3696 PGMPOOL_TD_GET_IDX(u16));
3697 }
3698 else if (u16 != PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED))
3699 pgmPoolTrackFlushGCPhysPTs(pVM, pPhysPage, fFlushPTEs, PGMPOOL_TD_GET_IDX(u16));
3700 else
3701 rc = pgmPoolTrackFlushGCPhysPTsSlow(pVM, pPhysPage);
3702 *pfFlushTLBs = true;
3703
3704# if defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0) || defined(IN_RC)
3705 PGMRZDynMapPopAutoSubset(pVCpu, iPrevSubset);
3706# endif
3707 }
3708 }
3709
3710 if (rc == VINF_PGM_GCPHYS_ALIASED)
3711 {
3712 pVCpu->pgm.s.fSyncFlags |= PGM_SYNC_CLEAR_PGM_POOL;
3713 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
3714 rc = VINF_PGM_SYNC_CR3;
3715 }
3716 pgmUnlock(pVM);
3717 return rc;
3718}
3719
3720
3721/**
3722 * Scans all shadow page tables for mappings of a physical page.
3723 *
3724 * This may be slow, but it's most likely more efficient than cleaning
3725 * out the entire page pool / cache.
3726 *
3727 * @returns VBox status code.
3728 * @retval VINF_SUCCESS if all references has been successfully cleared.
3729 * @retval VINF_PGM_GCPHYS_ALIASED if we're better off with a CR3 sync and
3730 * a page pool cleaning.
3731 *
3732 * @param pVM The cross context VM structure.
3733 * @param pPhysPage The guest page in question.
3734 */
3735int pgmPoolTrackFlushGCPhysPTsSlow(PVM pVM, PPGMPAGE pPhysPage)
3736{
3737 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3738 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPTsSlow, s);
3739 LogFlow(("pgmPoolTrackFlushGCPhysPTsSlow: cUsedPages=%d cPresent=%d pPhysPage=%R[pgmpage]\n",
3740 pPool->cUsedPages, pPool->cPresent, pPhysPage));
3741
3742 /*
3743 * There is a limit to what makes sense.
3744 */
3745 if ( pPool->cPresent > 1024
3746 && pVM->cCpus == 1)
3747 {
3748 LogFlow(("pgmPoolTrackFlushGCPhysPTsSlow: giving up... (cPresent=%d)\n", pPool->cPresent));
3749 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTsSlow, s);
3750 return VINF_PGM_GCPHYS_ALIASED;
3751 }
3752
3753 /*
3754 * Iterate all the pages until we've encountered all that in use.
3755 * This is simple but not quite optimal solution.
3756 */
3757 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P; /** @todo drop X86_PTE_P here as we always test if present separately, anyway. */
3758 const uint32_t u32 = u64; /** @todo move into the 32BIT_PT_xx case */
3759 unsigned cLeft = pPool->cUsedPages;
3760 unsigned iPage = pPool->cCurPages;
3761 while (--iPage >= PGMPOOL_IDX_FIRST)
3762 {
3763 PPGMPOOLPAGE pPage = &pPool->aPages[iPage];
3764 if ( pPage->GCPhys != NIL_RTGCPHYS
3765 && pPage->cPresent)
3766 {
3767 switch (pPage->enmKind)
3768 {
3769 /*
3770 * We only care about shadow page tables.
3771 */
3772 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
3773 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
3774 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
3775 {
3776 unsigned cPresent = pPage->cPresent;
3777 PX86PT pPT = (PX86PT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3778 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
3779 if (pPT->a[i].n.u1Present)
3780 {
3781 if ((pPT->a[i].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
3782 {
3783 //Log4(("pgmPoolTrackFlushGCPhysPTsSlow: idx=%d i=%d pte=%RX32\n", iPage, i, pPT->a[i]));
3784 pPT->a[i].u = 0;
3785
3786 /* Update the counter as we're removing references. */
3787 Assert(pPage->cPresent);
3788 Assert(pPool->cPresent);
3789 pPage->cPresent--;
3790 pPool->cPresent--;
3791 }
3792 if (!--cPresent)
3793 break;
3794 }
3795 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);
3796 break;
3797 }
3798
3799 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
3800 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
3801 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
3802 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
3803 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
3804 {
3805 unsigned cPresent = pPage->cPresent;
3806 PPGMSHWPTPAE pPT = (PPGMSHWPTPAE)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3807 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
3808 if (PGMSHWPTEPAE_IS_P(pPT->a[i]))
3809 {
3810 if ((PGMSHWPTEPAE_GET_U(pPT->a[i]) & (X86_PTE_PAE_PG_MASK | X86_PTE_P)) == u64)
3811 {
3812 //Log4(("pgmPoolTrackFlushGCPhysPTsSlow: idx=%d i=%d pte=%RX64\n", iPage, i, pPT->a[i]));
3813 PGMSHWPTEPAE_SET(pPT->a[i], 0); /// @todo why not atomic?
3814
3815 /* Update the counter as we're removing references. */
3816 Assert(pPage->cPresent);
3817 Assert(pPool->cPresent);
3818 pPage->cPresent--;
3819 pPool->cPresent--;
3820 }
3821 if (!--cPresent)
3822 break;
3823 }
3824 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);
3825 break;
3826 }
3827#ifndef IN_RC
3828 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
3829 {
3830 unsigned cPresent = pPage->cPresent;
3831 PEPTPT pPT = (PEPTPT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3832 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
3833 if (pPT->a[i].n.u1Present)
3834 {
3835 if ((pPT->a[i].u & (EPT_PTE_PG_MASK | X86_PTE_P)) == u64)
3836 {
3837 //Log4(("pgmPoolTrackFlushGCPhysPTsSlow: idx=%d i=%d pte=%RX64\n", iPage, i, pPT->a[i]));
3838 pPT->a[i].u = 0;
3839
3840 /* Update the counter as we're removing references. */
3841 Assert(pPage->cPresent);
3842 Assert(pPool->cPresent);
3843 pPage->cPresent--;
3844 pPool->cPresent--;
3845 }
3846 if (!--cPresent)
3847 break;
3848 }
3849 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);
3850 break;
3851 }
3852#endif
3853 }
3854 if (!--cLeft)
3855 break;
3856 }
3857 }
3858
3859 PGM_PAGE_SET_TRACKING(pVM, pPhysPage, 0);
3860 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTsSlow, s);
3861
3862 /*
3863 * There is a limit to what makes sense. The above search is very expensive, so force a pgm pool flush.
3864 */
3865 if (pPool->cPresent > 1024)
3866 {
3867 LogFlow(("pgmPoolTrackFlushGCPhysPTsSlow: giving up... (cPresent=%d)\n", pPool->cPresent));
3868 return VINF_PGM_GCPHYS_ALIASED;
3869 }
3870
3871 return VINF_SUCCESS;
3872}
3873
3874
3875/**
3876 * Clears the user entry in a user table.
3877 *
3878 * This is used to remove all references to a page when flushing it.
3879 */
3880static void pgmPoolTrackClearPageUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PCPGMPOOLUSER pUser)
3881{
3882 Assert(pUser->iUser != NIL_PGMPOOL_IDX);
3883 Assert(pUser->iUser < pPool->cCurPages);
3884 uint32_t iUserTable = pUser->iUserTable;
3885
3886 /*
3887 * Map the user page. Ignore references made by fictitious pages.
3888 */
3889 PPGMPOOLPAGE pUserPage = &pPool->aPages[pUser->iUser];
3890 LogFlow(("pgmPoolTrackClearPageUser: clear %x in %s (%RGp) (flushing %s)\n", iUserTable, pgmPoolPoolKindToStr(pUserPage->enmKind), pUserPage->Core.Key, pgmPoolPoolKindToStr(pPage->enmKind)));
3891 union
3892 {
3893 uint64_t *pau64;
3894 uint32_t *pau32;
3895 } u;
3896 if (pUserPage->idx < PGMPOOL_IDX_FIRST)
3897 {
3898 Assert(!pUserPage->pvPageR3);
3899 return;
3900 }
3901 u.pau64 = (uint64_t *)PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pUserPage);
3902
3903
3904 /* Safety precaution in case we change the paging for other modes too in the future. */
3905 Assert(!pgmPoolIsPageLocked(pPage)); RT_NOREF_PV(pPage);
3906
3907#ifdef VBOX_STRICT
3908 /*
3909 * Some sanity checks.
3910 */
3911 switch (pUserPage->enmKind)
3912 {
3913 case PGMPOOLKIND_32BIT_PD:
3914 case PGMPOOLKIND_32BIT_PD_PHYS:
3915 Assert(iUserTable < X86_PG_ENTRIES);
3916 break;
3917 case PGMPOOLKIND_PAE_PDPT:
3918 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
3919 case PGMPOOLKIND_PAE_PDPT_PHYS:
3920 Assert(iUserTable < 4);
3921 Assert(!(u.pau64[iUserTable] & PGM_PLXFLAGS_PERMANENT));
3922 break;
3923 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
3924 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
3925 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
3926 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
3927 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
3928 case PGMPOOLKIND_PAE_PD_PHYS:
3929 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3930 break;
3931 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
3932 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3933 Assert(!(u.pau64[iUserTable] & PGM_PDFLAGS_MAPPING));
3934 break;
3935 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
3936 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3937 Assert(!(u.pau64[iUserTable] & PGM_PLXFLAGS_PERMANENT));
3938 break;
3939 case PGMPOOLKIND_64BIT_PML4:
3940 Assert(!(u.pau64[iUserTable] & PGM_PLXFLAGS_PERMANENT));
3941 /* GCPhys >> PAGE_SHIFT is the index here */
3942 break;
3943 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
3944 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
3945 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3946 break;
3947
3948 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
3949 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
3950 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3951 break;
3952
3953 case PGMPOOLKIND_ROOT_NESTED:
3954 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3955 break;
3956
3957 default:
3958 AssertMsgFailed(("enmKind=%d\n", pUserPage->enmKind));
3959 break;
3960 }
3961#endif /* VBOX_STRICT */
3962
3963 /*
3964 * Clear the entry in the user page.
3965 */
3966 switch (pUserPage->enmKind)
3967 {
3968 /* 32-bit entries */
3969 case PGMPOOLKIND_32BIT_PD:
3970 case PGMPOOLKIND_32BIT_PD_PHYS:
3971 ASMAtomicWriteU32(&u.pau32[iUserTable], 0);
3972 break;
3973
3974 /* 64-bit entries */
3975 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
3976 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
3977 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
3978 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
3979 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
3980#ifdef IN_RC
3981 /*
3982 * In 32 bits PAE mode we *must* invalidate the TLB when changing a
3983 * PDPT entry; the CPU fetches them only during cr3 load, so any
3984 * non-present PDPT will continue to cause page faults.
3985 */
3986 ASMReloadCR3();
3987#endif
3988 /* fall thru */
3989 case PGMPOOLKIND_PAE_PD_PHYS:
3990 case PGMPOOLKIND_PAE_PDPT_PHYS:
3991 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
3992 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
3993 case PGMPOOLKIND_64BIT_PML4:
3994 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
3995 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
3996 case PGMPOOLKIND_PAE_PDPT:
3997 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
3998 case PGMPOOLKIND_ROOT_NESTED:
3999 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
4000 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
4001 ASMAtomicWriteU64(&u.pau64[iUserTable], 0);
4002 break;
4003
4004 default:
4005 AssertFatalMsgFailed(("enmKind=%d iUser=%d iUserTable=%#x\n", pUserPage->enmKind, pUser->iUser, pUser->iUserTable));
4006 }
4007 PGM_DYNMAP_UNUSED_HINT_VM(pPool->CTX_SUFF(pVM), u.pau64);
4008}
4009
4010
4011/**
4012 * Clears all users of a page.
4013 */
4014static void pgmPoolTrackClearPageUsers(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
4015{
4016 /*
4017 * Free all the user records.
4018 */
4019 LogFlow(("pgmPoolTrackClearPageUsers %RGp\n", pPage->GCPhys));
4020
4021 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
4022 uint16_t i = pPage->iUserHead;
4023 while (i != NIL_PGMPOOL_USER_INDEX)
4024 {
4025 /* Clear enter in user table. */
4026 pgmPoolTrackClearPageUser(pPool, pPage, &paUsers[i]);
4027
4028 /* Free it. */
4029 const uint16_t iNext = paUsers[i].iNext;
4030 paUsers[i].iUser = NIL_PGMPOOL_IDX;
4031 paUsers[i].iNext = pPool->iUserFreeHead;
4032 pPool->iUserFreeHead = i;
4033
4034 /* Next. */
4035 i = iNext;
4036 }
4037 pPage->iUserHead = NIL_PGMPOOL_USER_INDEX;
4038}
4039
4040
4041/**
4042 * Allocates a new physical cross reference extent.
4043 *
4044 * @returns Pointer to the allocated extent on success. NULL if we're out of them.
4045 * @param pVM The cross context VM structure.
4046 * @param piPhysExt Where to store the phys ext index.
4047 */
4048PPGMPOOLPHYSEXT pgmPoolTrackPhysExtAlloc(PVM pVM, uint16_t *piPhysExt)
4049{
4050 PGM_LOCK_ASSERT_OWNER(pVM);
4051 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
4052 uint16_t iPhysExt = pPool->iPhysExtFreeHead;
4053 if (iPhysExt == NIL_PGMPOOL_PHYSEXT_INDEX)
4054 {
4055 STAM_COUNTER_INC(&pPool->StamTrackPhysExtAllocFailures);
4056 return NULL;
4057 }
4058 PPGMPOOLPHYSEXT pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
4059 pPool->iPhysExtFreeHead = pPhysExt->iNext;
4060 pPhysExt->iNext = NIL_PGMPOOL_PHYSEXT_INDEX;
4061 *piPhysExt = iPhysExt;
4062 return pPhysExt;
4063}
4064
4065
4066/**
4067 * Frees a physical cross reference extent.
4068 *
4069 * @param pVM The cross context VM structure.
4070 * @param iPhysExt The extent to free.
4071 */
4072void pgmPoolTrackPhysExtFree(PVM pVM, uint16_t iPhysExt)
4073{
4074 PGM_LOCK_ASSERT_OWNER(pVM);
4075 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
4076 Assert(iPhysExt < pPool->cMaxPhysExts);
4077 PPGMPOOLPHYSEXT pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
4078 for (unsigned i = 0; i < RT_ELEMENTS(pPhysExt->aidx); i++)
4079 {
4080 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
4081 pPhysExt->apte[i] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
4082 }
4083 pPhysExt->iNext = pPool->iPhysExtFreeHead;
4084 pPool->iPhysExtFreeHead = iPhysExt;
4085}
4086
4087
4088/**
4089 * Frees a physical cross reference extent.
4090 *
4091 * @param pVM The cross context VM structure.
4092 * @param iPhysExt The extent to free.
4093 */
4094void pgmPoolTrackPhysExtFreeList(PVM pVM, uint16_t iPhysExt)
4095{
4096 PGM_LOCK_ASSERT_OWNER(pVM);
4097 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
4098
4099 const uint16_t iPhysExtStart = iPhysExt;
4100 PPGMPOOLPHYSEXT pPhysExt;
4101 do
4102 {
4103 Assert(iPhysExt < pPool->cMaxPhysExts);
4104 pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
4105 for (unsigned i = 0; i < RT_ELEMENTS(pPhysExt->aidx); i++)
4106 {
4107 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
4108 pPhysExt->apte[i] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
4109 }
4110
4111 /* next */
4112 iPhysExt = pPhysExt->iNext;
4113 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
4114
4115 pPhysExt->iNext = pPool->iPhysExtFreeHead;
4116 pPool->iPhysExtFreeHead = iPhysExtStart;
4117}
4118
4119
4120/**
4121 * Insert a reference into a list of physical cross reference extents.
4122 *
4123 * @returns The new tracking data for PGMPAGE.
4124 *
4125 * @param pVM The cross context VM structure.
4126 * @param iPhysExt The physical extent index of the list head.
4127 * @param iShwPT The shadow page table index.
4128 * @param iPte Page table entry
4129 *
4130 */
4131static uint16_t pgmPoolTrackPhysExtInsert(PVM pVM, uint16_t iPhysExt, uint16_t iShwPT, uint16_t iPte)
4132{
4133 PGM_LOCK_ASSERT_OWNER(pVM);
4134 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
4135 PPGMPOOLPHYSEXT paPhysExts = pPool->CTX_SUFF(paPhysExts);
4136
4137 /*
4138 * Special common cases.
4139 */
4140 if (paPhysExts[iPhysExt].aidx[1] == NIL_PGMPOOL_IDX)
4141 {
4142 paPhysExts[iPhysExt].aidx[1] = iShwPT;
4143 paPhysExts[iPhysExt].apte[1] = iPte;
4144 STAM_COUNTER_INC(&pVM->pgm.s.CTX_SUFF(pStats)->StatTrackAliasedMany);
4145 LogFlow(("pgmPoolTrackPhysExtInsert: %d:{,%d pte %d,}\n", iPhysExt, iShwPT, iPte));
4146 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExt);
4147 }
4148 if (paPhysExts[iPhysExt].aidx[2] == NIL_PGMPOOL_IDX)
4149 {
4150 paPhysExts[iPhysExt].aidx[2] = iShwPT;
4151 paPhysExts[iPhysExt].apte[2] = iPte;
4152 STAM_COUNTER_INC(&pVM->pgm.s.CTX_SUFF(pStats)->StatTrackAliasedMany);
4153 LogFlow(("pgmPoolTrackPhysExtInsert: %d:{,,%d pte %d}\n", iPhysExt, iShwPT, iPte));
4154 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExt);
4155 }
4156 AssertCompile(RT_ELEMENTS(paPhysExts[iPhysExt].aidx) == 3);
4157
4158 /*
4159 * General treatment.
4160 */
4161 const uint16_t iPhysExtStart = iPhysExt;
4162 unsigned cMax = 15;
4163 for (;;)
4164 {
4165 Assert(iPhysExt < pPool->cMaxPhysExts);
4166 for (unsigned i = 0; i < RT_ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
4167 if (paPhysExts[iPhysExt].aidx[i] == NIL_PGMPOOL_IDX)
4168 {
4169 paPhysExts[iPhysExt].aidx[i] = iShwPT;
4170 paPhysExts[iPhysExt].apte[i] = iPte;
4171 STAM_COUNTER_INC(&pVM->pgm.s.CTX_SUFF(pStats)->StatTrackAliasedMany);
4172 LogFlow(("pgmPoolTrackPhysExtInsert: %d:{%d pte %d} i=%d cMax=%d\n", iPhysExt, iShwPT, iPte, i, cMax));
4173 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExtStart);
4174 }
4175 if (!--cMax)
4176 {
4177 STAM_COUNTER_INC(&pVM->pgm.s.CTX_SUFF(pStats)->StatTrackOverflows);
4178 pgmPoolTrackPhysExtFreeList(pVM, iPhysExtStart);
4179 LogFlow(("pgmPoolTrackPhysExtInsert: overflow (1) iShwPT=%d\n", iShwPT));
4180 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED);
4181 }
4182
4183 /* advance */
4184 iPhysExt = paPhysExts[iPhysExt].iNext;
4185 if (iPhysExt == NIL_PGMPOOL_PHYSEXT_INDEX)
4186 break;
4187 }
4188
4189 /*
4190 * Add another extent to the list.
4191 */
4192 PPGMPOOLPHYSEXT pNew = pgmPoolTrackPhysExtAlloc(pVM, &iPhysExt);
4193 if (!pNew)
4194 {
4195 STAM_COUNTER_INC(&pVM->pgm.s.CTX_SUFF(pStats)->StatTrackNoExtentsLeft);
4196 pgmPoolTrackPhysExtFreeList(pVM, iPhysExtStart);
4197 LogFlow(("pgmPoolTrackPhysExtInsert: pgmPoolTrackPhysExtAlloc failed iShwPT=%d\n", iShwPT));
4198 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED);
4199 }
4200 pNew->iNext = iPhysExtStart;
4201 pNew->aidx[0] = iShwPT;
4202 pNew->apte[0] = iPte;
4203 LogFlow(("pgmPoolTrackPhysExtInsert: added new extent %d:{%d pte %d}->%d\n", iPhysExt, iShwPT, iPte, iPhysExtStart));
4204 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExt);
4205}
4206
4207
4208/**
4209 * Add a reference to guest physical page where extents are in use.
4210 *
4211 * @returns The new tracking data for PGMPAGE.
4212 *
4213 * @param pVM The cross context VM structure.
4214 * @param pPhysPage Pointer to the aPages entry in the ram range.
4215 * @param u16 The ram range flags (top 16-bits).
4216 * @param iShwPT The shadow page table index.
4217 * @param iPte Page table entry
4218 */
4219uint16_t pgmPoolTrackPhysExtAddref(PVM pVM, PPGMPAGE pPhysPage, uint16_t u16, uint16_t iShwPT, uint16_t iPte)
4220{
4221 pgmLock(pVM);
4222 if (PGMPOOL_TD_GET_CREFS(u16) != PGMPOOL_TD_CREFS_PHYSEXT)
4223 {
4224 /*
4225 * Convert to extent list.
4226 */
4227 Assert(PGMPOOL_TD_GET_CREFS(u16) == 1);
4228 uint16_t iPhysExt;
4229 PPGMPOOLPHYSEXT pPhysExt = pgmPoolTrackPhysExtAlloc(pVM, &iPhysExt);
4230 if (pPhysExt)
4231 {
4232 LogFlow(("pgmPoolTrackPhysExtAddref: new extent: %d:{%d, %d}\n", iPhysExt, PGMPOOL_TD_GET_IDX(u16), iShwPT));
4233 STAM_COUNTER_INC(&pVM->pgm.s.CTX_SUFF(pStats)->StatTrackAliased);
4234 pPhysExt->aidx[0] = PGMPOOL_TD_GET_IDX(u16);
4235 pPhysExt->apte[0] = PGM_PAGE_GET_PTE_INDEX(pPhysPage);
4236 pPhysExt->aidx[1] = iShwPT;
4237 pPhysExt->apte[1] = iPte;
4238 u16 = PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExt);
4239 }
4240 else
4241 u16 = PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED);
4242 }
4243 else if (u16 != PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED))
4244 {
4245 /*
4246 * Insert into the extent list.
4247 */
4248 u16 = pgmPoolTrackPhysExtInsert(pVM, PGMPOOL_TD_GET_IDX(u16), iShwPT, iPte);
4249 }
4250 else
4251 STAM_COUNTER_INC(&pVM->pgm.s.CTX_SUFF(pStats)->StatTrackAliasedLots);
4252 pgmUnlock(pVM);
4253 return u16;
4254}
4255
4256
4257/**
4258 * Clear references to guest physical memory.
4259 *
4260 * @param pPool The pool.
4261 * @param pPage The page.
4262 * @param pPhysPage Pointer to the aPages entry in the ram range.
4263 * @param iPte Shadow PTE index
4264 */
4265void pgmPoolTrackPhysExtDerefGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMPAGE pPhysPage, uint16_t iPte)
4266{
4267 PVM pVM = pPool->CTX_SUFF(pVM);
4268 const unsigned cRefs = PGM_PAGE_GET_TD_CREFS(pPhysPage);
4269 AssertFatalMsg(cRefs == PGMPOOL_TD_CREFS_PHYSEXT, ("cRefs=%d pPhysPage=%R[pgmpage] pPage=%p:{.idx=%d}\n", cRefs, pPhysPage, pPage, pPage->idx));
4270
4271 uint16_t iPhysExt = PGM_PAGE_GET_TD_IDX(pPhysPage);
4272 if (iPhysExt != PGMPOOL_TD_IDX_OVERFLOWED)
4273 {
4274 pgmLock(pVM);
4275
4276 uint16_t iPhysExtPrev = NIL_PGMPOOL_PHYSEXT_INDEX;
4277 PPGMPOOLPHYSEXT paPhysExts = pPool->CTX_SUFF(paPhysExts);
4278 do
4279 {
4280 Assert(iPhysExt < pPool->cMaxPhysExts);
4281
4282 /*
4283 * Look for the shadow page and check if it's all freed.
4284 */
4285 for (unsigned i = 0; i < RT_ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
4286 {
4287 if ( paPhysExts[iPhysExt].aidx[i] == pPage->idx
4288 && paPhysExts[iPhysExt].apte[i] == iPte)
4289 {
4290 paPhysExts[iPhysExt].aidx[i] = NIL_PGMPOOL_IDX;
4291 paPhysExts[iPhysExt].apte[i] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
4292
4293 for (i = 0; i < RT_ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
4294 if (paPhysExts[iPhysExt].aidx[i] != NIL_PGMPOOL_IDX)
4295 {
4296 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage] idx=%d\n", pPhysPage, pPage->idx));
4297 pgmUnlock(pVM);
4298 return;
4299 }
4300
4301 /* we can free the node. */
4302 const uint16_t iPhysExtNext = paPhysExts[iPhysExt].iNext;
4303 if ( iPhysExtPrev == NIL_PGMPOOL_PHYSEXT_INDEX
4304 && iPhysExtNext == NIL_PGMPOOL_PHYSEXT_INDEX)
4305 {
4306 /* lonely node */
4307 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
4308 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage] idx=%d lonely\n", pPhysPage, pPage->idx));
4309 PGM_PAGE_SET_TRACKING(pVM, pPhysPage, 0);
4310 }
4311 else if (iPhysExtPrev == NIL_PGMPOOL_PHYSEXT_INDEX)
4312 {
4313 /* head */
4314 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage] idx=%d head\n", pPhysPage, pPage->idx));
4315 PGM_PAGE_SET_TRACKING(pVM, pPhysPage, PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExtNext));
4316 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
4317 }
4318 else
4319 {
4320 /* in list */
4321 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage] idx=%d in list\n", pPhysPage, pPage->idx));
4322 paPhysExts[iPhysExtPrev].iNext = iPhysExtNext;
4323 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
4324 }
4325 iPhysExt = iPhysExtNext;
4326 pgmUnlock(pVM);
4327 return;
4328 }
4329 }
4330
4331 /* next */
4332 iPhysExtPrev = iPhysExt;
4333 iPhysExt = paPhysExts[iPhysExt].iNext;
4334 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
4335
4336 pgmUnlock(pVM);
4337 AssertFatalMsgFailed(("not-found! cRefs=%d pPhysPage=%R[pgmpage] pPage=%p:{.idx=%d}\n", cRefs, pPhysPage, pPage, pPage->idx));
4338 }
4339 else /* nothing to do */
4340 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage]\n", pPhysPage));
4341}
4342
4343/**
4344 * Clear references to guest physical memory.
4345 *
4346 * This is the same as pgmPoolTracDerefGCPhysHint except that the guest
4347 * physical address is assumed to be correct, so the linear search can be
4348 * skipped and we can assert at an earlier point.
4349 *
4350 * @param pPool The pool.
4351 * @param pPage The page.
4352 * @param HCPhys The host physical address corresponding to the guest page.
4353 * @param GCPhys The guest physical address corresponding to HCPhys.
4354 * @param iPte Shadow PTE index
4355 */
4356static void pgmPoolTracDerefGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTHCPHYS HCPhys, RTGCPHYS GCPhys, uint16_t iPte)
4357{
4358 /*
4359 * Lookup the page and check if it checks out before derefing it.
4360 */
4361 PVM pVM = pPool->CTX_SUFF(pVM);
4362 PPGMPAGE pPhysPage = pgmPhysGetPage(pVM, GCPhys);
4363 if (pPhysPage)
4364 {
4365 Assert(PGM_PAGE_GET_HCPHYS(pPhysPage));
4366#ifdef LOG_ENABLED
4367 RTHCPHYS HCPhysPage = PGM_PAGE_GET_HCPHYS(pPhysPage);
4368 Log2(("pgmPoolTracDerefGCPhys %RHp vs %RHp\n", HCPhysPage, HCPhys));
4369#endif
4370 if (PGM_PAGE_GET_HCPHYS(pPhysPage) == HCPhys)
4371 {
4372 Assert(pPage->cPresent);
4373 Assert(pPool->cPresent);
4374 pPage->cPresent--;
4375 pPool->cPresent--;
4376 pgmTrackDerefGCPhys(pPool, pPage, pPhysPage, iPte);
4377 return;
4378 }
4379
4380 AssertFatalMsgFailed(("HCPhys=%RHp GCPhys=%RGp; found page has HCPhys=%RHp\n",
4381 HCPhys, GCPhys, PGM_PAGE_GET_HCPHYS(pPhysPage)));
4382 }
4383 AssertFatalMsgFailed(("HCPhys=%RHp GCPhys=%RGp\n", HCPhys, GCPhys));
4384}
4385
4386
4387/**
4388 * Clear references to guest physical memory.
4389 *
4390 * @param pPool The pool.
4391 * @param pPage The page.
4392 * @param HCPhys The host physical address corresponding to the guest page.
4393 * @param GCPhysHint The guest physical address which may corresponding to HCPhys.
4394 * @param iPte Shadow pte index
4395 */
4396void pgmPoolTracDerefGCPhysHint(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTHCPHYS HCPhys, RTGCPHYS GCPhysHint, uint16_t iPte)
4397{
4398 Log4(("pgmPoolTracDerefGCPhysHint %RHp %RGp\n", HCPhys, GCPhysHint));
4399
4400 /*
4401 * Try the hint first.
4402 */
4403 RTHCPHYS HCPhysHinted;
4404 PVM pVM = pPool->CTX_SUFF(pVM);
4405 PPGMPAGE pPhysPage = pgmPhysGetPage(pVM, GCPhysHint);
4406 if (pPhysPage)
4407 {
4408 HCPhysHinted = PGM_PAGE_GET_HCPHYS(pPhysPage);
4409 Assert(HCPhysHinted);
4410 if (HCPhysHinted == HCPhys)
4411 {
4412 Assert(pPage->cPresent);
4413 Assert(pPool->cPresent);
4414 pPage->cPresent--;
4415 pPool->cPresent--;
4416 pgmTrackDerefGCPhys(pPool, pPage, pPhysPage, iPte);
4417 return;
4418 }
4419 }
4420 else
4421 HCPhysHinted = UINT64_C(0xdeadbeefdeadbeef);
4422
4423 /*
4424 * Damn, the hint didn't work. We'll have to do an expensive linear search.
4425 */
4426 STAM_COUNTER_INC(&pPool->StatTrackLinearRamSearches);
4427 PPGMRAMRANGE pRam = pPool->CTX_SUFF(pVM)->pgm.s.CTX_SUFF(pRamRangesX);
4428 while (pRam)
4429 {
4430 unsigned iPage = pRam->cb >> PAGE_SHIFT;
4431 while (iPage-- > 0)
4432 {
4433 if (PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]) == HCPhys)
4434 {
4435 Log4(("pgmPoolTracDerefGCPhysHint: Linear HCPhys=%RHp GCPhysHint=%RGp GCPhysReal=%RGp\n",
4436 HCPhys, GCPhysHint, pRam->GCPhys + (iPage << PAGE_SHIFT)));
4437 Assert(pPage->cPresent);
4438 Assert(pPool->cPresent);
4439 pPage->cPresent--;
4440 pPool->cPresent--;
4441 pgmTrackDerefGCPhys(pPool, pPage, &pRam->aPages[iPage], iPte);
4442 return;
4443 }
4444 }
4445 pRam = pRam->CTX_SUFF(pNext);
4446 }
4447
4448 AssertFatalMsgFailed(("HCPhys=%RHp GCPhysHint=%RGp (Hinted page has HCPhys = %RHp)\n", HCPhys, GCPhysHint, HCPhysHinted));
4449}
4450
4451
4452/**
4453 * Clear references to guest physical memory in a 32-bit / 32-bit page table.
4454 *
4455 * @param pPool The pool.
4456 * @param pPage The page.
4457 * @param pShwPT The shadow page table (mapping of the page).
4458 * @param pGstPT The guest page table.
4459 */
4460DECLINLINE(void) pgmPoolTrackDerefPT32Bit32Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PT pShwPT, PCX86PT pGstPT)
4461{
4462 RTGCPHYS32 const fPgMask = pPage->fA20Enabled ? X86_PTE_PG_MASK : X86_PTE_PG_MASK & ~RT_BIT_32(20);
4463 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
4464 {
4465 Assert(!(pShwPT->a[i].u & RT_BIT_32(10)));
4466 if (pShwPT->a[i].n.u1Present)
4467 {
4468 Log4(("pgmPoolTrackDerefPT32Bit32Bit: i=%d pte=%RX32 hint=%RX32\n",
4469 i, pShwPT->a[i].u & X86_PTE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK));
4470 pgmPoolTracDerefGCPhysHint(pPool, pPage, pShwPT->a[i].u & X86_PTE_PG_MASK, pGstPT->a[i].u & fPgMask, i);
4471 if (!pPage->cPresent)
4472 break;
4473 }
4474 }
4475}
4476
4477
4478/**
4479 * Clear references to guest physical memory in a PAE / 32-bit page table.
4480 *
4481 * @param pPool The pool.
4482 * @param pPage The page.
4483 * @param pShwPT The shadow page table (mapping of the page).
4484 * @param pGstPT The guest page table (just a half one).
4485 */
4486DECLINLINE(void) pgmPoolTrackDerefPTPae32Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT, PCX86PT pGstPT)
4487{
4488 RTGCPHYS32 const fPgMask = pPage->fA20Enabled ? X86_PTE_PG_MASK : X86_PTE_PG_MASK & ~RT_BIT_32(20);
4489 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
4490 {
4491 Assert( (PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & UINT64_C(0x7ff0000000000400)) == 0
4492 || (PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & UINT64_C(0x7ff0000000000400)) == UINT64_C(0x7ff0000000000000));
4493 if (PGMSHWPTEPAE_IS_P(pShwPT->a[i]))
4494 {
4495 Log4(("pgmPoolTrackDerefPTPae32Bit: i=%d pte=%RX64 hint=%RX32\n",
4496 i, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pGstPT->a[i].u & X86_PTE_PG_MASK));
4497 pgmPoolTracDerefGCPhysHint(pPool, pPage, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pGstPT->a[i].u & fPgMask, i);
4498 if (!pPage->cPresent)
4499 break;
4500 }
4501 }
4502}
4503
4504
4505/**
4506 * Clear references to guest physical memory in a PAE / PAE page table.
4507 *
4508 * @param pPool The pool.
4509 * @param pPage The page.
4510 * @param pShwPT The shadow page table (mapping of the page).
4511 * @param pGstPT The guest page table.
4512 */
4513DECLINLINE(void) pgmPoolTrackDerefPTPaePae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT, PCX86PTPAE pGstPT)
4514{
4515 RTGCPHYS const fPgMask = pPage->fA20Enabled ? X86_PTE_PAE_PG_MASK : X86_PTE_PAE_PG_MASK & ~RT_BIT_64(20);
4516 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
4517 {
4518 Assert( (PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & UINT64_C(0x7ff0000000000400)) == 0
4519 || (PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & UINT64_C(0x7ff0000000000400)) == UINT64_C(0x7ff0000000000000));
4520 if (PGMSHWPTEPAE_IS_P(pShwPT->a[i]))
4521 {
4522 Log4(("pgmPoolTrackDerefPTPaePae: i=%d pte=%RX32 hint=%RX32\n",
4523 i, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pGstPT->a[i].u & X86_PTE_PAE_PG_MASK));
4524 pgmPoolTracDerefGCPhysHint(pPool, pPage, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pGstPT->a[i].u & fPgMask, i);
4525 if (!pPage->cPresent)
4526 break;
4527 }
4528 }
4529}
4530
4531
4532/**
4533 * Clear references to guest physical memory in a 32-bit / 4MB page table.
4534 *
4535 * @param pPool The pool.
4536 * @param pPage The page.
4537 * @param pShwPT The shadow page table (mapping of the page).
4538 */
4539DECLINLINE(void) pgmPoolTrackDerefPT32Bit4MB(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PT pShwPT)
4540{
4541 RTGCPHYS const GCPhysA20Mask = pPage->fA20Enabled ? UINT64_MAX : ~RT_BIT_64(20);
4542 RTGCPHYS GCPhys = pPage->GCPhys + PAGE_SIZE * pPage->iFirstPresent;
4543 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
4544 {
4545 Assert(!(pShwPT->a[i].u & RT_BIT_32(10)));
4546 if (pShwPT->a[i].n.u1Present)
4547 {
4548 Log4(("pgmPoolTrackDerefPT32Bit4MB: i=%d pte=%RX32 GCPhys=%RGp\n",
4549 i, pShwPT->a[i].u & X86_PTE_PG_MASK, GCPhys));
4550 pgmPoolTracDerefGCPhys(pPool, pPage, pShwPT->a[i].u & X86_PTE_PG_MASK, GCPhys & GCPhysA20Mask, i);
4551 if (!pPage->cPresent)
4552 break;
4553 }
4554 }
4555}
4556
4557
4558/**
4559 * Clear references to guest physical memory in a PAE / 2/4MB page table.
4560 *
4561 * @param pPool The pool.
4562 * @param pPage The page.
4563 * @param pShwPT The shadow page table (mapping of the page).
4564 */
4565DECLINLINE(void) pgmPoolTrackDerefPTPaeBig(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT)
4566{
4567 RTGCPHYS const GCPhysA20Mask = pPage->fA20Enabled ? UINT64_MAX : ~RT_BIT_64(20);
4568 RTGCPHYS GCPhys = pPage->GCPhys + PAGE_SIZE * pPage->iFirstPresent;
4569 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
4570 {
4571 Assert( (PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & UINT64_C(0x7ff0000000000400)) == 0
4572 || (PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & UINT64_C(0x7ff0000000000400)) == UINT64_C(0x7ff0000000000000));
4573 if (PGMSHWPTEPAE_IS_P(pShwPT->a[i]))
4574 {
4575 Log4(("pgmPoolTrackDerefPTPaeBig: i=%d pte=%RX64 hint=%RGp\n",
4576 i, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), GCPhys));
4577 pgmPoolTracDerefGCPhys(pPool, pPage, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), GCPhys & GCPhysA20Mask, i);
4578 if (!pPage->cPresent)
4579 break;
4580 }
4581 }
4582}
4583
4584
4585/**
4586 * Clear references to shadowed pages in an EPT page table.
4587 *
4588 * @param pPool The pool.
4589 * @param pPage The page.
4590 * @param pShwPT The shadow page directory pointer table (mapping of the
4591 * page).
4592 */
4593DECLINLINE(void) pgmPoolTrackDerefPTEPT(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PEPTPT pShwPT)
4594{
4595 RTGCPHYS const GCPhysA20Mask = pPage->fA20Enabled ? UINT64_MAX : ~RT_BIT_64(20);
4596 RTGCPHYS GCPhys = pPage->GCPhys + PAGE_SIZE * pPage->iFirstPresent;
4597 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
4598 {
4599 Assert((pShwPT->a[i].u & UINT64_C(0xfff0000000000f80)) == 0);
4600 if (pShwPT->a[i].n.u1Present)
4601 {
4602 Log4(("pgmPoolTrackDerefPTEPT: i=%d pte=%RX64 GCPhys=%RX64\n",
4603 i, pShwPT->a[i].u & EPT_PTE_PG_MASK, pPage->GCPhys));
4604 pgmPoolTracDerefGCPhys(pPool, pPage, pShwPT->a[i].u & EPT_PTE_PG_MASK, GCPhys & GCPhysA20Mask, i);
4605 if (!pPage->cPresent)
4606 break;
4607 }
4608 }
4609}
4610
4611
4612/**
4613 * Clear references to shadowed pages in a 32 bits page directory.
4614 *
4615 * @param pPool The pool.
4616 * @param pPage The page.
4617 * @param pShwPD The shadow page directory (mapping of the page).
4618 */
4619DECLINLINE(void) pgmPoolTrackDerefPD(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PD pShwPD)
4620{
4621 for (unsigned i = 0; i < RT_ELEMENTS(pShwPD->a); i++)
4622 {
4623 if ( pShwPD->a[i].n.u1Present
4624 && !(pShwPD->a[i].u & PGM_PDFLAGS_MAPPING)
4625 )
4626 {
4627 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPD->a[i].u & X86_PDE_PG_MASK);
4628 if (pSubPage)
4629 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4630 else
4631 AssertFatalMsgFailed(("%x\n", pShwPD->a[i].u & X86_PDE_PG_MASK));
4632 }
4633 }
4634}
4635
4636
4637/**
4638 * Clear references to shadowed pages in a PAE (legacy or 64 bits) page directory.
4639 *
4640 * @param pPool The pool.
4641 * @param pPage The page.
4642 * @param pShwPD The shadow page directory (mapping of the page).
4643 */
4644DECLINLINE(void) pgmPoolTrackDerefPDPae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PDPAE pShwPD)
4645{
4646 for (unsigned i = 0; i < RT_ELEMENTS(pShwPD->a); i++)
4647 {
4648 if ( pShwPD->a[i].n.u1Present
4649 && !(pShwPD->a[i].u & PGM_PDFLAGS_MAPPING))
4650 {
4651#ifdef PGM_WITH_LARGE_PAGES
4652 if (pShwPD->a[i].b.u1Size)
4653 {
4654 Log4(("pgmPoolTrackDerefPDPae: i=%d pde=%RX64 GCPhys=%RX64\n",
4655 i, pShwPD->a[i].u & X86_PDE2M_PAE_PG_MASK, pPage->GCPhys));
4656 pgmPoolTracDerefGCPhys(pPool, pPage, pShwPD->a[i].u & X86_PDE2M_PAE_PG_MASK,
4657 pPage->GCPhys + i * 2 * _1M /* pPage->GCPhys = base address of the memory described by the PD */,
4658 i);
4659 }
4660 else
4661#endif
4662 {
4663 Assert((pShwPD->a[i].u & (X86_PDE_PAE_MBZ_MASK_NX | UINT64_C(0x7ff0000000000000))) == 0);
4664 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPD->a[i].u & X86_PDE_PAE_PG_MASK);
4665 if (pSubPage)
4666 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4667 else
4668 AssertFatalMsgFailed(("%RX64\n", pShwPD->a[i].u & X86_PDE_PAE_PG_MASK));
4669 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4670 }
4671 }
4672 }
4673}
4674
4675
4676/**
4677 * Clear references to shadowed pages in a PAE page directory pointer table.
4678 *
4679 * @param pPool The pool.
4680 * @param pPage The page.
4681 * @param pShwPDPT The shadow page directory pointer table (mapping of the page).
4682 */
4683DECLINLINE(void) pgmPoolTrackDerefPDPTPae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PDPT pShwPDPT)
4684{
4685 for (unsigned i = 0; i < X86_PG_PAE_PDPE_ENTRIES; i++)
4686 {
4687 Assert((pShwPDPT->a[i].u & (X86_PDPE_PAE_MBZ_MASK | UINT64_C(0x7ff0000000000200))) == 0);
4688 if ( pShwPDPT->a[i].n.u1Present
4689 && !(pShwPDPT->a[i].u & PGM_PLXFLAGS_MAPPING)
4690 )
4691 {
4692 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPDPT->a[i].u & X86_PDPE_PG_MASK);
4693 if (pSubPage)
4694 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4695 else
4696 AssertFatalMsgFailed(("%RX64\n", pShwPDPT->a[i].u & X86_PDPE_PG_MASK));
4697 }
4698 }
4699}
4700
4701
4702/**
4703 * Clear references to shadowed pages in a 64-bit page directory pointer table.
4704 *
4705 * @param pPool The pool.
4706 * @param pPage The page.
4707 * @param pShwPDPT The shadow page directory pointer table (mapping of the page).
4708 */
4709DECLINLINE(void) pgmPoolTrackDerefPDPT64Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PDPT pShwPDPT)
4710{
4711 for (unsigned i = 0; i < RT_ELEMENTS(pShwPDPT->a); i++)
4712 {
4713 Assert((pShwPDPT->a[i].u & (X86_PDPE_LM_MBZ_MASK_NX | UINT64_C(0x7ff0000000000200))) == 0);
4714 if (pShwPDPT->a[i].n.u1Present)
4715 {
4716 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPDPT->a[i].u & X86_PDPE_PG_MASK);
4717 if (pSubPage)
4718 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4719 else
4720 AssertFatalMsgFailed(("%RX64\n", pShwPDPT->a[i].u & X86_PDPE_PG_MASK));
4721 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4722 }
4723 }
4724}
4725
4726
4727/**
4728 * Clear references to shadowed pages in a 64-bit level 4 page table.
4729 *
4730 * @param pPool The pool.
4731 * @param pPage The page.
4732 * @param pShwPML4 The shadow page directory pointer table (mapping of the page).
4733 */
4734DECLINLINE(void) pgmPoolTrackDerefPML464Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PML4 pShwPML4)
4735{
4736 for (unsigned i = 0; i < RT_ELEMENTS(pShwPML4->a); i++)
4737 {
4738 Assert((pShwPML4->a[i].u & (X86_PML4E_MBZ_MASK_NX | UINT64_C(0x7ff0000000000200))) == 0);
4739 if (pShwPML4->a[i].n.u1Present)
4740 {
4741 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPML4->a[i].u & X86_PDPE_PG_MASK);
4742 if (pSubPage)
4743 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4744 else
4745 AssertFatalMsgFailed(("%RX64\n", pShwPML4->a[i].u & X86_PML4E_PG_MASK));
4746 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4747 }
4748 }
4749}
4750
4751
4752/**
4753 * Clear references to shadowed pages in an EPT page directory.
4754 *
4755 * @param pPool The pool.
4756 * @param pPage The page.
4757 * @param pShwPD The shadow page directory (mapping of the page).
4758 */
4759DECLINLINE(void) pgmPoolTrackDerefPDEPT(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PEPTPD pShwPD)
4760{
4761 for (unsigned i = 0; i < RT_ELEMENTS(pShwPD->a); i++)
4762 {
4763 Assert((pShwPD->a[i].u & UINT64_C(0xfff0000000000f80)) == 0);
4764 if (pShwPD->a[i].n.u1Present)
4765 {
4766#ifdef PGM_WITH_LARGE_PAGES
4767 if (pShwPD->a[i].b.u1Size)
4768 {
4769 Log4(("pgmPoolTrackDerefPDEPT: i=%d pde=%RX64 GCPhys=%RX64\n",
4770 i, pShwPD->a[i].u & X86_PDE2M_PAE_PG_MASK, pPage->GCPhys));
4771 pgmPoolTracDerefGCPhys(pPool, pPage, pShwPD->a[i].u & X86_PDE2M_PAE_PG_MASK,
4772 pPage->GCPhys + i * 2 * _1M /* pPage->GCPhys = base address of the memory described by the PD */,
4773 i);
4774 }
4775 else
4776#endif
4777 {
4778 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPD->a[i].u & EPT_PDE_PG_MASK);
4779 if (pSubPage)
4780 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4781 else
4782 AssertFatalMsgFailed(("%RX64\n", pShwPD->a[i].u & EPT_PDE_PG_MASK));
4783 }
4784 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4785 }
4786 }
4787}
4788
4789
4790/**
4791 * Clear references to shadowed pages in an EPT page directory pointer table.
4792 *
4793 * @param pPool The pool.
4794 * @param pPage The page.
4795 * @param pShwPDPT The shadow page directory pointer table (mapping of the page).
4796 */
4797DECLINLINE(void) pgmPoolTrackDerefPDPTEPT(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PEPTPDPT pShwPDPT)
4798{
4799 for (unsigned i = 0; i < RT_ELEMENTS(pShwPDPT->a); i++)
4800 {
4801 Assert((pShwPDPT->a[i].u & UINT64_C(0xfff0000000000f80)) == 0);
4802 if (pShwPDPT->a[i].n.u1Present)
4803 {
4804 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPDPT->a[i].u & EPT_PDPTE_PG_MASK);
4805 if (pSubPage)
4806 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4807 else
4808 AssertFatalMsgFailed(("%RX64\n", pShwPDPT->a[i].u & EPT_PDPTE_PG_MASK));
4809 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4810 }
4811 }
4812}
4813
4814
4815/**
4816 * Clears all references made by this page.
4817 *
4818 * This includes other shadow pages and GC physical addresses.
4819 *
4820 * @param pPool The pool.
4821 * @param pPage The page.
4822 */
4823static void pgmPoolTrackDeref(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
4824{
4825 /*
4826 * Map the shadow page and take action according to the page kind.
4827 */
4828 PVM pVM = pPool->CTX_SUFF(pVM);
4829 void *pvShw = PGMPOOL_PAGE_2_PTR(pVM, pPage);
4830 switch (pPage->enmKind)
4831 {
4832 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
4833 {
4834 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
4835 void *pvGst;
4836 int rc = PGM_GCPHYS_2_PTR(pVM, pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
4837 pgmPoolTrackDerefPT32Bit32Bit(pPool, pPage, (PX86PT)pvShw, (PCX86PT)pvGst);
4838 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvGst);
4839 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
4840 break;
4841 }
4842
4843 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
4844 {
4845 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
4846 void *pvGst;
4847 int rc = PGM_GCPHYS_2_PTR_EX(pVM, pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
4848 pgmPoolTrackDerefPTPae32Bit(pPool, pPage, (PPGMSHWPTPAE)pvShw, (PCX86PT)pvGst);
4849 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvGst);
4850 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
4851 break;
4852 }
4853
4854 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
4855 {
4856 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
4857 void *pvGst;
4858 int rc = PGM_GCPHYS_2_PTR(pVM, pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
4859 pgmPoolTrackDerefPTPaePae(pPool, pPage, (PPGMSHWPTPAE)pvShw, (PCX86PTPAE)pvGst);
4860 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvGst);
4861 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
4862 break;
4863 }
4864
4865 case PGMPOOLKIND_32BIT_PT_FOR_PHYS: /* treat it like a 4 MB page */
4866 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
4867 {
4868 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
4869 pgmPoolTrackDerefPT32Bit4MB(pPool, pPage, (PX86PT)pvShw);
4870 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
4871 break;
4872 }
4873
4874 case PGMPOOLKIND_PAE_PT_FOR_PHYS: /* treat it like a 2 MB page */
4875 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
4876 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
4877 {
4878 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
4879 pgmPoolTrackDerefPTPaeBig(pPool, pPage, (PPGMSHWPTPAE)pvShw);
4880 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
4881 break;
4882 }
4883
4884 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
4885 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
4886 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
4887 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
4888 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
4889 case PGMPOOLKIND_PAE_PD_PHYS:
4890 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
4891 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
4892 pgmPoolTrackDerefPDPae(pPool, pPage, (PX86PDPAE)pvShw);
4893 break;
4894
4895 case PGMPOOLKIND_32BIT_PD_PHYS:
4896 case PGMPOOLKIND_32BIT_PD:
4897 pgmPoolTrackDerefPD(pPool, pPage, (PX86PD)pvShw);
4898 break;
4899
4900 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
4901 case PGMPOOLKIND_PAE_PDPT:
4902 case PGMPOOLKIND_PAE_PDPT_PHYS:
4903 pgmPoolTrackDerefPDPTPae(pPool, pPage, (PX86PDPT)pvShw);
4904 break;
4905
4906 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
4907 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
4908 pgmPoolTrackDerefPDPT64Bit(pPool, pPage, (PX86PDPT)pvShw);
4909 break;
4910
4911 case PGMPOOLKIND_64BIT_PML4:
4912 pgmPoolTrackDerefPML464Bit(pPool, pPage, (PX86PML4)pvShw);
4913 break;
4914
4915 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
4916 pgmPoolTrackDerefPTEPT(pPool, pPage, (PEPTPT)pvShw);
4917 break;
4918
4919 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
4920 pgmPoolTrackDerefPDEPT(pPool, pPage, (PEPTPD)pvShw);
4921 break;
4922
4923 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
4924 pgmPoolTrackDerefPDPTEPT(pPool, pPage, (PEPTPDPT)pvShw);
4925 break;
4926
4927 default:
4928 AssertFatalMsgFailed(("enmKind=%d\n", pPage->enmKind));
4929 }
4930
4931 /* paranoia, clear the shadow page. Remove this laser (i.e. let Alloc and ClearAll do it). */
4932 STAM_PROFILE_START(&pPool->StatZeroPage, z);
4933 ASMMemZeroPage(pvShw);
4934 STAM_PROFILE_STOP(&pPool->StatZeroPage, z);
4935 pPage->fZeroed = true;
4936 Assert(!pPage->cPresent);
4937 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvShw);
4938}
4939
4940
4941/**
4942 * Flushes a pool page.
4943 *
4944 * This moves the page to the free list after removing all user references to it.
4945 *
4946 * @returns VBox status code.
4947 * @retval VINF_SUCCESS on success.
4948 * @param pPool The pool.
4949 * @param pPage The shadow page.
4950 * @param fFlush Flush the TLBS when required (should only be false in very specific use cases!!)
4951 */
4952int pgmPoolFlushPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage, bool fFlush)
4953{
4954 PVM pVM = pPool->CTX_SUFF(pVM);
4955 bool fFlushRequired = false;
4956
4957 int rc = VINF_SUCCESS;
4958 STAM_PROFILE_START(&pPool->StatFlushPage, f);
4959 LogFlow(("pgmPoolFlushPage: pPage=%p:{.Key=%RHp, .idx=%d, .enmKind=%s, .GCPhys=%RGp}\n",
4960 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), pPage->GCPhys));
4961
4962 /*
4963 * Reject any attempts at flushing any of the special root pages (shall
4964 * not happen).
4965 */
4966 AssertMsgReturn(pPage->idx >= PGMPOOL_IDX_FIRST,
4967 ("pgmPoolFlushPage: special root page, rejected. enmKind=%s idx=%d\n",
4968 pgmPoolPoolKindToStr(pPage->enmKind), pPage->idx),
4969 VINF_SUCCESS);
4970
4971 pgmLock(pVM);
4972
4973 /*
4974 * Quietly reject any attempts at flushing the currently active shadow CR3 mapping
4975 */
4976 if (pgmPoolIsPageLocked(pPage))
4977 {
4978 AssertMsg( pPage->enmKind == PGMPOOLKIND_64BIT_PML4
4979 || pPage->enmKind == PGMPOOLKIND_PAE_PDPT
4980 || pPage->enmKind == PGMPOOLKIND_PAE_PDPT_FOR_32BIT
4981 || pPage->enmKind == PGMPOOLKIND_32BIT_PD
4982 || pPage->enmKind == PGMPOOLKIND_PAE_PD_FOR_PAE_PD
4983 || pPage->enmKind == PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD
4984 || pPage->enmKind == PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD
4985 || pPage->enmKind == PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD
4986 || pPage->enmKind == PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD
4987 || pPage->enmKind == PGMPOOLKIND_ROOT_NESTED,
4988 ("Can't free the shadow CR3! (%RHp vs %RHp kind=%d\n", PGMGetHyperCR3(VMMGetCpu(pVM)), pPage->Core.Key, pPage->enmKind));
4989 Log(("pgmPoolFlushPage: current active shadow CR3, rejected. enmKind=%s idx=%d\n", pgmPoolPoolKindToStr(pPage->enmKind), pPage->idx));
4990 pgmUnlock(pVM);
4991 return VINF_SUCCESS;
4992 }
4993
4994#if defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0) || defined(IN_RC)
4995 /* Start a subset so we won't run out of mapping space. */
4996 PVMCPU pVCpu = VMMGetCpu(pVM);
4997 uint32_t iPrevSubset = PGMRZDynMapPushAutoSubset(pVCpu);
4998#endif
4999
5000 /*
5001 * Mark the page as being in need of an ASMMemZeroPage().
5002 */
5003 pPage->fZeroed = false;
5004
5005#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
5006 if (pPage->fDirty)
5007 pgmPoolFlushDirtyPage(pVM, pPool, pPage->idxDirtyEntry, false /* do not remove */);
5008#endif
5009
5010 /* If there are any users of this table, then we *must* issue a tlb flush on all VCPUs. */
5011 if (pPage->iUserHead != NIL_PGMPOOL_USER_INDEX)
5012 fFlushRequired = true;
5013
5014 /*
5015 * Clear the page.
5016 */
5017 pgmPoolTrackClearPageUsers(pPool, pPage);
5018 STAM_PROFILE_START(&pPool->StatTrackDeref,a);
5019 pgmPoolTrackDeref(pPool, pPage);
5020 STAM_PROFILE_STOP(&pPool->StatTrackDeref,a);
5021
5022 /*
5023 * Flush it from the cache.
5024 */
5025 pgmPoolCacheFlushPage(pPool, pPage);
5026
5027#if defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0) || defined(IN_RC)
5028 /* Heavy stuff done. */
5029 PGMRZDynMapPopAutoSubset(pVCpu, iPrevSubset);
5030#endif
5031
5032 /*
5033 * Deregistering the monitoring.
5034 */
5035 if (pPage->fMonitored)
5036 rc = pgmPoolMonitorFlush(pPool, pPage);
5037
5038 /*
5039 * Free the page.
5040 */
5041 Assert(pPage->iNext == NIL_PGMPOOL_IDX);
5042 pPage->iNext = pPool->iFreeHead;
5043 pPool->iFreeHead = pPage->idx;
5044 pPage->enmKind = PGMPOOLKIND_FREE;
5045 pPage->enmAccess = PGMPOOLACCESS_DONTCARE;
5046 pPage->GCPhys = NIL_RTGCPHYS;
5047 pPage->fReusedFlushPending = false;
5048
5049 pPool->cUsedPages--;
5050
5051 /* Flush the TLBs of all VCPUs if required. */
5052 if ( fFlushRequired
5053 && fFlush)
5054 {
5055 PGM_INVL_ALL_VCPU_TLBS(pVM);
5056 }
5057
5058 pgmUnlock(pVM);
5059 STAM_PROFILE_STOP(&pPool->StatFlushPage, f);
5060 return rc;
5061}
5062
5063
5064/**
5065 * Frees a usage of a pool page.
5066 *
5067 * The caller is responsible to updating the user table so that it no longer
5068 * references the shadow page.
5069 *
5070 * @param pPool The pool.
5071 * @param pPage The shadow page.
5072 * @param iUser The shadow page pool index of the user table.
5073 * NIL_PGMPOOL_IDX for root pages.
5074 * @param iUserTable The index into the user table (shadowed). Ignored if
5075 * root page.
5076 */
5077void pgmPoolFreeByPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable)
5078{
5079 PVM pVM = pPool->CTX_SUFF(pVM);
5080
5081 STAM_PROFILE_START(&pPool->StatFree, a);
5082 LogFlow(("pgmPoolFreeByPage: pPage=%p:{.Key=%RHp, .idx=%d, enmKind=%s} iUser=%d iUserTable=%#x\n",
5083 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), iUser, iUserTable));
5084 AssertReturnVoid(pPage->idx >= PGMPOOL_IDX_FIRST); /* paranoia (#6349) */
5085
5086 pgmLock(pVM);
5087 if (iUser != NIL_PGMPOOL_IDX)
5088 pgmPoolTrackFreeUser(pPool, pPage, iUser, iUserTable);
5089 if (!pPage->fCached)
5090 pgmPoolFlushPage(pPool, pPage);
5091 pgmUnlock(pVM);
5092 STAM_PROFILE_STOP(&pPool->StatFree, a);
5093}
5094
5095
5096/**
5097 * Makes one or more free page free.
5098 *
5099 * @returns VBox status code.
5100 * @retval VINF_SUCCESS on success.
5101 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
5102 *
5103 * @param pPool The pool.
5104 * @param enmKind Page table kind
5105 * @param iUser The user of the page.
5106 */
5107static int pgmPoolMakeMoreFreePages(PPGMPOOL pPool, PGMPOOLKIND enmKind, uint16_t iUser)
5108{
5109 PVM pVM = pPool->CTX_SUFF(pVM);
5110 LogFlow(("pgmPoolMakeMoreFreePages: enmKind=%d iUser=%d\n", enmKind, iUser));
5111 NOREF(enmKind);
5112
5113 /*
5114 * If the pool isn't full grown yet, expand it.
5115 */
5116 if ( pPool->cCurPages < pPool->cMaxPages
5117#if defined(IN_RC)
5118 /* Hack alert: we can't deal with jumps to ring 3 when called from MapCR3 and allocating pages for PAE PDs. */
5119 && enmKind != PGMPOOLKIND_PAE_PD_FOR_PAE_PD
5120 && (enmKind < PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD || enmKind > PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD)
5121#endif
5122 )
5123 {
5124 STAM_PROFILE_ADV_SUSPEND(&pPool->StatAlloc, a);
5125#ifdef IN_RING3
5126 int rc = PGMR3PoolGrow(pVM);
5127#else
5128 int rc = VMMRZCallRing3NoCpu(pVM, VMMCALLRING3_PGM_POOL_GROW, 0);
5129#endif
5130 if (RT_FAILURE(rc))
5131 return rc;
5132 STAM_PROFILE_ADV_RESUME(&pPool->StatAlloc, a);
5133 if (pPool->iFreeHead != NIL_PGMPOOL_IDX)
5134 return VINF_SUCCESS;
5135 }
5136
5137 /*
5138 * Free one cached page.
5139 */
5140 return pgmPoolCacheFreeOne(pPool, iUser);
5141}
5142
5143
5144/**
5145 * Allocates a page from the pool.
5146 *
5147 * This page may actually be a cached page and not in need of any processing
5148 * on the callers part.
5149 *
5150 * @returns VBox status code.
5151 * @retval VINF_SUCCESS if a NEW page was allocated.
5152 * @retval VINF_PGM_CACHED_PAGE if a CACHED page was returned.
5153 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
5154 *
5155 * @param pVM The cross context VM structure.
5156 * @param GCPhys The GC physical address of the page we're gonna shadow.
5157 * For 4MB and 2MB PD entries, it's the first address the
5158 * shadow PT is covering.
5159 * @param enmKind The kind of mapping.
5160 * @param enmAccess Access type for the mapping (only relevant for big pages)
5161 * @param fA20Enabled Whether the A20 gate is enabled or not.
5162 * @param iUser The shadow page pool index of the user table. Root
5163 * pages should pass NIL_PGMPOOL_IDX.
5164 * @param iUserTable The index into the user table (shadowed). Ignored for
5165 * root pages (iUser == NIL_PGMPOOL_IDX).
5166 * @param fLockPage Lock the page
5167 * @param ppPage Where to store the pointer to the page. NULL is stored here on failure.
5168 */
5169int pgmPoolAlloc(PVM pVM, RTGCPHYS GCPhys, PGMPOOLKIND enmKind, PGMPOOLACCESS enmAccess, bool fA20Enabled,
5170 uint16_t iUser, uint32_t iUserTable, bool fLockPage, PPPGMPOOLPAGE ppPage)
5171{
5172 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
5173 STAM_PROFILE_ADV_START(&pPool->StatAlloc, a);
5174 LogFlow(("pgmPoolAlloc: GCPhys=%RGp enmKind=%s iUser=%d iUserTable=%#x\n", GCPhys, pgmPoolPoolKindToStr(enmKind), iUser, iUserTable));
5175 *ppPage = NULL;
5176 /** @todo CSAM/PGMPrefetchPage messes up here during CSAMR3CheckGates
5177 * (TRPMR3SyncIDT) because of FF priority. Try fix that?
5178 * Assert(!(pVM->pgm.s.fGlobalSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)); */
5179
5180 pgmLock(pVM);
5181
5182 if (pPool->fCacheEnabled)
5183 {
5184 int rc2 = pgmPoolCacheAlloc(pPool, GCPhys, enmKind, enmAccess, fA20Enabled, iUser, iUserTable, ppPage);
5185 if (RT_SUCCESS(rc2))
5186 {
5187 if (fLockPage)
5188 pgmPoolLockPage(pPool, *ppPage);
5189 pgmUnlock(pVM);
5190 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
5191 LogFlow(("pgmPoolAlloc: cached returns %Rrc *ppPage=%p:{.Key=%RHp, .idx=%d}\n", rc2, *ppPage, (*ppPage)->Core.Key, (*ppPage)->idx));
5192 return rc2;
5193 }
5194 }
5195
5196 /*
5197 * Allocate a new one.
5198 */
5199 int rc = VINF_SUCCESS;
5200 uint16_t iNew = pPool->iFreeHead;
5201 if (iNew == NIL_PGMPOOL_IDX)
5202 {
5203 rc = pgmPoolMakeMoreFreePages(pPool, enmKind, iUser);
5204 if (RT_FAILURE(rc))
5205 {
5206 pgmUnlock(pVM);
5207 Log(("pgmPoolAlloc: returns %Rrc (Free)\n", rc));
5208 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
5209 return rc;
5210 }
5211 iNew = pPool->iFreeHead;
5212 AssertReleaseReturn(iNew != NIL_PGMPOOL_IDX, VERR_PGM_POOL_IPE);
5213 }
5214
5215 /* unlink the free head */
5216 PPGMPOOLPAGE pPage = &pPool->aPages[iNew];
5217 pPool->iFreeHead = pPage->iNext;
5218 pPage->iNext = NIL_PGMPOOL_IDX;
5219
5220 /*
5221 * Initialize it.
5222 */
5223 pPool->cUsedPages++; /* physical handler registration / pgmPoolTrackFlushGCPhysPTsSlow requirement. */
5224 pPage->enmKind = enmKind;
5225 pPage->enmAccess = enmAccess;
5226 pPage->GCPhys = GCPhys;
5227 pPage->fA20Enabled = fA20Enabled;
5228 pPage->fSeenNonGlobal = false; /* Set this to 'true' to disable this feature. */
5229 pPage->fMonitored = false;
5230 pPage->fCached = false;
5231 pPage->fDirty = false;
5232 pPage->fReusedFlushPending = false;
5233 pPage->cModifications = 0;
5234 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
5235 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
5236 pPage->cPresent = 0;
5237 pPage->iFirstPresent = NIL_PGMPOOL_PRESENT_INDEX;
5238 pPage->idxDirtyEntry = 0;
5239 pPage->GCPtrLastAccessHandlerFault = NIL_RTGCPTR;
5240 pPage->GCPtrLastAccessHandlerRip = NIL_RTGCPTR;
5241 pPage->cLastAccessHandler = 0;
5242 pPage->cLocked = 0;
5243# ifdef VBOX_STRICT
5244 pPage->GCPtrDirtyFault = NIL_RTGCPTR;
5245# endif
5246
5247 /*
5248 * Insert into the tracking and cache. If this fails, free the page.
5249 */
5250 int rc3 = pgmPoolTrackInsert(pPool, pPage, GCPhys, iUser, iUserTable);
5251 if (RT_FAILURE(rc3))
5252 {
5253 pPool->cUsedPages--;
5254 pPage->enmKind = PGMPOOLKIND_FREE;
5255 pPage->enmAccess = PGMPOOLACCESS_DONTCARE;
5256 pPage->GCPhys = NIL_RTGCPHYS;
5257 pPage->iNext = pPool->iFreeHead;
5258 pPool->iFreeHead = pPage->idx;
5259 pgmUnlock(pVM);
5260 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
5261 Log(("pgmPoolAlloc: returns %Rrc (Insert)\n", rc3));
5262 return rc3;
5263 }
5264
5265 /*
5266 * Commit the allocation, clear the page and return.
5267 */
5268#ifdef VBOX_WITH_STATISTICS
5269 if (pPool->cUsedPages > pPool->cUsedPagesHigh)
5270 pPool->cUsedPagesHigh = pPool->cUsedPages;
5271#endif
5272
5273 if (!pPage->fZeroed)
5274 {
5275 STAM_PROFILE_START(&pPool->StatZeroPage, z);
5276 void *pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
5277 ASMMemZeroPage(pv);
5278 STAM_PROFILE_STOP(&pPool->StatZeroPage, z);
5279 }
5280
5281 *ppPage = pPage;
5282 if (fLockPage)
5283 pgmPoolLockPage(pPool, pPage);
5284 pgmUnlock(pVM);
5285 LogFlow(("pgmPoolAlloc: returns %Rrc *ppPage=%p:{.Key=%RHp, .idx=%d, .fCached=%RTbool, .fMonitored=%RTbool}\n",
5286 rc, pPage, pPage->Core.Key, pPage->idx, pPage->fCached, pPage->fMonitored));
5287 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
5288 return rc;
5289}
5290
5291
5292/**
5293 * Frees a usage of a pool page.
5294 *
5295 * @param pVM The cross context VM structure.
5296 * @param HCPhys The HC physical address of the shadow page.
5297 * @param iUser The shadow page pool index of the user table.
5298 * NIL_PGMPOOL_IDX if root page.
5299 * @param iUserTable The index into the user table (shadowed). Ignored if
5300 * root page.
5301 */
5302void pgmPoolFree(PVM pVM, RTHCPHYS HCPhys, uint16_t iUser, uint32_t iUserTable)
5303{
5304 LogFlow(("pgmPoolFree: HCPhys=%RHp iUser=%d iUserTable=%#x\n", HCPhys, iUser, iUserTable));
5305 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
5306 pgmPoolFreeByPage(pPool, pgmPoolGetPage(pPool, HCPhys), iUser, iUserTable);
5307}
5308
5309
5310/**
5311 * Internal worker for finding a 'in-use' shadow page give by it's physical address.
5312 *
5313 * @returns Pointer to the shadow page structure.
5314 * @param pPool The pool.
5315 * @param HCPhys The HC physical address of the shadow page.
5316 */
5317PPGMPOOLPAGE pgmPoolGetPage(PPGMPOOL pPool, RTHCPHYS HCPhys)
5318{
5319 PGM_LOCK_ASSERT_OWNER(pPool->CTX_SUFF(pVM));
5320
5321 /*
5322 * Look up the page.
5323 */
5324 PPGMPOOLPAGE pPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, HCPhys & X86_PTE_PAE_PG_MASK);
5325
5326 AssertFatalMsg(pPage && pPage->enmKind != PGMPOOLKIND_FREE, ("HCPhys=%RHp pPage=%p idx=%d\n", HCPhys, pPage, (pPage) ? pPage->idx : 0));
5327 return pPage;
5328}
5329
5330
5331/**
5332 * Internal worker for finding a page for debugging purposes, no assertions.
5333 *
5334 * @returns Pointer to the shadow page structure. NULL on if not found.
5335 * @param pPool The pool.
5336 * @param HCPhys The HC physical address of the shadow page.
5337 */
5338PPGMPOOLPAGE pgmPoolQueryPageForDbg(PPGMPOOL pPool, RTHCPHYS HCPhys)
5339{
5340 PGM_LOCK_ASSERT_OWNER(pPool->CTX_SUFF(pVM));
5341 return (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, HCPhys & X86_PTE_PAE_PG_MASK);
5342}
5343
5344#ifdef IN_RING3 /* currently only used in ring 3; save some space in the R0 & GC modules (left it here as we might need it elsewhere later on) */
5345
5346/**
5347 * Flush the specified page if present
5348 *
5349 * @param pVM The cross context VM structure.
5350 * @param GCPhys Guest physical address of the page to flush
5351 */
5352void pgmPoolFlushPageByGCPhys(PVM pVM, RTGCPHYS GCPhys)
5353{
5354 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
5355
5356 VM_ASSERT_EMT(pVM);
5357
5358 /*
5359 * Look up the GCPhys in the hash.
5360 */
5361 GCPhys = GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK;
5362 unsigned i = pPool->aiHash[PGMPOOL_HASH(GCPhys)];
5363 if (i == NIL_PGMPOOL_IDX)
5364 return;
5365
5366 do
5367 {
5368 PPGMPOOLPAGE pPage = &pPool->aPages[i];
5369 if (pPage->GCPhys - GCPhys < PAGE_SIZE)
5370 {
5371 switch (pPage->enmKind)
5372 {
5373 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
5374 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
5375 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
5376 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
5377 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
5378 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
5379 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
5380 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
5381 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
5382 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
5383 case PGMPOOLKIND_64BIT_PML4:
5384 case PGMPOOLKIND_32BIT_PD:
5385 case PGMPOOLKIND_PAE_PDPT:
5386 {
5387 Log(("PGMPoolFlushPage: found pgm pool pages for %RGp\n", GCPhys));
5388#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
5389 if (pPage->fDirty)
5390 STAM_COUNTER_INC(&pPool->StatForceFlushDirtyPage);
5391 else
5392#endif
5393 STAM_COUNTER_INC(&pPool->StatForceFlushPage);
5394 Assert(!pgmPoolIsPageLocked(pPage));
5395 pgmPoolMonitorChainFlush(pPool, pPage);
5396 return;
5397 }
5398
5399 /* ignore, no monitoring. */
5400 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
5401 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
5402 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
5403 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
5404 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
5405 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
5406 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
5407 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
5408 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
5409 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
5410 case PGMPOOLKIND_ROOT_NESTED:
5411 case PGMPOOLKIND_PAE_PD_PHYS:
5412 case PGMPOOLKIND_PAE_PDPT_PHYS:
5413 case PGMPOOLKIND_32BIT_PD_PHYS:
5414 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
5415 break;
5416
5417 default:
5418 AssertFatalMsgFailed(("enmKind=%d idx=%d\n", pPage->enmKind, pPage->idx));
5419 }
5420 }
5421
5422 /* next */
5423 i = pPage->iNext;
5424 } while (i != NIL_PGMPOOL_IDX);
5425 return;
5426}
5427
5428#endif /* IN_RING3 */
5429#ifdef IN_RING3
5430
5431/**
5432 * Reset CPU on hot plugging.
5433 *
5434 * @param pVM The cross context VM structure.
5435 * @param pVCpu The cross context virtual CPU structure.
5436 */
5437void pgmR3PoolResetUnpluggedCpu(PVM pVM, PVMCPU pVCpu)
5438{
5439 pgmR3ExitShadowModeBeforePoolFlush(pVCpu);
5440
5441 pgmR3ReEnterShadowModeAfterPoolFlush(pVM, pVCpu);
5442 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
5443 VMCPU_FF_SET(pVCpu, VMCPU_FF_TLB_FLUSH);
5444}
5445
5446
5447/**
5448 * Flushes the entire cache.
5449 *
5450 * It will assert a global CR3 flush (FF) and assumes the caller is aware of
5451 * this and execute this CR3 flush.
5452 *
5453 * @param pVM The cross context VM structure.
5454 */
5455void pgmR3PoolReset(PVM pVM)
5456{
5457 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
5458
5459 PGM_LOCK_ASSERT_OWNER(pVM);
5460 STAM_PROFILE_START(&pPool->StatR3Reset, a);
5461 LogFlow(("pgmR3PoolReset:\n"));
5462
5463 /*
5464 * If there are no pages in the pool, there is nothing to do.
5465 */
5466 if (pPool->cCurPages <= PGMPOOL_IDX_FIRST)
5467 {
5468 STAM_PROFILE_STOP(&pPool->StatR3Reset, a);
5469 return;
5470 }
5471
5472 /*
5473 * Exit the shadow mode since we're going to clear everything,
5474 * including the root page.
5475 */
5476 for (VMCPUID i = 0; i < pVM->cCpus; i++)
5477 pgmR3ExitShadowModeBeforePoolFlush(&pVM->aCpus[i]);
5478
5479 /*
5480 * Nuke the free list and reinsert all pages into it.
5481 */
5482 for (unsigned i = pPool->cCurPages - 1; i >= PGMPOOL_IDX_FIRST; i--)
5483 {
5484 PPGMPOOLPAGE pPage = &pPool->aPages[i];
5485
5486 Assert(pPage->Core.Key == MMPage2Phys(pVM, pPage->pvPageR3));
5487 if (pPage->fMonitored)
5488 pgmPoolMonitorFlush(pPool, pPage);
5489 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
5490 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
5491 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
5492 pPage->iMonitoredPrev = NIL_PGMPOOL_IDX;
5493 pPage->GCPhys = NIL_RTGCPHYS;
5494 pPage->enmKind = PGMPOOLKIND_FREE;
5495 pPage->enmAccess = PGMPOOLACCESS_DONTCARE;
5496 Assert(pPage->idx == i);
5497 pPage->iNext = i + 1;
5498 pPage->fA20Enabled = true;
5499 pPage->fZeroed = false; /* This could probably be optimized, but better safe than sorry. */
5500 pPage->fSeenNonGlobal = false;
5501 pPage->fMonitored = false;
5502 pPage->fDirty = false;
5503 pPage->fCached = false;
5504 pPage->fReusedFlushPending = false;
5505 pPage->iUserHead = NIL_PGMPOOL_USER_INDEX;
5506 pPage->cPresent = 0;
5507 pPage->iFirstPresent = NIL_PGMPOOL_PRESENT_INDEX;
5508 pPage->cModifications = 0;
5509 pPage->iAgeNext = NIL_PGMPOOL_IDX;
5510 pPage->iAgePrev = NIL_PGMPOOL_IDX;
5511 pPage->idxDirtyEntry = 0;
5512 pPage->GCPtrLastAccessHandlerRip = NIL_RTGCPTR;
5513 pPage->GCPtrLastAccessHandlerFault = NIL_RTGCPTR;
5514 pPage->cLastAccessHandler = 0;
5515 pPage->cLocked = 0;
5516#ifdef VBOX_STRICT
5517 pPage->GCPtrDirtyFault = NIL_RTGCPTR;
5518#endif
5519 }
5520 pPool->aPages[pPool->cCurPages - 1].iNext = NIL_PGMPOOL_IDX;
5521 pPool->iFreeHead = PGMPOOL_IDX_FIRST;
5522 pPool->cUsedPages = 0;
5523
5524 /*
5525 * Zap and reinitialize the user records.
5526 */
5527 pPool->cPresent = 0;
5528 pPool->iUserFreeHead = 0;
5529 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
5530 const unsigned cMaxUsers = pPool->cMaxUsers;
5531 for (unsigned i = 0; i < cMaxUsers; i++)
5532 {
5533 paUsers[i].iNext = i + 1;
5534 paUsers[i].iUser = NIL_PGMPOOL_IDX;
5535 paUsers[i].iUserTable = 0xfffffffe;
5536 }
5537 paUsers[cMaxUsers - 1].iNext = NIL_PGMPOOL_USER_INDEX;
5538
5539 /*
5540 * Clear all the GCPhys links and rebuild the phys ext free list.
5541 */
5542 for (PPGMRAMRANGE pRam = pVM->pgm.s.CTX_SUFF(pRamRangesX);
5543 pRam;
5544 pRam = pRam->CTX_SUFF(pNext))
5545 {
5546 unsigned iPage = pRam->cb >> PAGE_SHIFT;
5547 while (iPage-- > 0)
5548 PGM_PAGE_SET_TRACKING(pVM, &pRam->aPages[iPage], 0);
5549 }
5550
5551 pPool->iPhysExtFreeHead = 0;
5552 PPGMPOOLPHYSEXT paPhysExts = pPool->CTX_SUFF(paPhysExts);
5553 const unsigned cMaxPhysExts = pPool->cMaxPhysExts;
5554 for (unsigned i = 0; i < cMaxPhysExts; i++)
5555 {
5556 paPhysExts[i].iNext = i + 1;
5557 paPhysExts[i].aidx[0] = NIL_PGMPOOL_IDX;
5558 paPhysExts[i].apte[0] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
5559 paPhysExts[i].aidx[1] = NIL_PGMPOOL_IDX;
5560 paPhysExts[i].apte[1] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
5561 paPhysExts[i].aidx[2] = NIL_PGMPOOL_IDX;
5562 paPhysExts[i].apte[2] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
5563 }
5564 paPhysExts[cMaxPhysExts - 1].iNext = NIL_PGMPOOL_PHYSEXT_INDEX;
5565
5566 /*
5567 * Just zap the modified list.
5568 */
5569 pPool->cModifiedPages = 0;
5570 pPool->iModifiedHead = NIL_PGMPOOL_IDX;
5571
5572 /*
5573 * Clear the GCPhys hash and the age list.
5574 */
5575 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aiHash); i++)
5576 pPool->aiHash[i] = NIL_PGMPOOL_IDX;
5577 pPool->iAgeHead = NIL_PGMPOOL_IDX;
5578 pPool->iAgeTail = NIL_PGMPOOL_IDX;
5579
5580#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
5581 /* Clear all dirty pages. */
5582 pPool->idxFreeDirtyPage = 0;
5583 pPool->cDirtyPages = 0;
5584 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
5585 pPool->aDirtyPages[i].uIdx = NIL_PGMPOOL_IDX;
5586#endif
5587
5588 /*
5589 * Reinsert active pages into the hash and ensure monitoring chains are correct.
5590 */
5591 for (VMCPUID i = 0; i < pVM->cCpus; i++)
5592 {
5593 /*
5594 * Re-enter the shadowing mode and assert Sync CR3 FF.
5595 */
5596 PVMCPU pVCpu = &pVM->aCpus[i];
5597 pgmR3ReEnterShadowModeAfterPoolFlush(pVM, pVCpu);
5598 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
5599 VMCPU_FF_SET(pVCpu, VMCPU_FF_TLB_FLUSH);
5600 }
5601
5602 STAM_PROFILE_STOP(&pPool->StatR3Reset, a);
5603}
5604
5605#endif /* IN_RING3 */
5606
5607#if defined(LOG_ENABLED) || defined(VBOX_STRICT)
5608/**
5609 * Stringifies a PGMPOOLKIND value.
5610 */
5611static const char *pgmPoolPoolKindToStr(uint8_t enmKind)
5612{
5613 switch ((PGMPOOLKIND)enmKind)
5614 {
5615 case PGMPOOLKIND_INVALID:
5616 return "PGMPOOLKIND_INVALID";
5617 case PGMPOOLKIND_FREE:
5618 return "PGMPOOLKIND_FREE";
5619 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
5620 return "PGMPOOLKIND_32BIT_PT_FOR_PHYS";
5621 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
5622 return "PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT";
5623 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
5624 return "PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB";
5625 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
5626 return "PGMPOOLKIND_PAE_PT_FOR_PHYS";
5627 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
5628 return "PGMPOOLKIND_PAE_PT_FOR_32BIT_PT";
5629 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
5630 return "PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB";
5631 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
5632 return "PGMPOOLKIND_PAE_PT_FOR_PAE_PT";
5633 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
5634 return "PGMPOOLKIND_PAE_PT_FOR_PAE_2MB";
5635 case PGMPOOLKIND_32BIT_PD:
5636 return "PGMPOOLKIND_32BIT_PD";
5637 case PGMPOOLKIND_32BIT_PD_PHYS:
5638 return "PGMPOOLKIND_32BIT_PD_PHYS";
5639 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
5640 return "PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD";
5641 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
5642 return "PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD";
5643 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
5644 return "PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD";
5645 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
5646 return "PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD";
5647 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
5648 return "PGMPOOLKIND_PAE_PD_FOR_PAE_PD";
5649 case PGMPOOLKIND_PAE_PD_PHYS:
5650 return "PGMPOOLKIND_PAE_PD_PHYS";
5651 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
5652 return "PGMPOOLKIND_PAE_PDPT_FOR_32BIT";
5653 case PGMPOOLKIND_PAE_PDPT:
5654 return "PGMPOOLKIND_PAE_PDPT";
5655 case PGMPOOLKIND_PAE_PDPT_PHYS:
5656 return "PGMPOOLKIND_PAE_PDPT_PHYS";
5657 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
5658 return "PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT";
5659 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
5660 return "PGMPOOLKIND_64BIT_PDPT_FOR_PHYS";
5661 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
5662 return "PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD";
5663 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
5664 return "PGMPOOLKIND_64BIT_PD_FOR_PHYS";
5665 case PGMPOOLKIND_64BIT_PML4:
5666 return "PGMPOOLKIND_64BIT_PML4";
5667 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
5668 return "PGMPOOLKIND_EPT_PDPT_FOR_PHYS";
5669 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
5670 return "PGMPOOLKIND_EPT_PD_FOR_PHYS";
5671 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
5672 return "PGMPOOLKIND_EPT_PT_FOR_PHYS";
5673 case PGMPOOLKIND_ROOT_NESTED:
5674 return "PGMPOOLKIND_ROOT_NESTED";
5675 }
5676 return "Unknown kind!";
5677}
5678#endif /* LOG_ENABLED || VBOX_STRICT */
5679
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette