VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/PGMAllPool.cpp@ 96966

Last change on this file since 96966 was 96966, checked in by vboxsync, 2 years ago

VMM/PGM: Nested VMX: bugref:10092 Remove unused function used early during debugging.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id Revision
File size: 221.8 KB
Line 
1/* $Id: PGMAllPool.cpp 96966 2022-10-03 12:09:15Z vboxsync $ */
2/** @file
3 * PGM Shadow Page Pool.
4 */
5
6/*
7 * Copyright (C) 2006-2022 Oracle and/or its affiliates.
8 *
9 * This file is part of VirtualBox base platform packages, as
10 * available from https://www.virtualbox.org.
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation, in version 3 of the
15 * License.
16 *
17 * This program is distributed in the hope that it will be useful, but
18 * WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * General Public License for more details.
21 *
22 * You should have received a copy of the GNU General Public License
23 * along with this program; if not, see <https://www.gnu.org/licenses>.
24 *
25 * SPDX-License-Identifier: GPL-3.0-only
26 */
27
28
29/*********************************************************************************************************************************
30* Header Files *
31*********************************************************************************************************************************/
32#define LOG_GROUP LOG_GROUP_PGM_POOL
33#define VBOX_WITHOUT_PAGING_BIT_FIELDS /* 64-bit bitfields are just asking for trouble. See @bugref{9841} and others. */
34#include <VBox/vmm/pgm.h>
35#include <VBox/vmm/mm.h>
36#include <VBox/vmm/em.h>
37#include <VBox/vmm/cpum.h>
38#include "PGMInternal.h"
39#include <VBox/vmm/vmcc.h>
40#include "PGMInline.h"
41#include <VBox/disopcode.h>
42#include <VBox/vmm/hm_vmx.h>
43
44#include <VBox/log.h>
45#include <VBox/err.h>
46#include <iprt/asm.h>
47#include <iprt/string.h>
48
49
50/*********************************************************************************************************************************
51* Internal Functions *
52*********************************************************************************************************************************/
53RT_C_DECLS_BEGIN
54#if 0 /* unused */
55DECLINLINE(unsigned) pgmPoolTrackGetShadowEntrySize(PGMPOOLKIND enmKind);
56DECLINLINE(unsigned) pgmPoolTrackGetGuestEntrySize(PGMPOOLKIND enmKind);
57#endif /* unused */
58static void pgmPoolTrackClearPageUsers(PPGMPOOL pPool, PPGMPOOLPAGE pPage);
59static void pgmPoolTrackDeref(PPGMPOOL pPool, PPGMPOOLPAGE pPage);
60static int pgmPoolTrackAddUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable);
61static void pgmPoolMonitorModifiedRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage);
62#if defined(LOG_ENABLED) || defined(VBOX_STRICT)
63static const char *pgmPoolPoolKindToStr(uint8_t enmKind);
64#endif
65#if 0 /*defined(VBOX_STRICT) && defined(PGMPOOL_WITH_OPTIMIZED_DIRTY_PT)*/
66static void pgmPoolTrackCheckPTPaePae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT, PCX86PTPAE pGstPT);
67#endif
68
69int pgmPoolTrackFlushGCPhysPTsSlow(PVMCC pVM, PPGMPAGE pPhysPage);
70PPGMPOOLPHYSEXT pgmPoolTrackPhysExtAlloc(PVMCC pVM, uint16_t *piPhysExt);
71void pgmPoolTrackPhysExtFree(PVMCC pVM, uint16_t iPhysExt);
72void pgmPoolTrackPhysExtFreeList(PVMCC pVM, uint16_t iPhysExt);
73
74RT_C_DECLS_END
75
76
77#if 0 /* unused */
78/**
79 * Checks if the specified page pool kind is for a 4MB or 2MB guest page.
80 *
81 * @returns true if it's the shadow of a 4MB or 2MB guest page, otherwise false.
82 * @param enmKind The page kind.
83 */
84DECLINLINE(bool) pgmPoolIsBigPage(PGMPOOLKIND enmKind)
85{
86 switch (enmKind)
87 {
88 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
89 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
90 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
91 return true;
92 default:
93 return false;
94 }
95}
96#endif /* unused */
97
98
99/**
100 * Flushes a chain of pages sharing the same access monitor.
101 *
102 * @param pPool The pool.
103 * @param pPage A page in the chain.
104 */
105void pgmPoolMonitorChainFlush(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
106{
107 LogFlow(("pgmPoolMonitorChainFlush: Flush page %RGp type=%d\n", pPage->GCPhys, pPage->enmKind));
108
109 /*
110 * Find the list head.
111 */
112 uint16_t idx = pPage->idx;
113 if (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
114 {
115 while (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
116 {
117 idx = pPage->iMonitoredPrev;
118 Assert(idx != pPage->idx);
119 pPage = &pPool->aPages[idx];
120 }
121 }
122
123 /*
124 * Iterate the list flushing each shadow page.
125 */
126 for (;;)
127 {
128 idx = pPage->iMonitoredNext;
129 Assert(idx != pPage->idx);
130 if (pPage->idx >= PGMPOOL_IDX_FIRST)
131 {
132 int rc2 = pgmPoolFlushPage(pPool, pPage);
133 AssertRC(rc2);
134 }
135 /* next */
136 if (idx == NIL_PGMPOOL_IDX)
137 break;
138 pPage = &pPool->aPages[idx];
139 }
140}
141
142
143/**
144 * Wrapper for getting the current context pointer to the entry being modified.
145 *
146 * @returns VBox status code suitable for scheduling.
147 * @param pVM The cross context VM structure.
148 * @param pvDst Destination address
149 * @param pvSrc Pointer to the mapping of @a GCPhysSrc or NULL depending
150 * on the context (e.g. \#PF in R0 & RC).
151 * @param GCPhysSrc The source guest physical address.
152 * @param cb Size of data to read
153 */
154DECLINLINE(int) pgmPoolPhysSimpleReadGCPhys(PVMCC pVM, void *pvDst, void const *pvSrc, RTGCPHYS GCPhysSrc, size_t cb)
155{
156#if defined(IN_RING3)
157 NOREF(pVM); NOREF(GCPhysSrc);
158 memcpy(pvDst, (RTHCPTR)((uintptr_t)pvSrc & ~(RTHCUINTPTR)(cb - 1)), cb);
159 return VINF_SUCCESS;
160#else
161 /** @todo in RC we could attempt to use the virtual address, although this can cause many faults (PAE Windows XP guest). */
162 NOREF(pvSrc);
163 return PGMPhysSimpleReadGCPhys(pVM, pvDst, GCPhysSrc & ~(RTGCPHYS)(cb - 1), cb);
164#endif
165}
166
167
168/**
169 * Process shadow entries before they are changed by the guest.
170 *
171 * For PT entries we will clear them. For PD entries, we'll simply check
172 * for mapping conflicts and set the SyncCR3 FF if found.
173 *
174 * @param pVCpu The cross context virtual CPU structure.
175 * @param pPool The pool.
176 * @param pPage The head page.
177 * @param GCPhysFault The guest physical fault address.
178 * @param pvAddress Pointer to the mapping of @a GCPhysFault or NULL
179 * depending on the context (e.g. \#PF in R0 & RC).
180 * @param cbWrite Write size; might be zero if the caller knows we're not crossing entry boundaries
181 */
182static void pgmPoolMonitorChainChanging(PVMCPU pVCpu, PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTGCPHYS GCPhysFault,
183 void const *pvAddress, unsigned cbWrite)
184{
185 AssertMsg(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX, ("%u (idx=%u)\n", pPage->iMonitoredPrev, pPage->idx));
186 const unsigned off = GCPhysFault & GUEST_PAGE_OFFSET_MASK;
187 PVMCC pVM = pPool->CTX_SUFF(pVM);
188 NOREF(pVCpu);
189
190 LogFlow(("pgmPoolMonitorChainChanging: %RGv phys=%RGp cbWrite=%d\n",
191 (RTGCPTR)(CTXTYPE(RTGCPTR, uintptr_t, RTGCPTR))(uintptr_t)pvAddress, GCPhysFault, cbWrite));
192
193 if (PGMPOOL_PAGE_IS_NESTED(pPage))
194 Log7Func(("%RGv phys=%RGp cbWrite=%d\n", (RTGCPTR)(CTXTYPE(RTGCPTR, uintptr_t, RTGCPTR))(uintptr_t)pvAddress, GCPhysFault, cbWrite));
195
196 for (;;)
197 {
198 union
199 {
200 void *pv;
201 PX86PT pPT;
202 PPGMSHWPTPAE pPTPae;
203 PX86PD pPD;
204 PX86PDPAE pPDPae;
205 PX86PDPT pPDPT;
206 PX86PML4 pPML4;
207#ifdef VBOX_WITH_NESTED_HWVIRT_VMX_EPT
208 PEPTPDPT pEptPdpt;
209 PEPTPD pEptPd;
210 PEPTPT pEptPt;
211#endif
212 } uShw;
213
214 LogFlow(("pgmPoolMonitorChainChanging: page idx=%d phys=%RGp (next=%d) kind=%s write=%#x\n",
215 pPage->idx, pPage->GCPhys, pPage->iMonitoredNext, pgmPoolPoolKindToStr(pPage->enmKind), cbWrite));
216
217 uShw.pv = NULL;
218 switch (pPage->enmKind)
219 {
220 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
221 {
222 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPT));
223 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
224 const unsigned iShw = off / sizeof(X86PTE);
225 LogFlow(("PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT iShw=%x\n", iShw));
226 X86PGUINT const uPde = uShw.pPT->a[iShw].u;
227 if (uPde & X86_PTE_P)
228 {
229 X86PTE GstPte;
230 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvAddress, GCPhysFault, sizeof(GstPte));
231 AssertRC(rc);
232 Log4(("pgmPoolMonitorChainChanging 32_32: deref %016RX64 GCPhys %08RX32\n", uPde & X86_PTE_PG_MASK, GstPte.u & X86_PTE_PG_MASK));
233 pgmPoolTracDerefGCPhysHint(pPool, pPage, uPde & X86_PTE_PG_MASK, GstPte.u & X86_PTE_PG_MASK, iShw);
234 ASMAtomicWriteU32(&uShw.pPT->a[iShw].u, 0);
235 }
236 break;
237 }
238
239 /* page/2 sized */
240 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
241 {
242 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPT));
243 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
244 if (!((off ^ pPage->GCPhys) & (PAGE_SIZE / 2)))
245 {
246 const unsigned iShw = (off / sizeof(X86PTE)) & (X86_PG_PAE_ENTRIES - 1);
247 LogFlow(("PGMPOOLKIND_PAE_PT_FOR_32BIT_PT iShw=%x\n", iShw));
248 if (PGMSHWPTEPAE_IS_P(uShw.pPTPae->a[iShw]))
249 {
250 X86PTE GstPte;
251 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvAddress, GCPhysFault, sizeof(GstPte));
252 AssertRC(rc);
253
254 Log4(("pgmPoolMonitorChainChanging pae_32: deref %016RX64 GCPhys %08RX32\n", uShw.pPT->a[iShw].u & X86_PTE_PAE_PG_MASK, GstPte.u & X86_PTE_PG_MASK));
255 pgmPoolTracDerefGCPhysHint(pPool, pPage,
256 PGMSHWPTEPAE_GET_HCPHYS(uShw.pPTPae->a[iShw]),
257 GstPte.u & X86_PTE_PG_MASK,
258 iShw);
259 PGMSHWPTEPAE_ATOMIC_SET(uShw.pPTPae->a[iShw], 0);
260 }
261 }
262 break;
263 }
264
265 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
266 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
267 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
268 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
269 {
270 unsigned iGst = off / sizeof(X86PDE);
271 unsigned iShwPdpt = iGst / 256;
272 unsigned iShw = (iGst % 256) * 2;
273 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
274
275 LogFlow(("pgmPoolMonitorChainChanging PAE for 32 bits: iGst=%x iShw=%x idx = %d page idx=%d\n", iGst, iShw, iShwPdpt, pPage->enmKind - PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD));
276 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPD));
277 if (iShwPdpt == pPage->enmKind - (unsigned)PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD)
278 {
279 for (unsigned i = 0; i < 2; i++)
280 {
281 X86PGPAEUINT const uPde = uShw.pPDPae->a[iShw + i].u;
282 if (uPde & X86_PDE_P)
283 {
284 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw + i, uPde));
285 pgmPoolFree(pVM, uPde & X86_PDE_PAE_PG_MASK, pPage->idx, iShw + i);
286 ASMAtomicWriteU64(&uShw.pPDPae->a[iShw + i].u, 0);
287 }
288
289 /* paranoia / a bit assumptive. */
290 if ( (off & 3)
291 && (off & 3) + cbWrite > 4)
292 {
293 const unsigned iShw2 = iShw + 2 + i;
294 if (iShw2 < RT_ELEMENTS(uShw.pPDPae->a))
295 {
296 X86PGPAEUINT const uPde2 = uShw.pPDPae->a[iShw2].u;
297 if (uPde2 & X86_PDE_P)
298 {
299 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw2, uPde2));
300 pgmPoolFree(pVM, uPde2 & X86_PDE_PAE_PG_MASK, pPage->idx, iShw2);
301 ASMAtomicWriteU64(&uShw.pPDPae->a[iShw2].u, 0);
302 }
303 }
304 }
305 }
306 }
307 break;
308 }
309
310 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
311 {
312 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
313 const unsigned iShw = off / sizeof(X86PTEPAE);
314 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPT));
315 if (PGMSHWPTEPAE_IS_P(uShw.pPTPae->a[iShw]))
316 {
317 X86PTEPAE GstPte;
318 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvAddress, GCPhysFault, sizeof(GstPte));
319 AssertRC(rc);
320
321 Log4(("pgmPoolMonitorChainChanging pae: deref %016RX64 GCPhys %016RX64\n", PGMSHWPTEPAE_GET_HCPHYS(uShw.pPTPae->a[iShw]), GstPte.u & X86_PTE_PAE_PG_MASK));
322 pgmPoolTracDerefGCPhysHint(pPool, pPage,
323 PGMSHWPTEPAE_GET_HCPHYS(uShw.pPTPae->a[iShw]),
324 GstPte.u & X86_PTE_PAE_PG_MASK,
325 iShw);
326 PGMSHWPTEPAE_ATOMIC_SET(uShw.pPTPae->a[iShw], 0);
327 }
328
329 /* paranoia / a bit assumptive. */
330 if ( (off & 7)
331 && (off & 7) + cbWrite > sizeof(X86PTEPAE))
332 {
333 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PTEPAE);
334 AssertBreak(iShw2 < RT_ELEMENTS(uShw.pPTPae->a));
335
336 if (PGMSHWPTEPAE_IS_P(uShw.pPTPae->a[iShw2]))
337 {
338 X86PTEPAE GstPte;
339 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte,
340 pvAddress ? (uint8_t const *)pvAddress + sizeof(GstPte) : NULL,
341 GCPhysFault + sizeof(GstPte), sizeof(GstPte));
342 AssertRC(rc);
343 Log4(("pgmPoolMonitorChainChanging pae: deref %016RX64 GCPhys %016RX64\n", PGMSHWPTEPAE_GET_HCPHYS(uShw.pPTPae->a[iShw2]), GstPte.u & X86_PTE_PAE_PG_MASK));
344 pgmPoolTracDerefGCPhysHint(pPool, pPage,
345 PGMSHWPTEPAE_GET_HCPHYS(uShw.pPTPae->a[iShw2]),
346 GstPte.u & X86_PTE_PAE_PG_MASK,
347 iShw2);
348 PGMSHWPTEPAE_ATOMIC_SET(uShw.pPTPae->a[iShw2], 0);
349 }
350 }
351 break;
352 }
353
354 case PGMPOOLKIND_32BIT_PD:
355 {
356 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
357 const unsigned iShw = off / sizeof(X86PTE); // ASSUMING 32-bit guest paging!
358
359 LogFlow(("pgmPoolMonitorChainChanging: PGMPOOLKIND_32BIT_PD %x\n", iShw));
360 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPD));
361 X86PGUINT const uPde = uShw.pPD->a[iShw].u;
362 if (uPde & X86_PDE_P)
363 {
364 LogFlow(("pgmPoolMonitorChainChanging: 32 bit pd iShw=%#x: %RX64 -> freeing it!\n", iShw, uPde));
365 pgmPoolFree(pVM, uPde & X86_PDE_PG_MASK, pPage->idx, iShw);
366 ASMAtomicWriteU32(&uShw.pPD->a[iShw].u, 0);
367 }
368
369 /* paranoia / a bit assumptive. */
370 if ( (off & 3)
371 && (off & 3) + cbWrite > sizeof(X86PTE))
372 {
373 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PTE);
374 if ( iShw2 != iShw
375 && iShw2 < RT_ELEMENTS(uShw.pPD->a))
376 {
377 X86PGUINT const uPde2 = uShw.pPD->a[iShw2].u;
378 if (uPde2 & X86_PDE_P)
379 {
380 LogFlow(("pgmPoolMonitorChainChanging: 32 bit pd iShw=%#x: %RX64 -> freeing it!\n", iShw2, uPde2));
381 pgmPoolFree(pVM, uPde2 & X86_PDE_PG_MASK, pPage->idx, iShw2);
382 ASMAtomicWriteU32(&uShw.pPD->a[iShw2].u, 0);
383 }
384 }
385 }
386#if 0 /* useful when running PGMAssertCR3(), a bit too troublesome for general use (TLBs). - not working any longer... */
387 if ( uShw.pPD->a[iShw].n.u1Present
388 && !VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3))
389 {
390 LogFlow(("pgmPoolMonitorChainChanging: iShw=%#x: %RX32 -> freeing it!\n", iShw, uShw.pPD->a[iShw].u));
391 pgmPoolFree(pVM, uShw.pPD->a[iShw].u & X86_PDE_PG_MASK, pPage->idx, iShw);
392 ASMAtomicWriteU32(&uShw.pPD->a[iShw].u, 0);
393 }
394#endif
395 break;
396 }
397
398 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
399 {
400 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
401 const unsigned iShw = off / sizeof(X86PDEPAE);
402 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPD));
403
404 /*
405 * Causes trouble when the guest uses a PDE to refer to the whole page table level
406 * structure. (Invalidate here; faults later on when it tries to change the page
407 * table entries -> recheck; probably only applies to the RC case.)
408 */
409 X86PGPAEUINT const uPde = uShw.pPDPae->a[iShw].u;
410 if (uPde & X86_PDE_P)
411 {
412 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw, uPde));
413 pgmPoolFree(pVM, uPde & X86_PDE_PAE_PG_MASK, pPage->idx, iShw);
414 ASMAtomicWriteU64(&uShw.pPDPae->a[iShw].u, 0);
415 }
416
417 /* paranoia / a bit assumptive. */
418 if ( (off & 7)
419 && (off & 7) + cbWrite > sizeof(X86PDEPAE))
420 {
421 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PDEPAE);
422 AssertBreak(iShw2 < RT_ELEMENTS(uShw.pPDPae->a));
423
424 X86PGPAEUINT const uPde2 = uShw.pPDPae->a[iShw2].u;
425 if (uPde2 & X86_PDE_P)
426 {
427 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uPde2));
428 pgmPoolFree(pVM, uPde2 & X86_PDE_PAE_PG_MASK, pPage->idx, iShw2);
429 ASMAtomicWriteU64(&uShw.pPDPae->a[iShw2].u, 0);
430 }
431 }
432 break;
433 }
434
435 case PGMPOOLKIND_PAE_PDPT:
436 {
437 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPDPT));
438 /*
439 * Hopefully this doesn't happen very often:
440 * - touching unused parts of the page
441 * - messing with the bits of pd pointers without changing the physical address
442 */
443 /* PDPT roots are not page aligned; 32 byte only! */
444 const unsigned offPdpt = GCPhysFault - pPage->GCPhys;
445
446 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
447 const unsigned iShw = offPdpt / sizeof(X86PDPE);
448 if (iShw < X86_PG_PAE_PDPE_ENTRIES) /* don't use RT_ELEMENTS(uShw.pPDPT->a), because that's for long mode only */
449 {
450 X86PGPAEUINT const uPdpe = uShw.pPDPT->a[iShw].u;
451 if (uPdpe & X86_PDPE_P)
452 {
453 LogFlow(("pgmPoolMonitorChainChanging: pae pdpt iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPT->a[iShw].u));
454 pgmPoolFree(pVM, uPdpe & X86_PDPE_PG_MASK, pPage->idx, iShw);
455 ASMAtomicWriteU64(&uShw.pPDPT->a[iShw].u, 0);
456 }
457
458 /* paranoia / a bit assumptive. */
459 if ( (offPdpt & 7)
460 && (offPdpt & 7) + cbWrite > sizeof(X86PDPE))
461 {
462 const unsigned iShw2 = (offPdpt + cbWrite - 1) / sizeof(X86PDPE);
463 if ( iShw2 != iShw
464 && iShw2 < X86_PG_PAE_PDPE_ENTRIES)
465 {
466 X86PGPAEUINT const uPdpe2 = uShw.pPDPT->a[iShw2].u;
467 if (uPdpe2 & X86_PDPE_P)
468 {
469 LogFlow(("pgmPoolMonitorChainChanging: pae pdpt iShw=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPT->a[iShw2].u));
470 pgmPoolFree(pVM, uPdpe2 & X86_PDPE_PG_MASK, pPage->idx, iShw2);
471 ASMAtomicWriteU64(&uShw.pPDPT->a[iShw2].u, 0);
472 }
473 }
474 }
475 }
476 break;
477 }
478
479 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
480 {
481 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPD));
482 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
483 const unsigned iShw = off / sizeof(X86PDEPAE);
484 X86PGPAEUINT const uPde = uShw.pPDPae->a[iShw].u;
485 if (uPde & X86_PDE_P)
486 {
487 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw, uPde));
488 pgmPoolFree(pVM, uPde & X86_PDE_PAE_PG_MASK, pPage->idx, iShw);
489 ASMAtomicWriteU64(&uShw.pPDPae->a[iShw].u, 0);
490 }
491
492 /* paranoia / a bit assumptive. */
493 if ( (off & 7)
494 && (off & 7) + cbWrite > sizeof(X86PDEPAE))
495 {
496 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PDEPAE);
497 AssertBreak(iShw2 < RT_ELEMENTS(uShw.pPDPae->a));
498 X86PGPAEUINT const uPde2 = uShw.pPDPae->a[iShw2].u;
499 if (uPde2 & X86_PDE_P)
500 {
501 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uPde2));
502 pgmPoolFree(pVM, uPde2 & X86_PDE_PAE_PG_MASK, pPage->idx, iShw2);
503 ASMAtomicWriteU64(&uShw.pPDPae->a[iShw2].u, 0);
504 }
505 }
506 break;
507 }
508
509 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
510 {
511 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPDPT));
512 /*
513 * Hopefully this doesn't happen very often:
514 * - messing with the bits of pd pointers without changing the physical address
515 */
516 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
517 const unsigned iShw = off / sizeof(X86PDPE);
518 X86PGPAEUINT const uPdpe = uShw.pPDPT->a[iShw].u;
519 if (uPdpe & X86_PDPE_P)
520 {
521 LogFlow(("pgmPoolMonitorChainChanging: pdpt iShw=%#x: %RX64 -> freeing it!\n", iShw, uPdpe));
522 pgmPoolFree(pVM, uPdpe & X86_PDPE_PG_MASK, pPage->idx, iShw);
523 ASMAtomicWriteU64(&uShw.pPDPT->a[iShw].u, 0);
524 }
525 /* paranoia / a bit assumptive. */
526 if ( (off & 7)
527 && (off & 7) + cbWrite > sizeof(X86PDPE))
528 {
529 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PDPE);
530 X86PGPAEUINT const uPdpe2 = uShw.pPDPT->a[iShw2].u;
531 if (uPdpe2 & X86_PDPE_P)
532 {
533 LogFlow(("pgmPoolMonitorChainChanging: pdpt iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uPdpe2));
534 pgmPoolFree(pVM, uPdpe2 & X86_PDPE_PG_MASK, pPage->idx, iShw2);
535 ASMAtomicWriteU64(&uShw.pPDPT->a[iShw2].u, 0);
536 }
537 }
538 break;
539 }
540
541 case PGMPOOLKIND_64BIT_PML4:
542 {
543 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPML4));
544 /*
545 * Hopefully this doesn't happen very often:
546 * - messing with the bits of pd pointers without changing the physical address
547 */
548 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
549 const unsigned iShw = off / sizeof(X86PDPE);
550 X86PGPAEUINT const uPml4e = uShw.pPML4->a[iShw].u;
551 if (uPml4e & X86_PML4E_P)
552 {
553 LogFlow(("pgmPoolMonitorChainChanging: pml4 iShw=%#x: %RX64 -> freeing it!\n", iShw, uPml4e));
554 pgmPoolFree(pVM, uPml4e & X86_PML4E_PG_MASK, pPage->idx, iShw);
555 ASMAtomicWriteU64(&uShw.pPML4->a[iShw].u, 0);
556 }
557 /* paranoia / a bit assumptive. */
558 if ( (off & 7)
559 && (off & 7) + cbWrite > sizeof(X86PDPE))
560 {
561 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PML4E);
562 X86PGPAEUINT const uPml4e2 = uShw.pPML4->a[iShw2].u;
563 if (uPml4e2 & X86_PML4E_P)
564 {
565 LogFlow(("pgmPoolMonitorChainChanging: pml4 iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uPml4e2));
566 pgmPoolFree(pVM, uPml4e2 & X86_PML4E_PG_MASK, pPage->idx, iShw2);
567 ASMAtomicWriteU64(&uShw.pPML4->a[iShw2].u, 0);
568 }
569 }
570 break;
571 }
572
573#ifdef VBOX_WITH_NESTED_HWVIRT_VMX_EPT
574 case PGMPOOLKIND_EPT_PML4_FOR_EPT_PML4:
575 {
576 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
577 const unsigned iShw = off / sizeof(EPTPML4E);
578 X86PGPAEUINT const uPml4e = uShw.pPML4->a[iShw].u;
579 if (uPml4e & EPT_PRESENT_MASK)
580 {
581 Log7Func(("PML4 iShw=%#x: %RX64 (%RGp) -> freeing it!\n", iShw, uPml4e, pPage->GCPhys));
582 pgmPoolFree(pVM, uPml4e & X86_PML4E_PG_MASK, pPage->idx, iShw);
583 ASMAtomicWriteU64(&uShw.pPML4->a[iShw].u, 0);
584 }
585
586 /* paranoia / a bit assumptive. */
587 if ( (off & 7)
588 && (off & 7) + cbWrite > sizeof(X86PML4E))
589 {
590 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PML4E);
591 X86PGPAEUINT const uPml4e2 = uShw.pPML4->a[iShw2].u;
592 if (uPml4e2 & EPT_PRESENT_MASK)
593 {
594 Log7Func(("PML4 iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uPml4e2));
595 pgmPoolFree(pVM, uPml4e2 & X86_PML4E_PG_MASK, pPage->idx, iShw2);
596 ASMAtomicWriteU64(&uShw.pPML4->a[iShw2].u, 0);
597 }
598 }
599 break;
600 }
601
602 case PGMPOOLKIND_EPT_PDPT_FOR_EPT_PDPT:
603 {
604 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
605 const unsigned iShw = off / sizeof(EPTPDPTE);
606 X86PGPAEUINT const uPdpte = uShw.pEptPdpt->a[iShw].u;
607 if (uPdpte & EPT_PRESENT_MASK)
608 {
609 Log7Func(("EPT PDPT iShw=%#x: %RX64 (%RGp) -> freeing it!\n", iShw, uPdpte, pPage->GCPhys));
610 pgmPoolFree(pVM, uPdpte & EPT_PDPTE_PG_MASK, pPage->idx, iShw);
611 ASMAtomicWriteU64(&uShw.pEptPdpt->a[iShw].u, 0);
612 }
613
614 /* paranoia / a bit assumptive. */
615 if ( (off & 7)
616 && (off & 7) + cbWrite > sizeof(EPTPDPTE))
617 {
618 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(EPTPDPTE);
619 X86PGPAEUINT const uPdpte2 = uShw.pEptPdpt->a[iShw2].u;
620 if (uPdpte2 & EPT_PRESENT_MASK)
621 {
622 Log7Func(("EPT PDPT iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uPdpte2));
623 pgmPoolFree(pVM, uPdpte2 & EPT_PDPTE_PG_MASK, pPage->idx, iShw2);
624 ASMAtomicWriteU64(&uShw.pEptPdpt->a[iShw2].u, 0);
625 }
626 }
627 break;
628 }
629
630 case PGMPOOLKIND_EPT_PD_FOR_EPT_PD:
631 {
632 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
633 const unsigned iShw = off / sizeof(EPTPDE);
634 X86PGPAEUINT const uPde = uShw.pEptPd->a[iShw].u;
635 if (uPde & EPT_PRESENT_MASK)
636 {
637 Log7Func(("EPT PD iShw=%#x: %RX64 (%RGp) -> freeing it!\n", iShw, uPde, pPage->GCPhys));
638 pgmPoolFree(pVM, uPde & EPT_PDE_PG_MASK, pPage->idx, iShw);
639 ASMAtomicWriteU64(&uShw.pEptPd->a[iShw].u, 0);
640 }
641
642 /* paranoia / a bit assumptive. */
643 if ( (off & 7)
644 && (off & 7) + cbWrite > sizeof(EPTPDE))
645 {
646 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(EPTPDE);
647 AssertBreak(iShw2 < RT_ELEMENTS(uShw.pEptPd->a));
648 X86PGPAEUINT const uPde2 = uShw.pEptPd->a[iShw2].u;
649 if (uPde2 & EPT_PRESENT_MASK)
650 {
651 Log7Func(("EPT PD (2): iShw2=%#x: %RX64 (%RGp) -> freeing it!\n", iShw2, uPde2, pPage->GCPhys));
652 pgmPoolFree(pVM, uPde2 & EPT_PDE_PG_MASK, pPage->idx, iShw2);
653 ASMAtomicWriteU64(&uShw.pEptPd->a[iShw2].u, 0);
654 }
655 }
656 break;
657 }
658
659 case PGMPOOLKIND_EPT_PT_FOR_EPT_PT:
660 {
661 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
662 const unsigned iShw = off / sizeof(EPTPTE);
663 X86PGPAEUINT const uPte = uShw.pEptPt->a[iShw].u;
664 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPT));
665 if (uPte & EPT_PRESENT_MASK)
666 {
667 EPTPTE GstPte;
668 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvAddress, GCPhysFault, sizeof(GstPte));
669 AssertRC(rc);
670
671 Log7Func(("EPT PT: iShw=%#x %RX64 (%RGp)\n", iShw, uPte, pPage->GCPhys));
672 pgmPoolTracDerefGCPhysHint(pPool, pPage,
673 uShw.pEptPt->a[iShw].u & EPT_PTE_PG_MASK,
674 GstPte.u & EPT_PTE_PG_MASK,
675 iShw);
676 ASMAtomicWriteU64(&uShw.pEptPt->a[iShw].u, 0);
677 }
678
679 /* paranoia / a bit assumptive. */
680 if ( (off & 7)
681 && (off & 7) + cbWrite > sizeof(EPTPTE))
682 {
683 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(EPTPTE);
684 AssertBreak(iShw2 < RT_ELEMENTS(uShw.pEptPt->a));
685 X86PGPAEUINT const uPte2 = uShw.pEptPt->a[iShw2].u;
686 if (uPte2 & EPT_PRESENT_MASK)
687 {
688 EPTPTE GstPte;
689 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte,
690 pvAddress ? (uint8_t const *)pvAddress + sizeof(GstPte) : NULL,
691 GCPhysFault + sizeof(GstPte), sizeof(GstPte));
692 AssertRC(rc);
693 Log7Func(("EPT PT (2): iShw=%#x %RX64 (%RGp)\n", iShw2, uPte2, pPage->GCPhys));
694 pgmPoolTracDerefGCPhysHint(pPool, pPage,
695 uShw.pEptPt->a[iShw2].u & EPT_PTE_PG_MASK,
696 GstPte.u & EPT_PTE_PG_MASK,
697 iShw2);
698 ASMAtomicWriteU64(&uShw.pEptPt->a[iShw2].u, 0);
699 }
700 }
701 break;
702 }
703#endif /* VBOX_WITH_NESTED_HWVIRT_VMX_EPT */
704
705 default:
706 AssertFatalMsgFailed(("enmKind=%d\n", pPage->enmKind));
707 }
708 PGM_DYNMAP_UNUSED_HINT_VM(pVM, uShw.pv);
709
710 /* next */
711 if (pPage->iMonitoredNext == NIL_PGMPOOL_IDX)
712 return;
713 pPage = &pPool->aPages[pPage->iMonitoredNext];
714 }
715}
716
717#ifndef IN_RING3
718
719/**
720 * Checks if a access could be a fork operation in progress.
721 *
722 * Meaning, that the guest is setting up the parent process for Copy-On-Write.
723 *
724 * @returns true if it's likely that we're forking, otherwise false.
725 * @param pPool The pool.
726 * @param pDis The disassembled instruction.
727 * @param offFault The access offset.
728 */
729DECLINLINE(bool) pgmRZPoolMonitorIsForking(PPGMPOOL pPool, PDISCPUSTATE pDis, unsigned offFault)
730{
731 /*
732 * i386 linux is using btr to clear X86_PTE_RW.
733 * The functions involved are (2.6.16 source inspection):
734 * clear_bit
735 * ptep_set_wrprotect
736 * copy_one_pte
737 * copy_pte_range
738 * copy_pmd_range
739 * copy_pud_range
740 * copy_page_range
741 * dup_mmap
742 * dup_mm
743 * copy_mm
744 * copy_process
745 * do_fork
746 */
747 if ( pDis->pCurInstr->uOpcode == OP_BTR
748 && !(offFault & 4)
749 /** @todo Validate that the bit index is X86_PTE_RW. */
750 )
751 {
752 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitorPf,Fork)); RT_NOREF_PV(pPool);
753 return true;
754 }
755 return false;
756}
757
758
759/**
760 * Determine whether the page is likely to have been reused.
761 *
762 * @returns true if we consider the page as being reused for a different purpose.
763 * @returns false if we consider it to still be a paging page.
764 * @param pVM The cross context VM structure.
765 * @param pVCpu The cross context virtual CPU structure.
766 * @param pRegFrame Trap register frame.
767 * @param pDis The disassembly info for the faulting instruction.
768 * @param pvFault The fault address.
769 * @param pPage The pool page being accessed.
770 *
771 * @remark The REP prefix check is left to the caller because of STOSD/W.
772 */
773DECLINLINE(bool) pgmRZPoolMonitorIsReused(PVMCC pVM, PVMCPUCC pVCpu, PCPUMCTXCORE pRegFrame, PDISCPUSTATE pDis, RTGCPTR pvFault,
774 PPGMPOOLPAGE pPage)
775{
776 /* Locked (CR3, PDPTR*4) should not be reusable. Considering them as
777 such may cause loops booting tst-ubuntu-15_10-64-efi, ++. */
778 if (pPage->cLocked)
779 {
780 Log2(("pgmRZPoolMonitorIsReused: %RGv (%p) can't have been resued, because it's locked!\n", pvFault, pPage));
781 return false;
782 }
783
784 /** @todo could make this general, faulting close to rsp should be a safe reuse heuristic. */
785 if ( HMHasPendingIrq(pVM)
786 && pRegFrame->rsp - pvFault < 32)
787 {
788 /* Fault caused by stack writes while trying to inject an interrupt event. */
789 Log(("pgmRZPoolMonitorIsReused: reused %RGv for interrupt stack (rsp=%RGv).\n", pvFault, pRegFrame->rsp));
790 return true;
791 }
792
793 LogFlow(("Reused instr %RGv %d at %RGv param1.fUse=%llx param1.reg=%d\n", pRegFrame->rip, pDis->pCurInstr->uOpcode, pvFault, pDis->Param1.fUse, pDis->Param1.Base.idxGenReg));
794
795 /* Non-supervisor mode write means it's used for something else. */
796 if (CPUMGetGuestCPL(pVCpu) == 3)
797 return true;
798
799 switch (pDis->pCurInstr->uOpcode)
800 {
801 /* call implies the actual push of the return address faulted */
802 case OP_CALL:
803 Log4(("pgmRZPoolMonitorIsReused: CALL\n"));
804 return true;
805 case OP_PUSH:
806 Log4(("pgmRZPoolMonitorIsReused: PUSH\n"));
807 return true;
808 case OP_PUSHF:
809 Log4(("pgmRZPoolMonitorIsReused: PUSHF\n"));
810 return true;
811 case OP_PUSHA:
812 Log4(("pgmRZPoolMonitorIsReused: PUSHA\n"));
813 return true;
814 case OP_FXSAVE:
815 Log4(("pgmRZPoolMonitorIsReused: FXSAVE\n"));
816 return true;
817 case OP_MOVNTI: /* solaris - block_zero_no_xmm */
818 Log4(("pgmRZPoolMonitorIsReused: MOVNTI\n"));
819 return true;
820 case OP_MOVNTDQ: /* solaris - hwblkclr & hwblkpagecopy */
821 Log4(("pgmRZPoolMonitorIsReused: MOVNTDQ\n"));
822 return true;
823 case OP_MOVSWD:
824 case OP_STOSWD:
825 if ( pDis->fPrefix == (DISPREFIX_REP|DISPREFIX_REX)
826 && pRegFrame->rcx >= 0x40
827 )
828 {
829 Assert(pDis->uCpuMode == DISCPUMODE_64BIT);
830
831 Log(("pgmRZPoolMonitorIsReused: OP_STOSQ\n"));
832 return true;
833 }
834 break;
835
836 default:
837 /*
838 * Anything having ESP on the left side means stack writes.
839 */
840 if ( ( (pDis->Param1.fUse & DISUSE_REG_GEN32)
841 || (pDis->Param1.fUse & DISUSE_REG_GEN64))
842 && (pDis->Param1.Base.idxGenReg == DISGREG_ESP))
843 {
844 Log4(("pgmRZPoolMonitorIsReused: ESP\n"));
845 return true;
846 }
847 break;
848 }
849
850 /*
851 * Page table updates are very very unlikely to be crossing page boundraries,
852 * and we don't want to deal with that in pgmPoolMonitorChainChanging and such.
853 */
854 uint32_t const cbWrite = DISGetParamSize(pDis, &pDis->Param1);
855 if ( (((uintptr_t)pvFault + cbWrite) >> X86_PAGE_SHIFT) != ((uintptr_t)pvFault >> X86_PAGE_SHIFT) )
856 {
857 Log4(("pgmRZPoolMonitorIsReused: cross page write\n"));
858 return true;
859 }
860
861 /*
862 * Nobody does an unaligned 8 byte write to a page table, right.
863 */
864 if (cbWrite >= 8 && ((uintptr_t)pvFault & 7) != 0)
865 {
866 Log4(("pgmRZPoolMonitorIsReused: Unaligned 8+ byte write\n"));
867 return true;
868 }
869
870 return false;
871}
872
873
874/**
875 * Flushes the page being accessed.
876 *
877 * @returns VBox status code suitable for scheduling.
878 * @param pVM The cross context VM structure.
879 * @param pVCpu The cross context virtual CPU structure.
880 * @param pPool The pool.
881 * @param pPage The pool page (head).
882 * @param pDis The disassembly of the write instruction.
883 * @param pRegFrame The trap register frame.
884 * @param GCPhysFault The fault address as guest physical address.
885 * @param pvFault The fault address.
886 * @todo VBOXSTRICTRC
887 */
888static int pgmRZPoolAccessPfHandlerFlush(PVMCC pVM, PVMCPUCC pVCpu, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pDis,
889 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault)
890{
891 NOREF(pVM); NOREF(GCPhysFault);
892
893 /*
894 * First, do the flushing.
895 */
896 pgmPoolMonitorChainFlush(pPool, pPage);
897
898 /*
899 * Emulate the instruction (xp/w2k problem, requires pc/cr2/sp detection).
900 * Must do this in raw mode (!); XP boot will fail otherwise.
901 */
902 int rc = VINF_SUCCESS;
903 VBOXSTRICTRC rc2 = EMInterpretInstructionDisasState(pVCpu, pDis, pRegFrame, pvFault, EMCODETYPE_ALL);
904 if (rc2 == VINF_SUCCESS)
905 { /* do nothing */ }
906 else if (rc2 == VINF_EM_RESCHEDULE)
907 {
908 rc = VBOXSTRICTRC_VAL(rc2);
909# ifndef IN_RING3
910 VMCPU_FF_SET(pVCpu, VMCPU_FF_TO_R3);
911# endif
912 }
913 else if (rc2 == VERR_EM_INTERPRETER)
914 {
915 rc = VINF_EM_RAW_EMULATE_INSTR;
916 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitorPf,EmulateInstr));
917 }
918 else if (RT_FAILURE_NP(rc2))
919 rc = VBOXSTRICTRC_VAL(rc2);
920 else
921 AssertMsgFailed(("%Rrc\n", VBOXSTRICTRC_VAL(rc2))); /* ASSUMES no complicated stuff here. */
922
923 LogFlow(("pgmRZPoolAccessPfHandlerFlush: returns %Rrc (flushed)\n", rc));
924 return rc;
925}
926
927
928/**
929 * Handles the STOSD write accesses.
930 *
931 * @returns VBox status code suitable for scheduling.
932 * @param pVM The cross context VM structure.
933 * @param pPool The pool.
934 * @param pPage The pool page (head).
935 * @param pDis The disassembly of the write instruction.
936 * @param pRegFrame The trap register frame.
937 * @param GCPhysFault The fault address as guest physical address.
938 * @param pvFault The fault address.
939 */
940DECLINLINE(int) pgmRZPoolAccessPfHandlerSTOSD(PVMCC pVM, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pDis,
941 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault)
942{
943 unsigned uIncrement = pDis->Param1.cb;
944 NOREF(pVM);
945
946 Assert(pDis->uCpuMode == DISCPUMODE_32BIT || pDis->uCpuMode == DISCPUMODE_64BIT);
947 Assert(pRegFrame->rcx <= 0x20);
948
949# ifdef VBOX_STRICT
950 if (pDis->uOpMode == DISCPUMODE_32BIT)
951 Assert(uIncrement == 4);
952 else
953 Assert(uIncrement == 8);
954# endif
955
956 Log3(("pgmRZPoolAccessPfHandlerSTOSD\n"));
957
958 /*
959 * Increment the modification counter and insert it into the list
960 * of modified pages the first time.
961 */
962 if (!pPage->cModifications++)
963 pgmPoolMonitorModifiedInsert(pPool, pPage);
964
965 /*
966 * Execute REP STOSD.
967 *
968 * This ASSUMES that we're not invoked by Trap0e on in a out-of-sync
969 * write situation, meaning that it's safe to write here.
970 */
971 PVMCPUCC pVCpu = VMMGetCpu(pPool->CTX_SUFF(pVM));
972 RTGCUINTPTR pu32 = (RTGCUINTPTR)pvFault;
973 while (pRegFrame->rcx)
974 {
975 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault, NULL, uIncrement);
976 PGMPhysSimpleWriteGCPhys(pVM, GCPhysFault, &pRegFrame->rax, uIncrement);
977 pu32 += uIncrement;
978 GCPhysFault += uIncrement;
979 pRegFrame->rdi += uIncrement;
980 pRegFrame->rcx--;
981 }
982 pRegFrame->rip += pDis->cbInstr;
983
984 LogFlow(("pgmRZPoolAccessPfHandlerSTOSD: returns\n"));
985 return VINF_SUCCESS;
986}
987
988
989/**
990 * Handles the simple write accesses.
991 *
992 * @returns VBox status code suitable for scheduling.
993 * @param pVM The cross context VM structure.
994 * @param pVCpu The cross context virtual CPU structure.
995 * @param pPool The pool.
996 * @param pPage The pool page (head).
997 * @param pDis The disassembly of the write instruction.
998 * @param pRegFrame The trap register frame.
999 * @param GCPhysFault The fault address as guest physical address.
1000 * @param pvFault The fault address.
1001 * @param pfReused Reused state (in/out)
1002 */
1003DECLINLINE(int) pgmRZPoolAccessPfHandlerSimple(PVMCC pVM, PVMCPUCC pVCpu, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pDis,
1004 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault, bool *pfReused)
1005{
1006 Log3(("pgmRZPoolAccessPfHandlerSimple\n"));
1007 NOREF(pVM);
1008 NOREF(pfReused); /* initialized by caller */
1009
1010 /*
1011 * Increment the modification counter and insert it into the list
1012 * of modified pages the first time.
1013 */
1014 if (!pPage->cModifications++)
1015 pgmPoolMonitorModifiedInsert(pPool, pPage);
1016
1017 /*
1018 * Clear all the pages. ASSUMES that pvFault is readable.
1019 */
1020 uint32_t cbWrite = DISGetParamSize(pDis, &pDis->Param1);
1021 if (cbWrite <= 8)
1022 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault, NULL, cbWrite);
1023 else if (cbWrite <= 16)
1024 {
1025 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault, NULL, 8);
1026 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault + 8, NULL, cbWrite - 8);
1027 }
1028 else
1029 {
1030 Assert(cbWrite <= 32);
1031 for (uint32_t off = 0; off < cbWrite; off += 8)
1032 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault + off, NULL, RT_MIN(8, cbWrite - off));
1033 }
1034
1035 /*
1036 * Interpret the instruction.
1037 */
1038 VBOXSTRICTRC rc = EMInterpretInstructionDisasState(pVCpu, pDis, pRegFrame, pvFault, EMCODETYPE_ALL);
1039 if (RT_SUCCESS(rc))
1040 AssertMsg(rc == VINF_SUCCESS, ("%Rrc\n", VBOXSTRICTRC_VAL(rc))); /* ASSUMES no complicated stuff here. */
1041 else if (rc == VERR_EM_INTERPRETER)
1042 {
1043 LogFlow(("pgmRZPoolAccessPfHandlerSimple: Interpretation failed for %04x:%RGv - opcode=%d\n",
1044 pRegFrame->cs.Sel, (RTGCPTR)pRegFrame->rip, pDis->pCurInstr->uOpcode));
1045 rc = VINF_EM_RAW_EMULATE_INSTR;
1046 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitorPf,EmulateInstr));
1047 }
1048
1049# if 0 /* experimental code */
1050 if (rc == VINF_SUCCESS)
1051 {
1052 switch (pPage->enmKind)
1053 {
1054 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1055 {
1056 X86PTEPAE GstPte;
1057 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvFault, GCPhysFault, sizeof(GstPte));
1058 AssertRC(rc);
1059
1060 /* Check the new value written by the guest. If present and with a bogus physical address, then
1061 * it's fairly safe to assume the guest is reusing the PT.
1062 */
1063 if (GstPte.n.u1Present)
1064 {
1065 RTHCPHYS HCPhys = -1;
1066 int rc = PGMPhysGCPhys2HCPhys(pVM, GstPte.u & X86_PTE_PAE_PG_MASK, &HCPhys);
1067 if (rc != VINF_SUCCESS)
1068 {
1069 *pfReused = true;
1070 STAM_COUNTER_INC(&pPool->StatForceFlushReused);
1071 }
1072 }
1073 break;
1074 }
1075 }
1076 }
1077# endif
1078
1079 LogFlow(("pgmRZPoolAccessPfHandlerSimple: returns %Rrc\n", VBOXSTRICTRC_VAL(rc)));
1080 return VBOXSTRICTRC_VAL(rc);
1081}
1082
1083
1084/**
1085 * @callback_method_impl{FNPGMRZPHYSPFHANDLER,
1086 * \#PF access handler callback for page table pages.}
1087 *
1088 * @remarks The @a uUser argument is the index of the PGMPOOLPAGE.
1089 */
1090DECLCALLBACK(VBOXSTRICTRC) pgmRZPoolAccessPfHandler(PVMCC pVM, PVMCPUCC pVCpu, RTGCUINT uErrorCode, PCPUMCTXCORE pRegFrame,
1091 RTGCPTR pvFault, RTGCPHYS GCPhysFault, uint64_t uUser)
1092{
1093 STAM_PROFILE_START(&pVM->pgm.s.CTX_SUFF(pPool)->StatMonitorRZ, a);
1094 PPGMPOOL const pPool = pVM->pgm.s.CTX_SUFF(pPool);
1095 AssertReturn(uUser < pPool->cCurPages, VERR_PGM_POOL_IPE);
1096 PPGMPOOLPAGE const pPage = &pPool->aPages[uUser];
1097 unsigned cMaxModifications;
1098 bool fForcedFlush = false;
1099 RT_NOREF_PV(uErrorCode);
1100
1101# ifdef VBOX_WITH_NESTED_HWVIRT_VMX_EPT
1102 AssertMsg(pVCpu->pgm.s.enmGuestSlatMode == PGMSLAT_DIRECT,
1103 ("pvFault=%RGv pPage=%p:{.idx=%d} GCPhysFault=%RGp\n", pvFault, pPage, pPage->idx, GCPhysFault));
1104# endif
1105 LogFlow(("pgmRZPoolAccessPfHandler: pvFault=%RGv pPage=%p:{.idx=%d} GCPhysFault=%RGp\n", pvFault, pPage, pPage->idx, GCPhysFault));
1106
1107 PGM_LOCK_VOID(pVM);
1108 if (PHYS_PAGE_ADDRESS(GCPhysFault) != PHYS_PAGE_ADDRESS(pPage->GCPhys))
1109 {
1110 /* Pool page changed while we were waiting for the lock; ignore. */
1111 Log(("CPU%d: pgmRZPoolAccessPfHandler pgm pool page for %RGp changed (to %RGp) while waiting!\n", pVCpu->idCpu, PHYS_PAGE_ADDRESS(GCPhysFault), PHYS_PAGE_ADDRESS(pPage->GCPhys)));
1112 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->StatMonitorPfRZ, &pPool->StatMonitorPfRZHandled, a);
1113 PGM_UNLOCK(pVM);
1114 return VINF_SUCCESS;
1115 }
1116# ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
1117 if (pPage->fDirty)
1118 {
1119# ifdef VBOX_WITH_NESTED_HWVIRT_VMX_EPT
1120 Assert(!PGMPOOL_PAGE_IS_NESTED(pPage));
1121# endif
1122 Assert(VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_TLB_FLUSH));
1123 PGM_UNLOCK(pVM);
1124 return VINF_SUCCESS; /* SMP guest case where we were blocking on the pgm lock while the same page was being marked dirty. */
1125 }
1126# endif
1127
1128# if 0 /* test code defined(VBOX_STRICT) && defined(PGMPOOL_WITH_OPTIMIZED_DIRTY_PT) */
1129 if (pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT)
1130 {
1131 void *pvShw = PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage);
1132 void *pvGst;
1133 int rc = PGM_GCPHYS_2_PTR(pPool->CTX_SUFF(pVM), pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
1134 pgmPoolTrackCheckPTPaePae(pPool, pPage, (PPGMSHWPTPAE)pvShw, (PCX86PTPAE)pvGst);
1135 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvGst);
1136 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvShw);
1137 }
1138# endif
1139
1140# ifdef VBOX_WITH_NESTED_HWVIRT_VMX_EPT
1141 if (PGMPOOL_PAGE_IS_NESTED(pPage))
1142 {
1143 Assert(!CPUMIsGuestInVmxNonRootMode(CPUMQueryGuestCtxPtr(pVCpu)));
1144 Log7Func(("Flushing pvFault=%RGv GCPhysFault=%RGp\n", pvFault, GCPhysFault));
1145 pgmPoolMonitorChainFlush(pPool, pPage);
1146 PGM_UNLOCK(pVM);
1147 return VINF_SUCCESS;
1148 }
1149# endif
1150
1151 /*
1152 * Disassemble the faulting instruction.
1153 */
1154 PDISCPUSTATE pDis = &pVCpu->pgm.s.DisState;
1155 int rc = EMInterpretDisasCurrent(pVM, pVCpu, pDis, NULL);
1156 if (RT_UNLIKELY(rc != VINF_SUCCESS))
1157 {
1158 AssertMsg(rc == VERR_PAGE_NOT_PRESENT || rc == VERR_PAGE_TABLE_NOT_PRESENT, ("Unexpected rc %d\n", rc));
1159 PGM_UNLOCK(pVM);
1160 return rc;
1161 }
1162
1163 Assert(pPage->enmKind != PGMPOOLKIND_FREE);
1164
1165 /*
1166 * We should ALWAYS have the list head as user parameter. This
1167 * is because we use that page to record the changes.
1168 */
1169 Assert(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
1170
1171# ifdef IN_RING0
1172 /* Maximum nr of modifications depends on the page type. */
1173 if ( pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT
1174 || pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_32BIT_PT)
1175 cMaxModifications = 4;
1176 else
1177 cMaxModifications = 24;
1178# else
1179 cMaxModifications = 48;
1180# endif
1181
1182 /*
1183 * Incremental page table updates should weigh more than random ones.
1184 * (Only applies when started from offset 0)
1185 */
1186 pVCpu->pgm.s.cPoolAccessHandler++;
1187 if ( pPage->GCPtrLastAccessHandlerRip >= pRegFrame->rip - 0x40 /* observed loops in Windows 7 x64 */
1188 && pPage->GCPtrLastAccessHandlerRip < pRegFrame->rip + 0x40
1189 && pvFault == (pPage->GCPtrLastAccessHandlerFault + pDis->Param1.cb)
1190 && pVCpu->pgm.s.cPoolAccessHandler == pPage->cLastAccessHandler + 1)
1191 {
1192 Log(("Possible page reuse cMods=%d -> %d (locked=%d type=%s)\n", pPage->cModifications, pPage->cModifications * 2, pgmPoolIsPageLocked(pPage), pgmPoolPoolKindToStr(pPage->enmKind)));
1193 Assert(pPage->cModifications < 32000);
1194 pPage->cModifications = pPage->cModifications * 2;
1195 pPage->GCPtrLastAccessHandlerFault = pvFault;
1196 pPage->cLastAccessHandler = pVCpu->pgm.s.cPoolAccessHandler;
1197 if (pPage->cModifications >= cMaxModifications)
1198 {
1199 STAM_COUNTER_INC(&pPool->StatMonitorPfRZFlushReinit);
1200 fForcedFlush = true;
1201 }
1202 }
1203
1204 if (pPage->cModifications >= cMaxModifications)
1205 Log(("Mod overflow %RGv cMods=%d (locked=%d type=%s)\n", pvFault, pPage->cModifications, pgmPoolIsPageLocked(pPage), pgmPoolPoolKindToStr(pPage->enmKind)));
1206
1207 /*
1208 * Check if it's worth dealing with.
1209 */
1210 bool fReused = false;
1211 bool fNotReusedNotForking = false;
1212 if ( ( pPage->cModifications < cMaxModifications /** @todo \#define */ /** @todo need to check that it's not mapping EIP. */ /** @todo adjust this! */
1213 || pgmPoolIsPageLocked(pPage)
1214 )
1215 && !(fReused = pgmRZPoolMonitorIsReused(pVM, pVCpu, pRegFrame, pDis, pvFault, pPage))
1216 && !pgmRZPoolMonitorIsForking(pPool, pDis, GCPhysFault & PAGE_OFFSET_MASK))
1217 {
1218 /*
1219 * Simple instructions, no REP prefix.
1220 */
1221 if (!(pDis->fPrefix & (DISPREFIX_REP | DISPREFIX_REPNE)))
1222 {
1223 rc = pgmRZPoolAccessPfHandlerSimple(pVM, pVCpu, pPool, pPage, pDis, pRegFrame, GCPhysFault, pvFault, &fReused);
1224 if (fReused)
1225 goto flushPage;
1226
1227 /* A mov instruction to change the first page table entry will be remembered so we can detect
1228 * full page table changes early on. This will reduce the amount of unnecessary traps we'll take.
1229 */
1230 if ( rc == VINF_SUCCESS
1231 && !pPage->cLocked /* only applies to unlocked pages as we can't free locked ones (e.g. cr3 root). */
1232 && pDis->pCurInstr->uOpcode == OP_MOV
1233 && (pvFault & PAGE_OFFSET_MASK) == 0)
1234 {
1235 pPage->GCPtrLastAccessHandlerFault = pvFault;
1236 pPage->cLastAccessHandler = pVCpu->pgm.s.cPoolAccessHandler;
1237 pPage->GCPtrLastAccessHandlerRip = pRegFrame->rip;
1238 /* Make sure we don't kick out a page too quickly. */
1239 if (pPage->cModifications > 8)
1240 pPage->cModifications = 2;
1241 }
1242 else if (pPage->GCPtrLastAccessHandlerFault == pvFault)
1243 {
1244 /* ignore the 2nd write to this page table entry. */
1245 pPage->cLastAccessHandler = pVCpu->pgm.s.cPoolAccessHandler;
1246 }
1247 else
1248 {
1249 pPage->GCPtrLastAccessHandlerFault = NIL_RTGCPTR;
1250 pPage->GCPtrLastAccessHandlerRip = 0;
1251 }
1252
1253 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->StatMonitorPfRZ, &pPool->StatMonitorPfRZHandled, a);
1254 PGM_UNLOCK(pVM);
1255 return rc;
1256 }
1257
1258 /*
1259 * Windows is frequently doing small memset() operations (netio test 4k+).
1260 * We have to deal with these or we'll kill the cache and performance.
1261 */
1262 if ( pDis->pCurInstr->uOpcode == OP_STOSWD
1263 && !pRegFrame->eflags.Bits.u1DF
1264 && pDis->uOpMode == pDis->uCpuMode
1265 && pDis->uAddrMode == pDis->uCpuMode)
1266 {
1267 bool fValidStosd = false;
1268
1269 if ( pDis->uCpuMode == DISCPUMODE_32BIT
1270 && pDis->fPrefix == DISPREFIX_REP
1271 && pRegFrame->ecx <= 0x20
1272 && pRegFrame->ecx * 4 <= GUEST_PAGE_SIZE - ((uintptr_t)pvFault & GUEST_PAGE_OFFSET_MASK)
1273 && !((uintptr_t)pvFault & 3)
1274 && (pRegFrame->eax == 0 || pRegFrame->eax == 0x80) /* the two values observed. */
1275 )
1276 {
1277 fValidStosd = true;
1278 pRegFrame->rcx &= 0xffffffff; /* paranoia */
1279 }
1280 else
1281 if ( pDis->uCpuMode == DISCPUMODE_64BIT
1282 && pDis->fPrefix == (DISPREFIX_REP | DISPREFIX_REX)
1283 && pRegFrame->rcx <= 0x20
1284 && pRegFrame->rcx * 8 <= GUEST_PAGE_SIZE - ((uintptr_t)pvFault & GUEST_PAGE_OFFSET_MASK)
1285 && !((uintptr_t)pvFault & 7)
1286 && (pRegFrame->rax == 0 || pRegFrame->rax == 0x80) /* the two values observed. */
1287 )
1288 {
1289 fValidStosd = true;
1290 }
1291
1292 if (fValidStosd)
1293 {
1294 rc = pgmRZPoolAccessPfHandlerSTOSD(pVM, pPool, pPage, pDis, pRegFrame, GCPhysFault, pvFault);
1295 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->StatMonitorPfRZ, &pPool->StatMonitorPfRZRepStosd, a);
1296 PGM_UNLOCK(pVM);
1297 return rc;
1298 }
1299 }
1300
1301 /* REP prefix, don't bother. */
1302 STAM_COUNTER_INC(&pPool->StatMonitorPfRZRepPrefix);
1303 Log4(("pgmRZPoolAccessPfHandler: eax=%#x ecx=%#x edi=%#x esi=%#x rip=%RGv opcode=%d prefix=%#x\n",
1304 pRegFrame->eax, pRegFrame->ecx, pRegFrame->edi, pRegFrame->esi, (RTGCPTR)pRegFrame->rip, pDis->pCurInstr->uOpcode, pDis->fPrefix));
1305 fNotReusedNotForking = true;
1306 }
1307
1308# if defined(PGMPOOL_WITH_OPTIMIZED_DIRTY_PT) && defined(IN_RING0)
1309 /* E.g. Windows 7 x64 initializes page tables and touches some pages in the table during the process. This
1310 * leads to pgm pool trashing and an excessive amount of write faults due to page monitoring.
1311 */
1312 if ( pPage->cModifications >= cMaxModifications
1313 && !fForcedFlush
1314 && (pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT || pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_32BIT_PT)
1315 && ( fNotReusedNotForking
1316 || ( !pgmRZPoolMonitorIsReused(pVM, pVCpu, pRegFrame, pDis, pvFault, pPage)
1317 && !pgmRZPoolMonitorIsForking(pPool, pDis, GCPhysFault & PAGE_OFFSET_MASK))
1318 )
1319 )
1320 {
1321 Assert(!pgmPoolIsPageLocked(pPage));
1322 Assert(pPage->fDirty == false);
1323
1324 /* Flush any monitored duplicates as we will disable write protection. */
1325 if ( pPage->iMonitoredNext != NIL_PGMPOOL_IDX
1326 || pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
1327 {
1328 PPGMPOOLPAGE pPageHead = pPage;
1329
1330 /* Find the monitor head. */
1331 while (pPageHead->iMonitoredPrev != NIL_PGMPOOL_IDX)
1332 pPageHead = &pPool->aPages[pPageHead->iMonitoredPrev];
1333
1334 while (pPageHead)
1335 {
1336 unsigned idxNext = pPageHead->iMonitoredNext;
1337
1338 if (pPageHead != pPage)
1339 {
1340 STAM_COUNTER_INC(&pPool->StatDirtyPageDupFlush);
1341 Log(("Flush duplicate page idx=%d GCPhys=%RGp type=%s\n", pPageHead->idx, pPageHead->GCPhys, pgmPoolPoolKindToStr(pPageHead->enmKind)));
1342 int rc2 = pgmPoolFlushPage(pPool, pPageHead);
1343 AssertRC(rc2);
1344 }
1345
1346 if (idxNext == NIL_PGMPOOL_IDX)
1347 break;
1348
1349 pPageHead = &pPool->aPages[idxNext];
1350 }
1351 }
1352
1353 /* The flushing above might fail for locked pages, so double check. */
1354 if ( pPage->iMonitoredNext == NIL_PGMPOOL_IDX
1355 && pPage->iMonitoredPrev == NIL_PGMPOOL_IDX)
1356 {
1357 pgmPoolAddDirtyPage(pVM, pPool, pPage);
1358
1359 /* Temporarily allow write access to the page table again. */
1360 rc = PGMHandlerPhysicalPageTempOff(pVM,
1361 pPage->GCPhys & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK,
1362 pPage->GCPhys & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK);
1363 if (rc == VINF_SUCCESS)
1364 {
1365 rc = PGMShwMakePageWritable(pVCpu, pvFault, PGM_MK_PG_IS_WRITE_FAULT);
1366 AssertMsg(rc == VINF_SUCCESS
1367 /* In the SMP case the page table might be removed while we wait for the PGM lock in the trap handler. */
1368 || rc == VERR_PAGE_TABLE_NOT_PRESENT
1369 || rc == VERR_PAGE_NOT_PRESENT,
1370 ("PGMShwModifyPage -> GCPtr=%RGv rc=%d\n", pvFault, rc));
1371# ifdef VBOX_STRICT
1372 pPage->GCPtrDirtyFault = pvFault;
1373# endif
1374
1375 STAM_PROFILE_STOP(&pVM->pgm.s.CTX_SUFF(pPool)->StatMonitorPfRZ, a);
1376 PGM_UNLOCK(pVM);
1377 return rc;
1378 }
1379 }
1380 }
1381# endif /* PGMPOOL_WITH_OPTIMIZED_DIRTY_PT && IN_RING0 */
1382
1383 STAM_COUNTER_INC(&pPool->StatMonitorPfRZFlushModOverflow);
1384flushPage:
1385 /*
1386 * Not worth it, so flush it.
1387 *
1388 * If we considered it to be reused, don't go back to ring-3
1389 * to emulate failed instructions since we usually cannot
1390 * interpret then. This may be a bit risky, in which case
1391 * the reuse detection must be fixed.
1392 */
1393 rc = pgmRZPoolAccessPfHandlerFlush(pVM, pVCpu, pPool, pPage, pDis, pRegFrame, GCPhysFault, pvFault);
1394 if ( rc == VINF_EM_RAW_EMULATE_INSTR
1395 && fReused)
1396 {
1397 Assert(!PGMPOOL_PAGE_IS_NESTED(pPage)); /* temporary, remove later. */
1398 /* Make sure that the current instruction still has shadow page backing, otherwise we'll end up in a loop. */
1399 if (PGMShwGetPage(pVCpu, pRegFrame->rip, NULL, NULL) == VINF_SUCCESS)
1400 rc = VINF_SUCCESS; /* safe to restart the instruction. */
1401 }
1402 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->StatMonitorPfRZ, &pPool->StatMonitorPfRZFlushPage, a);
1403 PGM_UNLOCK(pVM);
1404 return rc;
1405}
1406
1407#endif /* !IN_RING3 */
1408
1409/**
1410 * @callback_method_impl{FNPGMPHYSHANDLER,
1411 * Access handler for shadowed page table pages.}
1412 *
1413 * @remarks Only uses the VINF_PGM_HANDLER_DO_DEFAULT status.
1414 * @note The @a uUser argument is the index of the PGMPOOLPAGE.
1415 */
1416DECLCALLBACK(VBOXSTRICTRC)
1417pgmPoolAccessHandler(PVMCC pVM, PVMCPUCC pVCpu, RTGCPHYS GCPhys, void *pvPhys, void *pvBuf, size_t cbBuf,
1418 PGMACCESSTYPE enmAccessType, PGMACCESSORIGIN enmOrigin, uint64_t uUser)
1419{
1420 PPGMPOOL const pPool = pVM->pgm.s.CTX_SUFF(pPool);
1421 STAM_PROFILE_START(&pPool->CTX_SUFF_Z(StatMonitor), a);
1422 AssertReturn(uUser < pPool->cCurPages, VERR_PGM_POOL_IPE);
1423 PPGMPOOLPAGE const pPage = &pPool->aPages[uUser];
1424 LogFlow(("PGM_ALL_CB_DECL: GCPhys=%RGp %p:{.Core=%RHp, .idx=%d, .GCPhys=%RGp, .enmType=%d}\n",
1425 GCPhys, pPage, pPage->Core.Key, pPage->idx, pPage->GCPhys, pPage->enmKind));
1426
1427 NOREF(pvPhys); NOREF(pvBuf); NOREF(enmAccessType);
1428
1429 PGM_LOCK_VOID(pVM);
1430
1431#ifdef VBOX_WITH_STATISTICS
1432 /*
1433 * Collect stats on the access.
1434 */
1435 AssertCompile(RT_ELEMENTS(pPool->CTX_MID_Z(aStatMonitor,Sizes)) == 19);
1436 if (cbBuf <= 16 && cbBuf > 0)
1437 STAM_COUNTER_INC(&pPool->CTX_MID_Z(aStatMonitor,Sizes)[cbBuf - 1]);
1438 else if (cbBuf >= 17 && cbBuf < 32)
1439 STAM_COUNTER_INC(&pPool->CTX_MID_Z(aStatMonitor,Sizes)[16]);
1440 else if (cbBuf >= 32 && cbBuf < 64)
1441 STAM_COUNTER_INC(&pPool->CTX_MID_Z(aStatMonitor,Sizes)[17]);
1442 else if (cbBuf >= 64)
1443 STAM_COUNTER_INC(&pPool->CTX_MID_Z(aStatMonitor,Sizes)[18]);
1444
1445 uint8_t cbAlign;
1446 switch (pPage->enmKind)
1447 {
1448 default:
1449 cbAlign = 7;
1450 break;
1451 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1452 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1453 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1454 case PGMPOOLKIND_32BIT_PD:
1455 case PGMPOOLKIND_32BIT_PD_PHYS:
1456 cbAlign = 3;
1457 break;
1458 }
1459 AssertCompile(RT_ELEMENTS(pPool->CTX_MID_Z(aStatMonitor,Misaligned)) == 7);
1460 if ((uint8_t)GCPhys & cbAlign)
1461 STAM_COUNTER_INC(&pPool->CTX_MID_Z(aStatMonitor,Misaligned)[((uint8_t)GCPhys & cbAlign) - 1]);
1462#endif
1463
1464 /*
1465 * Make sure the pool page wasn't modified by a different CPU.
1466 */
1467 if (PHYS_PAGE_ADDRESS(GCPhys) == PHYS_PAGE_ADDRESS(pPage->GCPhys))
1468 {
1469 Assert(pPage->enmKind != PGMPOOLKIND_FREE);
1470
1471 /* The max modification count before flushing depends on the context and page type. */
1472#ifdef IN_RING3
1473 uint16_t const cMaxModifications = 96; /* it's cheaper here, right? */
1474#else
1475 uint16_t cMaxModifications;
1476 if ( pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT
1477 || pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_32BIT_PT)
1478 cMaxModifications = 4;
1479 else
1480 cMaxModifications = 24;
1481#endif
1482
1483 /*
1484 * We don't have to be very sophisticated about this since there are relativly few calls here.
1485 * However, we must try our best to detect any non-cpu accesses (disk / networking).
1486 */
1487 if ( ( pPage->cModifications < cMaxModifications
1488 || pgmPoolIsPageLocked(pPage) )
1489 && enmOrigin != PGMACCESSORIGIN_DEVICE
1490 && cbBuf <= 16)
1491 {
1492 /* Clear the shadow entry. */
1493 if (!pPage->cModifications++)
1494 pgmPoolMonitorModifiedInsert(pPool, pPage);
1495
1496 if (cbBuf <= 8)
1497 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhys, pvBuf, (uint32_t)cbBuf);
1498 else
1499 {
1500 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhys, pvBuf, 8);
1501 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhys + 8, (uint8_t *)pvBuf + 8, (uint32_t)cbBuf - 8);
1502 }
1503 }
1504 else
1505 pgmPoolMonitorChainFlush(pPool, pPage);
1506
1507 STAM_PROFILE_STOP_EX(&pPool->CTX_SUFF_Z(StatMonitor), &pPool->CTX_MID_Z(StatMonitor,FlushPage), a);
1508 }
1509 else
1510 Log(("CPU%d: PGM_ALL_CB_DECL pgm pool page for %RGp changed (to %RGp) while waiting!\n", pVCpu->idCpu, PHYS_PAGE_ADDRESS(GCPhys), PHYS_PAGE_ADDRESS(pPage->GCPhys)));
1511 PGM_UNLOCK(pVM);
1512 return VINF_PGM_HANDLER_DO_DEFAULT;
1513}
1514
1515
1516#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
1517
1518# if defined(VBOX_STRICT) && !defined(IN_RING3)
1519
1520/**
1521 * Check references to guest physical memory in a PAE / PAE page table.
1522 *
1523 * @param pPool The pool.
1524 * @param pPage The page.
1525 * @param pShwPT The shadow page table (mapping of the page).
1526 * @param pGstPT The guest page table.
1527 */
1528static void pgmPoolTrackCheckPTPaePae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT, PCX86PTPAE pGstPT)
1529{
1530 unsigned cErrors = 0;
1531 int LastRc = -1; /* initialized to shut up gcc */
1532 unsigned LastPTE = ~0U; /* initialized to shut up gcc */
1533 RTHCPHYS LastHCPhys = NIL_RTHCPHYS; /* initialized to shut up gcc */
1534 PVMCC pVM = pPool->CTX_SUFF(pVM);
1535
1536# ifdef VBOX_STRICT
1537 for (unsigned i = 0; i < RT_MIN(RT_ELEMENTS(pShwPT->a), pPage->iFirstPresent); i++)
1538 AssertMsg(!PGMSHWPTEPAE_IS_P(pShwPT->a[i]), ("Unexpected PTE: idx=%d %RX64 (first=%d)\n", i, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), pPage->iFirstPresent));
1539# endif
1540 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
1541 {
1542 if (PGMSHWPTEPAE_IS_P(pShwPT->a[i]))
1543 {
1544 RTHCPHYS HCPhys = NIL_RTHCPHYS;
1545 int rc = PGMPhysGCPhys2HCPhys(pVM, pGstPT->a[i].u & X86_PTE_PAE_PG_MASK, &HCPhys);
1546 if ( rc != VINF_SUCCESS
1547 || PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]) != HCPhys)
1548 {
1549 Log(("rc=%d idx=%d guest %RX64 shw=%RX64 vs %RHp\n", rc, i, pGstPT->a[i].u, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), HCPhys));
1550 LastPTE = i;
1551 LastRc = rc;
1552 LastHCPhys = HCPhys;
1553 cErrors++;
1554
1555 RTHCPHYS HCPhysPT = NIL_RTHCPHYS;
1556 rc = PGMPhysGCPhys2HCPhys(pVM, pPage->GCPhys, &HCPhysPT);
1557 AssertRC(rc);
1558
1559 for (unsigned iPage = 0; iPage < pPool->cCurPages; iPage++)
1560 {
1561 PPGMPOOLPAGE pTempPage = &pPool->aPages[iPage];
1562
1563 if (pTempPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT)
1564 {
1565 PPGMSHWPTPAE pShwPT2 = (PPGMSHWPTPAE)PGMPOOL_PAGE_2_PTR(pVM, pTempPage);
1566
1567 for (unsigned j = 0; j < RT_ELEMENTS(pShwPT->a); j++)
1568 {
1569 if ( PGMSHWPTEPAE_IS_P_RW(pShwPT2->a[j])
1570 && PGMSHWPTEPAE_GET_HCPHYS(pShwPT2->a[j]) == HCPhysPT)
1571 {
1572 Log(("GCPhys=%RGp idx=%d %RX64 vs %RX64\n", pTempPage->GCPhys, j, PGMSHWPTEPAE_GET_LOG(pShwPT->a[j]), PGMSHWPTEPAE_GET_LOG(pShwPT2->a[j])));
1573 }
1574 }
1575
1576 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pShwPT2);
1577 }
1578 }
1579 }
1580 }
1581 }
1582 AssertMsg(!cErrors, ("cErrors=%d: last rc=%d idx=%d guest %RX64 shw=%RX64 vs %RHp\n", cErrors, LastRc, LastPTE, pGstPT->a[LastPTE].u, PGMSHWPTEPAE_GET_LOG(pShwPT->a[LastPTE]), LastHCPhys));
1583}
1584
1585
1586/**
1587 * Check references to guest physical memory in a PAE / 32-bit page table.
1588 *
1589 * @param pPool The pool.
1590 * @param pPage The page.
1591 * @param pShwPT The shadow page table (mapping of the page).
1592 * @param pGstPT The guest page table.
1593 */
1594static void pgmPoolTrackCheckPTPae32Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT, PCX86PT pGstPT)
1595{
1596 unsigned cErrors = 0;
1597 int LastRc = -1; /* initialized to shut up gcc */
1598 unsigned LastPTE = ~0U; /* initialized to shut up gcc */
1599 RTHCPHYS LastHCPhys = NIL_RTHCPHYS; /* initialized to shut up gcc */
1600 PVMCC pVM = pPool->CTX_SUFF(pVM);
1601
1602# ifdef VBOX_STRICT
1603 for (unsigned i = 0; i < RT_MIN(RT_ELEMENTS(pShwPT->a), pPage->iFirstPresent); i++)
1604 AssertMsg(!PGMSHWPTEPAE_IS_P(pShwPT->a[i]), ("Unexpected PTE: idx=%d %RX64 (first=%d)\n", i, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), pPage->iFirstPresent));
1605# endif
1606 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
1607 {
1608 if (PGMSHWPTEPAE_IS_P(pShwPT->a[i]))
1609 {
1610 RTHCPHYS HCPhys = NIL_RTHCPHYS;
1611 int rc = PGMPhysGCPhys2HCPhys(pVM, pGstPT->a[i].u & X86_PTE_PG_MASK, &HCPhys);
1612 if ( rc != VINF_SUCCESS
1613 || PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]) != HCPhys)
1614 {
1615 Log(("rc=%d idx=%d guest %x shw=%RX64 vs %RHp\n", rc, i, pGstPT->a[i].u, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), HCPhys));
1616 LastPTE = i;
1617 LastRc = rc;
1618 LastHCPhys = HCPhys;
1619 cErrors++;
1620
1621 RTHCPHYS HCPhysPT = NIL_RTHCPHYS;
1622 rc = PGMPhysGCPhys2HCPhys(pVM, pPage->GCPhys, &HCPhysPT);
1623 AssertRC(rc);
1624
1625 for (unsigned iPage = 0; iPage < pPool->cCurPages; iPage++)
1626 {
1627 PPGMPOOLPAGE pTempPage = &pPool->aPages[iPage];
1628
1629 if (pTempPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_32BIT_PT)
1630 {
1631 PPGMSHWPTPAE pShwPT2 = (PPGMSHWPTPAE)PGMPOOL_PAGE_2_PTR(pVM, pTempPage);
1632
1633 for (unsigned j = 0; j < RT_ELEMENTS(pShwPT->a); j++)
1634 {
1635 if ( PGMSHWPTEPAE_IS_P_RW(pShwPT2->a[j])
1636 && PGMSHWPTEPAE_GET_HCPHYS(pShwPT2->a[j]) == HCPhysPT)
1637 {
1638 Log(("GCPhys=%RGp idx=%d %RX64 vs %RX64\n", pTempPage->GCPhys, j, PGMSHWPTEPAE_GET_LOG(pShwPT->a[j]), PGMSHWPTEPAE_GET_LOG(pShwPT2->a[j])));
1639 }
1640 }
1641
1642 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pShwPT2);
1643 }
1644 }
1645 }
1646 }
1647 }
1648 AssertMsg(!cErrors, ("cErrors=%d: last rc=%d idx=%d guest %x shw=%RX64 vs %RHp\n", cErrors, LastRc, LastPTE, pGstPT->a[LastPTE].u, PGMSHWPTEPAE_GET_LOG(pShwPT->a[LastPTE]), LastHCPhys));
1649}
1650
1651# endif /* VBOX_STRICT && !IN_RING3 */
1652
1653/**
1654 * Clear references to guest physical memory in a PAE / PAE page table.
1655 *
1656 * @returns nr of changed PTEs
1657 * @param pPool The pool.
1658 * @param pPage The page.
1659 * @param pShwPT The shadow page table (mapping of the page).
1660 * @param pGstPT The guest page table.
1661 * @param pOldGstPT The old cached guest page table.
1662 * @param fAllowRemoval Bail out as soon as we encounter an invalid PTE
1663 * @param pfFlush Flush reused page table (out)
1664 */
1665DECLINLINE(unsigned) pgmPoolTrackFlushPTPaePae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT, PCX86PTPAE pGstPT,
1666 PCX86PTPAE pOldGstPT, bool fAllowRemoval, bool *pfFlush)
1667{
1668 unsigned cChanged = 0;
1669
1670# ifdef VBOX_STRICT
1671 for (unsigned i = 0; i < RT_MIN(RT_ELEMENTS(pShwPT->a), pPage->iFirstPresent); i++)
1672 AssertMsg(!PGMSHWPTEPAE_IS_P(pShwPT->a[i]), ("Unexpected PTE: idx=%d %RX64 (first=%d)\n", i, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), pPage->iFirstPresent));
1673# endif
1674 *pfFlush = false;
1675
1676 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
1677 {
1678 /* Check the new value written by the guest. If present and with a bogus physical address, then
1679 * it's fairly safe to assume the guest is reusing the PT.
1680 */
1681 if ( fAllowRemoval
1682 && (pGstPT->a[i].u & X86_PTE_P))
1683 {
1684 if (!PGMPhysIsGCPhysValid(pPool->CTX_SUFF(pVM), pGstPT->a[i].u & X86_PTE_PAE_PG_MASK))
1685 {
1686 *pfFlush = true;
1687 return ++cChanged;
1688 }
1689 }
1690 if (PGMSHWPTEPAE_IS_P(pShwPT->a[i]))
1691 {
1692 /* If the old cached PTE is identical, then there's no need to flush the shadow copy. */
1693 if ((pGstPT->a[i].u & X86_PTE_PAE_PG_MASK) == (pOldGstPT->a[i].u & X86_PTE_PAE_PG_MASK))
1694 {
1695# ifdef VBOX_STRICT
1696 RTHCPHYS HCPhys = NIL_RTGCPHYS;
1697 int rc = PGMPhysGCPhys2HCPhys(pPool->CTX_SUFF(pVM), pGstPT->a[i].u & X86_PTE_PAE_PG_MASK, &HCPhys);
1698 AssertMsg(rc == VINF_SUCCESS && PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]) == HCPhys, ("rc=%d guest %RX64 old %RX64 shw=%RX64 vs %RHp\n", rc, pGstPT->a[i].u, pOldGstPT->a[i].u, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), HCPhys));
1699# endif
1700 uint64_t uHostAttr = PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & (X86_PTE_P | X86_PTE_US | X86_PTE_A | X86_PTE_D | X86_PTE_G | X86_PTE_PAE_NX);
1701 bool fHostRW = !!(PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & X86_PTE_RW);
1702 uint64_t uGuestAttr = pGstPT->a[i].u & (X86_PTE_P | X86_PTE_US | X86_PTE_A | X86_PTE_D | X86_PTE_G | X86_PTE_PAE_NX);
1703 bool fGuestRW = !!(pGstPT->a[i].u & X86_PTE_RW);
1704
1705 if ( uHostAttr == uGuestAttr
1706 && fHostRW <= fGuestRW)
1707 continue;
1708 }
1709 cChanged++;
1710 /* Something was changed, so flush it. */
1711 Log4(("pgmPoolTrackDerefPTPaePae: i=%d pte=%RX64 hint=%RX64\n",
1712 i, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pOldGstPT->a[i].u & X86_PTE_PAE_PG_MASK));
1713 pgmPoolTracDerefGCPhysHint(pPool, pPage, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pOldGstPT->a[i].u & X86_PTE_PAE_PG_MASK, i);
1714 PGMSHWPTEPAE_ATOMIC_SET(pShwPT->a[i], 0);
1715 }
1716 }
1717 return cChanged;
1718}
1719
1720
1721/**
1722 * Clear references to guest physical memory in a PAE / PAE page table.
1723 *
1724 * @returns nr of changed PTEs
1725 * @param pPool The pool.
1726 * @param pPage The page.
1727 * @param pShwPT The shadow page table (mapping of the page).
1728 * @param pGstPT The guest page table.
1729 * @param pOldGstPT The old cached guest page table.
1730 * @param fAllowRemoval Bail out as soon as we encounter an invalid PTE
1731 * @param pfFlush Flush reused page table (out)
1732 */
1733DECLINLINE(unsigned) pgmPoolTrackFlushPTPae32Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT, PCX86PT pGstPT,
1734 PCX86PT pOldGstPT, bool fAllowRemoval, bool *pfFlush)
1735{
1736 unsigned cChanged = 0;
1737
1738# ifdef VBOX_STRICT
1739 for (unsigned i = 0; i < RT_MIN(RT_ELEMENTS(pShwPT->a), pPage->iFirstPresent); i++)
1740 AssertMsg(!PGMSHWPTEPAE_IS_P(pShwPT->a[i]), ("Unexpected PTE: idx=%d %RX64 (first=%d)\n", i, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), pPage->iFirstPresent));
1741# endif
1742 *pfFlush = false;
1743
1744 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
1745 {
1746 /* Check the new value written by the guest. If present and with a bogus physical address, then
1747 * it's fairly safe to assume the guest is reusing the PT. */
1748 if (fAllowRemoval)
1749 {
1750 X86PGUINT const uPte = pGstPT->a[i].u;
1751 if ( (uPte & X86_PTE_P)
1752 && !PGMPhysIsGCPhysValid(pPool->CTX_SUFF(pVM), uPte & X86_PTE_PG_MASK))
1753 {
1754 *pfFlush = true;
1755 return ++cChanged;
1756 }
1757 }
1758 if (PGMSHWPTEPAE_IS_P(pShwPT->a[i]))
1759 {
1760 /* If the old cached PTE is identical, then there's no need to flush the shadow copy. */
1761 if ((pGstPT->a[i].u & X86_PTE_PG_MASK) == (pOldGstPT->a[i].u & X86_PTE_PG_MASK))
1762 {
1763# ifdef VBOX_STRICT
1764 RTHCPHYS HCPhys = NIL_RTGCPHYS;
1765 int rc = PGMPhysGCPhys2HCPhys(pPool->CTX_SUFF(pVM), pGstPT->a[i].u & X86_PTE_PG_MASK, &HCPhys);
1766 AssertMsg(rc == VINF_SUCCESS && PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]) == HCPhys, ("rc=%d guest %x old %x shw=%RX64 vs %RHp\n", rc, pGstPT->a[i].u, pOldGstPT->a[i].u, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), HCPhys));
1767# endif
1768 uint64_t uHostAttr = PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & (X86_PTE_P | X86_PTE_US | X86_PTE_A | X86_PTE_D | X86_PTE_G);
1769 bool fHostRW = !!(PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & X86_PTE_RW);
1770 uint64_t uGuestAttr = pGstPT->a[i].u & (X86_PTE_P | X86_PTE_US | X86_PTE_A | X86_PTE_D | X86_PTE_G);
1771 bool fGuestRW = !!(pGstPT->a[i].u & X86_PTE_RW);
1772
1773 if ( uHostAttr == uGuestAttr
1774 && fHostRW <= fGuestRW)
1775 continue;
1776 }
1777 cChanged++;
1778 /* Something was changed, so flush it. */
1779 Log4(("pgmPoolTrackDerefPTPaePae: i=%d pte=%RX64 hint=%x\n",
1780 i, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pOldGstPT->a[i].u & X86_PTE_PG_MASK));
1781 pgmPoolTracDerefGCPhysHint(pPool, pPage, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pOldGstPT->a[i].u & X86_PTE_PG_MASK, i);
1782 PGMSHWPTEPAE_ATOMIC_SET(pShwPT->a[i], 0);
1783 }
1784 }
1785 return cChanged;
1786}
1787
1788
1789/**
1790 * Flush a dirty page
1791 *
1792 * @param pVM The cross context VM structure.
1793 * @param pPool The pool.
1794 * @param idxSlot Dirty array slot index
1795 * @param fAllowRemoval Allow a reused page table to be removed
1796 */
1797static void pgmPoolFlushDirtyPage(PVMCC pVM, PPGMPOOL pPool, unsigned idxSlot, bool fAllowRemoval = false)
1798{
1799 AssertCompile(RT_ELEMENTS(pPool->aidxDirtyPages) == RT_ELEMENTS(pPool->aDirtyPages));
1800
1801 Assert(idxSlot < RT_ELEMENTS(pPool->aDirtyPages));
1802 unsigned idxPage = pPool->aidxDirtyPages[idxSlot];
1803 if (idxPage == NIL_PGMPOOL_IDX)
1804 return;
1805
1806 PPGMPOOLPAGE pPage = &pPool->aPages[idxPage];
1807 Assert(pPage->idx == idxPage);
1808 Assert(pPage->iMonitoredNext == NIL_PGMPOOL_IDX && pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
1809
1810 AssertMsg(pPage->fDirty, ("Page %RGp (slot=%d) not marked dirty!", pPage->GCPhys, idxSlot));
1811 Log(("Flush dirty page %RGp cMods=%d\n", pPage->GCPhys, pPage->cModifications));
1812
1813 /* First write protect the page again to catch all write accesses. (before checking for changes -> SMP) */
1814 int rc = PGMHandlerPhysicalReset(pVM, pPage->GCPhys & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK);
1815 Assert(rc == VINF_SUCCESS);
1816 pPage->fDirty = false;
1817
1818# ifdef VBOX_STRICT
1819 uint64_t fFlags = 0;
1820 RTHCPHYS HCPhys;
1821 rc = PGMShwGetPage(VMMGetCpu(pVM), pPage->GCPtrDirtyFault, &fFlags, &HCPhys);
1822 AssertMsg( ( rc == VINF_SUCCESS
1823 && (!(fFlags & X86_PTE_RW) || HCPhys != pPage->Core.Key))
1824 /* In the SMP case the page table might be removed while we wait for the PGM lock in the trap handler. */
1825 || rc == VERR_PAGE_TABLE_NOT_PRESENT
1826 || rc == VERR_PAGE_NOT_PRESENT,
1827 ("PGMShwGetPage -> GCPtr=%RGv rc=%d flags=%RX64\n", pPage->GCPtrDirtyFault, rc, fFlags));
1828# endif
1829
1830 /* Flush those PTEs that have changed. */
1831 STAM_PROFILE_START(&pPool->StatTrackDeref,a);
1832 void *pvShw = PGMPOOL_PAGE_2_PTR(pVM, pPage);
1833 void *pvGst;
1834 rc = PGM_GCPHYS_2_PTR_EX(pVM, pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
1835 bool fFlush;
1836 unsigned cChanges;
1837
1838 if (pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT)
1839 cChanges = pgmPoolTrackFlushPTPaePae(pPool, pPage, (PPGMSHWPTPAE)pvShw, (PCX86PTPAE)pvGst,
1840 (PCX86PTPAE)&pPool->aDirtyPages[idxSlot].aPage[0], fAllowRemoval, &fFlush);
1841 else
1842 {
1843 Assert(!PGMPOOL_PAGE_IS_NESTED(pPage)); /* temporary, remove later. */
1844 cChanges = pgmPoolTrackFlushPTPae32Bit(pPool, pPage, (PPGMSHWPTPAE)pvShw, (PCX86PT)pvGst,
1845 (PCX86PT)&pPool->aDirtyPages[idxSlot].aPage[0], fAllowRemoval, &fFlush);
1846 }
1847
1848 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvGst);
1849 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvShw);
1850 STAM_PROFILE_STOP(&pPool->StatTrackDeref,a);
1851 /* Note: we might want to consider keeping the dirty page active in case there were many changes. */
1852
1853 /* This page is likely to be modified again, so reduce the nr of modifications just a bit here. */
1854 Assert(pPage->cModifications);
1855 if (cChanges < 4)
1856 pPage->cModifications = 1; /* must use > 0 here */
1857 else
1858 pPage->cModifications = RT_MAX(1, pPage->cModifications / 2);
1859
1860 STAM_COUNTER_INC(&pPool->StatResetDirtyPages);
1861 if (pPool->cDirtyPages == RT_ELEMENTS(pPool->aDirtyPages))
1862 pPool->idxFreeDirtyPage = idxSlot;
1863
1864 pPool->cDirtyPages--;
1865 pPool->aidxDirtyPages[idxSlot] = NIL_PGMPOOL_IDX;
1866 Assert(pPool->cDirtyPages <= RT_ELEMENTS(pPool->aDirtyPages));
1867 if (fFlush)
1868 {
1869 Assert(fAllowRemoval);
1870 Log(("Flush reused page table!\n"));
1871 pgmPoolFlushPage(pPool, pPage);
1872 STAM_COUNTER_INC(&pPool->StatForceFlushReused);
1873 }
1874 else
1875 Log(("Removed dirty page %RGp cMods=%d cChanges=%d\n", pPage->GCPhys, pPage->cModifications, cChanges));
1876}
1877
1878
1879# ifndef IN_RING3
1880/**
1881 * Add a new dirty page
1882 *
1883 * @param pVM The cross context VM structure.
1884 * @param pPool The pool.
1885 * @param pPage The page.
1886 */
1887void pgmPoolAddDirtyPage(PVMCC pVM, PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1888{
1889 PGM_LOCK_ASSERT_OWNER(pVM);
1890 AssertCompile(RT_ELEMENTS(pPool->aDirtyPages) == 8 || RT_ELEMENTS(pPool->aDirtyPages) == 16);
1891 Assert(!pPage->fDirty);
1892 Assert(!PGMPOOL_PAGE_IS_NESTED(pPage));
1893
1894 unsigned idxFree = pPool->idxFreeDirtyPage;
1895 Assert(idxFree < RT_ELEMENTS(pPool->aDirtyPages));
1896 Assert(pPage->iMonitoredNext == NIL_PGMPOOL_IDX && pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
1897
1898 if (pPool->cDirtyPages >= RT_ELEMENTS(pPool->aDirtyPages))
1899 {
1900 STAM_COUNTER_INC(&pPool->StatDirtyPageOverFlowFlush);
1901 pgmPoolFlushDirtyPage(pVM, pPool, idxFree, true /* allow removal of reused page tables*/);
1902 }
1903 Assert(pPool->cDirtyPages < RT_ELEMENTS(pPool->aDirtyPages));
1904 AssertMsg(pPool->aidxDirtyPages[idxFree] == NIL_PGMPOOL_IDX, ("idxFree=%d cDirtyPages=%d\n", idxFree, pPool->cDirtyPages));
1905
1906 Log(("Add dirty page %RGp (slot=%d)\n", pPage->GCPhys, idxFree));
1907
1908 /*
1909 * Make a copy of the guest page table as we require valid GCPhys addresses
1910 * when removing references to physical pages.
1911 * (The HCPhys linear lookup is *extremely* expensive!)
1912 */
1913 void *pvGst;
1914 int rc = PGM_GCPHYS_2_PTR_EX(pVM, pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
1915 memcpy(&pPool->aDirtyPages[idxFree].aPage[0], pvGst,
1916 pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT ? PAGE_SIZE : PAGE_SIZE / 2);
1917# ifdef VBOX_STRICT
1918 void *pvShw = PGMPOOL_PAGE_2_PTR(pVM, pPage);
1919 if (pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT)
1920 pgmPoolTrackCheckPTPaePae(pPool, pPage, (PPGMSHWPTPAE)pvShw, (PCX86PTPAE)pvGst);
1921 else
1922 pgmPoolTrackCheckPTPae32Bit(pPool, pPage, (PPGMSHWPTPAE)pvShw, (PCX86PT)pvGst);
1923 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvShw);
1924# endif
1925 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvGst);
1926
1927 STAM_COUNTER_INC(&pPool->StatDirtyPage);
1928 pPage->fDirty = true;
1929 pPage->idxDirtyEntry = (uint8_t)idxFree; Assert(pPage->idxDirtyEntry == idxFree);
1930 pPool->aidxDirtyPages[idxFree] = pPage->idx;
1931 pPool->cDirtyPages++;
1932
1933 pPool->idxFreeDirtyPage = (pPool->idxFreeDirtyPage + 1) & (RT_ELEMENTS(pPool->aDirtyPages) - 1);
1934 if ( pPool->cDirtyPages < RT_ELEMENTS(pPool->aDirtyPages)
1935 && pPool->aidxDirtyPages[pPool->idxFreeDirtyPage] != NIL_PGMPOOL_IDX)
1936 {
1937 unsigned i;
1938 for (i = 1; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
1939 {
1940 idxFree = (pPool->idxFreeDirtyPage + i) & (RT_ELEMENTS(pPool->aDirtyPages) - 1);
1941 if (pPool->aidxDirtyPages[idxFree] == NIL_PGMPOOL_IDX)
1942 {
1943 pPool->idxFreeDirtyPage = idxFree;
1944 break;
1945 }
1946 }
1947 Assert(i != RT_ELEMENTS(pPool->aDirtyPages));
1948 }
1949
1950 Assert(pPool->cDirtyPages == RT_ELEMENTS(pPool->aDirtyPages) || pPool->aidxDirtyPages[pPool->idxFreeDirtyPage] == NIL_PGMPOOL_IDX);
1951
1952 /*
1953 * Clear all references to this shadow table. See @bugref{7298}.
1954 */
1955 pgmPoolTrackClearPageUsers(pPool, pPage);
1956}
1957# endif /* !IN_RING3 */
1958
1959
1960/**
1961 * Check if the specified page is dirty (not write monitored)
1962 *
1963 * @return dirty or not
1964 * @param pVM The cross context VM structure.
1965 * @param GCPhys Guest physical address
1966 */
1967bool pgmPoolIsDirtyPageSlow(PVMCC pVM, RTGCPHYS GCPhys)
1968{
1969 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1970 PGM_LOCK_ASSERT_OWNER(pVM);
1971 if (!pPool->cDirtyPages)
1972 return false;
1973
1974 GCPhys = GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK;
1975
1976 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
1977 {
1978 unsigned idxPage = pPool->aidxDirtyPages[i];
1979 if (idxPage != NIL_PGMPOOL_IDX)
1980 {
1981 PPGMPOOLPAGE pPage = &pPool->aPages[idxPage];
1982 if (pPage->GCPhys == GCPhys)
1983 return true;
1984 }
1985 }
1986 return false;
1987}
1988
1989
1990/**
1991 * Reset all dirty pages by reinstating page monitoring.
1992 *
1993 * @param pVM The cross context VM structure.
1994 */
1995void pgmPoolResetDirtyPages(PVMCC pVM)
1996{
1997 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1998 PGM_LOCK_ASSERT_OWNER(pVM);
1999 Assert(pPool->cDirtyPages <= RT_ELEMENTS(pPool->aDirtyPages));
2000
2001 if (!pPool->cDirtyPages)
2002 return;
2003
2004 Log(("pgmPoolResetDirtyPages\n"));
2005 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
2006 pgmPoolFlushDirtyPage(pVM, pPool, i, true /* allow removal of reused page tables*/);
2007
2008 pPool->idxFreeDirtyPage = 0;
2009 if ( pPool->cDirtyPages != RT_ELEMENTS(pPool->aDirtyPages)
2010 && pPool->aidxDirtyPages[pPool->idxFreeDirtyPage] != NIL_PGMPOOL_IDX)
2011 {
2012 unsigned i;
2013 for (i = 1; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
2014 {
2015 if (pPool->aidxDirtyPages[i] == NIL_PGMPOOL_IDX)
2016 {
2017 pPool->idxFreeDirtyPage = i;
2018 break;
2019 }
2020 }
2021 AssertMsg(i != RT_ELEMENTS(pPool->aDirtyPages), ("cDirtyPages %d", pPool->cDirtyPages));
2022 }
2023
2024 Assert(pPool->aidxDirtyPages[pPool->idxFreeDirtyPage] == NIL_PGMPOOL_IDX || pPool->cDirtyPages == RT_ELEMENTS(pPool->aDirtyPages));
2025 return;
2026}
2027
2028
2029/**
2030 * Invalidate the PT entry for the specified page
2031 *
2032 * @param pVM The cross context VM structure.
2033 * @param GCPtrPage Guest page to invalidate
2034 */
2035void pgmPoolResetDirtyPage(PVMCC pVM, RTGCPTR GCPtrPage)
2036{
2037 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
2038 PGM_LOCK_ASSERT_OWNER(pVM);
2039 Assert(pPool->cDirtyPages <= RT_ELEMENTS(pPool->aDirtyPages));
2040
2041 if (!pPool->cDirtyPages)
2042 return;
2043
2044 Log(("pgmPoolResetDirtyPage %RGv\n", GCPtrPage)); RT_NOREF_PV(GCPtrPage);
2045 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
2046 {
2047 /** @todo What was intended here??? This looks incomplete... */
2048 }
2049}
2050
2051
2052/**
2053 * Reset all dirty pages by reinstating page monitoring.
2054 *
2055 * @param pVM The cross context VM structure.
2056 * @param GCPhysPT Physical address of the page table
2057 */
2058void pgmPoolInvalidateDirtyPage(PVMCC pVM, RTGCPHYS GCPhysPT)
2059{
2060 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
2061 PGM_LOCK_ASSERT_OWNER(pVM);
2062 Assert(pPool->cDirtyPages <= RT_ELEMENTS(pPool->aDirtyPages));
2063 unsigned idxDirtyPage = RT_ELEMENTS(pPool->aDirtyPages);
2064
2065 if (!pPool->cDirtyPages)
2066 return;
2067
2068 GCPhysPT = GCPhysPT & ~(RTGCPHYS)PAGE_OFFSET_MASK;
2069
2070 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
2071 {
2072 unsigned idxPage = pPool->aidxDirtyPages[i];
2073 if (idxPage != NIL_PGMPOOL_IDX)
2074 {
2075 PPGMPOOLPAGE pPage = &pPool->aPages[idxPage];
2076 if (pPage->GCPhys == GCPhysPT)
2077 {
2078 idxDirtyPage = i;
2079 break;
2080 }
2081 }
2082 }
2083
2084 if (idxDirtyPage != RT_ELEMENTS(pPool->aDirtyPages))
2085 {
2086 pgmPoolFlushDirtyPage(pVM, pPool, idxDirtyPage, true /* allow removal of reused page tables*/);
2087 if ( pPool->cDirtyPages != RT_ELEMENTS(pPool->aDirtyPages)
2088 && pPool->aidxDirtyPages[pPool->idxFreeDirtyPage] != NIL_PGMPOOL_IDX)
2089 {
2090 unsigned i;
2091 for (i = 0; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
2092 {
2093 if (pPool->aidxDirtyPages[i] == NIL_PGMPOOL_IDX)
2094 {
2095 pPool->idxFreeDirtyPage = i;
2096 break;
2097 }
2098 }
2099 AssertMsg(i != RT_ELEMENTS(pPool->aDirtyPages), ("cDirtyPages %d", pPool->cDirtyPages));
2100 }
2101 }
2102}
2103
2104#endif /* PGMPOOL_WITH_OPTIMIZED_DIRTY_PT */
2105
2106/**
2107 * Inserts a page into the GCPhys hash table.
2108 *
2109 * @param pPool The pool.
2110 * @param pPage The page.
2111 */
2112DECLINLINE(void) pgmPoolHashInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2113{
2114 Log3(("pgmPoolHashInsert: %RGp\n", pPage->GCPhys));
2115 Assert(pPage->GCPhys != NIL_RTGCPHYS); Assert(pPage->iNext == NIL_PGMPOOL_IDX);
2116 uint16_t iHash = PGMPOOL_HASH(pPage->GCPhys);
2117 pPage->iNext = pPool->aiHash[iHash];
2118 pPool->aiHash[iHash] = pPage->idx;
2119}
2120
2121
2122/**
2123 * Removes a page from the GCPhys hash table.
2124 *
2125 * @param pPool The pool.
2126 * @param pPage The page.
2127 */
2128DECLINLINE(void) pgmPoolHashRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2129{
2130 Log3(("pgmPoolHashRemove: %RGp\n", pPage->GCPhys));
2131 uint16_t iHash = PGMPOOL_HASH(pPage->GCPhys);
2132 if (pPool->aiHash[iHash] == pPage->idx)
2133 pPool->aiHash[iHash] = pPage->iNext;
2134 else
2135 {
2136 uint16_t iPrev = pPool->aiHash[iHash];
2137 for (;;)
2138 {
2139 const int16_t i = pPool->aPages[iPrev].iNext;
2140 if (i == pPage->idx)
2141 {
2142 pPool->aPages[iPrev].iNext = pPage->iNext;
2143 break;
2144 }
2145 if (i == NIL_PGMPOOL_IDX)
2146 {
2147 AssertReleaseMsgFailed(("GCPhys=%RGp idx=%d\n", pPage->GCPhys, pPage->idx));
2148 break;
2149 }
2150 iPrev = i;
2151 }
2152 }
2153 pPage->iNext = NIL_PGMPOOL_IDX;
2154}
2155
2156
2157/**
2158 * Frees up one cache page.
2159 *
2160 * @returns VBox status code.
2161 * @retval VINF_SUCCESS on success.
2162 * @param pPool The pool.
2163 * @param iUser The user index.
2164 */
2165static int pgmPoolCacheFreeOne(PPGMPOOL pPool, uint16_t iUser)
2166{
2167 const PVMCC pVM = pPool->CTX_SUFF(pVM);
2168 Assert(pPool->iAgeHead != pPool->iAgeTail); /* We shouldn't be here if there < 2 cached entries! */
2169 STAM_COUNTER_INC(&pPool->StatCacheFreeUpOne);
2170
2171 /*
2172 * Select one page from the tail of the age list.
2173 */
2174 PPGMPOOLPAGE pPage;
2175 for (unsigned iLoop = 0; ; iLoop++)
2176 {
2177 uint16_t iToFree = pPool->iAgeTail;
2178 if (iToFree == iUser && iUser != NIL_PGMPOOL_IDX)
2179 iToFree = pPool->aPages[iToFree].iAgePrev;
2180/* This is the alternative to the SyncCR3 pgmPoolCacheUsed calls.
2181 if (pPool->aPages[iToFree].iUserHead != NIL_PGMPOOL_USER_INDEX)
2182 {
2183 uint16_t i = pPool->aPages[iToFree].iAgePrev;
2184 for (unsigned j = 0; j < 10 && i != NIL_PGMPOOL_USER_INDEX; j++, i = pPool->aPages[i].iAgePrev)
2185 {
2186 if (pPool->aPages[iToFree].iUserHead == NIL_PGMPOOL_USER_INDEX)
2187 continue;
2188 iToFree = i;
2189 break;
2190 }
2191 }
2192*/
2193 Assert(iToFree != iUser);
2194 AssertReleaseMsg(iToFree != NIL_PGMPOOL_IDX,
2195 ("iToFree=%#x (iAgeTail=%#x) iUser=%#x iLoop=%u - pPool=%p LB %#zx\n",
2196 iToFree, pPool->iAgeTail, iUser, iLoop, pPool,
2197 RT_UOFFSETOF_DYN(PGMPOOL, aPages[pPool->cMaxPages])
2198 + pPool->cMaxUsers * sizeof(PGMPOOLUSER)
2199 + pPool->cMaxPhysExts * sizeof(PGMPOOLPHYSEXT) ));
2200
2201 pPage = &pPool->aPages[iToFree];
2202
2203 /*
2204 * Reject any attempts at flushing the currently active shadow CR3 mapping.
2205 * Call pgmPoolCacheUsed to move the page to the head of the age list.
2206 */
2207 if ( !pgmPoolIsPageLocked(pPage)
2208 && pPage->idx >= PGMPOOL_IDX_FIRST /* paranoia (#6349) */)
2209 break;
2210 LogFlow(("pgmPoolCacheFreeOne: refuse CR3 mapping\n"));
2211 pgmPoolCacheUsed(pPool, pPage);
2212 AssertLogRelReturn(iLoop < 8192, VERR_PGM_POOL_TOO_MANY_LOOPS);
2213 }
2214
2215 /*
2216 * Found a usable page, flush it and return.
2217 */
2218 int rc = pgmPoolFlushPage(pPool, pPage);
2219 /* This flush was initiated by us and not the guest, so explicitly flush the TLB. */
2220 /** @todo find out why this is necessary; pgmPoolFlushPage should trigger a flush if one is really needed. */
2221 if (rc == VINF_SUCCESS)
2222 PGM_INVL_ALL_VCPU_TLBS(pVM);
2223 return rc;
2224}
2225
2226
2227/**
2228 * Checks if a kind mismatch is really a page being reused
2229 * or if it's just normal remappings.
2230 *
2231 * @returns true if reused and the cached page (enmKind1) should be flushed
2232 * @returns false if not reused.
2233 * @param enmKind1 The kind of the cached page.
2234 * @param enmKind2 The kind of the requested page.
2235 */
2236static bool pgmPoolCacheReusedByKind(PGMPOOLKIND enmKind1, PGMPOOLKIND enmKind2)
2237{
2238 switch (enmKind1)
2239 {
2240 /*
2241 * Never reuse them. There is no remapping in non-paging mode.
2242 */
2243 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2244 case PGMPOOLKIND_32BIT_PD_PHYS:
2245 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2246 case PGMPOOLKIND_PAE_PD_PHYS:
2247 case PGMPOOLKIND_PAE_PDPT_PHYS:
2248 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2249 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2250 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2251 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2252 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2253 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT: /* never reuse them for other types */
2254 return false;
2255
2256 /*
2257 * It's perfectly fine to reuse these, except for PAE and non-paging stuff.
2258 */
2259 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2260 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2261 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2262 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2263 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2264 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2265 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2266 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2267 case PGMPOOLKIND_32BIT_PD:
2268 case PGMPOOLKIND_PAE_PDPT:
2269 Assert(!PGMPOOL_PAGE_IS_KIND_NESTED(enmKind2));
2270 switch (enmKind2)
2271 {
2272 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2273 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2274 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2275 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2276 case PGMPOOLKIND_64BIT_PML4:
2277 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2278 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2279 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2280 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2281 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2282 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2283 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2284 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2285 return true;
2286 default:
2287 return false;
2288 }
2289
2290 /*
2291 * It's perfectly fine to reuse these, except for PAE and non-paging stuff.
2292 */
2293 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2294 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2295 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2296 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2297 case PGMPOOLKIND_64BIT_PML4:
2298 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2299 Assert(!PGMPOOL_PAGE_IS_KIND_NESTED(enmKind2));
2300 switch (enmKind2)
2301 {
2302 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2303 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2304 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2305 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2306 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2307 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2308 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2309 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2310 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2311 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2312 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2313 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2314 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2315 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2316 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2317 return true;
2318 default:
2319 return false;
2320 }
2321
2322#ifdef VBOX_WITH_NESTED_HWVIRT_VMX_EPT
2323 case PGMPOOLKIND_EPT_PT_FOR_EPT_PT:
2324 case PGMPOOLKIND_EPT_PD_FOR_EPT_PD:
2325 case PGMPOOLKIND_EPT_PDPT_FOR_EPT_PDPT:
2326 return PGMPOOL_PAGE_IS_KIND_NESTED(enmKind2);
2327
2328 case PGMPOOLKIND_EPT_PML4_FOR_EPT_PML4:
2329 return false;
2330#endif
2331
2332 /*
2333 * These cannot be flushed, and it's common to reuse the PDs as PTs.
2334 */
2335 case PGMPOOLKIND_ROOT_NESTED:
2336 return false;
2337
2338 default:
2339 AssertFatalMsgFailed(("enmKind1=%d\n", enmKind1));
2340 }
2341}
2342
2343
2344/**
2345 * Attempts to satisfy a pgmPoolAlloc request from the cache.
2346 *
2347 * @returns VBox status code.
2348 * @retval VINF_PGM_CACHED_PAGE on success.
2349 * @retval VERR_FILE_NOT_FOUND if not found.
2350 * @param pPool The pool.
2351 * @param GCPhys The GC physical address of the page we're gonna shadow.
2352 * @param enmKind The kind of mapping.
2353 * @param enmAccess Access type for the mapping (only relevant for big pages)
2354 * @param fA20Enabled Whether the CPU has the A20 gate enabled.
2355 * @param iUser The shadow page pool index of the user table. This is
2356 * NIL_PGMPOOL_IDX for root pages.
2357 * @param iUserTable The index into the user table (shadowed). Ignored if
2358 * root page
2359 * @param ppPage Where to store the pointer to the page.
2360 */
2361static int pgmPoolCacheAlloc(PPGMPOOL pPool, RTGCPHYS GCPhys, PGMPOOLKIND enmKind, PGMPOOLACCESS enmAccess, bool fA20Enabled,
2362 uint16_t iUser, uint32_t iUserTable, PPPGMPOOLPAGE ppPage)
2363{
2364 /*
2365 * Look up the GCPhys in the hash.
2366 */
2367 unsigned i = pPool->aiHash[PGMPOOL_HASH(GCPhys)];
2368 Log3(("pgmPoolCacheAlloc: %RGp kind %s iUser=%d iUserTable=%x SLOT=%d\n", GCPhys, pgmPoolPoolKindToStr(enmKind), iUser, iUserTable, i));
2369 if (i != NIL_PGMPOOL_IDX)
2370 {
2371 do
2372 {
2373 PPGMPOOLPAGE pPage = &pPool->aPages[i];
2374 Log4(("pgmPoolCacheAlloc: slot %d found page %RGp\n", i, pPage->GCPhys));
2375 if (pPage->GCPhys == GCPhys)
2376 {
2377 if ( (PGMPOOLKIND)pPage->enmKind == enmKind
2378 && (PGMPOOLACCESS)pPage->enmAccess == enmAccess
2379 && pPage->fA20Enabled == fA20Enabled)
2380 {
2381 /* Put it at the start of the use list to make sure pgmPoolTrackAddUser
2382 * doesn't flush it in case there are no more free use records.
2383 */
2384 pgmPoolCacheUsed(pPool, pPage);
2385
2386 int rc = VINF_SUCCESS;
2387 if (iUser != NIL_PGMPOOL_IDX)
2388 rc = pgmPoolTrackAddUser(pPool, pPage, iUser, iUserTable);
2389 if (RT_SUCCESS(rc))
2390 {
2391 Assert((PGMPOOLKIND)pPage->enmKind == enmKind);
2392 *ppPage = pPage;
2393 if (pPage->cModifications)
2394 pPage->cModifications = 1; /* reset counter (can't use 0, or else it will be reinserted in the modified list) */
2395 STAM_COUNTER_INC(&pPool->StatCacheHits);
2396 return VINF_PGM_CACHED_PAGE;
2397 }
2398 return rc;
2399 }
2400
2401 if ((PGMPOOLKIND)pPage->enmKind != enmKind)
2402 {
2403 /*
2404 * The kind is different. In some cases we should now flush the page
2405 * as it has been reused, but in most cases this is normal remapping
2406 * of PDs as PT or big pages using the GCPhys field in a slightly
2407 * different way than the other kinds.
2408 */
2409 if (pgmPoolCacheReusedByKind((PGMPOOLKIND)pPage->enmKind, enmKind))
2410 {
2411 STAM_COUNTER_INC(&pPool->StatCacheKindMismatches);
2412 pgmPoolFlushPage(pPool, pPage);
2413 break;
2414 }
2415 }
2416 }
2417
2418 /* next */
2419 i = pPage->iNext;
2420 } while (i != NIL_PGMPOOL_IDX);
2421 }
2422
2423 Log3(("pgmPoolCacheAlloc: Missed GCPhys=%RGp enmKind=%s\n", GCPhys, pgmPoolPoolKindToStr(enmKind)));
2424 STAM_COUNTER_INC(&pPool->StatCacheMisses);
2425 return VERR_FILE_NOT_FOUND;
2426}
2427
2428
2429/**
2430 * Inserts a page into the cache.
2431 *
2432 * @param pPool The pool.
2433 * @param pPage The cached page.
2434 * @param fCanBeCached Set if the page is fit for caching from the caller's point of view.
2435 */
2436static void pgmPoolCacheInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage, bool fCanBeCached)
2437{
2438 /*
2439 * Insert into the GCPhys hash if the page is fit for that.
2440 */
2441 Assert(!pPage->fCached);
2442 if (fCanBeCached)
2443 {
2444 pPage->fCached = true;
2445 pgmPoolHashInsert(pPool, pPage);
2446 Log3(("pgmPoolCacheInsert: Caching %p:{.Core=%RHp, .idx=%d, .enmKind=%s, GCPhys=%RGp}\n",
2447 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), pPage->GCPhys));
2448 STAM_COUNTER_INC(&pPool->StatCacheCacheable);
2449 }
2450 else
2451 {
2452 Log3(("pgmPoolCacheInsert: Not caching %p:{.Core=%RHp, .idx=%d, .enmKind=%s, GCPhys=%RGp}\n",
2453 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), pPage->GCPhys));
2454 STAM_COUNTER_INC(&pPool->StatCacheUncacheable);
2455 }
2456
2457 /*
2458 * Insert at the head of the age list.
2459 */
2460 pPage->iAgePrev = NIL_PGMPOOL_IDX;
2461 pPage->iAgeNext = pPool->iAgeHead;
2462 if (pPool->iAgeHead != NIL_PGMPOOL_IDX)
2463 pPool->aPages[pPool->iAgeHead].iAgePrev = pPage->idx;
2464 else
2465 pPool->iAgeTail = pPage->idx;
2466 pPool->iAgeHead = pPage->idx;
2467}
2468
2469
2470/**
2471 * Flushes a cached page.
2472 *
2473 * @param pPool The pool.
2474 * @param pPage The cached page.
2475 */
2476static void pgmPoolCacheFlushPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2477{
2478 Log3(("pgmPoolCacheFlushPage: %RGp\n", pPage->GCPhys));
2479
2480 /*
2481 * Remove the page from the hash.
2482 */
2483 if (pPage->fCached)
2484 {
2485 pPage->fCached = false;
2486 pgmPoolHashRemove(pPool, pPage);
2487 }
2488 else
2489 Assert(pPage->iNext == NIL_PGMPOOL_IDX);
2490
2491 /*
2492 * Remove it from the age list.
2493 */
2494 if (pPage->iAgeNext != NIL_PGMPOOL_IDX)
2495 pPool->aPages[pPage->iAgeNext].iAgePrev = pPage->iAgePrev;
2496 else
2497 pPool->iAgeTail = pPage->iAgePrev;
2498 if (pPage->iAgePrev != NIL_PGMPOOL_IDX)
2499 pPool->aPages[pPage->iAgePrev].iAgeNext = pPage->iAgeNext;
2500 else
2501 pPool->iAgeHead = pPage->iAgeNext;
2502 pPage->iAgeNext = NIL_PGMPOOL_IDX;
2503 pPage->iAgePrev = NIL_PGMPOOL_IDX;
2504}
2505
2506
2507/**
2508 * Looks for pages sharing the monitor.
2509 *
2510 * @returns Pointer to the head page.
2511 * @returns NULL if not found.
2512 * @param pPool The Pool
2513 * @param pNewPage The page which is going to be monitored.
2514 */
2515static PPGMPOOLPAGE pgmPoolMonitorGetPageByGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pNewPage)
2516{
2517 /*
2518 * Look up the GCPhys in the hash.
2519 */
2520 RTGCPHYS GCPhys = pNewPage->GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK;
2521 unsigned i = pPool->aiHash[PGMPOOL_HASH(GCPhys)];
2522 if (i == NIL_PGMPOOL_IDX)
2523 return NULL;
2524 do
2525 {
2526 PPGMPOOLPAGE pPage = &pPool->aPages[i];
2527 if ( pPage->GCPhys - GCPhys < PAGE_SIZE
2528 && pPage != pNewPage)
2529 {
2530 switch (pPage->enmKind)
2531 {
2532 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2533 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2534 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2535 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2536 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2537 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2538 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2539 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2540 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2541 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2542 case PGMPOOLKIND_64BIT_PML4:
2543 case PGMPOOLKIND_32BIT_PD:
2544 case PGMPOOLKIND_PAE_PDPT:
2545#ifdef VBOX_WITH_NESTED_HWVIRT_VMX_EPT
2546 case PGMPOOLKIND_EPT_PT_FOR_EPT_PT:
2547 case PGMPOOLKIND_EPT_PD_FOR_EPT_PD:
2548 case PGMPOOLKIND_EPT_PDPT_FOR_EPT_PDPT:
2549#endif
2550 {
2551 /* find the head */
2552 while (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
2553 {
2554 Assert(pPage->iMonitoredPrev != pPage->idx);
2555 pPage = &pPool->aPages[pPage->iMonitoredPrev];
2556 }
2557 return pPage;
2558 }
2559
2560 /* ignore, no monitoring. */
2561 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2562 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2563 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2564 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2565 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2566 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2567 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2568 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2569 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2570 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2571 case PGMPOOLKIND_ROOT_NESTED:
2572 case PGMPOOLKIND_PAE_PD_PHYS:
2573 case PGMPOOLKIND_PAE_PDPT_PHYS:
2574 case PGMPOOLKIND_32BIT_PD_PHYS:
2575 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
2576#ifdef VBOX_WITH_NESTED_HWVIRT_VMX_EPT
2577 case PGMPOOLKIND_EPT_PML4_FOR_EPT_PML4:
2578#endif
2579 break;
2580 default:
2581 AssertFatalMsgFailed(("enmKind=%d idx=%d\n", pPage->enmKind, pPage->idx));
2582 }
2583 }
2584
2585 /* next */
2586 i = pPage->iNext;
2587 } while (i != NIL_PGMPOOL_IDX);
2588 return NULL;
2589}
2590
2591
2592/**
2593 * Enabled write monitoring of a guest page.
2594 *
2595 * @returns VBox status code.
2596 * @retval VINF_SUCCESS on success.
2597 * @param pPool The pool.
2598 * @param pPage The cached page.
2599 */
2600static int pgmPoolMonitorInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2601{
2602 LogFlow(("pgmPoolMonitorInsert %RGp\n", pPage->GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK));
2603
2604 /*
2605 * Filter out the relevant kinds.
2606 */
2607 switch (pPage->enmKind)
2608 {
2609 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2610 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2611 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2612 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2613 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2614 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2615 case PGMPOOLKIND_64BIT_PML4:
2616 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2617 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2618 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2619 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2620 case PGMPOOLKIND_32BIT_PD:
2621 case PGMPOOLKIND_PAE_PDPT:
2622 break;
2623
2624 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2625 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2626 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2627 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2628 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2629 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2630 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2631 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2632 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2633 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2634 case PGMPOOLKIND_ROOT_NESTED:
2635 /* Nothing to monitor here. */
2636 return VINF_SUCCESS;
2637
2638 case PGMPOOLKIND_32BIT_PD_PHYS:
2639 case PGMPOOLKIND_PAE_PDPT_PHYS:
2640 case PGMPOOLKIND_PAE_PD_PHYS:
2641 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
2642 /* Nothing to monitor here. */
2643 return VINF_SUCCESS;
2644
2645#ifdef VBOX_WITH_NESTED_HWVIRT_VMX_EPT
2646 case PGMPOOLKIND_EPT_PT_FOR_EPT_PT:
2647 case PGMPOOLKIND_EPT_PD_FOR_EPT_PD:
2648 case PGMPOOLKIND_EPT_PDPT_FOR_EPT_PDPT:
2649 break;
2650
2651 case PGMPOOLKIND_EPT_PML4_FOR_EPT_PML4:
2652 /* Nothing to monitor here. */
2653 return VINF_SUCCESS;
2654#endif
2655
2656 default:
2657 AssertFatalMsgFailed(("This can't happen! enmKind=%d\n", pPage->enmKind));
2658 }
2659
2660 /*
2661 * Install handler.
2662 */
2663 int rc;
2664 PPGMPOOLPAGE pPageHead = pgmPoolMonitorGetPageByGCPhys(pPool, pPage);
2665 if (pPageHead)
2666 {
2667 Assert(pPageHead != pPage); Assert(pPageHead->iMonitoredNext != pPage->idx);
2668 Assert(pPageHead->iMonitoredPrev != pPage->idx);
2669
2670#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
2671 if (pPageHead->fDirty)
2672 pgmPoolFlushDirtyPage(pPool->CTX_SUFF(pVM), pPool, pPageHead->idxDirtyEntry, false /* do not remove */);
2673#endif
2674
2675 pPage->iMonitoredPrev = pPageHead->idx;
2676 pPage->iMonitoredNext = pPageHead->iMonitoredNext;
2677 if (pPageHead->iMonitoredNext != NIL_PGMPOOL_IDX)
2678 pPool->aPages[pPageHead->iMonitoredNext].iMonitoredPrev = pPage->idx;
2679 pPageHead->iMonitoredNext = pPage->idx;
2680 rc = VINF_SUCCESS;
2681 if (PGMPOOL_PAGE_IS_NESTED(pPage))
2682 Log7Func(("Adding to monitoring list GCPhysPage=%RGp\n", pPage->GCPhys));
2683 }
2684 else
2685 {
2686 if (PGMPOOL_PAGE_IS_NESTED(pPage))
2687 Log7Func(("Started monitoring GCPhysPage=%RGp HCPhys=%RHp enmKind=%s\n", pPage->GCPhys, pPage->Core.Key, pgmPoolPoolKindToStr(pPage->enmKind)));
2688
2689 Assert(pPage->iMonitoredNext == NIL_PGMPOOL_IDX); Assert(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
2690 PVMCC pVM = pPool->CTX_SUFF(pVM);
2691 const RTGCPHYS GCPhysPage = pPage->GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK;
2692 rc = PGMHandlerPhysicalRegister(pVM, GCPhysPage, GCPhysPage + PAGE_OFFSET_MASK, pPool->hAccessHandlerType,
2693 pPage - &pPool->aPages[0], NIL_RTR3PTR /*pszDesc*/);
2694 /** @todo we should probably deal with out-of-memory conditions here, but for now increasing
2695 * the heap size should suffice. */
2696 AssertFatalMsgRC(rc, ("PGMHandlerPhysicalRegisterEx %RGp failed with %Rrc\n", GCPhysPage, rc));
2697 PVMCPU pVCpu = VMMGetCpu(pVM);
2698 AssertFatalMsg(!(pVCpu->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL) || VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3), ("fSyncFlags=%x syncff=%d\n", pVCpu->pgm.s.fSyncFlags, VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3)));
2699 }
2700 pPage->fMonitored = true;
2701 return rc;
2702}
2703
2704
2705/**
2706 * Disables write monitoring of a guest page.
2707 *
2708 * @returns VBox status code.
2709 * @retval VINF_SUCCESS on success.
2710 * @param pPool The pool.
2711 * @param pPage The cached page.
2712 */
2713static int pgmPoolMonitorFlush(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2714{
2715 /*
2716 * Filter out the relevant kinds.
2717 */
2718 switch (pPage->enmKind)
2719 {
2720 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2721 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2722 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2723 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2724 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2725 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2726 case PGMPOOLKIND_64BIT_PML4:
2727 case PGMPOOLKIND_32BIT_PD:
2728 case PGMPOOLKIND_PAE_PDPT:
2729 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2730 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2731 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2732 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2733 break;
2734
2735 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2736 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2737 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2738 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2739 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2740 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2741 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2742 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2743 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2744 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2745 case PGMPOOLKIND_ROOT_NESTED:
2746 case PGMPOOLKIND_PAE_PD_PHYS:
2747 case PGMPOOLKIND_PAE_PDPT_PHYS:
2748 case PGMPOOLKIND_32BIT_PD_PHYS:
2749 /* Nothing to monitor here. */
2750 Assert(!pPage->fMonitored);
2751 return VINF_SUCCESS;
2752
2753#ifdef VBOX_WITH_NESTED_HWVIRT_VMX_EPT
2754 case PGMPOOLKIND_EPT_PT_FOR_EPT_PT:
2755 case PGMPOOLKIND_EPT_PD_FOR_EPT_PD:
2756 case PGMPOOLKIND_EPT_PDPT_FOR_EPT_PDPT:
2757 break;
2758
2759 case PGMPOOLKIND_EPT_PML4_FOR_EPT_PML4:
2760 /* Nothing to monitor here. */
2761 Assert(!pPage->fMonitored);
2762 return VINF_SUCCESS;
2763#endif
2764
2765 default:
2766 AssertFatalMsgFailed(("This can't happen! enmKind=%d\n", pPage->enmKind));
2767 }
2768 Assert(pPage->fMonitored);
2769
2770 /*
2771 * Remove the page from the monitored list or uninstall it if last.
2772 */
2773 const PVMCC pVM = pPool->CTX_SUFF(pVM);
2774 int rc;
2775 if ( pPage->iMonitoredNext != NIL_PGMPOOL_IDX
2776 || pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
2777 {
2778 if (pPage->iMonitoredPrev == NIL_PGMPOOL_IDX)
2779 {
2780 PPGMPOOLPAGE pNewHead = &pPool->aPages[pPage->iMonitoredNext];
2781 pNewHead->iMonitoredPrev = NIL_PGMPOOL_IDX;
2782 rc = PGMHandlerPhysicalChangeUserArg(pVM, pPage->GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK, pPage->iMonitoredNext);
2783
2784 AssertFatalRCSuccess(rc);
2785 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
2786 }
2787 else
2788 {
2789 pPool->aPages[pPage->iMonitoredPrev].iMonitoredNext = pPage->iMonitoredNext;
2790 if (pPage->iMonitoredNext != NIL_PGMPOOL_IDX)
2791 {
2792 pPool->aPages[pPage->iMonitoredNext].iMonitoredPrev = pPage->iMonitoredPrev;
2793 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
2794 }
2795 pPage->iMonitoredPrev = NIL_PGMPOOL_IDX;
2796 rc = VINF_SUCCESS;
2797 }
2798 }
2799 else
2800 {
2801 rc = PGMHandlerPhysicalDeregister(pVM, pPage->GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK);
2802 AssertFatalRC(rc);
2803 PVMCPU pVCpu = VMMGetCpu(pVM);
2804 AssertFatalMsg(!(pVCpu->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL) || VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3),
2805 ("%#x %#x\n", pVCpu->pgm.s.fSyncFlags, pVM->fGlobalForcedActions));
2806 }
2807 pPage->fMonitored = false;
2808
2809 /*
2810 * Remove it from the list of modified pages (if in it).
2811 */
2812 pgmPoolMonitorModifiedRemove(pPool, pPage);
2813
2814 if (PGMPOOL_PAGE_IS_NESTED(pPage))
2815 Log7Func(("Stopped monitoring %RGp\n", pPage->GCPhys));
2816
2817 return rc;
2818}
2819
2820
2821/**
2822 * Inserts the page into the list of modified pages.
2823 *
2824 * @param pPool The pool.
2825 * @param pPage The page.
2826 */
2827void pgmPoolMonitorModifiedInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2828{
2829 Log3(("pgmPoolMonitorModifiedInsert: idx=%d\n", pPage->idx));
2830 AssertMsg( pPage->iModifiedNext == NIL_PGMPOOL_IDX
2831 && pPage->iModifiedPrev == NIL_PGMPOOL_IDX
2832 && pPool->iModifiedHead != pPage->idx,
2833 ("Next=%d Prev=%d idx=%d cModifications=%d Head=%d cModifiedPages=%d\n",
2834 pPage->iModifiedNext, pPage->iModifiedPrev, pPage->idx, pPage->cModifications,
2835 pPool->iModifiedHead, pPool->cModifiedPages));
2836
2837 pPage->iModifiedNext = pPool->iModifiedHead;
2838 if (pPool->iModifiedHead != NIL_PGMPOOL_IDX)
2839 pPool->aPages[pPool->iModifiedHead].iModifiedPrev = pPage->idx;
2840 pPool->iModifiedHead = pPage->idx;
2841 pPool->cModifiedPages++;
2842#ifdef VBOX_WITH_STATISTICS
2843 if (pPool->cModifiedPages > pPool->cModifiedPagesHigh)
2844 pPool->cModifiedPagesHigh = pPool->cModifiedPages;
2845#endif
2846}
2847
2848
2849/**
2850 * Removes the page from the list of modified pages and resets the
2851 * modification counter.
2852 *
2853 * @param pPool The pool.
2854 * @param pPage The page which is believed to be in the list of modified pages.
2855 */
2856static void pgmPoolMonitorModifiedRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2857{
2858 Log3(("pgmPoolMonitorModifiedRemove: idx=%d cModifications=%d\n", pPage->idx, pPage->cModifications));
2859 if (pPool->iModifiedHead == pPage->idx)
2860 {
2861 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX);
2862 pPool->iModifiedHead = pPage->iModifiedNext;
2863 if (pPage->iModifiedNext != NIL_PGMPOOL_IDX)
2864 {
2865 pPool->aPages[pPage->iModifiedNext].iModifiedPrev = NIL_PGMPOOL_IDX;
2866 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
2867 }
2868 pPool->cModifiedPages--;
2869 }
2870 else if (pPage->iModifiedPrev != NIL_PGMPOOL_IDX)
2871 {
2872 pPool->aPages[pPage->iModifiedPrev].iModifiedNext = pPage->iModifiedNext;
2873 if (pPage->iModifiedNext != NIL_PGMPOOL_IDX)
2874 {
2875 pPool->aPages[pPage->iModifiedNext].iModifiedPrev = pPage->iModifiedPrev;
2876 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
2877 }
2878 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
2879 pPool->cModifiedPages--;
2880 }
2881 else
2882 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX);
2883 pPage->cModifications = 0;
2884}
2885
2886
2887/**
2888 * Zaps the list of modified pages, resetting their modification counters in the process.
2889 *
2890 * @param pVM The cross context VM structure.
2891 */
2892static void pgmPoolMonitorModifiedClearAll(PVMCC pVM)
2893{
2894 PGM_LOCK_VOID(pVM);
2895 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
2896 LogFlow(("pgmPoolMonitorModifiedClearAll: cModifiedPages=%d\n", pPool->cModifiedPages));
2897
2898 unsigned cPages = 0; NOREF(cPages);
2899
2900#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
2901 pgmPoolResetDirtyPages(pVM);
2902#endif
2903
2904 uint16_t idx = pPool->iModifiedHead;
2905 pPool->iModifiedHead = NIL_PGMPOOL_IDX;
2906 while (idx != NIL_PGMPOOL_IDX)
2907 {
2908 PPGMPOOLPAGE pPage = &pPool->aPages[idx];
2909 idx = pPage->iModifiedNext;
2910 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
2911 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
2912 pPage->cModifications = 0;
2913 Assert(++cPages);
2914 }
2915 AssertMsg(cPages == pPool->cModifiedPages, ("%d != %d\n", cPages, pPool->cModifiedPages));
2916 pPool->cModifiedPages = 0;
2917 PGM_UNLOCK(pVM);
2918}
2919
2920
2921/**
2922 * Handle SyncCR3 pool tasks
2923 *
2924 * @returns VBox status code.
2925 * @retval VINF_SUCCESS if successfully added.
2926 * @retval VINF_PGM_SYNC_CR3 is it needs to be deferred to ring 3 (GC only)
2927 * @param pVCpu The cross context virtual CPU structure.
2928 * @remark Should only be used when monitoring is available, thus placed in
2929 * the PGMPOOL_WITH_MONITORING \#ifdef.
2930 */
2931int pgmPoolSyncCR3(PVMCPUCC pVCpu)
2932{
2933 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
2934 LogFlow(("pgmPoolSyncCR3 fSyncFlags=%x\n", pVCpu->pgm.s.fSyncFlags));
2935
2936 /*
2937 * When monitoring shadowed pages, we reset the modification counters on CR3 sync.
2938 * Occasionally we will have to clear all the shadow page tables because we wanted
2939 * to monitor a page which was mapped by too many shadowed page tables. This operation
2940 * sometimes referred to as a 'lightweight flush'.
2941 */
2942# ifdef IN_RING3 /* Don't flush in ring-0 or raw mode, it's taking too long. */
2943 if (pVCpu->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)
2944 pgmR3PoolClearAll(pVM, false /*fFlushRemTlb*/);
2945# else /* !IN_RING3 */
2946 if (pVCpu->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)
2947 {
2948 Log(("SyncCR3: PGM_SYNC_CLEAR_PGM_POOL is set -> VINF_PGM_SYNC_CR3\n"));
2949 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3); /** @todo no need to do global sync, right? */
2950
2951 /* Make sure all other VCPUs return to ring 3. */
2952 if (pVM->cCpus > 1)
2953 {
2954 VM_FF_SET(pVM, VM_FF_PGM_POOL_FLUSH_PENDING);
2955 PGM_INVL_ALL_VCPU_TLBS(pVM);
2956 }
2957 return VINF_PGM_SYNC_CR3;
2958 }
2959# endif /* !IN_RING3 */
2960 else
2961 {
2962 pgmPoolMonitorModifiedClearAll(pVM);
2963
2964 /* pgmPoolMonitorModifiedClearAll can cause a pgm pool flush (dirty page clearing), so make sure we handle this! */
2965 if (pVCpu->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)
2966 {
2967 Log(("pgmPoolMonitorModifiedClearAll caused a pgm flush -> call pgmPoolSyncCR3 again!\n"));
2968 return pgmPoolSyncCR3(pVCpu);
2969 }
2970 }
2971 return VINF_SUCCESS;
2972}
2973
2974
2975/**
2976 * Frees up at least one user entry.
2977 *
2978 * @returns VBox status code.
2979 * @retval VINF_SUCCESS if successfully added.
2980 *
2981 * @param pPool The pool.
2982 * @param iUser The user index.
2983 */
2984static int pgmPoolTrackFreeOneUser(PPGMPOOL pPool, uint16_t iUser)
2985{
2986 STAM_COUNTER_INC(&pPool->StatTrackFreeUpOneUser);
2987 /*
2988 * Just free cached pages in a braindead fashion.
2989 */
2990 /** @todo walk the age list backwards and free the first with usage. */
2991 int rc = VINF_SUCCESS;
2992 do
2993 {
2994 int rc2 = pgmPoolCacheFreeOne(pPool, iUser);
2995 if (RT_FAILURE(rc2) && rc == VINF_SUCCESS)
2996 rc = rc2;
2997 } while (pPool->iUserFreeHead == NIL_PGMPOOL_USER_INDEX);
2998 return rc;
2999}
3000
3001
3002/**
3003 * Inserts a page into the cache.
3004 *
3005 * This will create user node for the page, insert it into the GCPhys
3006 * hash, and insert it into the age list.
3007 *
3008 * @returns VBox status code.
3009 * @retval VINF_SUCCESS if successfully added.
3010 *
3011 * @param pPool The pool.
3012 * @param pPage The cached page.
3013 * @param GCPhys The GC physical address of the page we're gonna shadow.
3014 * @param iUser The user index.
3015 * @param iUserTable The user table index.
3016 */
3017DECLINLINE(int) pgmPoolTrackInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTGCPHYS GCPhys, uint16_t iUser, uint32_t iUserTable)
3018{
3019 int rc = VINF_SUCCESS;
3020 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
3021
3022 LogFlow(("pgmPoolTrackInsert GCPhys=%RGp iUser=%d iUserTable=%x\n", GCPhys, iUser, iUserTable)); RT_NOREF_PV(GCPhys);
3023
3024 if (iUser != NIL_PGMPOOL_IDX)
3025 {
3026#ifdef VBOX_STRICT
3027 /*
3028 * Check that the entry doesn't already exists.
3029 */
3030 if (pPage->iUserHead != NIL_PGMPOOL_USER_INDEX)
3031 {
3032 uint16_t i = pPage->iUserHead;
3033 do
3034 {
3035 Assert(i < pPool->cMaxUsers);
3036 AssertMsg(paUsers[i].iUser != iUser || paUsers[i].iUserTable != iUserTable, ("%x %x vs new %x %x\n", paUsers[i].iUser, paUsers[i].iUserTable, iUser, iUserTable));
3037 i = paUsers[i].iNext;
3038 } while (i != NIL_PGMPOOL_USER_INDEX);
3039 }
3040#endif
3041
3042 /*
3043 * Find free a user node.
3044 */
3045 uint16_t i = pPool->iUserFreeHead;
3046 if (i == NIL_PGMPOOL_USER_INDEX)
3047 {
3048 rc = pgmPoolTrackFreeOneUser(pPool, iUser);
3049 if (RT_FAILURE(rc))
3050 return rc;
3051 i = pPool->iUserFreeHead;
3052 }
3053
3054 /*
3055 * Unlink the user node from the free list,
3056 * initialize and insert it into the user list.
3057 */
3058 pPool->iUserFreeHead = paUsers[i].iNext;
3059 paUsers[i].iNext = NIL_PGMPOOL_USER_INDEX;
3060 paUsers[i].iUser = iUser;
3061 paUsers[i].iUserTable = iUserTable;
3062 pPage->iUserHead = i;
3063 }
3064 else
3065 pPage->iUserHead = NIL_PGMPOOL_USER_INDEX;
3066
3067
3068 /*
3069 * Insert into cache and enable monitoring of the guest page if enabled.
3070 *
3071 * Until we implement caching of all levels, including the CR3 one, we'll
3072 * have to make sure we don't try monitor & cache any recursive reuse of
3073 * a monitored CR3 page. Because all windows versions are doing this we'll
3074 * have to be able to do combined access monitoring, CR3 + PT and
3075 * PD + PT (guest PAE).
3076 *
3077 * Update:
3078 * We're now cooperating with the CR3 monitor if an uncachable page is found.
3079 */
3080 const bool fCanBeMonitored = true;
3081 pgmPoolCacheInsert(pPool, pPage, fCanBeMonitored); /* This can be expanded. */
3082 if (fCanBeMonitored)
3083 {
3084 rc = pgmPoolMonitorInsert(pPool, pPage);
3085 AssertRC(rc);
3086 }
3087 return rc;
3088}
3089
3090
3091/**
3092 * Adds a user reference to a page.
3093 *
3094 * This will move the page to the head of the
3095 *
3096 * @returns VBox status code.
3097 * @retval VINF_SUCCESS if successfully added.
3098 *
3099 * @param pPool The pool.
3100 * @param pPage The cached page.
3101 * @param iUser The user index.
3102 * @param iUserTable The user table.
3103 */
3104static int pgmPoolTrackAddUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable)
3105{
3106 Log3(("pgmPoolTrackAddUser: GCPhys=%RGp iUser=%x iUserTable=%x\n", pPage->GCPhys, iUser, iUserTable));
3107 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
3108 Assert(iUser != NIL_PGMPOOL_IDX);
3109
3110# ifdef VBOX_STRICT
3111 /*
3112 * Check that the entry doesn't already exists. We only allow multiple
3113 * users of top-level paging structures (SHW_POOL_ROOT_IDX).
3114 */
3115 if (pPage->iUserHead != NIL_PGMPOOL_USER_INDEX)
3116 {
3117 uint16_t i = pPage->iUserHead;
3118 do
3119 {
3120 Assert(i < pPool->cMaxUsers);
3121 /** @todo this assertion looks odd... Shouldn't it be && here? */
3122 AssertMsg(paUsers[i].iUser != iUser || paUsers[i].iUserTable != iUserTable, ("%x %x vs new %x %x\n", paUsers[i].iUser, paUsers[i].iUserTable, iUser, iUserTable));
3123 i = paUsers[i].iNext;
3124 } while (i != NIL_PGMPOOL_USER_INDEX);
3125 }
3126# endif
3127
3128 /*
3129 * Allocate a user node.
3130 */
3131 uint16_t i = pPool->iUserFreeHead;
3132 if (i == NIL_PGMPOOL_USER_INDEX)
3133 {
3134 int rc = pgmPoolTrackFreeOneUser(pPool, iUser);
3135 if (RT_FAILURE(rc))
3136 return rc;
3137 i = pPool->iUserFreeHead;
3138 }
3139 pPool->iUserFreeHead = paUsers[i].iNext;
3140
3141 /*
3142 * Initialize the user node and insert it.
3143 */
3144 paUsers[i].iNext = pPage->iUserHead;
3145 paUsers[i].iUser = iUser;
3146 paUsers[i].iUserTable = iUserTable;
3147 pPage->iUserHead = i;
3148
3149# ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
3150 if (pPage->fDirty)
3151 pgmPoolFlushDirtyPage(pPool->CTX_SUFF(pVM), pPool, pPage->idxDirtyEntry, false /* do not remove */);
3152# endif
3153
3154 /*
3155 * Tell the cache to update its replacement stats for this page.
3156 */
3157 pgmPoolCacheUsed(pPool, pPage);
3158 return VINF_SUCCESS;
3159}
3160
3161
3162/**
3163 * Frees a user record associated with a page.
3164 *
3165 * This does not clear the entry in the user table, it simply replaces the
3166 * user record to the chain of free records.
3167 *
3168 * @param pPool The pool.
3169 * @param pPage The shadow page.
3170 * @param iUser The shadow page pool index of the user table.
3171 * @param iUserTable The index into the user table (shadowed).
3172 *
3173 * @remarks Don't call this for root pages.
3174 */
3175static void pgmPoolTrackFreeUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable)
3176{
3177 Log3(("pgmPoolTrackFreeUser %RGp %x %x\n", pPage->GCPhys, iUser, iUserTable));
3178 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
3179 Assert(iUser != NIL_PGMPOOL_IDX);
3180
3181 /*
3182 * Unlink and free the specified user entry.
3183 */
3184
3185 /* Special: For PAE and 32-bit paging, there is usually no more than one user. */
3186 uint16_t i = pPage->iUserHead;
3187 if ( i != NIL_PGMPOOL_USER_INDEX
3188 && paUsers[i].iUser == iUser
3189 && paUsers[i].iUserTable == iUserTable)
3190 {
3191 pPage->iUserHead = paUsers[i].iNext;
3192
3193 paUsers[i].iUser = NIL_PGMPOOL_IDX;
3194 paUsers[i].iNext = pPool->iUserFreeHead;
3195 pPool->iUserFreeHead = i;
3196 return;
3197 }
3198
3199 /* General: Linear search. */
3200 uint16_t iPrev = NIL_PGMPOOL_USER_INDEX;
3201 while (i != NIL_PGMPOOL_USER_INDEX)
3202 {
3203 if ( paUsers[i].iUser == iUser
3204 && paUsers[i].iUserTable == iUserTable)
3205 {
3206 if (iPrev != NIL_PGMPOOL_USER_INDEX)
3207 paUsers[iPrev].iNext = paUsers[i].iNext;
3208 else
3209 pPage->iUserHead = paUsers[i].iNext;
3210
3211 paUsers[i].iUser = NIL_PGMPOOL_IDX;
3212 paUsers[i].iNext = pPool->iUserFreeHead;
3213 pPool->iUserFreeHead = i;
3214 return;
3215 }
3216 iPrev = i;
3217 i = paUsers[i].iNext;
3218 }
3219
3220 /* Fatal: didn't find it */
3221 AssertFatalMsgFailed(("Didn't find the user entry! iUser=%d iUserTable=%#x GCPhys=%RGp\n",
3222 iUser, iUserTable, pPage->GCPhys));
3223}
3224
3225
3226#if 0 /* unused */
3227/**
3228 * Gets the entry size of a shadow table.
3229 *
3230 * @param enmKind The kind of page.
3231 *
3232 * @returns The size of the entry in bytes. That is, 4 or 8.
3233 * @returns If the kind is not for a table, an assertion is raised and 0 is
3234 * returned.
3235 */
3236DECLINLINE(unsigned) pgmPoolTrackGetShadowEntrySize(PGMPOOLKIND enmKind)
3237{
3238 switch (enmKind)
3239 {
3240 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
3241 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
3242 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
3243 case PGMPOOLKIND_32BIT_PD:
3244 case PGMPOOLKIND_32BIT_PD_PHYS:
3245 return 4;
3246
3247 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
3248 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
3249 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
3250 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
3251 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
3252 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
3253 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
3254 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
3255 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
3256 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
3257 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
3258 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
3259 case PGMPOOLKIND_64BIT_PML4:
3260 case PGMPOOLKIND_PAE_PDPT:
3261 case PGMPOOLKIND_ROOT_NESTED:
3262 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
3263 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
3264 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
3265 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
3266 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
3267 case PGMPOOLKIND_PAE_PD_PHYS:
3268 case PGMPOOLKIND_PAE_PDPT_PHYS:
3269 return 8;
3270
3271 default:
3272 AssertFatalMsgFailed(("enmKind=%d\n", enmKind));
3273 }
3274}
3275#endif /* unused */
3276
3277#if 0 /* unused */
3278/**
3279 * Gets the entry size of a guest table.
3280 *
3281 * @param enmKind The kind of page.
3282 *
3283 * @returns The size of the entry in bytes. That is, 0, 4 or 8.
3284 * @returns If the kind is not for a table, an assertion is raised and 0 is
3285 * returned.
3286 */
3287DECLINLINE(unsigned) pgmPoolTrackGetGuestEntrySize(PGMPOOLKIND enmKind)
3288{
3289 switch (enmKind)
3290 {
3291 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
3292 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
3293 case PGMPOOLKIND_32BIT_PD:
3294 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
3295 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
3296 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
3297 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
3298 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
3299 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
3300 return 4;
3301
3302 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
3303 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
3304 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
3305 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
3306 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
3307 case PGMPOOLKIND_64BIT_PML4:
3308 case PGMPOOLKIND_PAE_PDPT:
3309 return 8;
3310
3311 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
3312 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
3313 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
3314 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
3315 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
3316 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
3317 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
3318 case PGMPOOLKIND_ROOT_NESTED:
3319 case PGMPOOLKIND_PAE_PD_PHYS:
3320 case PGMPOOLKIND_PAE_PDPT_PHYS:
3321 case PGMPOOLKIND_32BIT_PD_PHYS:
3322 /** @todo can we return 0? (nobody is calling this...) */
3323 AssertFailed();
3324 return 0;
3325
3326 default:
3327 AssertFatalMsgFailed(("enmKind=%d\n", enmKind));
3328 }
3329}
3330#endif /* unused */
3331
3332
3333/**
3334 * Checks one shadow page table entry for a mapping of a physical page.
3335 *
3336 * @returns true / false indicating removal of all relevant PTEs
3337 *
3338 * @param pVM The cross context VM structure.
3339 * @param pPhysPage The guest page in question.
3340 * @param fFlushPTEs Flush PTEs or allow them to be updated (e.g. in case of an RW bit change)
3341 * @param iShw The shadow page table.
3342 * @param iPte Page table entry or NIL_PGMPOOL_PHYSEXT_IDX_PTE if unknown
3343 */
3344static bool pgmPoolTrackFlushGCPhysPTInt(PVM pVM, PCPGMPAGE pPhysPage, bool fFlushPTEs, uint16_t iShw, uint16_t iPte)
3345{
3346 LogFlow(("pgmPoolTrackFlushGCPhysPTInt: pPhysPage=%RHp iShw=%d iPte=%d\n", PGM_PAGE_GET_HCPHYS(pPhysPage), iShw, iPte));
3347 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3348 bool fRet = false;
3349
3350 /*
3351 * Assert sanity.
3352 */
3353 Assert(iPte != NIL_PGMPOOL_PHYSEXT_IDX_PTE);
3354 AssertFatalMsg(iShw < pPool->cCurPages && iShw != NIL_PGMPOOL_IDX, ("iShw=%d\n", iShw));
3355 PPGMPOOLPAGE pPage = &pPool->aPages[iShw];
3356
3357 /*
3358 * Then, clear the actual mappings to the page in the shadow PT.
3359 */
3360 switch (pPage->enmKind)
3361 {
3362 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
3363 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
3364 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
3365 {
3366 const uint32_t u32 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P;
3367 PX86PT pPT = (PX86PT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3368 uint32_t u32AndMask = 0;
3369 uint32_t u32OrMask = 0;
3370
3371 if (!fFlushPTEs)
3372 {
3373 switch (PGM_PAGE_GET_HNDL_PHYS_STATE(pPhysPage))
3374 {
3375 case PGM_PAGE_HNDL_PHYS_STATE_NONE: /* No handler installed. */
3376 case PGM_PAGE_HNDL_PHYS_STATE_DISABLED: /* Monitoring is temporarily disabled. */
3377 u32OrMask = X86_PTE_RW;
3378 u32AndMask = UINT32_MAX;
3379 fRet = true;
3380 STAM_COUNTER_INC(&pPool->StatTrackFlushEntryKeep);
3381 break;
3382
3383 case PGM_PAGE_HNDL_PHYS_STATE_WRITE: /* Write access is monitored. */
3384 u32OrMask = 0;
3385 u32AndMask = ~X86_PTE_RW;
3386 fRet = true;
3387 STAM_COUNTER_INC(&pPool->StatTrackFlushEntryKeep);
3388 break;
3389 default:
3390 /* We will end up here when called with an "ALL" access handler. */
3391 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3392 break;
3393 }
3394 }
3395 else
3396 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3397
3398 /* Update the counter if we're removing references. */
3399 if (!u32AndMask)
3400 {
3401 Assert(pPage->cPresent);
3402 Assert(pPool->cPresent);
3403 pPage->cPresent--;
3404 pPool->cPresent--;
3405 }
3406
3407 if ((pPT->a[iPte].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
3408 {
3409 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pte=%RX32\n", iPte, pPT->a[iPte]));
3410 X86PTE Pte;
3411 Pte.u = (pPT->a[iPte].u & u32AndMask) | u32OrMask;
3412 if (Pte.u & PGM_PTFLAGS_TRACK_DIRTY)
3413 Pte.u &= ~(X86PGUINT)X86_PTE_RW; /* need to disallow writes when dirty bit tracking is still active. */
3414
3415 ASMAtomicWriteU32(&pPT->a[iPte].u, Pte.u);
3416 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);
3417 return fRet;
3418 }
3419#ifdef LOG_ENABLED
3420 Log(("iFirstPresent=%d cPresent=%d\n", pPage->iFirstPresent, pPage->cPresent));
3421 for (unsigned i = 0, cFound = 0; i < RT_ELEMENTS(pPT->a); i++)
3422 if ((pPT->a[i].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
3423 {
3424 Log(("i=%d cFound=%d\n", i, ++cFound));
3425 }
3426#endif
3427 AssertFatalMsgFailed(("iFirstPresent=%d cPresent=%d u32=%RX32 poolkind=%x\n", pPage->iFirstPresent, pPage->cPresent, u32, pPage->enmKind));
3428 /*PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);*/
3429 break;
3430 }
3431
3432 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
3433 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
3434 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
3435 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
3436 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
3437 case PGMPOOLKIND_EPT_PT_FOR_PHYS: /* physical mask the same as PAE; RW bit as well; be careful! */
3438#ifdef VBOX_WITH_NESTED_HWVIRT_VMX_EPT
3439 case PGMPOOLKIND_EPT_PT_FOR_EPT_PT:
3440#endif
3441 {
3442 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P;
3443 PPGMSHWPTPAE pPT = (PPGMSHWPTPAE)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3444 uint64_t u64OrMask = 0;
3445 uint64_t u64AndMask = 0;
3446
3447 if (!fFlushPTEs)
3448 {
3449 switch (PGM_PAGE_GET_HNDL_PHYS_STATE(pPhysPage))
3450 {
3451 case PGM_PAGE_HNDL_PHYS_STATE_NONE: /* No handler installed. */
3452 case PGM_PAGE_HNDL_PHYS_STATE_DISABLED: /* Monitoring is temporarily disabled. */
3453 u64OrMask = X86_PTE_RW;
3454 u64AndMask = UINT64_MAX;
3455 fRet = true;
3456 STAM_COUNTER_INC(&pPool->StatTrackFlushEntryKeep);
3457 break;
3458
3459 case PGM_PAGE_HNDL_PHYS_STATE_WRITE: /* Write access is monitored. */
3460 u64OrMask = 0;
3461 u64AndMask = ~(uint64_t)X86_PTE_RW;
3462 fRet = true;
3463 STAM_COUNTER_INC(&pPool->StatTrackFlushEntryKeep);
3464 break;
3465
3466 default:
3467 /* We will end up here when called with an "ALL" access handler. */
3468 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3469 break;
3470 }
3471 }
3472 else
3473 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3474
3475 /* Update the counter if we're removing references. */
3476 if (!u64AndMask)
3477 {
3478 Assert(pPage->cPresent);
3479 Assert(pPool->cPresent);
3480 pPage->cPresent--;
3481 pPool->cPresent--;
3482 }
3483
3484 if ((PGMSHWPTEPAE_GET_U(pPT->a[iPte]) & (X86_PTE_PAE_PG_MASK | X86_PTE_P | X86_PTE_PAE_MBZ_MASK_NX)) == u64)
3485 {
3486 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pte=%RX64\n", iPte, PGMSHWPTEPAE_GET_LOG(pPT->a[iPte])));
3487 X86PTEPAE Pte;
3488 Pte.u = (PGMSHWPTEPAE_GET_U(pPT->a[iPte]) & u64AndMask) | u64OrMask;
3489 if (Pte.u & PGM_PTFLAGS_TRACK_DIRTY)
3490 Pte.u &= ~(X86PGPAEUINT)X86_PTE_RW; /* need to disallow writes when dirty bit tracking is still active. */
3491
3492 PGMSHWPTEPAE_ATOMIC_SET(pPT->a[iPte], Pte.u);
3493 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);
3494 return fRet;
3495 }
3496#ifdef LOG_ENABLED
3497 Log(("iFirstPresent=%d cPresent=%d\n", pPage->iFirstPresent, pPage->cPresent));
3498 Log(("Found %RX64 expected %RX64\n", PGMSHWPTEPAE_GET_U(pPT->a[iPte]) & (X86_PTE_PAE_PG_MASK | X86_PTE_P | X86_PTE_PAE_MBZ_MASK_NX), u64));
3499 for (unsigned i = 0, cFound = 0; i < RT_ELEMENTS(pPT->a); i++)
3500 if ((PGMSHWPTEPAE_GET_U(pPT->a[i]) & (X86_PTE_PAE_PG_MASK | X86_PTE_P | X86_PTE_PAE_MBZ_MASK_NX)) == u64)
3501 Log(("i=%d cFound=%d\n", i, ++cFound));
3502#endif
3503 AssertFatalMsgFailed(("iFirstPresent=%d cPresent=%d u64=%RX64 poolkind=%x iPte=%d PT=%RX64\n", pPage->iFirstPresent, pPage->cPresent, u64, pPage->enmKind, iPte, PGMSHWPTEPAE_GET_LOG(pPT->a[iPte])));
3504 /*PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);*/
3505 break;
3506 }
3507
3508#ifdef PGM_WITH_LARGE_PAGES
3509 /* Large page case only. */
3510 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
3511 {
3512 Assert(pVM->pgm.s.fNestedPaging);
3513
3514 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PDE4M_P | X86_PDE4M_PS;
3515 PEPTPD pPD = (PEPTPD)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3516
3517 if ((pPD->a[iPte].u & (EPT_PDE2M_PG_MASK | X86_PDE4M_P | X86_PDE4M_PS)) == u64)
3518 {
3519 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pde=%RX64\n", iPte, pPD->a[iPte]));
3520 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3521 pPD->a[iPte].u = 0;
3522 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPD);
3523
3524 /* Update the counter as we're removing references. */
3525 Assert(pPage->cPresent);
3526 Assert(pPool->cPresent);
3527 pPage->cPresent--;
3528 pPool->cPresent--;
3529
3530 return fRet;
3531 }
3532# ifdef LOG_ENABLED
3533 Log(("iFirstPresent=%d cPresent=%d\n", pPage->iFirstPresent, pPage->cPresent));
3534 for (unsigned i = 0, cFound = 0; i < RT_ELEMENTS(pPD->a); i++)
3535 if ((pPD->a[i].u & (EPT_PDE2M_PG_MASK | X86_PDE4M_P | X86_PDE4M_PS)) == u64)
3536 Log(("i=%d cFound=%d\n", i, ++cFound));
3537# endif
3538 AssertFatalMsgFailed(("iFirstPresent=%d cPresent=%d\n", pPage->iFirstPresent, pPage->cPresent));
3539 /*PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPD);*/
3540 break;
3541 }
3542
3543 /* AMD-V nested paging */ /** @todo merge with EPT as we only check the parts that are identical. */
3544 case PGMPOOLKIND_PAE_PD_PHYS:
3545 {
3546 Assert(pVM->pgm.s.fNestedPaging);
3547
3548 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PDE4M_P | X86_PDE4M_PS;
3549 PX86PDPAE pPD = (PX86PDPAE)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3550
3551 if ((pPD->a[iPte].u & (X86_PDE2M_PAE_PG_MASK | X86_PDE4M_P | X86_PDE4M_PS)) == u64)
3552 {
3553 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pde=%RX64\n", iPte, pPD->a[iPte]));
3554 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3555 pPD->a[iPte].u = 0;
3556 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPD);
3557
3558 /* Update the counter as we're removing references. */
3559 Assert(pPage->cPresent);
3560 Assert(pPool->cPresent);
3561 pPage->cPresent--;
3562 pPool->cPresent--;
3563 return fRet;
3564 }
3565# ifdef LOG_ENABLED
3566 Log(("iFirstPresent=%d cPresent=%d\n", pPage->iFirstPresent, pPage->cPresent));
3567 for (unsigned i = 0, cFound = 0; i < RT_ELEMENTS(pPD->a); i++)
3568 if ((pPD->a[i].u & (X86_PDE2M_PAE_PG_MASK | X86_PDE4M_P | X86_PDE4M_PS)) == u64)
3569 Log(("i=%d cFound=%d\n", i, ++cFound));
3570# endif
3571 AssertFatalMsgFailed(("iFirstPresent=%d cPresent=%d\n", pPage->iFirstPresent, pPage->cPresent));
3572 /*PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPD);*/
3573 break;
3574 }
3575#endif /* PGM_WITH_LARGE_PAGES */
3576
3577 default:
3578 AssertFatalMsgFailed(("enmKind=%d iShw=%d\n", pPage->enmKind, iShw));
3579 }
3580
3581 /* not reached. */
3582#ifndef _MSC_VER
3583 return fRet;
3584#endif
3585}
3586
3587
3588/**
3589 * Scans one shadow page table for mappings of a physical page.
3590 *
3591 * @param pVM The cross context VM structure.
3592 * @param pPhysPage The guest page in question.
3593 * @param fFlushPTEs Flush PTEs or allow them to be updated (e.g. in case of an RW bit change)
3594 * @param iShw The shadow page table.
3595 */
3596static void pgmPoolTrackFlushGCPhysPT(PVM pVM, PPGMPAGE pPhysPage, bool fFlushPTEs, uint16_t iShw)
3597{
3598 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool); NOREF(pPool);
3599
3600 /* We should only come here with when there's only one reference to this physical page. */
3601 Assert(PGMPOOL_TD_GET_CREFS(PGM_PAGE_GET_TRACKING(pPhysPage)) == 1);
3602
3603 Log2(("pgmPoolTrackFlushGCPhysPT: pPhysPage=%RHp iShw=%d\n", PGM_PAGE_GET_HCPHYS(pPhysPage), iShw));
3604 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPT, f);
3605 bool fKeptPTEs = pgmPoolTrackFlushGCPhysPTInt(pVM, pPhysPage, fFlushPTEs, iShw, PGM_PAGE_GET_PTE_INDEX(pPhysPage));
3606 if (!fKeptPTEs)
3607 PGM_PAGE_SET_TRACKING(pVM, pPhysPage, 0);
3608 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPT, f);
3609}
3610
3611
3612/**
3613 * Flushes a list of shadow page tables mapping the same physical page.
3614 *
3615 * @param pVM The cross context VM structure.
3616 * @param pPhysPage The guest page in question.
3617 * @param fFlushPTEs Flush PTEs or allow them to be updated (e.g. in case of an RW bit change)
3618 * @param iPhysExt The physical cross reference extent list to flush.
3619 */
3620static void pgmPoolTrackFlushGCPhysPTs(PVMCC pVM, PPGMPAGE pPhysPage, bool fFlushPTEs, uint16_t iPhysExt)
3621{
3622 PGM_LOCK_ASSERT_OWNER(pVM);
3623 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3624 bool fKeepList = false;
3625
3626 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPTs, f);
3627 Log2(("pgmPoolTrackFlushGCPhysPTs: pPhysPage=%RHp iPhysExt=%u\n", PGM_PAGE_GET_HCPHYS(pPhysPage), iPhysExt));
3628
3629 const uint16_t iPhysExtStart = iPhysExt;
3630 PPGMPOOLPHYSEXT pPhysExt;
3631 do
3632 {
3633 Assert(iPhysExt < pPool->cMaxPhysExts);
3634 pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
3635 for (unsigned i = 0; i < RT_ELEMENTS(pPhysExt->aidx); i++)
3636 {
3637 if (pPhysExt->aidx[i] != NIL_PGMPOOL_IDX)
3638 {
3639 bool fKeptPTEs = pgmPoolTrackFlushGCPhysPTInt(pVM, pPhysPage, fFlushPTEs, pPhysExt->aidx[i], pPhysExt->apte[i]);
3640 if (!fKeptPTEs)
3641 {
3642 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
3643 pPhysExt->apte[i] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
3644 }
3645 else
3646 fKeepList = true;
3647 }
3648 }
3649 /* next */
3650 iPhysExt = pPhysExt->iNext;
3651 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
3652
3653 if (!fKeepList)
3654 {
3655 /* insert the list into the free list and clear the ram range entry. */
3656 pPhysExt->iNext = pPool->iPhysExtFreeHead;
3657 pPool->iPhysExtFreeHead = iPhysExtStart;
3658 /* Invalidate the tracking data. */
3659 PGM_PAGE_SET_TRACKING(pVM, pPhysPage, 0);
3660 }
3661
3662 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTs, f);
3663}
3664
3665
3666/**
3667 * Flushes all shadow page table mappings of the given guest page.
3668 *
3669 * This is typically called when the host page backing the guest one has been
3670 * replaced or when the page protection was changed due to a guest access
3671 * caught by the monitoring.
3672 *
3673 * @returns VBox status code.
3674 * @retval VINF_SUCCESS if all references has been successfully cleared.
3675 * @retval VINF_PGM_SYNC_CR3 if we're better off with a CR3 sync and a page
3676 * pool cleaning. FF and sync flags are set.
3677 *
3678 * @param pVM The cross context VM structure.
3679 * @param GCPhysPage GC physical address of the page in question
3680 * @param pPhysPage The guest page in question.
3681 * @param fFlushPTEs Flush PTEs or allow them to be updated (e.g. in case of an RW bit change)
3682 * @param pfFlushTLBs This is set to @a true if the shadow TLBs should be
3683 * flushed, it is NOT touched if this isn't necessary.
3684 * The caller MUST initialized this to @a false.
3685 */
3686int pgmPoolTrackUpdateGCPhys(PVMCC pVM, RTGCPHYS GCPhysPage, PPGMPAGE pPhysPage, bool fFlushPTEs, bool *pfFlushTLBs)
3687{
3688 PVMCPUCC pVCpu = VMMGetCpu(pVM);
3689 PGM_LOCK_VOID(pVM);
3690 int rc = VINF_SUCCESS;
3691
3692#ifdef PGM_WITH_LARGE_PAGES
3693 /* Is this page part of a large page? */
3694 if (PGM_PAGE_GET_PDE_TYPE(pPhysPage) == PGM_PAGE_PDE_TYPE_PDE)
3695 {
3696 RTGCPHYS GCPhysBase = GCPhysPage & X86_PDE2M_PAE_PG_MASK;
3697 GCPhysPage &= X86_PDE_PAE_PG_MASK;
3698
3699 /* Fetch the large page base. */
3700 PPGMPAGE pLargePage;
3701 if (GCPhysBase != GCPhysPage)
3702 {
3703 pLargePage = pgmPhysGetPage(pVM, GCPhysBase);
3704 AssertFatal(pLargePage);
3705 }
3706 else
3707 pLargePage = pPhysPage;
3708
3709 Log(("pgmPoolTrackUpdateGCPhys: update large page PDE for %RGp (%RGp)\n", GCPhysBase, GCPhysPage));
3710
3711 if (PGM_PAGE_GET_PDE_TYPE(pLargePage) == PGM_PAGE_PDE_TYPE_PDE)
3712 {
3713 /* Mark the large page as disabled as we need to break it up to change a single page in the 2 MB range. */
3714 PGM_PAGE_SET_PDE_TYPE(pVM, pLargePage, PGM_PAGE_PDE_TYPE_PDE_DISABLED);
3715 pVM->pgm.s.cLargePagesDisabled++;
3716
3717 /* Update the base as that *only* that one has a reference and there's only one PDE to clear. */
3718 rc = pgmPoolTrackUpdateGCPhys(pVM, GCPhysBase, pLargePage, fFlushPTEs, pfFlushTLBs);
3719
3720 *pfFlushTLBs = true;
3721 PGM_UNLOCK(pVM);
3722 return rc;
3723 }
3724 }
3725#else
3726 NOREF(GCPhysPage);
3727#endif /* PGM_WITH_LARGE_PAGES */
3728
3729 const uint16_t u16 = PGM_PAGE_GET_TRACKING(pPhysPage);
3730 if (u16)
3731 {
3732 /*
3733 * The zero page is currently screwing up the tracking and we'll
3734 * have to flush the whole shebang. Unless VBOX_WITH_NEW_LAZY_PAGE_ALLOC
3735 * is defined, zero pages won't normally be mapped. Some kind of solution
3736 * will be needed for this problem of course, but it will have to wait...
3737 */
3738 if ( PGM_PAGE_IS_ZERO(pPhysPage)
3739 || PGM_PAGE_IS_BALLOONED(pPhysPage))
3740 rc = VINF_PGM_GCPHYS_ALIASED;
3741 else
3742 {
3743 if (PGMPOOL_TD_GET_CREFS(u16) != PGMPOOL_TD_CREFS_PHYSEXT)
3744 {
3745 Assert(PGMPOOL_TD_GET_CREFS(u16) == 1);
3746 pgmPoolTrackFlushGCPhysPT(pVM,
3747 pPhysPage,
3748 fFlushPTEs,
3749 PGMPOOL_TD_GET_IDX(u16));
3750 }
3751 else if (u16 != PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED))
3752 pgmPoolTrackFlushGCPhysPTs(pVM, pPhysPage, fFlushPTEs, PGMPOOL_TD_GET_IDX(u16));
3753 else
3754 rc = pgmPoolTrackFlushGCPhysPTsSlow(pVM, pPhysPage);
3755 *pfFlushTLBs = true;
3756 }
3757 }
3758
3759 if (rc == VINF_PGM_GCPHYS_ALIASED)
3760 {
3761 pVCpu->pgm.s.fSyncFlags |= PGM_SYNC_CLEAR_PGM_POOL;
3762 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
3763 rc = VINF_PGM_SYNC_CR3;
3764 }
3765 PGM_UNLOCK(pVM);
3766 return rc;
3767}
3768
3769
3770/**
3771 * Scans all shadow page tables for mappings of a physical page.
3772 *
3773 * This may be slow, but it's most likely more efficient than cleaning
3774 * out the entire page pool / cache.
3775 *
3776 * @returns VBox status code.
3777 * @retval VINF_SUCCESS if all references has been successfully cleared.
3778 * @retval VINF_PGM_GCPHYS_ALIASED if we're better off with a CR3 sync and
3779 * a page pool cleaning.
3780 *
3781 * @param pVM The cross context VM structure.
3782 * @param pPhysPage The guest page in question.
3783 */
3784int pgmPoolTrackFlushGCPhysPTsSlow(PVMCC pVM, PPGMPAGE pPhysPage)
3785{
3786 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3787 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPTsSlow, s);
3788 LogFlow(("pgmPoolTrackFlushGCPhysPTsSlow: cUsedPages=%d cPresent=%d pPhysPage=%R[pgmpage]\n",
3789 pPool->cUsedPages, pPool->cPresent, pPhysPage));
3790
3791 /*
3792 * There is a limit to what makes sense.
3793 */
3794 if ( pPool->cPresent > 1024
3795 && pVM->cCpus == 1)
3796 {
3797 LogFlow(("pgmPoolTrackFlushGCPhysPTsSlow: giving up... (cPresent=%d)\n", pPool->cPresent));
3798 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTsSlow, s);
3799 return VINF_PGM_GCPHYS_ALIASED;
3800 }
3801
3802 /*
3803 * Iterate all the pages until we've encountered all that in use.
3804 * This is simple but not quite optimal solution.
3805 */
3806 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage);
3807 unsigned cLeft = pPool->cUsedPages;
3808 unsigned iPage = pPool->cCurPages;
3809 while (--iPage >= PGMPOOL_IDX_FIRST)
3810 {
3811 PPGMPOOLPAGE pPage = &pPool->aPages[iPage];
3812 if ( pPage->GCPhys != NIL_RTGCPHYS
3813 && pPage->cPresent)
3814 {
3815 Assert(!PGMPOOL_PAGE_IS_NESTED(pPage)); /* see if it hits */
3816 switch (pPage->enmKind)
3817 {
3818 /*
3819 * We only care about shadow page tables.
3820 */
3821 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
3822 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
3823 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
3824 {
3825 const uint32_t u32 = (uint32_t)u64;
3826 unsigned cPresent = pPage->cPresent;
3827 PX86PT pPT = (PX86PT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3828 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
3829 {
3830 const X86PGUINT uPte = pPT->a[i].u;
3831 if (uPte & X86_PTE_P)
3832 {
3833 if ((uPte & X86_PTE_PG_MASK) == u32)
3834 {
3835 //Log4(("pgmPoolTrackFlushGCPhysPTsSlow: idx=%d i=%d pte=%RX32\n", iPage, i, pPT->a[i]));
3836 ASMAtomicWriteU32(&pPT->a[i].u, 0);
3837
3838 /* Update the counter as we're removing references. */
3839 Assert(pPage->cPresent);
3840 Assert(pPool->cPresent);
3841 pPage->cPresent--;
3842 pPool->cPresent--;
3843 }
3844 if (!--cPresent)
3845 break;
3846 }
3847 }
3848 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);
3849 break;
3850 }
3851
3852 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
3853 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
3854 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
3855 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
3856 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
3857 {
3858 unsigned cPresent = pPage->cPresent;
3859 PPGMSHWPTPAE pPT = (PPGMSHWPTPAE)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3860 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
3861 if (PGMSHWPTEPAE_IS_P(pPT->a[i]))
3862 {
3863 if ((PGMSHWPTEPAE_GET_U(pPT->a[i]) & X86_PTE_PAE_PG_MASK) == u64)
3864 {
3865 //Log4(("pgmPoolTrackFlushGCPhysPTsSlow: idx=%d i=%d pte=%RX64\n", iPage, i, pPT->a[i]));
3866 PGMSHWPTEPAE_ATOMIC_SET(pPT->a[i], 0); /// @todo why not atomic?
3867
3868 /* Update the counter as we're removing references. */
3869 Assert(pPage->cPresent);
3870 Assert(pPool->cPresent);
3871 pPage->cPresent--;
3872 pPool->cPresent--;
3873 }
3874 if (!--cPresent)
3875 break;
3876 }
3877 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);
3878 break;
3879 }
3880
3881 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
3882 {
3883 unsigned cPresent = pPage->cPresent;
3884 PEPTPT pPT = (PEPTPT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3885 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
3886 {
3887 X86PGPAEUINT const uPte = pPT->a[i].u;
3888 if (uPte & EPT_E_READ)
3889 {
3890 if ((uPte & EPT_PTE_PG_MASK) == u64)
3891 {
3892 //Log4(("pgmPoolTrackFlushGCPhysPTsSlow: idx=%d i=%d pte=%RX64\n", iPage, i, pPT->a[i]));
3893 ASMAtomicWriteU64(&pPT->a[i].u, 0);
3894
3895 /* Update the counter as we're removing references. */
3896 Assert(pPage->cPresent);
3897 Assert(pPool->cPresent);
3898 pPage->cPresent--;
3899 pPool->cPresent--;
3900 }
3901 if (!--cPresent)
3902 break;
3903 }
3904 }
3905 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);
3906 break;
3907 }
3908 }
3909
3910 if (!--cLeft)
3911 break;
3912 }
3913 }
3914
3915 PGM_PAGE_SET_TRACKING(pVM, pPhysPage, 0);
3916 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTsSlow, s);
3917
3918 /*
3919 * There is a limit to what makes sense. The above search is very expensive, so force a pgm pool flush.
3920 */
3921 if (pPool->cPresent > 1024)
3922 {
3923 LogFlow(("pgmPoolTrackFlushGCPhysPTsSlow: giving up... (cPresent=%d)\n", pPool->cPresent));
3924 return VINF_PGM_GCPHYS_ALIASED;
3925 }
3926
3927 return VINF_SUCCESS;
3928}
3929
3930
3931/**
3932 * Clears the user entry in a user table.
3933 *
3934 * This is used to remove all references to a page when flushing it.
3935 */
3936static void pgmPoolTrackClearPageUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PCPGMPOOLUSER pUser)
3937{
3938 Assert(pUser->iUser != NIL_PGMPOOL_IDX);
3939 Assert(pUser->iUser < pPool->cCurPages);
3940 uint32_t iUserTable = pUser->iUserTable;
3941
3942 /*
3943 * Map the user page. Ignore references made by fictitious pages.
3944 */
3945 PPGMPOOLPAGE pUserPage = &pPool->aPages[pUser->iUser];
3946 LogFlow(("pgmPoolTrackClearPageUser: clear %x in %s (%RGp) (flushing %s)\n", iUserTable, pgmPoolPoolKindToStr(pUserPage->enmKind), pUserPage->Core.Key, pgmPoolPoolKindToStr(pPage->enmKind)));
3947 union
3948 {
3949 uint64_t *pau64;
3950 uint32_t *pau32;
3951 } u;
3952 if (pUserPage->idx < PGMPOOL_IDX_FIRST)
3953 {
3954 Assert(!pUserPage->pvPageR3);
3955 return;
3956 }
3957 u.pau64 = (uint64_t *)PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pUserPage);
3958
3959
3960 /* Safety precaution in case we change the paging for other modes too in the future. */
3961 Assert(!pgmPoolIsPageLocked(pPage)); RT_NOREF_PV(pPage);
3962
3963#ifdef VBOX_STRICT
3964 /*
3965 * Some sanity checks.
3966 */
3967 switch (pUserPage->enmKind)
3968 {
3969 case PGMPOOLKIND_32BIT_PD:
3970 case PGMPOOLKIND_32BIT_PD_PHYS:
3971 Assert(iUserTable < X86_PG_ENTRIES);
3972 break;
3973 case PGMPOOLKIND_PAE_PDPT:
3974 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
3975 case PGMPOOLKIND_PAE_PDPT_PHYS:
3976 Assert(iUserTable < 4);
3977 Assert(!(u.pau64[iUserTable] & PGM_PLXFLAGS_PERMANENT));
3978 break;
3979 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
3980 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
3981 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
3982 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
3983 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
3984 case PGMPOOLKIND_PAE_PD_PHYS:
3985 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3986 break;
3987 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
3988 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3989 break;
3990 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
3991 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3992 Assert(!(u.pau64[iUserTable] & PGM_PLXFLAGS_PERMANENT));
3993 break;
3994 case PGMPOOLKIND_64BIT_PML4:
3995 Assert(!(u.pau64[iUserTable] & PGM_PLXFLAGS_PERMANENT));
3996 /* GCPhys >> PAGE_SHIFT is the index here */
3997 break;
3998 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
3999 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
4000 Assert(iUserTable < X86_PG_PAE_ENTRIES);
4001 break;
4002
4003 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
4004 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
4005 Assert(iUserTable < X86_PG_PAE_ENTRIES);
4006 break;
4007
4008 case PGMPOOLKIND_ROOT_NESTED:
4009 Assert(iUserTable < X86_PG_PAE_ENTRIES);
4010 break;
4011
4012# ifdef VBOX_WITH_NESTED_HWVIRT_VMX_EPT
4013 case PGMPOOLKIND_EPT_PT_FOR_EPT_PT:
4014 case PGMPOOLKIND_EPT_PD_FOR_EPT_PD:
4015 case PGMPOOLKIND_EPT_PDPT_FOR_EPT_PDPT:
4016 case PGMPOOLKIND_EPT_PML4_FOR_EPT_PML4:
4017 Assert(iUserTable < EPT_PG_ENTRIES);
4018 break;
4019# endif
4020
4021 default:
4022 AssertMsgFailed(("enmKind=%d GCPhys=%RGp\n", pUserPage->enmKind, pPage->GCPhys));
4023 break;
4024 }
4025#endif /* VBOX_STRICT */
4026
4027 /*
4028 * Clear the entry in the user page.
4029 */
4030 switch (pUserPage->enmKind)
4031 {
4032 /* 32-bit entries */
4033 case PGMPOOLKIND_32BIT_PD:
4034 case PGMPOOLKIND_32BIT_PD_PHYS:
4035 ASMAtomicWriteU32(&u.pau32[iUserTable], 0);
4036 break;
4037
4038 /* 64-bit entries */
4039 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
4040 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
4041 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
4042 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
4043 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
4044 case PGMPOOLKIND_PAE_PD_PHYS:
4045 case PGMPOOLKIND_PAE_PDPT_PHYS:
4046 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
4047 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
4048 case PGMPOOLKIND_64BIT_PML4:
4049 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
4050 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
4051 case PGMPOOLKIND_PAE_PDPT:
4052 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
4053 case PGMPOOLKIND_ROOT_NESTED:
4054 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
4055 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
4056# ifdef VBOX_WITH_NESTED_HWVIRT_VMX_EPT
4057 case PGMPOOLKIND_EPT_PT_FOR_EPT_PT:
4058 case PGMPOOLKIND_EPT_PD_FOR_EPT_PD:
4059 case PGMPOOLKIND_EPT_PDPT_FOR_EPT_PDPT:
4060 case PGMPOOLKIND_EPT_PML4_FOR_EPT_PML4:
4061#endif
4062 ASMAtomicWriteU64(&u.pau64[iUserTable], 0);
4063 break;
4064
4065 default:
4066 AssertFatalMsgFailed(("enmKind=%d iUser=%d iUserTable=%#x\n", pUserPage->enmKind, pUser->iUser, pUser->iUserTable));
4067 }
4068 PGM_DYNMAP_UNUSED_HINT_VM(pPool->CTX_SUFF(pVM), u.pau64);
4069}
4070
4071
4072/**
4073 * Clears all users of a page.
4074 */
4075static void pgmPoolTrackClearPageUsers(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
4076{
4077 /*
4078 * Free all the user records.
4079 */
4080 LogFlow(("pgmPoolTrackClearPageUsers %RGp\n", pPage->GCPhys));
4081
4082 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
4083 uint16_t i = pPage->iUserHead;
4084 while (i != NIL_PGMPOOL_USER_INDEX)
4085 {
4086 /* Clear enter in user table. */
4087 pgmPoolTrackClearPageUser(pPool, pPage, &paUsers[i]);
4088
4089 /* Free it. */
4090 const uint16_t iNext = paUsers[i].iNext;
4091 paUsers[i].iUser = NIL_PGMPOOL_IDX;
4092 paUsers[i].iNext = pPool->iUserFreeHead;
4093 pPool->iUserFreeHead = i;
4094
4095 /* Next. */
4096 i = iNext;
4097 }
4098 pPage->iUserHead = NIL_PGMPOOL_USER_INDEX;
4099}
4100
4101
4102/**
4103 * Allocates a new physical cross reference extent.
4104 *
4105 * @returns Pointer to the allocated extent on success. NULL if we're out of them.
4106 * @param pVM The cross context VM structure.
4107 * @param piPhysExt Where to store the phys ext index.
4108 */
4109PPGMPOOLPHYSEXT pgmPoolTrackPhysExtAlloc(PVMCC pVM, uint16_t *piPhysExt)
4110{
4111 PGM_LOCK_ASSERT_OWNER(pVM);
4112 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
4113 uint16_t iPhysExt = pPool->iPhysExtFreeHead;
4114 if (iPhysExt == NIL_PGMPOOL_PHYSEXT_INDEX)
4115 {
4116 STAM_COUNTER_INC(&pPool->StamTrackPhysExtAllocFailures);
4117 return NULL;
4118 }
4119 PPGMPOOLPHYSEXT pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
4120 pPool->iPhysExtFreeHead = pPhysExt->iNext;
4121 pPhysExt->iNext = NIL_PGMPOOL_PHYSEXT_INDEX;
4122 *piPhysExt = iPhysExt;
4123 return pPhysExt;
4124}
4125
4126
4127/**
4128 * Frees a physical cross reference extent.
4129 *
4130 * @param pVM The cross context VM structure.
4131 * @param iPhysExt The extent to free.
4132 */
4133void pgmPoolTrackPhysExtFree(PVMCC pVM, uint16_t iPhysExt)
4134{
4135 PGM_LOCK_ASSERT_OWNER(pVM);
4136 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
4137 Assert(iPhysExt < pPool->cMaxPhysExts);
4138 PPGMPOOLPHYSEXT pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
4139 for (unsigned i = 0; i < RT_ELEMENTS(pPhysExt->aidx); i++)
4140 {
4141 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
4142 pPhysExt->apte[i] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
4143 }
4144 pPhysExt->iNext = pPool->iPhysExtFreeHead;
4145 pPool->iPhysExtFreeHead = iPhysExt;
4146}
4147
4148
4149/**
4150 * Frees a physical cross reference extent.
4151 *
4152 * @param pVM The cross context VM structure.
4153 * @param iPhysExt The extent to free.
4154 */
4155void pgmPoolTrackPhysExtFreeList(PVMCC pVM, uint16_t iPhysExt)
4156{
4157 PGM_LOCK_ASSERT_OWNER(pVM);
4158 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
4159
4160 const uint16_t iPhysExtStart = iPhysExt;
4161 PPGMPOOLPHYSEXT pPhysExt;
4162 do
4163 {
4164 Assert(iPhysExt < pPool->cMaxPhysExts);
4165 pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
4166 for (unsigned i = 0; i < RT_ELEMENTS(pPhysExt->aidx); i++)
4167 {
4168 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
4169 pPhysExt->apte[i] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
4170 }
4171
4172 /* next */
4173 iPhysExt = pPhysExt->iNext;
4174 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
4175
4176 pPhysExt->iNext = pPool->iPhysExtFreeHead;
4177 pPool->iPhysExtFreeHead = iPhysExtStart;
4178}
4179
4180
4181/**
4182 * Insert a reference into a list of physical cross reference extents.
4183 *
4184 * @returns The new tracking data for PGMPAGE.
4185 *
4186 * @param pVM The cross context VM structure.
4187 * @param iPhysExt The physical extent index of the list head.
4188 * @param iShwPT The shadow page table index.
4189 * @param iPte Page table entry
4190 *
4191 */
4192static uint16_t pgmPoolTrackPhysExtInsert(PVMCC pVM, uint16_t iPhysExt, uint16_t iShwPT, uint16_t iPte)
4193{
4194 PGM_LOCK_ASSERT_OWNER(pVM);
4195 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
4196 PPGMPOOLPHYSEXT paPhysExts = pPool->CTX_SUFF(paPhysExts);
4197
4198 /*
4199 * Special common cases.
4200 */
4201 if (paPhysExts[iPhysExt].aidx[1] == NIL_PGMPOOL_IDX)
4202 {
4203 paPhysExts[iPhysExt].aidx[1] = iShwPT;
4204 paPhysExts[iPhysExt].apte[1] = iPte;
4205 STAM_COUNTER_INC(&pVM->pgm.s.Stats.StatTrackAliasedMany);
4206 LogFlow(("pgmPoolTrackPhysExtInsert: %d:{,%d pte %d,}\n", iPhysExt, iShwPT, iPte));
4207 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExt);
4208 }
4209 if (paPhysExts[iPhysExt].aidx[2] == NIL_PGMPOOL_IDX)
4210 {
4211 paPhysExts[iPhysExt].aidx[2] = iShwPT;
4212 paPhysExts[iPhysExt].apte[2] = iPte;
4213 STAM_COUNTER_INC(&pVM->pgm.s.Stats.StatTrackAliasedMany);
4214 LogFlow(("pgmPoolTrackPhysExtInsert: %d:{,,%d pte %d}\n", iPhysExt, iShwPT, iPte));
4215 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExt);
4216 }
4217 AssertCompile(RT_ELEMENTS(paPhysExts[iPhysExt].aidx) == 3);
4218
4219 /*
4220 * General treatment.
4221 */
4222 const uint16_t iPhysExtStart = iPhysExt;
4223 unsigned cMax = 15;
4224 for (;;)
4225 {
4226 Assert(iPhysExt < pPool->cMaxPhysExts);
4227 for (unsigned i = 0; i < RT_ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
4228 if (paPhysExts[iPhysExt].aidx[i] == NIL_PGMPOOL_IDX)
4229 {
4230 paPhysExts[iPhysExt].aidx[i] = iShwPT;
4231 paPhysExts[iPhysExt].apte[i] = iPte;
4232 STAM_COUNTER_INC(&pVM->pgm.s.Stats.StatTrackAliasedMany);
4233 LogFlow(("pgmPoolTrackPhysExtInsert: %d:{%d pte %d} i=%d cMax=%d\n", iPhysExt, iShwPT, iPte, i, cMax));
4234 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExtStart);
4235 }
4236 if (!--cMax)
4237 {
4238 STAM_COUNTER_INC(&pVM->pgm.s.Stats.StatTrackOverflows);
4239 pgmPoolTrackPhysExtFreeList(pVM, iPhysExtStart);
4240 LogFlow(("pgmPoolTrackPhysExtInsert: overflow (1) iShwPT=%d\n", iShwPT));
4241 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED);
4242 }
4243
4244 /* advance */
4245 iPhysExt = paPhysExts[iPhysExt].iNext;
4246 if (iPhysExt == NIL_PGMPOOL_PHYSEXT_INDEX)
4247 break;
4248 }
4249
4250 /*
4251 * Add another extent to the list.
4252 */
4253 PPGMPOOLPHYSEXT pNew = pgmPoolTrackPhysExtAlloc(pVM, &iPhysExt);
4254 if (!pNew)
4255 {
4256 STAM_COUNTER_INC(&pVM->pgm.s.Stats.StatTrackNoExtentsLeft);
4257 pgmPoolTrackPhysExtFreeList(pVM, iPhysExtStart);
4258 LogFlow(("pgmPoolTrackPhysExtInsert: pgmPoolTrackPhysExtAlloc failed iShwPT=%d\n", iShwPT));
4259 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED);
4260 }
4261 pNew->iNext = iPhysExtStart;
4262 pNew->aidx[0] = iShwPT;
4263 pNew->apte[0] = iPte;
4264 LogFlow(("pgmPoolTrackPhysExtInsert: added new extent %d:{%d pte %d}->%d\n", iPhysExt, iShwPT, iPte, iPhysExtStart));
4265 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExt);
4266}
4267
4268
4269/**
4270 * Add a reference to guest physical page where extents are in use.
4271 *
4272 * @returns The new tracking data for PGMPAGE.
4273 *
4274 * @param pVM The cross context VM structure.
4275 * @param pPhysPage Pointer to the aPages entry in the ram range.
4276 * @param u16 The ram range flags (top 16-bits).
4277 * @param iShwPT The shadow page table index.
4278 * @param iPte Page table entry
4279 */
4280uint16_t pgmPoolTrackPhysExtAddref(PVMCC pVM, PPGMPAGE pPhysPage, uint16_t u16, uint16_t iShwPT, uint16_t iPte)
4281{
4282 PGM_LOCK_VOID(pVM);
4283 if (PGMPOOL_TD_GET_CREFS(u16) != PGMPOOL_TD_CREFS_PHYSEXT)
4284 {
4285 /*
4286 * Convert to extent list.
4287 */
4288 Assert(PGMPOOL_TD_GET_CREFS(u16) == 1);
4289 uint16_t iPhysExt;
4290 PPGMPOOLPHYSEXT pPhysExt = pgmPoolTrackPhysExtAlloc(pVM, &iPhysExt);
4291 if (pPhysExt)
4292 {
4293 LogFlow(("pgmPoolTrackPhysExtAddref: new extent: %d:{%d, %d}\n", iPhysExt, PGMPOOL_TD_GET_IDX(u16), iShwPT));
4294 STAM_COUNTER_INC(&pVM->pgm.s.Stats.StatTrackAliased);
4295 pPhysExt->aidx[0] = PGMPOOL_TD_GET_IDX(u16);
4296 pPhysExt->apte[0] = PGM_PAGE_GET_PTE_INDEX(pPhysPage);
4297 pPhysExt->aidx[1] = iShwPT;
4298 pPhysExt->apte[1] = iPte;
4299 u16 = PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExt);
4300 }
4301 else
4302 u16 = PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED);
4303 }
4304 else if (u16 != PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED))
4305 {
4306 /*
4307 * Insert into the extent list.
4308 */
4309 u16 = pgmPoolTrackPhysExtInsert(pVM, PGMPOOL_TD_GET_IDX(u16), iShwPT, iPte);
4310 }
4311 else
4312 STAM_COUNTER_INC(&pVM->pgm.s.Stats.StatTrackAliasedLots);
4313 PGM_UNLOCK(pVM);
4314 return u16;
4315}
4316
4317
4318/**
4319 * Clear references to guest physical memory.
4320 *
4321 * @param pPool The pool.
4322 * @param pPage The page.
4323 * @param pPhysPage Pointer to the aPages entry in the ram range.
4324 * @param iPte Shadow PTE index
4325 */
4326void pgmPoolTrackPhysExtDerefGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMPAGE pPhysPage, uint16_t iPte)
4327{
4328 PVMCC pVM = pPool->CTX_SUFF(pVM);
4329 const unsigned cRefs = PGM_PAGE_GET_TD_CREFS(pPhysPage);
4330 AssertFatalMsg(cRefs == PGMPOOL_TD_CREFS_PHYSEXT, ("cRefs=%d pPhysPage=%R[pgmpage] pPage=%p:{.idx=%d}\n", cRefs, pPhysPage, pPage, pPage->idx));
4331
4332 uint16_t iPhysExt = PGM_PAGE_GET_TD_IDX(pPhysPage);
4333 if (iPhysExt != PGMPOOL_TD_IDX_OVERFLOWED)
4334 {
4335 PGM_LOCK_VOID(pVM);
4336
4337 uint16_t iPhysExtPrev = NIL_PGMPOOL_PHYSEXT_INDEX;
4338 PPGMPOOLPHYSEXT paPhysExts = pPool->CTX_SUFF(paPhysExts);
4339 do
4340 {
4341 Assert(iPhysExt < pPool->cMaxPhysExts);
4342
4343 /*
4344 * Look for the shadow page and check if it's all freed.
4345 */
4346 for (unsigned i = 0; i < RT_ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
4347 {
4348 if ( paPhysExts[iPhysExt].aidx[i] == pPage->idx
4349 && paPhysExts[iPhysExt].apte[i] == iPte)
4350 {
4351 paPhysExts[iPhysExt].aidx[i] = NIL_PGMPOOL_IDX;
4352 paPhysExts[iPhysExt].apte[i] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
4353
4354 for (i = 0; i < RT_ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
4355 if (paPhysExts[iPhysExt].aidx[i] != NIL_PGMPOOL_IDX)
4356 {
4357 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage] idx=%d\n", pPhysPage, pPage->idx));
4358 PGM_UNLOCK(pVM);
4359 return;
4360 }
4361
4362 /* we can free the node. */
4363 const uint16_t iPhysExtNext = paPhysExts[iPhysExt].iNext;
4364 if ( iPhysExtPrev == NIL_PGMPOOL_PHYSEXT_INDEX
4365 && iPhysExtNext == NIL_PGMPOOL_PHYSEXT_INDEX)
4366 {
4367 /* lonely node */
4368 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
4369 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage] idx=%d lonely\n", pPhysPage, pPage->idx));
4370 PGM_PAGE_SET_TRACKING(pVM, pPhysPage, 0);
4371 }
4372 else if (iPhysExtPrev == NIL_PGMPOOL_PHYSEXT_INDEX)
4373 {
4374 /* head */
4375 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage] idx=%d head\n", pPhysPage, pPage->idx));
4376 PGM_PAGE_SET_TRACKING(pVM, pPhysPage, PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExtNext));
4377 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
4378 }
4379 else
4380 {
4381 /* in list */
4382 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage] idx=%d in list\n", pPhysPage, pPage->idx));
4383 paPhysExts[iPhysExtPrev].iNext = iPhysExtNext;
4384 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
4385 }
4386 iPhysExt = iPhysExtNext;
4387 PGM_UNLOCK(pVM);
4388 return;
4389 }
4390 }
4391
4392 /* next */
4393 iPhysExtPrev = iPhysExt;
4394 iPhysExt = paPhysExts[iPhysExt].iNext;
4395 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
4396
4397 PGM_UNLOCK(pVM);
4398 AssertFatalMsgFailed(("not-found! cRefs=%d pPhysPage=%R[pgmpage] pPage=%p:{.idx=%d}\n", cRefs, pPhysPage, pPage, pPage->idx));
4399 }
4400 else /* nothing to do */
4401 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage]\n", pPhysPage));
4402}
4403
4404/**
4405 * Clear references to guest physical memory.
4406 *
4407 * This is the same as pgmPoolTracDerefGCPhysHint except that the guest
4408 * physical address is assumed to be correct, so the linear search can be
4409 * skipped and we can assert at an earlier point.
4410 *
4411 * @param pPool The pool.
4412 * @param pPage The page.
4413 * @param HCPhys The host physical address corresponding to the guest page.
4414 * @param GCPhys The guest physical address corresponding to HCPhys.
4415 * @param iPte Shadow PTE index
4416 */
4417static void pgmPoolTracDerefGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTHCPHYS HCPhys, RTGCPHYS GCPhys, uint16_t iPte)
4418{
4419 /*
4420 * Lookup the page and check if it checks out before derefing it.
4421 */
4422 PVMCC pVM = pPool->CTX_SUFF(pVM);
4423 PPGMPAGE pPhysPage = pgmPhysGetPage(pVM, GCPhys);
4424 if (pPhysPage)
4425 {
4426 Assert(PGM_PAGE_GET_HCPHYS(pPhysPage));
4427#ifdef LOG_ENABLED
4428 RTHCPHYS HCPhysPage = PGM_PAGE_GET_HCPHYS(pPhysPage);
4429 Log2(("pgmPoolTracDerefGCPhys %RHp vs %RHp\n", HCPhysPage, HCPhys));
4430#endif
4431 if (PGM_PAGE_GET_HCPHYS(pPhysPage) == HCPhys)
4432 {
4433 Assert(pPage->cPresent);
4434 Assert(pPool->cPresent);
4435 pPage->cPresent--;
4436 pPool->cPresent--;
4437 pgmTrackDerefGCPhys(pPool, pPage, pPhysPage, iPte);
4438 return;
4439 }
4440
4441 AssertFatalMsgFailed(("HCPhys=%RHp GCPhys=%RGp; found page has HCPhys=%RHp iPte=%u fIsNested=%RTbool\n",
4442 HCPhys, GCPhys, PGM_PAGE_GET_HCPHYS(pPhysPage), iPte, PGMPOOL_PAGE_IS_NESTED(pPage)));
4443 }
4444 AssertFatalMsgFailed(("HCPhys=%RHp GCPhys=%RGp\n", HCPhys, GCPhys));
4445}
4446
4447
4448/**
4449 * Clear references to guest physical memory.
4450 *
4451 * @param pPool The pool.
4452 * @param pPage The page.
4453 * @param HCPhys The host physical address corresponding to the guest page.
4454 * @param GCPhysHint The guest physical address which may corresponding to HCPhys.
4455 * @param iPte Shadow pte index
4456 */
4457void pgmPoolTracDerefGCPhysHint(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTHCPHYS HCPhys, RTGCPHYS GCPhysHint, uint16_t iPte)
4458{
4459 Log4(("pgmPoolTracDerefGCPhysHint %RHp %RGp\n", HCPhys, GCPhysHint));
4460
4461 /*
4462 * Try the hint first.
4463 */
4464 RTHCPHYS HCPhysHinted;
4465 PVMCC pVM = pPool->CTX_SUFF(pVM);
4466 PPGMPAGE pPhysPage = pgmPhysGetPage(pVM, GCPhysHint);
4467 if (pPhysPage)
4468 {
4469 HCPhysHinted = PGM_PAGE_GET_HCPHYS(pPhysPage);
4470 Assert(HCPhysHinted);
4471 if (HCPhysHinted == HCPhys)
4472 {
4473 Assert(pPage->cPresent);
4474 Assert(pPool->cPresent);
4475 pPage->cPresent--;
4476 pPool->cPresent--;
4477 pgmTrackDerefGCPhys(pPool, pPage, pPhysPage, iPte);
4478 return;
4479 }
4480 }
4481 else
4482 HCPhysHinted = UINT64_C(0xdeadbeefdeadbeef);
4483
4484 /*
4485 * Damn, the hint didn't work. We'll have to do an expensive linear search.
4486 */
4487 STAM_COUNTER_INC(&pPool->StatTrackLinearRamSearches);
4488 PPGMRAMRANGE pRam = pPool->CTX_SUFF(pVM)->pgm.s.CTX_SUFF(pRamRangesX);
4489 while (pRam)
4490 {
4491 unsigned iPage = pRam->cb >> PAGE_SHIFT;
4492 while (iPage-- > 0)
4493 {
4494 if (PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]) == HCPhys)
4495 {
4496 Log4(("pgmPoolTracDerefGCPhysHint: Linear HCPhys=%RHp GCPhysHint=%RGp GCPhysReal=%RGp\n",
4497 HCPhys, GCPhysHint, pRam->GCPhys + (iPage << PAGE_SHIFT)));
4498 Assert(pPage->cPresent);
4499 Assert(pPool->cPresent);
4500 pPage->cPresent--;
4501 pPool->cPresent--;
4502 pgmTrackDerefGCPhys(pPool, pPage, &pRam->aPages[iPage], iPte);
4503 return;
4504 }
4505 }
4506 pRam = pRam->CTX_SUFF(pNext);
4507 }
4508
4509 AssertFatalMsgFailed(("HCPhys=%RHp GCPhysHint=%RGp (Hinted page has HCPhys = %RHp)\n", HCPhys, GCPhysHint, HCPhysHinted));
4510}
4511
4512
4513/**
4514 * Clear references to guest physical memory in a 32-bit / 32-bit page table.
4515 *
4516 * @param pPool The pool.
4517 * @param pPage The page.
4518 * @param pShwPT The shadow page table (mapping of the page).
4519 * @param pGstPT The guest page table.
4520 */
4521DECLINLINE(void) pgmPoolTrackDerefPT32Bit32Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PT pShwPT, PCX86PT pGstPT)
4522{
4523 RTGCPHYS32 const fPgMask = pPage->fA20Enabled ? X86_PTE_PG_MASK : X86_PTE_PG_MASK & ~RT_BIT_32(20);
4524 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
4525 {
4526 const X86PGUINT uPte = pShwPT->a[i].u;
4527 Assert(!(uPte & RT_BIT_32(10)));
4528 if (uPte & X86_PTE_P)
4529 {
4530 Log4(("pgmPoolTrackDerefPT32Bit32Bit: i=%d pte=%RX32 hint=%RX32\n",
4531 i, uPte & X86_PTE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK));
4532 pgmPoolTracDerefGCPhysHint(pPool, pPage, uPte & X86_PTE_PG_MASK, pGstPT->a[i].u & fPgMask, i);
4533 if (!pPage->cPresent)
4534 break;
4535 }
4536 }
4537}
4538
4539
4540/**
4541 * Clear references to guest physical memory in a PAE / 32-bit page table.
4542 *
4543 * @param pPool The pool.
4544 * @param pPage The page.
4545 * @param pShwPT The shadow page table (mapping of the page).
4546 * @param pGstPT The guest page table (just a half one).
4547 */
4548DECLINLINE(void) pgmPoolTrackDerefPTPae32Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT, PCX86PT pGstPT)
4549{
4550 RTGCPHYS32 const fPgMask = pPage->fA20Enabled ? X86_PTE_PG_MASK : X86_PTE_PG_MASK & ~RT_BIT_32(20);
4551 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
4552 {
4553 Assert( (PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & UINT64_C(0x7ff0000000000400)) == 0
4554 || (PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & UINT64_C(0x7ff0000000000400)) == UINT64_C(0x7ff0000000000000));
4555 if (PGMSHWPTEPAE_IS_P(pShwPT->a[i]))
4556 {
4557 Log4(("pgmPoolTrackDerefPTPae32Bit: i=%d pte=%RX64 hint=%RX32\n",
4558 i, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pGstPT->a[i].u & X86_PTE_PG_MASK));
4559 pgmPoolTracDerefGCPhysHint(pPool, pPage, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pGstPT->a[i].u & fPgMask, i);
4560 if (!pPage->cPresent)
4561 break;
4562 }
4563 }
4564}
4565
4566
4567/**
4568 * Clear references to guest physical memory in a PAE / PAE page table.
4569 *
4570 * @param pPool The pool.
4571 * @param pPage The page.
4572 * @param pShwPT The shadow page table (mapping of the page).
4573 * @param pGstPT The guest page table.
4574 */
4575DECLINLINE(void) pgmPoolTrackDerefPTPaePae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT, PCX86PTPAE pGstPT)
4576{
4577 RTGCPHYS const fPgMask = pPage->fA20Enabled ? X86_PTE_PAE_PG_MASK : X86_PTE_PAE_PG_MASK & ~RT_BIT_64(20);
4578 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
4579 {
4580 Assert( (PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & UINT64_C(0x7ff0000000000400)) == 0
4581 || (PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & UINT64_C(0x7ff0000000000400)) == UINT64_C(0x7ff0000000000000));
4582 if (PGMSHWPTEPAE_IS_P(pShwPT->a[i]))
4583 {
4584 Log4(("pgmPoolTrackDerefPTPaePae: i=%d pte=%RX32 hint=%RX32\n",
4585 i, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pGstPT->a[i].u & X86_PTE_PAE_PG_MASK));
4586 pgmPoolTracDerefGCPhysHint(pPool, pPage, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pGstPT->a[i].u & fPgMask, i);
4587 if (!pPage->cPresent)
4588 break;
4589 }
4590 }
4591}
4592
4593
4594/**
4595 * Clear references to guest physical memory in a 32-bit / 4MB page table.
4596 *
4597 * @param pPool The pool.
4598 * @param pPage The page.
4599 * @param pShwPT The shadow page table (mapping of the page).
4600 */
4601DECLINLINE(void) pgmPoolTrackDerefPT32Bit4MB(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PT pShwPT)
4602{
4603 RTGCPHYS const GCPhysA20Mask = pPage->fA20Enabled ? UINT64_MAX : ~RT_BIT_64(20);
4604 RTGCPHYS GCPhys = pPage->GCPhys + PAGE_SIZE * pPage->iFirstPresent;
4605 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
4606 {
4607 const X86PGUINT uPte = pShwPT->a[i].u;
4608 Assert(!(uPte & RT_BIT_32(10)));
4609 if (uPte & X86_PTE_P)
4610 {
4611 Log4(("pgmPoolTrackDerefPT32Bit4MB: i=%d pte=%RX32 GCPhys=%RGp\n",
4612 i, uPte & X86_PTE_PG_MASK, GCPhys));
4613 pgmPoolTracDerefGCPhys(pPool, pPage, uPte & X86_PTE_PG_MASK, GCPhys & GCPhysA20Mask, i);
4614 if (!pPage->cPresent)
4615 break;
4616 }
4617 }
4618}
4619
4620
4621/**
4622 * Clear references to guest physical memory in a PAE / 2/4MB page table.
4623 *
4624 * @param pPool The pool.
4625 * @param pPage The page.
4626 * @param pShwPT The shadow page table (mapping of the page).
4627 */
4628DECLINLINE(void) pgmPoolTrackDerefPTPaeBig(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT)
4629{
4630 RTGCPHYS const GCPhysA20Mask = pPage->fA20Enabled ? UINT64_MAX : ~RT_BIT_64(20);
4631 RTGCPHYS GCPhys = pPage->GCPhys + PAGE_SIZE * pPage->iFirstPresent;
4632 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
4633 {
4634 Assert( (PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & UINT64_C(0x7ff0000000000400)) == 0
4635 || (PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & UINT64_C(0x7ff0000000000400)) == UINT64_C(0x7ff0000000000000));
4636 if (PGMSHWPTEPAE_IS_P(pShwPT->a[i]))
4637 {
4638 Log4(("pgmPoolTrackDerefPTPaeBig: i=%d pte=%RX64 hint=%RGp\n",
4639 i, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), GCPhys));
4640 pgmPoolTracDerefGCPhys(pPool, pPage, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), GCPhys & GCPhysA20Mask, i);
4641 if (!pPage->cPresent)
4642 break;
4643 }
4644 }
4645}
4646
4647
4648/**
4649 * Clear references to shadowed pages in an EPT page table.
4650 *
4651 * @param pPool The pool.
4652 * @param pPage The page.
4653 * @param pShwPT The shadow page directory pointer table (mapping of the
4654 * page).
4655 */
4656DECLINLINE(void) pgmPoolTrackDerefPTEPT(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PEPTPT pShwPT)
4657{
4658 RTGCPHYS const GCPhysA20Mask = pPage->fA20Enabled ? UINT64_MAX : ~RT_BIT_64(20);
4659 RTGCPHYS GCPhys = pPage->GCPhys + PAGE_SIZE * pPage->iFirstPresent;
4660 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
4661 {
4662 X86PGPAEUINT const uPte = pShwPT->a[i].u;
4663 Assert((uPte & UINT64_C(0xfff0000000000f80)) == 0);
4664 if (uPte & EPT_E_READ)
4665 {
4666 Log4(("pgmPoolTrackDerefPTEPT: i=%d pte=%RX64 GCPhys=%RX64\n",
4667 i, uPte & EPT_PTE_PG_MASK, pPage->GCPhys));
4668 pgmPoolTracDerefGCPhys(pPool, pPage, uPte & EPT_PTE_PG_MASK, GCPhys & GCPhysA20Mask, i);
4669 if (!pPage->cPresent)
4670 break;
4671 }
4672 }
4673}
4674
4675
4676#ifdef VBOX_WITH_NESTED_HWVIRT_VMX_EPT
4677/**
4678 * Clears references to shadowed pages in a SLAT EPT page table.
4679 *
4680 * @param pPool The pool.
4681 * @param pPage The page.
4682 * @param pShwPT The shadow page table (mapping of the page).
4683 * @param pGstPT The guest page table.
4684 */
4685DECLINLINE(void) pgmPoolTrackDerefNestedPTEPT(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PEPTPT pShwPT, PCEPTPT pGstPT)
4686{
4687 Assert(PGMPOOL_PAGE_IS_NESTED(pPage));
4688 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
4689 {
4690 X86PGPAEUINT const uShwPte = pShwPT->a[i].u;
4691 Assert((uShwPte & UINT64_C(0xfff0000000000f80)) == 0); /* Access, Dirty, UserX (not supported) and ignored bits 7, 11. */
4692 if (uShwPte & EPT_PRESENT_MASK)
4693 {
4694 Log7Func(("Shw=%RX64 GstPte=%RX64\n", uShwPte, pGstPT->a[i].u));
4695 pgmPoolTracDerefGCPhys(pPool, pPage, uShwPte & EPT_PTE_PG_MASK, pGstPT->a[i].u & EPT_PTE_PG_MASK, i);
4696 if (!pPage->cPresent)
4697 break;
4698 }
4699 }
4700}
4701#endif /* VBOX_WITH_NESTED_HWVIRT_VMX_EPT */
4702
4703
4704/**
4705 * Clear references to shadowed pages in a 32 bits page directory.
4706 *
4707 * @param pPool The pool.
4708 * @param pPage The page.
4709 * @param pShwPD The shadow page directory (mapping of the page).
4710 */
4711DECLINLINE(void) pgmPoolTrackDerefPD(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PD pShwPD)
4712{
4713 for (unsigned i = 0; i < RT_ELEMENTS(pShwPD->a); i++)
4714 {
4715 X86PGUINT const uPde = pShwPD->a[i].u;
4716 if (uPde & X86_PDE_P)
4717 {
4718 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPD->a[i].u & X86_PDE_PG_MASK);
4719 if (pSubPage)
4720 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4721 else
4722 AssertFatalMsgFailed(("%x\n", pShwPD->a[i].u & X86_PDE_PG_MASK));
4723 }
4724 }
4725}
4726
4727
4728/**
4729 * Clear references to shadowed pages in a PAE (legacy or 64 bits) page directory.
4730 *
4731 * @param pPool The pool.
4732 * @param pPage The page.
4733 * @param pShwPD The shadow page directory (mapping of the page).
4734 */
4735DECLINLINE(void) pgmPoolTrackDerefPDPae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PDPAE pShwPD)
4736{
4737 for (unsigned i = 0; i < RT_ELEMENTS(pShwPD->a); i++)
4738 {
4739 X86PGPAEUINT const uPde = pShwPD->a[i].u;
4740 if (uPde & X86_PDE_P)
4741 {
4742#ifdef PGM_WITH_LARGE_PAGES
4743 if (uPde & X86_PDE_PS)
4744 {
4745 Log4(("pgmPoolTrackDerefPDPae: i=%d pde=%RX64 GCPhys=%RX64\n",
4746 i, uPde & X86_PDE2M_PAE_PG_MASK, pPage->GCPhys));
4747 pgmPoolTracDerefGCPhys(pPool, pPage, uPde & X86_PDE2M_PAE_PG_MASK,
4748 pPage->GCPhys + i * 2 * _1M /* pPage->GCPhys = base address of the memory described by the PD */,
4749 i);
4750 }
4751 else
4752#endif
4753 {
4754 Assert((uPde & (X86_PDE_PAE_MBZ_MASK_NX | UINT64_C(0x7ff0000000000000))) == 0);
4755 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, uPde & X86_PDE_PAE_PG_MASK);
4756 if (pSubPage)
4757 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4758 else
4759 AssertFatalMsgFailed(("%RX64\n", uPde & X86_PDE_PAE_PG_MASK));
4760 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4761 }
4762 }
4763 }
4764}
4765
4766
4767/**
4768 * Clear references to shadowed pages in a PAE page directory pointer table.
4769 *
4770 * @param pPool The pool.
4771 * @param pPage The page.
4772 * @param pShwPDPT The shadow page directory pointer table (mapping of the page).
4773 */
4774DECLINLINE(void) pgmPoolTrackDerefPDPTPae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PDPT pShwPDPT)
4775{
4776 for (unsigned i = 0; i < X86_PG_PAE_PDPE_ENTRIES; i++)
4777 {
4778 X86PGPAEUINT const uPdpe = pShwPDPT->a[i].u;
4779 Assert((uPdpe & (X86_PDPE_PAE_MBZ_MASK | UINT64_C(0x7ff0000000000200))) == 0);
4780 if (uPdpe & X86_PDPE_P)
4781 {
4782 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, uPdpe & X86_PDPE_PG_MASK);
4783 if (pSubPage)
4784 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4785 else
4786 AssertFatalMsgFailed(("%RX64\n", uPdpe & X86_PDPE_PG_MASK));
4787 }
4788 }
4789}
4790
4791
4792/**
4793 * Clear references to shadowed pages in a 64-bit page directory pointer table.
4794 *
4795 * @param pPool The pool.
4796 * @param pPage The page.
4797 * @param pShwPDPT The shadow page directory pointer table (mapping of the page).
4798 */
4799DECLINLINE(void) pgmPoolTrackDerefPDPT64Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PDPT pShwPDPT)
4800{
4801 for (unsigned i = 0; i < RT_ELEMENTS(pShwPDPT->a); i++)
4802 {
4803 X86PGPAEUINT const uPdpe = pShwPDPT->a[i].u;
4804 Assert((uPdpe & (X86_PDPE_LM_MBZ_MASK_NX | UINT64_C(0x7ff0000000000200))) == 0);
4805 if (uPdpe & X86_PDPE_P)
4806 {
4807 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, uPdpe & X86_PDPE_PG_MASK);
4808 if (pSubPage)
4809 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4810 else
4811 AssertFatalMsgFailed(("%RX64\n", uPdpe & X86_PDPE_PG_MASK));
4812 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4813 }
4814 }
4815}
4816
4817
4818/**
4819 * Clear references to shadowed pages in a 64-bit level 4 page table.
4820 *
4821 * @param pPool The pool.
4822 * @param pPage The page.
4823 * @param pShwPML4 The shadow page directory pointer table (mapping of the page).
4824 */
4825DECLINLINE(void) pgmPoolTrackDerefPML464Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PML4 pShwPML4)
4826{
4827 for (unsigned i = 0; i < RT_ELEMENTS(pShwPML4->a); i++)
4828 {
4829 X86PGPAEUINT const uPml4e = pShwPML4->a[i].u;
4830 Assert((uPml4e & (X86_PML4E_MBZ_MASK_NX | UINT64_C(0x7ff0000000000200))) == 0);
4831 if (uPml4e & X86_PML4E_P)
4832 {
4833 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, uPml4e & X86_PDPE_PG_MASK);
4834 if (pSubPage)
4835 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4836 else
4837 AssertFatalMsgFailed(("%RX64\n", uPml4e & X86_PML4E_PG_MASK));
4838 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4839 }
4840 }
4841}
4842
4843
4844/**
4845 * Clear references to shadowed pages in an EPT page directory.
4846 *
4847 * @param pPool The pool.
4848 * @param pPage The page.
4849 * @param pShwPD The shadow page directory (mapping of the page).
4850 */
4851DECLINLINE(void) pgmPoolTrackDerefPDEPT(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PEPTPD pShwPD)
4852{
4853 for (unsigned i = 0; i < RT_ELEMENTS(pShwPD->a); i++)
4854 {
4855 X86PGPAEUINT const uPde = pShwPD->a[i].u;
4856#ifdef PGM_WITH_LARGE_PAGES
4857 AssertMsg((uPde & UINT64_C(0xfff0000000000f00)) == 0, ("uPde=%RX64\n", uPde));
4858#else
4859 AssertMsg((uPde & UINT64_C(0xfff0000000000f80)) == 0, ("uPde=%RX64\n", uPde));
4860#endif
4861 if (uPde & EPT_E_READ)
4862 {
4863#ifdef PGM_WITH_LARGE_PAGES
4864 if (uPde & EPT_E_LEAF)
4865 {
4866 Log4(("pgmPoolTrackDerefPDEPT: i=%d pde=%RX64 GCPhys=%RX64\n",
4867 i, uPde & EPT_PDE2M_PG_MASK, pPage->GCPhys));
4868 Assert(!PGMPOOL_PAGE_IS_NESTED(pPage)); /* We don't support large guest EPT yet. */
4869 pgmPoolTracDerefGCPhys(pPool, pPage, uPde & EPT_PDE2M_PG_MASK,
4870 pPage->GCPhys + i * 2 * _1M /* pPage->GCPhys = base address of the memory described by the PD */,
4871 i);
4872 }
4873 else
4874#endif
4875 {
4876 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, uPde & EPT_PDE_PG_MASK);
4877 if (pSubPage)
4878 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4879 else
4880 AssertFatalMsgFailed(("%RX64\n", pShwPD->a[i].u & EPT_PDE_PG_MASK));
4881 }
4882 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4883 }
4884 }
4885}
4886
4887
4888/**
4889 * Clear references to shadowed pages in an EPT page directory pointer table.
4890 *
4891 * @param pPool The pool.
4892 * @param pPage The page.
4893 * @param pShwPDPT The shadow page directory pointer table (mapping of the page).
4894 */
4895DECLINLINE(void) pgmPoolTrackDerefPDPTEPT(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PEPTPDPT pShwPDPT)
4896{
4897 for (unsigned i = 0; i < RT_ELEMENTS(pShwPDPT->a); i++)
4898 {
4899 X86PGPAEUINT const uPdpe = pShwPDPT->a[i].u;
4900 Assert((uPdpe & UINT64_C(0xfff0000000000f80)) == 0);
4901 if (uPdpe & EPT_E_READ)
4902 {
4903 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, uPdpe & EPT_PDPTE_PG_MASK);
4904 if (pSubPage)
4905 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4906 else
4907 AssertFatalMsgFailed(("%RX64\n", uPdpe & EPT_PDPTE_PG_MASK));
4908 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4909 }
4910 }
4911}
4912
4913
4914/**
4915 * Clears all references made by this page.
4916 *
4917 * This includes other shadow pages and GC physical addresses.
4918 *
4919 * @param pPool The pool.
4920 * @param pPage The page.
4921 */
4922static void pgmPoolTrackDeref(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
4923{
4924 /*
4925 * Map the shadow page and take action according to the page kind.
4926 */
4927 PVMCC pVM = pPool->CTX_SUFF(pVM);
4928 void *pvShw = PGMPOOL_PAGE_2_PTR(pVM, pPage);
4929 switch (pPage->enmKind)
4930 {
4931 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
4932 {
4933 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
4934 void *pvGst;
4935 int rc = PGM_GCPHYS_2_PTR(pVM, pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
4936 pgmPoolTrackDerefPT32Bit32Bit(pPool, pPage, (PX86PT)pvShw, (PCX86PT)pvGst);
4937 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvGst);
4938 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
4939 break;
4940 }
4941
4942 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
4943 {
4944 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
4945 void *pvGst;
4946 int rc = PGM_GCPHYS_2_PTR_EX(pVM, pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
4947 pgmPoolTrackDerefPTPae32Bit(pPool, pPage, (PPGMSHWPTPAE)pvShw, (PCX86PT)pvGst);
4948 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvGst);
4949 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
4950 break;
4951 }
4952
4953 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
4954 {
4955 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
4956 void *pvGst;
4957 int rc = PGM_GCPHYS_2_PTR(pVM, pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
4958 pgmPoolTrackDerefPTPaePae(pPool, pPage, (PPGMSHWPTPAE)pvShw, (PCX86PTPAE)pvGst);
4959 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvGst);
4960 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
4961 break;
4962 }
4963
4964 case PGMPOOLKIND_32BIT_PT_FOR_PHYS: /* treat it like a 4 MB page */
4965 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
4966 {
4967 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
4968 pgmPoolTrackDerefPT32Bit4MB(pPool, pPage, (PX86PT)pvShw);
4969 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
4970 break;
4971 }
4972
4973 case PGMPOOLKIND_PAE_PT_FOR_PHYS: /* treat it like a 2 MB page */
4974 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
4975 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
4976 {
4977 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
4978 pgmPoolTrackDerefPTPaeBig(pPool, pPage, (PPGMSHWPTPAE)pvShw);
4979 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
4980 break;
4981 }
4982
4983 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
4984 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
4985 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
4986 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
4987 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
4988 case PGMPOOLKIND_PAE_PD_PHYS:
4989 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
4990 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
4991 pgmPoolTrackDerefPDPae(pPool, pPage, (PX86PDPAE)pvShw);
4992 break;
4993
4994 case PGMPOOLKIND_32BIT_PD_PHYS:
4995 case PGMPOOLKIND_32BIT_PD:
4996 pgmPoolTrackDerefPD(pPool, pPage, (PX86PD)pvShw);
4997 break;
4998
4999 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
5000 case PGMPOOLKIND_PAE_PDPT:
5001 case PGMPOOLKIND_PAE_PDPT_PHYS:
5002 pgmPoolTrackDerefPDPTPae(pPool, pPage, (PX86PDPT)pvShw);
5003 break;
5004
5005 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
5006 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
5007 pgmPoolTrackDerefPDPT64Bit(pPool, pPage, (PX86PDPT)pvShw);
5008 break;
5009
5010 case PGMPOOLKIND_64BIT_PML4:
5011 pgmPoolTrackDerefPML464Bit(pPool, pPage, (PX86PML4)pvShw);
5012 break;
5013
5014 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
5015 pgmPoolTrackDerefPTEPT(pPool, pPage, (PEPTPT)pvShw);
5016 break;
5017
5018 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
5019 pgmPoolTrackDerefPDEPT(pPool, pPage, (PEPTPD)pvShw);
5020 break;
5021
5022 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
5023 pgmPoolTrackDerefPDPTEPT(pPool, pPage, (PEPTPDPT)pvShw);
5024 break;
5025
5026#ifdef VBOX_WITH_NESTED_HWVIRT_VMX_EPT
5027 case PGMPOOLKIND_EPT_PT_FOR_EPT_PT:
5028 {
5029 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
5030 void *pvGst;
5031 int const rc = PGM_GCPHYS_2_PTR(pVM, pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
5032 pgmPoolTrackDerefNestedPTEPT(pPool, pPage, (PEPTPT)pvShw, (PCEPTPT)pvGst);
5033 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvGst);
5034 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
5035 break;
5036 }
5037
5038 case PGMPOOLKIND_EPT_PD_FOR_EPT_PD:
5039 pgmPoolTrackDerefPDEPT(pPool, pPage, (PEPTPD)pvShw);
5040 break;
5041
5042 case PGMPOOLKIND_EPT_PDPT_FOR_EPT_PDPT:
5043 pgmPoolTrackDerefPDPTEPT(pPool, pPage, (PEPTPDPT)pvShw);
5044 break;
5045#endif
5046
5047 default:
5048 AssertFatalMsgFailed(("enmKind=%d GCPhys=%RGp\n", pPage->enmKind, pPage->GCPhys));
5049 }
5050
5051 /* paranoia, clear the shadow page. Remove this laser (i.e. let Alloc and ClearAll do it). */
5052 STAM_PROFILE_START(&pPool->StatZeroPage, z);
5053 ASMMemZeroPage(pvShw);
5054 STAM_PROFILE_STOP(&pPool->StatZeroPage, z);
5055 pPage->fZeroed = true;
5056 Assert(!pPage->cPresent);
5057 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvShw);
5058}
5059
5060
5061/**
5062 * Flushes a pool page.
5063 *
5064 * This moves the page to the free list after removing all user references to it.
5065 *
5066 * @returns VBox status code.
5067 * @retval VINF_SUCCESS on success.
5068 * @param pPool The pool.
5069 * @param pPage The shadow page.
5070 * @param fFlush Flush the TLBS when required (should only be false in very specific use cases!!)
5071 */
5072int pgmPoolFlushPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage, bool fFlush)
5073{
5074 PVMCC pVM = pPool->CTX_SUFF(pVM);
5075 bool fFlushRequired = false;
5076
5077 int rc = VINF_SUCCESS;
5078 STAM_PROFILE_START(&pPool->StatFlushPage, f);
5079 LogFlow(("pgmPoolFlushPage: pPage=%p:{.Key=%RHp, .idx=%d, .enmKind=%s, .GCPhys=%RGp}\n",
5080 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), pPage->GCPhys));
5081
5082 if (PGMPOOL_PAGE_IS_NESTED(pPage))
5083 Log7Func(("pPage=%p:{.Key=%RHp, .idx=%d, .enmKind=%s, .GCPhys=%RGp}\n",
5084 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), pPage->GCPhys));
5085
5086 /*
5087 * Reject any attempts at flushing any of the special root pages (shall
5088 * not happen).
5089 */
5090 AssertMsgReturn(pPage->idx >= PGMPOOL_IDX_FIRST,
5091 ("pgmPoolFlushPage: special root page, rejected. enmKind=%s idx=%d\n",
5092 pgmPoolPoolKindToStr(pPage->enmKind), pPage->idx),
5093 VINF_SUCCESS);
5094
5095 PGM_LOCK_VOID(pVM);
5096
5097 /*
5098 * Quietly reject any attempts at flushing the currently active shadow CR3 mapping
5099 */
5100 if (pgmPoolIsPageLocked(pPage))
5101 {
5102 AssertMsg( pPage->enmKind == PGMPOOLKIND_64BIT_PML4
5103 || pPage->enmKind == PGMPOOLKIND_PAE_PDPT
5104 || pPage->enmKind == PGMPOOLKIND_PAE_PDPT_FOR_32BIT
5105 || pPage->enmKind == PGMPOOLKIND_32BIT_PD
5106 || pPage->enmKind == PGMPOOLKIND_PAE_PD_FOR_PAE_PD
5107 || pPage->enmKind == PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD
5108 || pPage->enmKind == PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD
5109 || pPage->enmKind == PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD
5110 || pPage->enmKind == PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD
5111 || pPage->enmKind == PGMPOOLKIND_ROOT_NESTED,
5112 ("Can't free the shadow CR3! (%RHp vs %RHp kind=%d\n", PGMGetHyperCR3(VMMGetCpu(pVM)), pPage->Core.Key, pPage->enmKind));
5113 Log(("pgmPoolFlushPage: current active shadow CR3, rejected. enmKind=%s idx=%d\n", pgmPoolPoolKindToStr(pPage->enmKind), pPage->idx));
5114 PGM_UNLOCK(pVM);
5115 return VINF_SUCCESS;
5116 }
5117
5118 /*
5119 * Mark the page as being in need of an ASMMemZeroPage().
5120 */
5121 pPage->fZeroed = false;
5122
5123#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
5124 if (pPage->fDirty)
5125 pgmPoolFlushDirtyPage(pVM, pPool, pPage->idxDirtyEntry, false /* do not remove */);
5126#endif
5127
5128 /* If there are any users of this table, then we *must* issue a tlb flush on all VCPUs. */
5129 if (pPage->iUserHead != NIL_PGMPOOL_USER_INDEX)
5130 fFlushRequired = true;
5131
5132 /*
5133 * Clear the page.
5134 */
5135 pgmPoolTrackClearPageUsers(pPool, pPage);
5136 STAM_PROFILE_START(&pPool->StatTrackDeref,a);
5137 pgmPoolTrackDeref(pPool, pPage);
5138 STAM_PROFILE_STOP(&pPool->StatTrackDeref,a);
5139
5140 /*
5141 * Flush it from the cache.
5142 */
5143 pgmPoolCacheFlushPage(pPool, pPage);
5144
5145 /*
5146 * Deregistering the monitoring.
5147 */
5148 if (pPage->fMonitored)
5149 rc = pgmPoolMonitorFlush(pPool, pPage);
5150
5151 /*
5152 * Free the page.
5153 */
5154 Assert(pPage->iNext == NIL_PGMPOOL_IDX);
5155 pPage->iNext = pPool->iFreeHead;
5156 pPool->iFreeHead = pPage->idx;
5157 pPage->enmKind = PGMPOOLKIND_FREE;
5158 pPage->enmAccess = PGMPOOLACCESS_DONTCARE;
5159 pPage->GCPhys = NIL_RTGCPHYS;
5160 pPage->fReusedFlushPending = false;
5161
5162 pPool->cUsedPages--;
5163
5164 /* Flush the TLBs of all VCPUs if required. */
5165 if ( fFlushRequired
5166 && fFlush)
5167 {
5168 PGM_INVL_ALL_VCPU_TLBS(pVM);
5169 }
5170
5171 PGM_UNLOCK(pVM);
5172 STAM_PROFILE_STOP(&pPool->StatFlushPage, f);
5173 return rc;
5174}
5175
5176
5177/**
5178 * Frees a usage of a pool page.
5179 *
5180 * The caller is responsible to updating the user table so that it no longer
5181 * references the shadow page.
5182 *
5183 * @param pPool The pool.
5184 * @param pPage The shadow page.
5185 * @param iUser The shadow page pool index of the user table.
5186 * NIL_PGMPOOL_IDX for root pages.
5187 * @param iUserTable The index into the user table (shadowed). Ignored if
5188 * root page.
5189 */
5190void pgmPoolFreeByPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable)
5191{
5192 PVMCC pVM = pPool->CTX_SUFF(pVM);
5193
5194 STAM_PROFILE_START(&pPool->StatFree, a);
5195 LogFlow(("pgmPoolFreeByPage: pPage=%p:{.Key=%RHp, .idx=%d, enmKind=%s} iUser=%d iUserTable=%#x\n",
5196 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), iUser, iUserTable));
5197 AssertReturnVoid(pPage->idx >= PGMPOOL_IDX_FIRST); /* paranoia (#6349) */
5198
5199 PGM_LOCK_VOID(pVM);
5200 if (iUser != NIL_PGMPOOL_IDX)
5201 pgmPoolTrackFreeUser(pPool, pPage, iUser, iUserTable);
5202 if (!pPage->fCached)
5203 pgmPoolFlushPage(pPool, pPage);
5204 PGM_UNLOCK(pVM);
5205 STAM_PROFILE_STOP(&pPool->StatFree, a);
5206}
5207
5208
5209/**
5210 * Makes one or more free page free.
5211 *
5212 * @returns VBox status code.
5213 * @retval VINF_SUCCESS on success.
5214 *
5215 * @param pPool The pool.
5216 * @param enmKind Page table kind
5217 * @param iUser The user of the page.
5218 */
5219static int pgmPoolMakeMoreFreePages(PPGMPOOL pPool, PGMPOOLKIND enmKind, uint16_t iUser)
5220{
5221 PVMCC pVM = pPool->CTX_SUFF(pVM);
5222 LogFlow(("pgmPoolMakeMoreFreePages: enmKind=%d iUser=%d\n", enmKind, iUser));
5223 NOREF(enmKind);
5224
5225 /*
5226 * If the pool isn't full grown yet, expand it.
5227 */
5228 if (pPool->cCurPages < pPool->cMaxPages)
5229 {
5230 STAM_PROFILE_ADV_SUSPEND(&pPool->StatAlloc, a);
5231#ifdef IN_RING3
5232 int rc = PGMR3PoolGrow(pVM, VMMGetCpu(pVM));
5233#else
5234 int rc = PGMR0PoolGrow(pVM, VMMGetCpuId(pVM));
5235#endif
5236 if (RT_FAILURE(rc))
5237 return rc;
5238 STAM_PROFILE_ADV_RESUME(&pPool->StatAlloc, a);
5239 if (pPool->iFreeHead != NIL_PGMPOOL_IDX)
5240 return VINF_SUCCESS;
5241 }
5242
5243 /*
5244 * Free one cached page.
5245 */
5246 return pgmPoolCacheFreeOne(pPool, iUser);
5247}
5248
5249
5250/**
5251 * Allocates a page from the pool.
5252 *
5253 * This page may actually be a cached page and not in need of any processing
5254 * on the callers part.
5255 *
5256 * @returns VBox status code.
5257 * @retval VINF_SUCCESS if a NEW page was allocated.
5258 * @retval VINF_PGM_CACHED_PAGE if a CACHED page was returned.
5259 *
5260 * @param pVM The cross context VM structure.
5261 * @param GCPhys The GC physical address of the page we're gonna shadow.
5262 * For 4MB and 2MB PD entries, it's the first address the
5263 * shadow PT is covering.
5264 * @param enmKind The kind of mapping.
5265 * @param enmAccess Access type for the mapping (only relevant for big pages)
5266 * @param fA20Enabled Whether the A20 gate is enabled or not.
5267 * @param iUser The shadow page pool index of the user table. Root
5268 * pages should pass NIL_PGMPOOL_IDX.
5269 * @param iUserTable The index into the user table (shadowed). Ignored for
5270 * root pages (iUser == NIL_PGMPOOL_IDX).
5271 * @param fLockPage Lock the page
5272 * @param ppPage Where to store the pointer to the page. NULL is stored here on failure.
5273 */
5274int pgmPoolAlloc(PVMCC pVM, RTGCPHYS GCPhys, PGMPOOLKIND enmKind, PGMPOOLACCESS enmAccess, bool fA20Enabled,
5275 uint16_t iUser, uint32_t iUserTable, bool fLockPage, PPPGMPOOLPAGE ppPage)
5276{
5277 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
5278 STAM_PROFILE_ADV_START(&pPool->StatAlloc, a);
5279 LogFlow(("pgmPoolAlloc: GCPhys=%RGp enmKind=%s iUser=%d iUserTable=%#x\n", GCPhys, pgmPoolPoolKindToStr(enmKind), iUser, iUserTable));
5280 *ppPage = NULL;
5281 /** @todo CSAM/PGMPrefetchPage messes up here during CSAMR3CheckGates
5282 * (TRPMR3SyncIDT) because of FF priority. Try fix that?
5283 * Assert(!(pVM->pgm.s.fGlobalSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)); */
5284
5285#if defined(VBOX_STRICT) && defined(VBOX_WITH_NESTED_HWVIRT_VMX_EPT)
5286 PVMCPUCC pVCpu = VMMGetCpu(pVM);
5287 Assert(pVCpu->pgm.s.enmGuestSlatMode == PGMSLAT_DIRECT || PGMPOOL_PAGE_IS_KIND_NESTED(enmKind));
5288#endif
5289
5290 PGM_LOCK_VOID(pVM);
5291
5292 if (pPool->fCacheEnabled)
5293 {
5294 int rc2 = pgmPoolCacheAlloc(pPool, GCPhys, enmKind, enmAccess, fA20Enabled, iUser, iUserTable, ppPage);
5295 if (RT_SUCCESS(rc2))
5296 {
5297 if (fLockPage)
5298 pgmPoolLockPage(pPool, *ppPage);
5299 PGM_UNLOCK(pVM);
5300 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
5301 LogFlow(("pgmPoolAlloc: cached returns %Rrc *ppPage=%p:{.Key=%RHp, .idx=%d}\n", rc2, *ppPage, (*ppPage)->Core.Key, (*ppPage)->idx));
5302 return rc2;
5303 }
5304 }
5305
5306 /*
5307 * Allocate a new one.
5308 */
5309 int rc = VINF_SUCCESS;
5310 uint16_t iNew = pPool->iFreeHead;
5311 if (iNew == NIL_PGMPOOL_IDX)
5312 {
5313 rc = pgmPoolMakeMoreFreePages(pPool, enmKind, iUser);
5314 if (RT_FAILURE(rc))
5315 {
5316 PGM_UNLOCK(pVM);
5317 Log(("pgmPoolAlloc: returns %Rrc (Free)\n", rc));
5318 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
5319 return rc;
5320 }
5321 iNew = pPool->iFreeHead;
5322 AssertReleaseMsgReturn(iNew != NIL_PGMPOOL_IDX, ("iNew=%#x\n", iNew), VERR_PGM_POOL_IPE);
5323 }
5324
5325 /* unlink the free head */
5326 PPGMPOOLPAGE pPage = &pPool->aPages[iNew];
5327 pPool->iFreeHead = pPage->iNext;
5328 pPage->iNext = NIL_PGMPOOL_IDX;
5329
5330 /*
5331 * Initialize it.
5332 */
5333 pPool->cUsedPages++; /* physical handler registration / pgmPoolTrackFlushGCPhysPTsSlow requirement. */
5334 pPage->enmKind = enmKind;
5335 pPage->enmAccess = enmAccess;
5336 pPage->GCPhys = GCPhys;
5337 pPage->fA20Enabled = fA20Enabled;
5338 pPage->fSeenNonGlobal = false; /* Set this to 'true' to disable this feature. */
5339 pPage->fMonitored = false;
5340 pPage->fCached = false;
5341 pPage->fDirty = false;
5342 pPage->fReusedFlushPending = false;
5343 pPage->cModifications = 0;
5344 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
5345 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
5346 pPage->cPresent = 0;
5347 pPage->iFirstPresent = NIL_PGMPOOL_PRESENT_INDEX;
5348 pPage->idxDirtyEntry = 0;
5349 pPage->GCPtrLastAccessHandlerFault = NIL_RTGCPTR;
5350 pPage->GCPtrLastAccessHandlerRip = NIL_RTGCPTR;
5351 pPage->cLastAccessHandler = 0;
5352 pPage->cLocked = 0;
5353# ifdef VBOX_STRICT
5354 pPage->GCPtrDirtyFault = NIL_RTGCPTR;
5355# endif
5356
5357 /*
5358 * Insert into the tracking and cache. If this fails, free the page.
5359 */
5360 int rc3 = pgmPoolTrackInsert(pPool, pPage, GCPhys, iUser, iUserTable);
5361 if (RT_FAILURE(rc3))
5362 {
5363 pPool->cUsedPages--;
5364 pPage->enmKind = PGMPOOLKIND_FREE;
5365 pPage->enmAccess = PGMPOOLACCESS_DONTCARE;
5366 pPage->GCPhys = NIL_RTGCPHYS;
5367 pPage->iNext = pPool->iFreeHead;
5368 pPool->iFreeHead = pPage->idx;
5369 PGM_UNLOCK(pVM);
5370 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
5371 Log(("pgmPoolAlloc: returns %Rrc (Insert)\n", rc3));
5372 return rc3;
5373 }
5374
5375 /*
5376 * Commit the allocation, clear the page and return.
5377 */
5378#ifdef VBOX_WITH_STATISTICS
5379 if (pPool->cUsedPages > pPool->cUsedPagesHigh)
5380 pPool->cUsedPagesHigh = pPool->cUsedPages;
5381#endif
5382
5383 if (!pPage->fZeroed)
5384 {
5385 STAM_PROFILE_START(&pPool->StatZeroPage, z);
5386 void *pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
5387 ASMMemZeroPage(pv);
5388 STAM_PROFILE_STOP(&pPool->StatZeroPage, z);
5389 }
5390
5391 *ppPage = pPage;
5392 if (fLockPage)
5393 pgmPoolLockPage(pPool, pPage);
5394 PGM_UNLOCK(pVM);
5395 LogFlow(("pgmPoolAlloc: returns %Rrc *ppPage=%p:{.Key=%RHp, .idx=%d, .fCached=%RTbool, .fMonitored=%RTbool}\n",
5396 rc, pPage, pPage->Core.Key, pPage->idx, pPage->fCached, pPage->fMonitored));
5397 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
5398 return rc;
5399}
5400
5401
5402/**
5403 * Frees a usage of a pool page.
5404 *
5405 * @param pVM The cross context VM structure.
5406 * @param HCPhys The HC physical address of the shadow page.
5407 * @param iUser The shadow page pool index of the user table.
5408 * NIL_PGMPOOL_IDX if root page.
5409 * @param iUserTable The index into the user table (shadowed). Ignored if
5410 * root page.
5411 */
5412void pgmPoolFree(PVM pVM, RTHCPHYS HCPhys, uint16_t iUser, uint32_t iUserTable)
5413{
5414 LogFlow(("pgmPoolFree: HCPhys=%RHp iUser=%d iUserTable=%#x\n", HCPhys, iUser, iUserTable));
5415 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
5416 pgmPoolFreeByPage(pPool, pgmPoolGetPage(pPool, HCPhys), iUser, iUserTable);
5417}
5418
5419
5420/**
5421 * Internal worker for finding a 'in-use' shadow page give by it's physical address.
5422 *
5423 * @returns Pointer to the shadow page structure.
5424 * @param pPool The pool.
5425 * @param HCPhys The HC physical address of the shadow page.
5426 */
5427PPGMPOOLPAGE pgmPoolGetPage(PPGMPOOL pPool, RTHCPHYS HCPhys)
5428{
5429 PGM_LOCK_ASSERT_OWNER(pPool->CTX_SUFF(pVM));
5430
5431 /*
5432 * Look up the page.
5433 */
5434 PPGMPOOLPAGE pPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, HCPhys & X86_PTE_PAE_PG_MASK);
5435
5436 AssertFatalMsg(pPage && pPage->enmKind != PGMPOOLKIND_FREE, ("HCPhys=%RHp pPage=%p idx=%d\n", HCPhys, pPage, (pPage) ? pPage->idx : 0));
5437 return pPage;
5438}
5439
5440
5441/**
5442 * Internal worker for finding a page for debugging purposes, no assertions.
5443 *
5444 * @returns Pointer to the shadow page structure. NULL on if not found.
5445 * @param pPool The pool.
5446 * @param HCPhys The HC physical address of the shadow page.
5447 */
5448PPGMPOOLPAGE pgmPoolQueryPageForDbg(PPGMPOOL pPool, RTHCPHYS HCPhys)
5449{
5450 PGM_LOCK_ASSERT_OWNER(pPool->CTX_SUFF(pVM));
5451 return (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, HCPhys & X86_PTE_PAE_PG_MASK);
5452}
5453
5454
5455/**
5456 * Internal worker for PGM_HCPHYS_2_PTR.
5457 *
5458 * @returns VBox status code.
5459 * @param pVM The cross context VM structure.
5460 * @param HCPhys The HC physical address of the shadow page.
5461 * @param ppv Where to return the address.
5462 */
5463int pgmPoolHCPhys2Ptr(PVM pVM, RTHCPHYS HCPhys, void **ppv)
5464{
5465 PPGMPOOLPAGE pPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pVM->pgm.s.CTX_SUFF(pPool)->HCPhysTree, HCPhys & X86_PTE_PAE_PG_MASK);
5466 AssertMsgReturn(pPage && pPage->enmKind != PGMPOOLKIND_FREE,
5467 ("HCPhys=%RHp pPage=%p idx=%d\n", HCPhys, pPage, (pPage) ? pPage->idx : 0),
5468 VERR_PGM_POOL_GET_PAGE_FAILED);
5469 *ppv = (uint8_t *)pPage->CTX_SUFF(pvPage) + (HCPhys & PAGE_OFFSET_MASK);
5470 return VINF_SUCCESS;
5471}
5472
5473#ifdef IN_RING3 /* currently only used in ring 3; save some space in the R0 & GC modules (left it here as we might need it elsewhere later on) */
5474
5475/**
5476 * Flush the specified page if present
5477 *
5478 * @param pVM The cross context VM structure.
5479 * @param GCPhys Guest physical address of the page to flush
5480 */
5481void pgmPoolFlushPageByGCPhys(PVM pVM, RTGCPHYS GCPhys)
5482{
5483 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
5484
5485 VM_ASSERT_EMT(pVM);
5486
5487 /*
5488 * Look up the GCPhys in the hash.
5489 */
5490 GCPhys = GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK;
5491 unsigned i = pPool->aiHash[PGMPOOL_HASH(GCPhys)];
5492 if (i == NIL_PGMPOOL_IDX)
5493 return;
5494
5495 do
5496 {
5497 PPGMPOOLPAGE pPage = &pPool->aPages[i];
5498 if (pPage->GCPhys - GCPhys < PAGE_SIZE)
5499 {
5500 Assert(!PGMPOOL_PAGE_IS_NESTED(pPage)); /* Temporary to see if it hits. Remove later. */
5501 switch (pPage->enmKind)
5502 {
5503 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
5504 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
5505 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
5506 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
5507 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
5508 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
5509 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
5510 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
5511 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
5512 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
5513 case PGMPOOLKIND_64BIT_PML4:
5514 case PGMPOOLKIND_32BIT_PD:
5515 case PGMPOOLKIND_PAE_PDPT:
5516 {
5517 Log(("PGMPoolFlushPage: found pgm pool pages for %RGp\n", GCPhys));
5518# ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
5519 if (pPage->fDirty)
5520 STAM_COUNTER_INC(&pPool->StatForceFlushDirtyPage);
5521 else
5522# endif
5523 STAM_COUNTER_INC(&pPool->StatForceFlushPage);
5524 Assert(!pgmPoolIsPageLocked(pPage));
5525 pgmPoolMonitorChainFlush(pPool, pPage);
5526 return;
5527 }
5528
5529 /* ignore, no monitoring. */
5530 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
5531 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
5532 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
5533 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
5534 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
5535 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
5536 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
5537 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
5538 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
5539 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
5540 case PGMPOOLKIND_ROOT_NESTED:
5541 case PGMPOOLKIND_PAE_PD_PHYS:
5542 case PGMPOOLKIND_PAE_PDPT_PHYS:
5543 case PGMPOOLKIND_32BIT_PD_PHYS:
5544 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
5545 break;
5546
5547 default:
5548 AssertFatalMsgFailed(("enmKind=%d idx=%d\n", pPage->enmKind, pPage->idx));
5549 }
5550 }
5551
5552 /* next */
5553 i = pPage->iNext;
5554 } while (i != NIL_PGMPOOL_IDX);
5555 return;
5556}
5557
5558
5559/**
5560 * Reset CPU on hot plugging.
5561 *
5562 * @param pVM The cross context VM structure.
5563 * @param pVCpu The cross context virtual CPU structure.
5564 */
5565void pgmR3PoolResetUnpluggedCpu(PVM pVM, PVMCPU pVCpu)
5566{
5567 pgmR3ExitShadowModeBeforePoolFlush(pVCpu);
5568
5569 pgmR3ReEnterShadowModeAfterPoolFlush(pVM, pVCpu);
5570 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
5571 VMCPU_FF_SET(pVCpu, VMCPU_FF_TLB_FLUSH);
5572}
5573
5574
5575/**
5576 * Flushes the entire cache.
5577 *
5578 * It will assert a global CR3 flush (FF) and assumes the caller is aware of
5579 * this and execute this CR3 flush.
5580 *
5581 * @param pVM The cross context VM structure.
5582 */
5583void pgmR3PoolReset(PVM pVM)
5584{
5585 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
5586
5587 PGM_LOCK_ASSERT_OWNER(pVM);
5588 STAM_PROFILE_START(&pPool->StatR3Reset, a);
5589 LogFlow(("pgmR3PoolReset:\n"));
5590
5591 /*
5592 * If there are no pages in the pool, there is nothing to do.
5593 */
5594 if (pPool->cCurPages <= PGMPOOL_IDX_FIRST)
5595 {
5596 STAM_PROFILE_STOP(&pPool->StatR3Reset, a);
5597 return;
5598 }
5599
5600 /*
5601 * Exit the shadow mode since we're going to clear everything,
5602 * including the root page.
5603 */
5604 VMCC_FOR_EACH_VMCPU(pVM)
5605 pgmR3ExitShadowModeBeforePoolFlush(pVCpu);
5606 VMCC_FOR_EACH_VMCPU_END(pVM);
5607
5608
5609 /*
5610 * Nuke the free list and reinsert all pages into it.
5611 */
5612 for (unsigned i = pPool->cCurPages - 1; i >= PGMPOOL_IDX_FIRST; i--)
5613 {
5614 PPGMPOOLPAGE pPage = &pPool->aPages[i];
5615
5616 if (pPage->fMonitored)
5617 pgmPoolMonitorFlush(pPool, pPage);
5618 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
5619 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
5620 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
5621 pPage->iMonitoredPrev = NIL_PGMPOOL_IDX;
5622 pPage->GCPhys = NIL_RTGCPHYS;
5623 pPage->enmKind = PGMPOOLKIND_FREE;
5624 pPage->enmAccess = PGMPOOLACCESS_DONTCARE;
5625 Assert(pPage->idx == i);
5626 pPage->iNext = i + 1;
5627 pPage->fA20Enabled = true;
5628 pPage->fZeroed = false; /* This could probably be optimized, but better safe than sorry. */
5629 pPage->fSeenNonGlobal = false;
5630 pPage->fMonitored = false;
5631 pPage->fDirty = false;
5632 pPage->fCached = false;
5633 pPage->fReusedFlushPending = false;
5634 pPage->iUserHead = NIL_PGMPOOL_USER_INDEX;
5635 pPage->cPresent = 0;
5636 pPage->iFirstPresent = NIL_PGMPOOL_PRESENT_INDEX;
5637 pPage->cModifications = 0;
5638 pPage->iAgeNext = NIL_PGMPOOL_IDX;
5639 pPage->iAgePrev = NIL_PGMPOOL_IDX;
5640 pPage->idxDirtyEntry = 0;
5641 pPage->GCPtrLastAccessHandlerRip = NIL_RTGCPTR;
5642 pPage->GCPtrLastAccessHandlerFault = NIL_RTGCPTR;
5643 pPage->cLastAccessHandler = 0;
5644 pPage->cLocked = 0;
5645# ifdef VBOX_STRICT
5646 pPage->GCPtrDirtyFault = NIL_RTGCPTR;
5647# endif
5648 }
5649 pPool->aPages[pPool->cCurPages - 1].iNext = NIL_PGMPOOL_IDX;
5650 pPool->iFreeHead = PGMPOOL_IDX_FIRST;
5651 pPool->cUsedPages = 0;
5652
5653 /*
5654 * Zap and reinitialize the user records.
5655 */
5656 pPool->cPresent = 0;
5657 pPool->iUserFreeHead = 0;
5658 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
5659 const unsigned cMaxUsers = pPool->cMaxUsers;
5660 for (unsigned i = 0; i < cMaxUsers; i++)
5661 {
5662 paUsers[i].iNext = i + 1;
5663 paUsers[i].iUser = NIL_PGMPOOL_IDX;
5664 paUsers[i].iUserTable = 0xfffffffe;
5665 }
5666 paUsers[cMaxUsers - 1].iNext = NIL_PGMPOOL_USER_INDEX;
5667
5668 /*
5669 * Clear all the GCPhys links and rebuild the phys ext free list.
5670 */
5671 for (PPGMRAMRANGE pRam = pVM->pgm.s.CTX_SUFF(pRamRangesX);
5672 pRam;
5673 pRam = pRam->CTX_SUFF(pNext))
5674 {
5675 unsigned iPage = pRam->cb >> PAGE_SHIFT;
5676 while (iPage-- > 0)
5677 PGM_PAGE_SET_TRACKING(pVM, &pRam->aPages[iPage], 0);
5678 }
5679
5680 pPool->iPhysExtFreeHead = 0;
5681 PPGMPOOLPHYSEXT paPhysExts = pPool->CTX_SUFF(paPhysExts);
5682 const unsigned cMaxPhysExts = pPool->cMaxPhysExts;
5683 for (unsigned i = 0; i < cMaxPhysExts; i++)
5684 {
5685 paPhysExts[i].iNext = i + 1;
5686 paPhysExts[i].aidx[0] = NIL_PGMPOOL_IDX;
5687 paPhysExts[i].apte[0] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
5688 paPhysExts[i].aidx[1] = NIL_PGMPOOL_IDX;
5689 paPhysExts[i].apte[1] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
5690 paPhysExts[i].aidx[2] = NIL_PGMPOOL_IDX;
5691 paPhysExts[i].apte[2] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
5692 }
5693 paPhysExts[cMaxPhysExts - 1].iNext = NIL_PGMPOOL_PHYSEXT_INDEX;
5694
5695 /*
5696 * Just zap the modified list.
5697 */
5698 pPool->cModifiedPages = 0;
5699 pPool->iModifiedHead = NIL_PGMPOOL_IDX;
5700
5701 /*
5702 * Clear the GCPhys hash and the age list.
5703 */
5704 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aiHash); i++)
5705 pPool->aiHash[i] = NIL_PGMPOOL_IDX;
5706 pPool->iAgeHead = NIL_PGMPOOL_IDX;
5707 pPool->iAgeTail = NIL_PGMPOOL_IDX;
5708
5709# ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
5710 /* Clear all dirty pages. */
5711 pPool->idxFreeDirtyPage = 0;
5712 pPool->cDirtyPages = 0;
5713 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aidxDirtyPages); i++)
5714 pPool->aidxDirtyPages[i] = NIL_PGMPOOL_IDX;
5715# endif
5716
5717 /*
5718 * Reinsert active pages into the hash and ensure monitoring chains are correct.
5719 */
5720 VMCC_FOR_EACH_VMCPU(pVM)
5721 {
5722 /*
5723 * Re-enter the shadowing mode and assert Sync CR3 FF.
5724 */
5725 pgmR3ReEnterShadowModeAfterPoolFlush(pVM, pVCpu);
5726 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
5727 VMCPU_FF_SET(pVCpu, VMCPU_FF_TLB_FLUSH);
5728 }
5729 VMCC_FOR_EACH_VMCPU_END(pVM);
5730
5731 STAM_PROFILE_STOP(&pPool->StatR3Reset, a);
5732}
5733
5734#endif /* IN_RING3 */
5735
5736#if defined(LOG_ENABLED) || defined(VBOX_STRICT)
5737/**
5738 * Stringifies a PGMPOOLKIND value.
5739 */
5740static const char *pgmPoolPoolKindToStr(uint8_t enmKind)
5741{
5742 switch ((PGMPOOLKIND)enmKind)
5743 {
5744 case PGMPOOLKIND_INVALID:
5745 return "PGMPOOLKIND_INVALID";
5746 case PGMPOOLKIND_FREE:
5747 return "PGMPOOLKIND_FREE";
5748 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
5749 return "PGMPOOLKIND_32BIT_PT_FOR_PHYS";
5750 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
5751 return "PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT";
5752 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
5753 return "PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB";
5754 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
5755 return "PGMPOOLKIND_PAE_PT_FOR_PHYS";
5756 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
5757 return "PGMPOOLKIND_PAE_PT_FOR_32BIT_PT";
5758 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
5759 return "PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB";
5760 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
5761 return "PGMPOOLKIND_PAE_PT_FOR_PAE_PT";
5762 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
5763 return "PGMPOOLKIND_PAE_PT_FOR_PAE_2MB";
5764 case PGMPOOLKIND_32BIT_PD:
5765 return "PGMPOOLKIND_32BIT_PD";
5766 case PGMPOOLKIND_32BIT_PD_PHYS:
5767 return "PGMPOOLKIND_32BIT_PD_PHYS";
5768 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
5769 return "PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD";
5770 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
5771 return "PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD";
5772 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
5773 return "PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD";
5774 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
5775 return "PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD";
5776 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
5777 return "PGMPOOLKIND_PAE_PD_FOR_PAE_PD";
5778 case PGMPOOLKIND_PAE_PD_PHYS:
5779 return "PGMPOOLKIND_PAE_PD_PHYS";
5780 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
5781 return "PGMPOOLKIND_PAE_PDPT_FOR_32BIT";
5782 case PGMPOOLKIND_PAE_PDPT:
5783 return "PGMPOOLKIND_PAE_PDPT";
5784 case PGMPOOLKIND_PAE_PDPT_PHYS:
5785 return "PGMPOOLKIND_PAE_PDPT_PHYS";
5786 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
5787 return "PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT";
5788 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
5789 return "PGMPOOLKIND_64BIT_PDPT_FOR_PHYS";
5790 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
5791 return "PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD";
5792 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
5793 return "PGMPOOLKIND_64BIT_PD_FOR_PHYS";
5794 case PGMPOOLKIND_64BIT_PML4:
5795 return "PGMPOOLKIND_64BIT_PML4";
5796 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
5797 return "PGMPOOLKIND_EPT_PDPT_FOR_PHYS";
5798 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
5799 return "PGMPOOLKIND_EPT_PD_FOR_PHYS";
5800 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
5801 return "PGMPOOLKIND_EPT_PT_FOR_PHYS";
5802 case PGMPOOLKIND_ROOT_NESTED:
5803 return "PGMPOOLKIND_ROOT_NESTED";
5804 case PGMPOOLKIND_EPT_PT_FOR_EPT_PT:
5805 return "PGMPOOLKIND_EPT_PT_FOR_EPT_PT";
5806 case PGMPOOLKIND_EPT_PD_FOR_EPT_PD:
5807 return "PGMPOOLKIND_EPT_PD_FOR_EPT_PD";
5808 case PGMPOOLKIND_EPT_PDPT_FOR_EPT_PDPT:
5809 return "PGMPOOLKIND_EPT_PDPT_FOR_EPT_PDPT";
5810 case PGMPOOLKIND_EPT_PML4_FOR_EPT_PML4:
5811 return "PGMPOOLKIND_EPT_PML4_FOR_EPT_PML4";
5812 }
5813 return "Unknown kind!";
5814}
5815#endif /* LOG_ENABLED || VBOX_STRICT */
5816
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette