VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/PGMAllPool.cpp

Last change on this file was 106061, checked in by vboxsync, 2 months ago

Copyright year updates by scm.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id Revision
File size: 226.7 KB
Line 
1/* $Id: PGMAllPool.cpp 106061 2024-09-16 14:03:52Z vboxsync $ */
2/** @file
3 * PGM Shadow Page Pool.
4 */
5
6/*
7 * Copyright (C) 2006-2024 Oracle and/or its affiliates.
8 *
9 * This file is part of VirtualBox base platform packages, as
10 * available from https://www.virtualbox.org.
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation, in version 3 of the
15 * License.
16 *
17 * This program is distributed in the hope that it will be useful, but
18 * WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * General Public License for more details.
21 *
22 * You should have received a copy of the GNU General Public License
23 * along with this program; if not, see <https://www.gnu.org/licenses>.
24 *
25 * SPDX-License-Identifier: GPL-3.0-only
26 */
27
28
29/*********************************************************************************************************************************
30* Header Files *
31*********************************************************************************************************************************/
32#define LOG_GROUP LOG_GROUP_PGM_POOL
33#define VBOX_WITHOUT_PAGING_BIT_FIELDS /* 64-bit bitfields are just asking for trouble. See @bugref{9841} and others. */
34#include <VBox/vmm/pgm.h>
35#include <VBox/vmm/mm.h>
36#include <VBox/vmm/em.h>
37#include <VBox/vmm/cpum.h>
38#include "PGMInternal.h"
39#include <VBox/vmm/vmcc.h>
40#include "PGMInline.h"
41#include <VBox/vmm/hm_vmx.h>
42
43#include <VBox/log.h>
44#include <VBox/err.h>
45#include <iprt/asm.h>
46#include <iprt/asm-mem.h>
47#include <iprt/string.h>
48
49
50/*********************************************************************************************************************************
51* Internal Functions *
52*********************************************************************************************************************************/
53RT_C_DECLS_BEGIN
54#if 0 /* unused */
55DECLINLINE(unsigned) pgmPoolTrackGetShadowEntrySize(PGMPOOLKIND enmKind);
56DECLINLINE(unsigned) pgmPoolTrackGetGuestEntrySize(PGMPOOLKIND enmKind);
57#endif /* unused */
58static void pgmPoolTrackClearPageUsers(PPGMPOOL pPool, PPGMPOOLPAGE pPage);
59static void pgmPoolTrackDeref(PPGMPOOL pPool, PPGMPOOLPAGE pPage);
60static int pgmPoolTrackAddUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable);
61static void pgmPoolMonitorModifiedRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage);
62#if defined(LOG_ENABLED) || defined(VBOX_STRICT)
63static const char *pgmPoolPoolKindToStr(uint8_t enmKind);
64#endif
65#if 0 /*defined(VBOX_STRICT) && defined(PGMPOOL_WITH_OPTIMIZED_DIRTY_PT)*/
66static void pgmPoolTrackCheckPTPaePae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT, PCX86PTPAE pGstPT);
67#endif
68
69int pgmPoolTrackFlushGCPhysPTsSlow(PVMCC pVM, PPGMPAGE pPhysPage);
70PPGMPOOLPHYSEXT pgmPoolTrackPhysExtAlloc(PVMCC pVM, uint16_t *piPhysExt);
71void pgmPoolTrackPhysExtFree(PVMCC pVM, uint16_t iPhysExt);
72void pgmPoolTrackPhysExtFreeList(PVMCC pVM, uint16_t iPhysExt);
73
74RT_C_DECLS_END
75
76
77#if 0 /* unused */
78/**
79 * Checks if the specified page pool kind is for a 4MB or 2MB guest page.
80 *
81 * @returns true if it's the shadow of a 4MB or 2MB guest page, otherwise false.
82 * @param enmKind The page kind.
83 */
84DECLINLINE(bool) pgmPoolIsBigPage(PGMPOOLKIND enmKind)
85{
86 switch (enmKind)
87 {
88 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
89 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
90 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
91 return true;
92 default:
93 return false;
94 }
95}
96#endif /* unused */
97
98
99/**
100 * Flushes a chain of pages sharing the same access monitor.
101 *
102 * @param pPool The pool.
103 * @param pPage A page in the chain.
104 */
105void pgmPoolMonitorChainFlush(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
106{
107 LogFlow(("pgmPoolMonitorChainFlush: Flush page %RGp type=%d\n", pPage->GCPhys, pPage->enmKind));
108
109 /*
110 * Find the list head.
111 */
112 uint16_t idx = pPage->idx;
113 if (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
114 {
115 while (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
116 {
117 idx = pPage->iMonitoredPrev;
118 Assert(idx != pPage->idx);
119 pPage = &pPool->aPages[idx];
120 }
121 }
122
123 /*
124 * Iterate the list flushing each shadow page.
125 */
126 for (;;)
127 {
128 idx = pPage->iMonitoredNext;
129 Assert(idx != pPage->idx);
130 if (pPage->idx >= PGMPOOL_IDX_FIRST)
131 {
132 int rc2 = pgmPoolFlushPage(pPool, pPage);
133 AssertRC(rc2);
134 }
135 /* next */
136 if (idx == NIL_PGMPOOL_IDX)
137 break;
138 pPage = &pPool->aPages[idx];
139 }
140}
141
142
143/**
144 * Wrapper for getting the current context pointer to the entry being modified.
145 *
146 * @returns VBox status code suitable for scheduling.
147 * @param pVM The cross context VM structure.
148 * @param pvDst Destination address
149 * @param pvSrc Pointer to the mapping of @a GCPhysSrc or NULL depending
150 * on the context (e.g. \#PF in R0 & RC).
151 * @param GCPhysSrc The source guest physical address.
152 * @param cb Size of data to read
153 */
154DECLINLINE(int) pgmPoolPhysSimpleReadGCPhys(PVMCC pVM, void *pvDst, void const *pvSrc, RTGCPHYS GCPhysSrc, size_t cb)
155{
156#if defined(IN_RING3)
157 NOREF(pVM); NOREF(GCPhysSrc);
158 memcpy(pvDst, (RTHCPTR)((uintptr_t)pvSrc & ~(RTHCUINTPTR)(cb - 1)), cb);
159 return VINF_SUCCESS;
160#else
161 /** @todo in RC we could attempt to use the virtual address, although this can cause many faults (PAE Windows XP guest). */
162 NOREF(pvSrc);
163 return PGMPhysSimpleReadGCPhys(pVM, pvDst, GCPhysSrc & ~(RTGCPHYS)(cb - 1), cb);
164#endif
165}
166
167
168/**
169 * Process shadow entries before they are changed by the guest.
170 *
171 * For PT entries we will clear them. For PD entries, we'll simply check
172 * for mapping conflicts and set the SyncCR3 FF if found.
173 *
174 * @param pVCpu The cross context virtual CPU structure.
175 * @param pPool The pool.
176 * @param pPage The head page.
177 * @param GCPhysFault The guest physical fault address.
178 * @param pvAddress Pointer to the mapping of @a GCPhysFault or NULL
179 * depending on the context (e.g. \#PF in R0 & RC).
180 * @param cbWrite Write size; might be zero if the caller knows we're not crossing entry boundaries
181 */
182static void pgmPoolMonitorChainChanging(PVMCPU pVCpu, PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTGCPHYS GCPhysFault,
183 void const *pvAddress, unsigned cbWrite)
184{
185 AssertMsg(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX, ("%u (idx=%u)\n", pPage->iMonitoredPrev, pPage->idx));
186 const unsigned off = GCPhysFault & GUEST_PAGE_OFFSET_MASK;
187 PVMCC pVM = pPool->CTX_SUFF(pVM);
188 NOREF(pVCpu);
189
190 LogFlow(("pgmPoolMonitorChainChanging: %RGv phys=%RGp cbWrite=%d\n",
191 (RTGCPTR)(CTXTYPE(RTGCPTR, uintptr_t, RTGCPTR))(uintptr_t)pvAddress, GCPhysFault, cbWrite));
192
193 if (PGMPOOL_PAGE_IS_NESTED(pPage))
194 Log7Func(("%RGv phys=%RGp cbWrite=%d\n", (RTGCPTR)(CTXTYPE(RTGCPTR, uintptr_t, RTGCPTR))(uintptr_t)pvAddress, GCPhysFault, cbWrite));
195
196 for (;;)
197 {
198 union
199 {
200 void *pv;
201 PX86PT pPT;
202 PPGMSHWPTPAE pPTPae;
203 PX86PD pPD;
204 PX86PDPAE pPDPae;
205 PX86PDPT pPDPT;
206 PX86PML4 pPML4;
207#ifdef VBOX_WITH_NESTED_HWVIRT_VMX_EPT
208 PEPTPDPT pEptPdpt;
209 PEPTPD pEptPd;
210 PEPTPT pEptPt;
211#endif
212 } uShw;
213
214 LogFlow(("pgmPoolMonitorChainChanging: page idx=%d phys=%RGp (next=%d) kind=%s write=%#x\n",
215 pPage->idx, pPage->GCPhys, pPage->iMonitoredNext, pgmPoolPoolKindToStr(pPage->enmKind), cbWrite));
216
217 uShw.pv = NULL;
218 switch (pPage->enmKind)
219 {
220 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
221 {
222 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPT));
223 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
224 const unsigned iShw = off / sizeof(X86PTE);
225 LogFlow(("PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT iShw=%x\n", iShw));
226 X86PGUINT const uPde = uShw.pPT->a[iShw].u;
227 if (uPde & X86_PTE_P)
228 {
229 X86PTE GstPte;
230 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvAddress, GCPhysFault, sizeof(GstPte));
231 AssertRC(rc);
232 Log4(("pgmPoolMonitorChainChanging 32_32: deref %016RX64 GCPhys %08RX32\n", uPde & X86_PTE_PG_MASK, GstPte.u & X86_PTE_PG_MASK));
233 pgmPoolTracDerefGCPhysHint(pPool, pPage, uPde & X86_PTE_PG_MASK, GstPte.u & X86_PTE_PG_MASK, iShw);
234 ASMAtomicWriteU32(&uShw.pPT->a[iShw].u, 0);
235 }
236 break;
237 }
238
239 /* page/2 sized */
240 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
241 {
242 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPT));
243 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
244 if (!((off ^ pPage->GCPhys) & (PAGE_SIZE / 2)))
245 {
246 const unsigned iShw = (off / sizeof(X86PTE)) & (X86_PG_PAE_ENTRIES - 1);
247 LogFlow(("PGMPOOLKIND_PAE_PT_FOR_32BIT_PT iShw=%x\n", iShw));
248 if (PGMSHWPTEPAE_IS_P(uShw.pPTPae->a[iShw]))
249 {
250 X86PTE GstPte;
251 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvAddress, GCPhysFault, sizeof(GstPte));
252 AssertRC(rc);
253
254 Log4(("pgmPoolMonitorChainChanging pae_32: deref %016RX64 GCPhys %08RX32\n", uShw.pPT->a[iShw].u & X86_PTE_PAE_PG_MASK, GstPte.u & X86_PTE_PG_MASK));
255 pgmPoolTracDerefGCPhysHint(pPool, pPage,
256 PGMSHWPTEPAE_GET_HCPHYS(uShw.pPTPae->a[iShw]),
257 GstPte.u & X86_PTE_PG_MASK,
258 iShw);
259 PGMSHWPTEPAE_ATOMIC_SET(uShw.pPTPae->a[iShw], 0);
260 }
261 }
262 break;
263 }
264
265 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
266 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
267 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
268 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
269 {
270 unsigned iGst = off / sizeof(X86PDE);
271 unsigned iShwPdpt = iGst / 256;
272 unsigned iShw = (iGst % 256) * 2;
273 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
274
275 LogFlow(("pgmPoolMonitorChainChanging PAE for 32 bits: iGst=%x iShw=%x idx = %d page idx=%d\n", iGst, iShw, iShwPdpt, pPage->enmKind - PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD));
276 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPD));
277 if (iShwPdpt == pPage->enmKind - (unsigned)PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD)
278 {
279 for (unsigned i = 0; i < 2; i++)
280 {
281 X86PGPAEUINT const uPde = uShw.pPDPae->a[iShw + i].u;
282 if (uPde & X86_PDE_P)
283 {
284 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw + i, uPde));
285 pgmPoolFree(pVM, uPde & X86_PDE_PAE_PG_MASK, pPage->idx, iShw + i);
286 ASMAtomicWriteU64(&uShw.pPDPae->a[iShw + i].u, 0);
287 }
288
289 /* paranoia / a bit assumptive. */
290 if ( (off & 3)
291 && (off & 3) + cbWrite > 4)
292 {
293 const unsigned iShw2 = iShw + 2 + i;
294 if (iShw2 < RT_ELEMENTS(uShw.pPDPae->a))
295 {
296 X86PGPAEUINT const uPde2 = uShw.pPDPae->a[iShw2].u;
297 if (uPde2 & X86_PDE_P)
298 {
299 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw2, uPde2));
300 pgmPoolFree(pVM, uPde2 & X86_PDE_PAE_PG_MASK, pPage->idx, iShw2);
301 ASMAtomicWriteU64(&uShw.pPDPae->a[iShw2].u, 0);
302 }
303 }
304 }
305 }
306 }
307 break;
308 }
309
310 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
311 {
312 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
313 const unsigned iShw = off / sizeof(X86PTEPAE);
314 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPT));
315 if (PGMSHWPTEPAE_IS_P(uShw.pPTPae->a[iShw]))
316 {
317 X86PTEPAE GstPte;
318 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvAddress, GCPhysFault, sizeof(GstPte));
319 AssertRC(rc);
320
321 Log4(("pgmPoolMonitorChainChanging pae: deref %016RX64 GCPhys %016RX64\n", PGMSHWPTEPAE_GET_HCPHYS(uShw.pPTPae->a[iShw]), GstPte.u & X86_PTE_PAE_PG_MASK));
322 pgmPoolTracDerefGCPhysHint(pPool, pPage,
323 PGMSHWPTEPAE_GET_HCPHYS(uShw.pPTPae->a[iShw]),
324 GstPte.u & X86_PTE_PAE_PG_MASK,
325 iShw);
326 PGMSHWPTEPAE_ATOMIC_SET(uShw.pPTPae->a[iShw], 0);
327 }
328
329 /* paranoia / a bit assumptive. */
330 if ( (off & 7)
331 && (off & 7) + cbWrite > sizeof(X86PTEPAE))
332 {
333 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PTEPAE);
334 AssertBreak(iShw2 < RT_ELEMENTS(uShw.pPTPae->a));
335
336 if (PGMSHWPTEPAE_IS_P(uShw.pPTPae->a[iShw2]))
337 {
338 X86PTEPAE GstPte;
339 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte,
340 pvAddress ? (uint8_t const *)pvAddress + sizeof(GstPte) : NULL,
341 GCPhysFault + sizeof(GstPte), sizeof(GstPte));
342 AssertRC(rc);
343 Log4(("pgmPoolMonitorChainChanging pae: deref %016RX64 GCPhys %016RX64\n", PGMSHWPTEPAE_GET_HCPHYS(uShw.pPTPae->a[iShw2]), GstPte.u & X86_PTE_PAE_PG_MASK));
344 pgmPoolTracDerefGCPhysHint(pPool, pPage,
345 PGMSHWPTEPAE_GET_HCPHYS(uShw.pPTPae->a[iShw2]),
346 GstPte.u & X86_PTE_PAE_PG_MASK,
347 iShw2);
348 PGMSHWPTEPAE_ATOMIC_SET(uShw.pPTPae->a[iShw2], 0);
349 }
350 }
351 break;
352 }
353
354 case PGMPOOLKIND_32BIT_PD:
355 {
356 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
357 const unsigned iShw = off / sizeof(X86PTE); // ASSUMING 32-bit guest paging!
358
359 LogFlow(("pgmPoolMonitorChainChanging: PGMPOOLKIND_32BIT_PD %x\n", iShw));
360 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPD));
361 X86PGUINT const uPde = uShw.pPD->a[iShw].u;
362 if (uPde & X86_PDE_P)
363 {
364 LogFlow(("pgmPoolMonitorChainChanging: 32 bit pd iShw=%#x: %RX64 -> freeing it!\n", iShw, uPde));
365 pgmPoolFree(pVM, uPde & X86_PDE_PG_MASK, pPage->idx, iShw);
366 ASMAtomicWriteU32(&uShw.pPD->a[iShw].u, 0);
367 }
368
369 /* paranoia / a bit assumptive. */
370 if ( (off & 3)
371 && (off & 3) + cbWrite > sizeof(X86PTE))
372 {
373 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PTE);
374 if ( iShw2 != iShw
375 && iShw2 < RT_ELEMENTS(uShw.pPD->a))
376 {
377 X86PGUINT const uPde2 = uShw.pPD->a[iShw2].u;
378 if (uPde2 & X86_PDE_P)
379 {
380 LogFlow(("pgmPoolMonitorChainChanging: 32 bit pd iShw=%#x: %RX64 -> freeing it!\n", iShw2, uPde2));
381 pgmPoolFree(pVM, uPde2 & X86_PDE_PG_MASK, pPage->idx, iShw2);
382 ASMAtomicWriteU32(&uShw.pPD->a[iShw2].u, 0);
383 }
384 }
385 }
386#if 0 /* useful when running PGMAssertCR3(), a bit too troublesome for general use (TLBs). - not working any longer... */
387 if ( uShw.pPD->a[iShw].n.u1Present
388 && !VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3))
389 {
390 LogFlow(("pgmPoolMonitorChainChanging: iShw=%#x: %RX32 -> freeing it!\n", iShw, uShw.pPD->a[iShw].u));
391 pgmPoolFree(pVM, uShw.pPD->a[iShw].u & X86_PDE_PG_MASK, pPage->idx, iShw);
392 ASMAtomicWriteU32(&uShw.pPD->a[iShw].u, 0);
393 }
394#endif
395 break;
396 }
397
398 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
399 {
400 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
401 const unsigned iShw = off / sizeof(X86PDEPAE);
402 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPD));
403
404 /*
405 * Causes trouble when the guest uses a PDE to refer to the whole page table level
406 * structure. (Invalidate here; faults later on when it tries to change the page
407 * table entries -> recheck; probably only applies to the RC case.)
408 */
409 X86PGPAEUINT const uPde = uShw.pPDPae->a[iShw].u;
410 if (uPde & X86_PDE_P)
411 {
412 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw, uPde));
413 pgmPoolFree(pVM, uPde & X86_PDE_PAE_PG_MASK, pPage->idx, iShw);
414 ASMAtomicWriteU64(&uShw.pPDPae->a[iShw].u, 0);
415 }
416
417 /* paranoia / a bit assumptive. */
418 if ( (off & 7)
419 && (off & 7) + cbWrite > sizeof(X86PDEPAE))
420 {
421 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PDEPAE);
422 AssertBreak(iShw2 < RT_ELEMENTS(uShw.pPDPae->a));
423
424 X86PGPAEUINT const uPde2 = uShw.pPDPae->a[iShw2].u;
425 if (uPde2 & X86_PDE_P)
426 {
427 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uPde2));
428 pgmPoolFree(pVM, uPde2 & X86_PDE_PAE_PG_MASK, pPage->idx, iShw2);
429 ASMAtomicWriteU64(&uShw.pPDPae->a[iShw2].u, 0);
430 }
431 }
432 break;
433 }
434
435 case PGMPOOLKIND_PAE_PDPT:
436 {
437 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPDPT));
438 /*
439 * Hopefully this doesn't happen very often:
440 * - touching unused parts of the page
441 * - messing with the bits of pd pointers without changing the physical address
442 */
443 /* PDPT roots are not page aligned; 32 byte only! */
444 const unsigned offPdpt = GCPhysFault - pPage->GCPhys;
445
446 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
447 const unsigned iShw = offPdpt / sizeof(X86PDPE);
448 if (iShw < X86_PG_PAE_PDPE_ENTRIES) /* don't use RT_ELEMENTS(uShw.pPDPT->a), because that's for long mode only */
449 {
450 X86PGPAEUINT const uPdpe = uShw.pPDPT->a[iShw].u;
451 if (uPdpe & X86_PDPE_P)
452 {
453 LogFlow(("pgmPoolMonitorChainChanging: pae pdpt iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPT->a[iShw].u));
454 pgmPoolFree(pVM, uPdpe & X86_PDPE_PG_MASK, pPage->idx, iShw);
455 ASMAtomicWriteU64(&uShw.pPDPT->a[iShw].u, 0);
456 }
457
458 /* paranoia / a bit assumptive. */
459 if ( (offPdpt & 7)
460 && (offPdpt & 7) + cbWrite > sizeof(X86PDPE))
461 {
462 const unsigned iShw2 = (offPdpt + cbWrite - 1) / sizeof(X86PDPE);
463 if ( iShw2 != iShw
464 && iShw2 < X86_PG_PAE_PDPE_ENTRIES)
465 {
466 X86PGPAEUINT const uPdpe2 = uShw.pPDPT->a[iShw2].u;
467 if (uPdpe2 & X86_PDPE_P)
468 {
469 LogFlow(("pgmPoolMonitorChainChanging: pae pdpt iShw=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPT->a[iShw2].u));
470 pgmPoolFree(pVM, uPdpe2 & X86_PDPE_PG_MASK, pPage->idx, iShw2);
471 ASMAtomicWriteU64(&uShw.pPDPT->a[iShw2].u, 0);
472 }
473 }
474 }
475 }
476 break;
477 }
478
479 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
480 {
481 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPD));
482 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
483 const unsigned iShw = off / sizeof(X86PDEPAE);
484 X86PGPAEUINT const uPde = uShw.pPDPae->a[iShw].u;
485 if (uPde & X86_PDE_P)
486 {
487 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw, uPde));
488 pgmPoolFree(pVM, uPde & X86_PDE_PAE_PG_MASK, pPage->idx, iShw);
489 ASMAtomicWriteU64(&uShw.pPDPae->a[iShw].u, 0);
490 }
491
492 /* paranoia / a bit assumptive. */
493 if ( (off & 7)
494 && (off & 7) + cbWrite > sizeof(X86PDEPAE))
495 {
496 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PDEPAE);
497 AssertBreak(iShw2 < RT_ELEMENTS(uShw.pPDPae->a));
498 X86PGPAEUINT const uPde2 = uShw.pPDPae->a[iShw2].u;
499 if (uPde2 & X86_PDE_P)
500 {
501 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uPde2));
502 pgmPoolFree(pVM, uPde2 & X86_PDE_PAE_PG_MASK, pPage->idx, iShw2);
503 ASMAtomicWriteU64(&uShw.pPDPae->a[iShw2].u, 0);
504 }
505 }
506 break;
507 }
508
509 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
510 {
511 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPDPT));
512 /*
513 * Hopefully this doesn't happen very often:
514 * - messing with the bits of pd pointers without changing the physical address
515 */
516 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
517 const unsigned iShw = off / sizeof(X86PDPE);
518 X86PGPAEUINT const uPdpe = uShw.pPDPT->a[iShw].u;
519 if (uPdpe & X86_PDPE_P)
520 {
521 LogFlow(("pgmPoolMonitorChainChanging: pdpt iShw=%#x: %RX64 -> freeing it!\n", iShw, uPdpe));
522 pgmPoolFree(pVM, uPdpe & X86_PDPE_PG_MASK, pPage->idx, iShw);
523 ASMAtomicWriteU64(&uShw.pPDPT->a[iShw].u, 0);
524 }
525 /* paranoia / a bit assumptive. */
526 if ( (off & 7)
527 && (off & 7) + cbWrite > sizeof(X86PDPE))
528 {
529 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PDPE);
530 X86PGPAEUINT const uPdpe2 = uShw.pPDPT->a[iShw2].u;
531 if (uPdpe2 & X86_PDPE_P)
532 {
533 LogFlow(("pgmPoolMonitorChainChanging: pdpt iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uPdpe2));
534 pgmPoolFree(pVM, uPdpe2 & X86_PDPE_PG_MASK, pPage->idx, iShw2);
535 ASMAtomicWriteU64(&uShw.pPDPT->a[iShw2].u, 0);
536 }
537 }
538 break;
539 }
540
541 case PGMPOOLKIND_64BIT_PML4:
542 {
543 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPML4));
544 /*
545 * Hopefully this doesn't happen very often:
546 * - messing with the bits of pd pointers without changing the physical address
547 */
548 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
549 const unsigned iShw = off / sizeof(X86PDPE);
550 X86PGPAEUINT const uPml4e = uShw.pPML4->a[iShw].u;
551 if (uPml4e & X86_PML4E_P)
552 {
553 LogFlow(("pgmPoolMonitorChainChanging: pml4 iShw=%#x: %RX64 -> freeing it!\n", iShw, uPml4e));
554 pgmPoolFree(pVM, uPml4e & X86_PML4E_PG_MASK, pPage->idx, iShw);
555 ASMAtomicWriteU64(&uShw.pPML4->a[iShw].u, 0);
556 }
557 /* paranoia / a bit assumptive. */
558 if ( (off & 7)
559 && (off & 7) + cbWrite > sizeof(X86PDPE))
560 {
561 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PML4E);
562 X86PGPAEUINT const uPml4e2 = uShw.pPML4->a[iShw2].u;
563 if (uPml4e2 & X86_PML4E_P)
564 {
565 LogFlow(("pgmPoolMonitorChainChanging: pml4 iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uPml4e2));
566 pgmPoolFree(pVM, uPml4e2 & X86_PML4E_PG_MASK, pPage->idx, iShw2);
567 ASMAtomicWriteU64(&uShw.pPML4->a[iShw2].u, 0);
568 }
569 }
570 break;
571 }
572
573#ifdef VBOX_WITH_NESTED_HWVIRT_VMX_EPT
574 case PGMPOOLKIND_EPT_PML4_FOR_EPT_PML4:
575 {
576 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
577 const unsigned iShw = off / sizeof(EPTPML4E);
578 X86PGPAEUINT const uPml4e = uShw.pPML4->a[iShw].u;
579 if (uPml4e & EPT_PRESENT_MASK)
580 {
581 Log7Func(("PML4 iShw=%#x: %RX64 (%RGp) -> freeing it!\n", iShw, uPml4e, pPage->GCPhys));
582 pgmPoolFree(pVM, uPml4e & X86_PML4E_PG_MASK, pPage->idx, iShw);
583 ASMAtomicWriteU64(&uShw.pPML4->a[iShw].u, 0);
584 }
585
586 /* paranoia / a bit assumptive. */
587 if ( (off & 7)
588 && (off & 7) + cbWrite > sizeof(X86PML4E))
589 {
590 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PML4E);
591 X86PGPAEUINT const uPml4e2 = uShw.pPML4->a[iShw2].u;
592 if (uPml4e2 & EPT_PRESENT_MASK)
593 {
594 Log7Func(("PML4 iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uPml4e2));
595 pgmPoolFree(pVM, uPml4e2 & X86_PML4E_PG_MASK, pPage->idx, iShw2);
596 ASMAtomicWriteU64(&uShw.pPML4->a[iShw2].u, 0);
597 }
598 }
599 break;
600 }
601
602 case PGMPOOLKIND_EPT_PDPT_FOR_EPT_PDPT:
603 {
604 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
605 const unsigned iShw = off / sizeof(EPTPDPTE);
606 X86PGPAEUINT const uPdpte = uShw.pEptPdpt->a[iShw].u;
607 if (uPdpte & EPT_PRESENT_MASK)
608 {
609 Log7Func(("EPT PDPT iShw=%#x: %RX64 (%RGp) -> freeing it!\n", iShw, uPdpte, pPage->GCPhys));
610 pgmPoolFree(pVM, uPdpte & EPT_PDPTE_PG_MASK, pPage->idx, iShw);
611 ASMAtomicWriteU64(&uShw.pEptPdpt->a[iShw].u, 0);
612 }
613
614 /* paranoia / a bit assumptive. */
615 if ( (off & 7)
616 && (off & 7) + cbWrite > sizeof(EPTPDPTE))
617 {
618 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(EPTPDPTE);
619 X86PGPAEUINT const uPdpte2 = uShw.pEptPdpt->a[iShw2].u;
620 if (uPdpte2 & EPT_PRESENT_MASK)
621 {
622 Log7Func(("EPT PDPT iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uPdpte2));
623 pgmPoolFree(pVM, uPdpte2 & EPT_PDPTE_PG_MASK, pPage->idx, iShw2);
624 ASMAtomicWriteU64(&uShw.pEptPdpt->a[iShw2].u, 0);
625 }
626 }
627 break;
628 }
629
630 case PGMPOOLKIND_EPT_PD_FOR_EPT_PD:
631 {
632 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
633 const unsigned iShw = off / sizeof(EPTPDE);
634 X86PGPAEUINT const uPde = uShw.pEptPd->a[iShw].u;
635 if (uPde & EPT_PRESENT_MASK)
636 {
637 Assert(!(uPde & EPT_E_LEAF));
638 Log7Func(("EPT PD iShw=%#x: %RX64 (%RGp) -> freeing it!\n", iShw, uPde, pPage->GCPhys));
639 pgmPoolFree(pVM, uPde & EPT_PDE_PG_MASK, pPage->idx, iShw);
640 ASMAtomicWriteU64(&uShw.pEptPd->a[iShw].u, 0);
641 }
642
643 /* paranoia / a bit assumptive. */
644 if ( (off & 7)
645 && (off & 7) + cbWrite > sizeof(EPTPDE))
646 {
647 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(EPTPDE);
648 AssertBreak(iShw2 < RT_ELEMENTS(uShw.pEptPd->a));
649 X86PGPAEUINT const uPde2 = uShw.pEptPd->a[iShw2].u;
650 if (uPde2 & EPT_PRESENT_MASK)
651 {
652 Assert(!(uPde2 & EPT_E_LEAF));
653 Log7Func(("EPT PD (2): iShw2=%#x: %RX64 (%RGp) -> freeing it!\n", iShw2, uPde2, pPage->GCPhys));
654 pgmPoolFree(pVM, uPde2 & EPT_PDE_PG_MASK, pPage->idx, iShw2);
655 ASMAtomicWriteU64(&uShw.pEptPd->a[iShw2].u, 0);
656 }
657 }
658 break;
659 }
660
661 case PGMPOOLKIND_EPT_PT_FOR_EPT_PT:
662 {
663 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
664 const unsigned iShw = off / sizeof(EPTPTE);
665 X86PGPAEUINT const uPte = uShw.pEptPt->a[iShw].u;
666 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPT));
667 if (uPte & EPT_PRESENT_MASK)
668 {
669 EPTPTE GstPte;
670 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvAddress, GCPhysFault, sizeof(GstPte));
671 AssertRC(rc);
672
673 Log7Func(("EPT PT: iShw=%#x %RX64 (%RGp)\n", iShw, uPte, pPage->GCPhys));
674 pgmPoolTracDerefGCPhysHint(pPool, pPage,
675 uShw.pEptPt->a[iShw].u & EPT_PTE_PG_MASK,
676 GstPte.u & EPT_PTE_PG_MASK,
677 iShw);
678 ASMAtomicWriteU64(&uShw.pEptPt->a[iShw].u, 0);
679 }
680
681 /* paranoia / a bit assumptive. */
682 if ( (off & 7)
683 && (off & 7) + cbWrite > sizeof(EPTPTE))
684 {
685 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(EPTPTE);
686 AssertBreak(iShw2 < RT_ELEMENTS(uShw.pEptPt->a));
687 X86PGPAEUINT const uPte2 = uShw.pEptPt->a[iShw2].u;
688 if (uPte2 & EPT_PRESENT_MASK)
689 {
690 EPTPTE GstPte;
691 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte,
692 pvAddress ? (uint8_t const *)pvAddress + sizeof(GstPte) : NULL,
693 GCPhysFault + sizeof(GstPte), sizeof(GstPte));
694 AssertRC(rc);
695 Log7Func(("EPT PT (2): iShw=%#x %RX64 (%RGp)\n", iShw2, uPte2, pPage->GCPhys));
696 pgmPoolTracDerefGCPhysHint(pPool, pPage,
697 uShw.pEptPt->a[iShw2].u & EPT_PTE_PG_MASK,
698 GstPte.u & EPT_PTE_PG_MASK,
699 iShw2);
700 ASMAtomicWriteU64(&uShw.pEptPt->a[iShw2].u, 0);
701 }
702 }
703 break;
704 }
705#endif /* VBOX_WITH_NESTED_HWVIRT_VMX_EPT */
706
707 default:
708 AssertFatalMsgFailed(("enmKind=%d\n", pPage->enmKind));
709 }
710 PGM_DYNMAP_UNUSED_HINT_VM(pVM, uShw.pv);
711
712 /* next */
713 if (pPage->iMonitoredNext == NIL_PGMPOOL_IDX)
714 return;
715 pPage = &pPool->aPages[pPage->iMonitoredNext];
716 }
717}
718
719#ifndef IN_RING3
720
721/**
722 * Checks if a access could be a fork operation in progress.
723 *
724 * Meaning, that the guest is setting up the parent process for Copy-On-Write.
725 *
726 * @returns true if it's likely that we're forking, otherwise false.
727 * @param pPool The pool.
728 * @param pDis The disassembled instruction.
729 * @param offFault The access offset.
730 */
731DECLINLINE(bool) pgmRZPoolMonitorIsForking(PPGMPOOL pPool, PDISSTATE pDis, unsigned offFault)
732{
733 /*
734 * i386 linux is using btr to clear X86_PTE_RW.
735 * The functions involved are (2.6.16 source inspection):
736 * clear_bit
737 * ptep_set_wrprotect
738 * copy_one_pte
739 * copy_pte_range
740 * copy_pmd_range
741 * copy_pud_range
742 * copy_page_range
743 * dup_mmap
744 * dup_mm
745 * copy_mm
746 * copy_process
747 * do_fork
748 */
749 if ( pDis->pCurInstr->uOpcode == OP_BTR
750 && !(offFault & 4)
751 /** @todo Validate that the bit index is X86_PTE_RW. */
752 )
753 {
754 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitorPf,Fork)); RT_NOREF_PV(pPool);
755 return true;
756 }
757 return false;
758}
759
760
761/**
762 * Determine whether the page is likely to have been reused.
763 *
764 * @returns true if we consider the page as being reused for a different purpose.
765 * @returns false if we consider it to still be a paging page.
766 * @param pVM The cross context VM structure.
767 * @param pVCpu The cross context virtual CPU structure.
768 * @param pCtx Pointer to the register context for the CPU.
769 * @param pDis The disassembly info for the faulting instruction.
770 * @param pvFault The fault address.
771 * @param pPage The pool page being accessed.
772 *
773 * @remark The REP prefix check is left to the caller because of STOSD/W.
774 */
775DECLINLINE(bool) pgmRZPoolMonitorIsReused(PVMCC pVM, PVMCPUCC pVCpu, PCPUMCTX pCtx, PDISSTATE pDis, RTGCPTR pvFault,
776 PPGMPOOLPAGE pPage)
777{
778 /* Locked (CR3, PDPTR*4) should not be reusable. Considering them as
779 such may cause loops booting tst-ubuntu-15_10-64-efi, ++. */
780 if (pPage->cLocked)
781 {
782 Log2(("pgmRZPoolMonitorIsReused: %RGv (%p) can't have been resued, because it's locked!\n", pvFault, pPage));
783 return false;
784 }
785
786 /** @todo could make this general, faulting close to rsp should be a safe reuse heuristic. */
787 if ( HMHasPendingIrq(pVM)
788 && pCtx->rsp - pvFault < 32)
789 {
790 /* Fault caused by stack writes while trying to inject an interrupt event. */
791 Log(("pgmRZPoolMonitorIsReused: reused %RGv for interrupt stack (rsp=%RGv).\n", pvFault, pCtx->rsp));
792 return true;
793 }
794
795 LogFlow(("Reused instr %RGv %d at %RGv param1.fUse=%llx param1.reg=%d\n", pCtx->rip, pDis->pCurInstr->uOpcode, pvFault, pDis->aParams[0].fUse, pDis->aParams[0].x86.Base.idxGenReg));
796
797 /* Non-supervisor mode write means it's used for something else. */
798 if (CPUMGetGuestCPL(pVCpu) == 3)
799 return true;
800
801 switch (pDis->pCurInstr->uOpcode)
802 {
803 /* call implies the actual push of the return address faulted */
804 case OP_CALL:
805 Log4(("pgmRZPoolMonitorIsReused: CALL\n"));
806 return true;
807 case OP_PUSH:
808 Log4(("pgmRZPoolMonitorIsReused: PUSH\n"));
809 return true;
810 case OP_PUSHF:
811 Log4(("pgmRZPoolMonitorIsReused: PUSHF\n"));
812 return true;
813 case OP_PUSHA:
814 Log4(("pgmRZPoolMonitorIsReused: PUSHA\n"));
815 return true;
816 case OP_FXSAVE:
817 Log4(("pgmRZPoolMonitorIsReused: FXSAVE\n"));
818 return true;
819 case OP_MOVNTI: /* solaris - block_zero_no_xmm */
820 Log4(("pgmRZPoolMonitorIsReused: MOVNTI\n"));
821 return true;
822 case OP_MOVNTDQ: /* solaris - hwblkclr & hwblkpagecopy */
823 Log4(("pgmRZPoolMonitorIsReused: MOVNTDQ\n"));
824 return true;
825 case OP_MOVSWD:
826 case OP_STOSWD:
827 if ( pDis->x86.fPrefix == (DISPREFIX_REP|DISPREFIX_REX)
828 && pCtx->rcx >= 0x40
829 )
830 {
831 Assert(pDis->uCpuMode == DISCPUMODE_64BIT);
832
833 Log(("pgmRZPoolMonitorIsReused: OP_STOSQ\n"));
834 return true;
835 }
836 break;
837
838 default:
839 /*
840 * Anything having ESP on the left side means stack writes.
841 */
842 if ( ( (pDis->aParams[0].fUse & DISUSE_REG_GEN32)
843 || (pDis->aParams[0].fUse & DISUSE_REG_GEN64))
844 && (pDis->aParams[0].x86.Base.idxGenReg == DISGREG_ESP))
845 {
846 Log4(("pgmRZPoolMonitorIsReused: ESP\n"));
847 return true;
848 }
849 break;
850 }
851
852 /*
853 * Page table updates are very very unlikely to be crossing page boundraries,
854 * and we don't want to deal with that in pgmPoolMonitorChainChanging and such.
855 */
856 uint32_t const cbWrite = DISGetParamSize(pDis, &pDis->aParams[0]);
857 if ( (((uintptr_t)pvFault + cbWrite) >> X86_PAGE_SHIFT) != ((uintptr_t)pvFault >> X86_PAGE_SHIFT) )
858 {
859 Log4(("pgmRZPoolMonitorIsReused: cross page write\n"));
860 return true;
861 }
862
863 /*
864 * Nobody does an unaligned 8 byte write to a page table, right.
865 */
866 if (cbWrite >= 8 && ((uintptr_t)pvFault & 7) != 0)
867 {
868 Log4(("pgmRZPoolMonitorIsReused: Unaligned 8+ byte write\n"));
869 return true;
870 }
871
872 return false;
873}
874
875
876/**
877 * Flushes the page being accessed.
878 *
879 * @returns VBox status code suitable for scheduling.
880 * @param pVM The cross context VM structure.
881 * @param pVCpu The cross context virtual CPU structure.
882 * @param pPool The pool.
883 * @param pPage The pool page (head).
884 * @param pDis The disassembly of the write instruction.
885 * @param pCtx Pointer to the register context for the CPU.
886 * @param GCPhysFault The fault address as guest physical address.
887 * @todo VBOXSTRICTRC
888 */
889static int pgmRZPoolAccessPfHandlerFlush(PVMCC pVM, PVMCPUCC pVCpu, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISSTATE pDis,
890 PCPUMCTX pCtx, RTGCPHYS GCPhysFault)
891{
892 NOREF(pVM); NOREF(GCPhysFault);
893
894 /*
895 * First, do the flushing.
896 */
897 pgmPoolMonitorChainFlush(pPool, pPage);
898
899 /*
900 * Emulate the instruction (xp/w2k problem, requires pc/cr2/sp detection).
901 * Must do this in raw mode (!); XP boot will fail otherwise.
902 */
903 int rc = VINF_SUCCESS;
904 VBOXSTRICTRC rc2 = EMInterpretInstructionDisasState(pVCpu, pDis, pCtx->rip);
905 if (rc2 == VINF_SUCCESS)
906 { /* do nothing */ }
907 else if (rc2 == VINF_EM_RESCHEDULE)
908 {
909 rc = VBOXSTRICTRC_VAL(rc2);
910# ifndef IN_RING3
911 VMCPU_FF_SET(pVCpu, VMCPU_FF_TO_R3);
912# endif
913 }
914 else if (rc2 == VERR_EM_INTERPRETER)
915 {
916 rc = VINF_EM_RAW_EMULATE_INSTR;
917 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitorPf,EmulateInstr));
918 }
919 else if (RT_FAILURE_NP(rc2))
920 rc = VBOXSTRICTRC_VAL(rc2);
921 else
922 AssertMsgFailed(("%Rrc\n", VBOXSTRICTRC_VAL(rc2))); /* ASSUMES no complicated stuff here. */
923
924 LogFlow(("pgmRZPoolAccessPfHandlerFlush: returns %Rrc (flushed)\n", rc));
925 return rc;
926}
927
928
929/**
930 * Handles the STOSD write accesses.
931 *
932 * @returns VBox status code suitable for scheduling.
933 * @param pVM The cross context VM structure.
934 * @param pPool The pool.
935 * @param pPage The pool page (head).
936 * @param pDis The disassembly of the write instruction.
937 * @param pCtx Pointer to the register context for the CPU.
938 * @param GCPhysFault The fault address as guest physical address.
939 * @param pvFault The fault address.
940 */
941DECLINLINE(int) pgmRZPoolAccessPfHandlerSTOSD(PVMCC pVM, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISSTATE pDis,
942 PCPUMCTX pCtx, RTGCPHYS GCPhysFault, RTGCPTR pvFault)
943{
944 unsigned uIncrement = pDis->aParams[0].x86.cb;
945 NOREF(pVM);
946
947 Assert(pDis->uCpuMode == DISCPUMODE_32BIT || pDis->uCpuMode == DISCPUMODE_64BIT);
948 Assert(pCtx->rcx <= 0x20);
949
950# ifdef VBOX_STRICT
951 if (pDis->x86.uOpMode == DISCPUMODE_32BIT)
952 Assert(uIncrement == 4);
953 else
954 Assert(uIncrement == 8);
955# endif
956
957 Log3(("pgmRZPoolAccessPfHandlerSTOSD\n"));
958
959 /*
960 * Increment the modification counter and insert it into the list
961 * of modified pages the first time.
962 */
963 if (!pPage->cModifications++)
964 pgmPoolMonitorModifiedInsert(pPool, pPage);
965
966 /*
967 * Execute REP STOSD.
968 *
969 * This ASSUMES that we're not invoked by Trap0e on in a out-of-sync
970 * write situation, meaning that it's safe to write here.
971 */
972 PVMCPUCC pVCpu = VMMGetCpu(pPool->CTX_SUFF(pVM));
973 RTGCUINTPTR pu32 = (RTGCUINTPTR)pvFault;
974 while (pCtx->rcx)
975 {
976 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault, NULL, uIncrement);
977 PGMPhysSimpleWriteGCPhys(pVM, GCPhysFault, &pCtx->rax, uIncrement);
978 pu32 += uIncrement;
979 GCPhysFault += uIncrement;
980 pCtx->rdi += uIncrement;
981 pCtx->rcx--;
982 }
983 pCtx->rip += pDis->cbInstr;
984
985 LogFlow(("pgmRZPoolAccessPfHandlerSTOSD: returns\n"));
986 return VINF_SUCCESS;
987}
988
989
990/**
991 * Handles the simple write accesses.
992 *
993 * @returns VBox status code suitable for scheduling.
994 * @param pVM The cross context VM structure.
995 * @param pVCpu The cross context virtual CPU structure.
996 * @param pPool The pool.
997 * @param pPage The pool page (head).
998 * @param pDis The disassembly of the write instruction.
999 * @param pCtx Pointer to the register context for the CPU.
1000 * @param GCPhysFault The fault address as guest physical address.
1001 * @param pfReused Reused state (in/out)
1002 */
1003DECLINLINE(int) pgmRZPoolAccessPfHandlerSimple(PVMCC pVM, PVMCPUCC pVCpu, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISSTATE pDis,
1004 PCPUMCTX pCtx, RTGCPHYS GCPhysFault, bool *pfReused)
1005{
1006 Log3(("pgmRZPoolAccessPfHandlerSimple\n"));
1007 NOREF(pVM);
1008 NOREF(pfReused); /* initialized by caller */
1009
1010 /*
1011 * Increment the modification counter and insert it into the list
1012 * of modified pages the first time.
1013 */
1014 if (!pPage->cModifications++)
1015 pgmPoolMonitorModifiedInsert(pPool, pPage);
1016
1017 /*
1018 * Clear all the pages.
1019 */
1020 uint32_t cbWrite = DISGetParamSize(pDis, &pDis->aParams[0]);
1021 if (cbWrite <= 8)
1022 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault, NULL, cbWrite);
1023 else if (cbWrite <= 16)
1024 {
1025 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault, NULL, 8);
1026 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault + 8, NULL, cbWrite - 8);
1027 }
1028 else
1029 {
1030 Assert(cbWrite <= 32);
1031 for (uint32_t off = 0; off < cbWrite; off += 8)
1032 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault + off, NULL, RT_MIN(8, cbWrite - off));
1033 }
1034
1035 /*
1036 * Interpret the instruction.
1037 */
1038 VBOXSTRICTRC rc = EMInterpretInstructionDisasState(pVCpu, pDis, pCtx->rip);
1039 if (RT_SUCCESS(rc))
1040 AssertMsg(rc == VINF_SUCCESS, ("%Rrc\n", VBOXSTRICTRC_VAL(rc))); /* ASSUMES no complicated stuff here. */
1041 else if (rc == VERR_EM_INTERPRETER)
1042 {
1043 LogFlow(("pgmRZPoolAccessPfHandlerSimple: Interpretation failed for %04x:%RGv - opcode=%d\n",
1044 pCtx->cs.Sel, (RTGCPTR)pCtx->rip, pDis->pCurInstr->uOpcode));
1045 rc = VINF_EM_RAW_EMULATE_INSTR;
1046 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitorPf,EmulateInstr));
1047 }
1048
1049# if 0 /* experimental code */
1050 if (rc == VINF_SUCCESS)
1051 {
1052 switch (pPage->enmKind)
1053 {
1054 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1055 {
1056 X86PTEPAE GstPte;
1057 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvFault, GCPhysFault, sizeof(GstPte));
1058 AssertRC(rc);
1059
1060 /* Check the new value written by the guest. If present and with a bogus physical address, then
1061 * it's fairly safe to assume the guest is reusing the PT.
1062 */
1063 if (GstPte.n.u1Present)
1064 {
1065 RTHCPHYS HCPhys = -1;
1066 int rc = PGMPhysGCPhys2HCPhys(pVM, GstPte.u & X86_PTE_PAE_PG_MASK, &HCPhys);
1067 if (rc != VINF_SUCCESS)
1068 {
1069 *pfReused = true;
1070 STAM_COUNTER_INC(&pPool->StatForceFlushReused);
1071 }
1072 }
1073 break;
1074 }
1075 }
1076 }
1077# endif
1078
1079 LogFlow(("pgmRZPoolAccessPfHandlerSimple: returns %Rrc\n", VBOXSTRICTRC_VAL(rc)));
1080 return VBOXSTRICTRC_VAL(rc);
1081}
1082
1083
1084/**
1085 * @callback_method_impl{FNPGMRZPHYSPFHANDLER,
1086 * \#PF access handler callback for page table pages.}
1087 *
1088 * @remarks The @a uUser argument is the index of the PGMPOOLPAGE.
1089 */
1090DECLCALLBACK(VBOXSTRICTRC) pgmRZPoolAccessPfHandler(PVMCC pVM, PVMCPUCC pVCpu, RTGCUINT uErrorCode, PCPUMCTX pCtx,
1091 RTGCPTR pvFault, RTGCPHYS GCPhysFault, uint64_t uUser)
1092{
1093 STAM_PROFILE_START(&pVM->pgm.s.CTX_SUFF(pPool)->StatMonitorRZ, a);
1094 PPGMPOOL const pPool = pVM->pgm.s.CTX_SUFF(pPool);
1095 AssertReturn(uUser < pPool->cCurPages, VERR_PGM_POOL_IPE);
1096 PPGMPOOLPAGE const pPage = &pPool->aPages[uUser];
1097 unsigned cMaxModifications;
1098 bool fForcedFlush = false;
1099 RT_NOREF_PV(uErrorCode);
1100
1101# ifdef VBOX_WITH_NESTED_HWVIRT_VMX_EPT
1102 AssertMsg(pVCpu->pgm.s.enmGuestSlatMode == PGMSLAT_DIRECT,
1103 ("pvFault=%RGv pPage=%p:{.idx=%d} GCPhysFault=%RGp\n", pvFault, pPage, pPage->idx, GCPhysFault));
1104# endif
1105 LogFlow(("pgmRZPoolAccessPfHandler: pvFault=%RGv pPage=%p:{.idx=%d} GCPhysFault=%RGp\n", pvFault, pPage, pPage->idx, GCPhysFault));
1106
1107 PGM_LOCK_VOID(pVM);
1108 if (PHYS_PAGE_ADDRESS(GCPhysFault) != PHYS_PAGE_ADDRESS(pPage->GCPhys))
1109 {
1110 /* Pool page changed while we were waiting for the lock; ignore. */
1111 Log(("CPU%d: pgmRZPoolAccessPfHandler pgm pool page for %RGp changed (to %RGp) while waiting!\n", pVCpu->idCpu, PHYS_PAGE_ADDRESS(GCPhysFault), PHYS_PAGE_ADDRESS(pPage->GCPhys)));
1112 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->StatMonitorPfRZ, &pPool->StatMonitorPfRZHandled, a);
1113 PGM_UNLOCK(pVM);
1114 return VINF_SUCCESS;
1115 }
1116# ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
1117 if (pPage->fDirty)
1118 {
1119# ifdef VBOX_WITH_NESTED_HWVIRT_VMX_EPT
1120 Assert(!PGMPOOL_PAGE_IS_NESTED(pPage));
1121# endif
1122 Assert(VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_TLB_FLUSH));
1123 PGM_UNLOCK(pVM);
1124 return VINF_SUCCESS; /* SMP guest case where we were blocking on the pgm lock while the same page was being marked dirty. */
1125 }
1126# endif
1127
1128# if 0 /* test code defined(VBOX_STRICT) && defined(PGMPOOL_WITH_OPTIMIZED_DIRTY_PT) */
1129 if (pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT)
1130 {
1131 void *pvShw = PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage);
1132 void *pvGst;
1133 int rc = PGM_GCPHYS_2_PTR(pPool->CTX_SUFF(pVM), pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
1134 pgmPoolTrackCheckPTPaePae(pPool, pPage, (PPGMSHWPTPAE)pvShw, (PCX86PTPAE)pvGst);
1135 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvGst);
1136 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvShw);
1137 }
1138# endif
1139
1140# ifdef VBOX_WITH_NESTED_HWVIRT_VMX_EPT
1141 if (PGMPOOL_PAGE_IS_NESTED(pPage))
1142 {
1143 Assert(!CPUMIsGuestInVmxNonRootMode(CPUMQueryGuestCtxPtr(pVCpu)));
1144 Log7Func(("Flushing pvFault=%RGv GCPhysFault=%RGp\n", pvFault, GCPhysFault));
1145 pgmPoolMonitorChainFlush(pPool, pPage);
1146 PGM_UNLOCK(pVM);
1147 return VINF_SUCCESS;
1148 }
1149# endif
1150
1151 /*
1152 * Disassemble the faulting instruction.
1153 */
1154 PDISSTATE pDis = &pVCpu->pgm.s.Dis;
1155 int rc = EMInterpretDisasCurrent(pVCpu, pDis, NULL);
1156 if (RT_UNLIKELY(rc != VINF_SUCCESS))
1157 {
1158 AssertMsg(rc == VERR_PAGE_NOT_PRESENT || rc == VERR_PAGE_TABLE_NOT_PRESENT, ("Unexpected rc %d\n", rc));
1159 PGM_UNLOCK(pVM);
1160 return rc;
1161 }
1162
1163 Assert(pPage->enmKind != PGMPOOLKIND_FREE);
1164
1165 /*
1166 * We should ALWAYS have the list head as user parameter. This
1167 * is because we use that page to record the changes.
1168 */
1169 Assert(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
1170
1171# ifdef IN_RING0
1172 /* Maximum nr of modifications depends on the page type. */
1173 if ( pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT
1174 || pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_32BIT_PT)
1175 cMaxModifications = 4;
1176 else
1177 cMaxModifications = 24;
1178# else
1179 cMaxModifications = 48;
1180# endif
1181
1182 /*
1183 * Incremental page table updates should weigh more than random ones.
1184 * (Only applies when started from offset 0)
1185 */
1186 pVCpu->pgm.s.cPoolAccessHandler++;
1187 if ( pPage->GCPtrLastAccessHandlerRip >= pCtx->rip - 0x40 /* observed loops in Windows 7 x64 */
1188 && pPage->GCPtrLastAccessHandlerRip < pCtx->rip + 0x40
1189 && pvFault == (pPage->GCPtrLastAccessHandlerFault + pDis->aParams[0].x86.cb)
1190 && pVCpu->pgm.s.cPoolAccessHandler == pPage->cLastAccessHandler + 1)
1191 {
1192 Log(("Possible page reuse cMods=%d -> %d (locked=%d type=%s)\n", pPage->cModifications, pPage->cModifications * 2, pgmPoolIsPageLocked(pPage), pgmPoolPoolKindToStr(pPage->enmKind)));
1193 Assert(pPage->cModifications < 32000);
1194 pPage->cModifications = pPage->cModifications * 2;
1195 pPage->GCPtrLastAccessHandlerFault = pvFault;
1196 pPage->cLastAccessHandler = pVCpu->pgm.s.cPoolAccessHandler;
1197 if (pPage->cModifications >= cMaxModifications)
1198 {
1199 STAM_COUNTER_INC(&pPool->StatMonitorPfRZFlushReinit);
1200 fForcedFlush = true;
1201 }
1202 }
1203
1204 if (pPage->cModifications >= cMaxModifications)
1205 Log(("Mod overflow %RGv cMods=%d (locked=%d type=%s)\n", pvFault, pPage->cModifications, pgmPoolIsPageLocked(pPage), pgmPoolPoolKindToStr(pPage->enmKind)));
1206
1207 /*
1208 * Check if it's worth dealing with.
1209 */
1210 bool fReused = false;
1211 bool fNotReusedNotForking = false;
1212 if ( ( pPage->cModifications < cMaxModifications /** @todo \#define */ /** @todo need to check that it's not mapping EIP. */ /** @todo adjust this! */
1213 || pgmPoolIsPageLocked(pPage)
1214 )
1215 && !(fReused = pgmRZPoolMonitorIsReused(pVM, pVCpu, pCtx, pDis, pvFault, pPage))
1216 && !pgmRZPoolMonitorIsForking(pPool, pDis, GCPhysFault & PAGE_OFFSET_MASK))
1217 {
1218 /*
1219 * Simple instructions, no REP prefix.
1220 */
1221 if (!(pDis->x86.fPrefix & (DISPREFIX_REP | DISPREFIX_REPNE)))
1222 {
1223 rc = pgmRZPoolAccessPfHandlerSimple(pVM, pVCpu, pPool, pPage, pDis, pCtx, GCPhysFault, &fReused);
1224 if (fReused)
1225 goto flushPage;
1226
1227 /* A mov instruction to change the first page table entry will be remembered so we can detect
1228 * full page table changes early on. This will reduce the amount of unnecessary traps we'll take.
1229 */
1230 if ( rc == VINF_SUCCESS
1231 && !pPage->cLocked /* only applies to unlocked pages as we can't free locked ones (e.g. cr3 root). */
1232 && pDis->pCurInstr->uOpcode == OP_MOV
1233 && (pvFault & PAGE_OFFSET_MASK) == 0)
1234 {
1235 pPage->GCPtrLastAccessHandlerFault = pvFault;
1236 pPage->cLastAccessHandler = pVCpu->pgm.s.cPoolAccessHandler;
1237 pPage->GCPtrLastAccessHandlerRip = pCtx->rip;
1238 /* Make sure we don't kick out a page too quickly. */
1239 if (pPage->cModifications > 8)
1240 pPage->cModifications = 2;
1241 }
1242 else if (pPage->GCPtrLastAccessHandlerFault == pvFault)
1243 {
1244 /* ignore the 2nd write to this page table entry. */
1245 pPage->cLastAccessHandler = pVCpu->pgm.s.cPoolAccessHandler;
1246 }
1247 else
1248 {
1249 pPage->GCPtrLastAccessHandlerFault = NIL_RTGCPTR;
1250 pPage->GCPtrLastAccessHandlerRip = 0;
1251 }
1252
1253 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->StatMonitorPfRZ, &pPool->StatMonitorPfRZHandled, a);
1254 PGM_UNLOCK(pVM);
1255 return rc;
1256 }
1257
1258 /*
1259 * Windows is frequently doing small memset() operations (netio test 4k+).
1260 * We have to deal with these or we'll kill the cache and performance.
1261 */
1262 if ( pDis->pCurInstr->uOpcode == OP_STOSWD
1263 && !pCtx->eflags.Bits.u1DF
1264 && pDis->x86.uOpMode == pDis->uCpuMode
1265 && pDis->x86.uAddrMode == pDis->uCpuMode)
1266 {
1267 bool fValidStosd = false;
1268
1269 if ( pDis->uCpuMode == DISCPUMODE_32BIT
1270 && pDis->x86.fPrefix == DISPREFIX_REP
1271 && pCtx->ecx <= 0x20
1272 && pCtx->ecx * 4 <= GUEST_PAGE_SIZE - ((uintptr_t)pvFault & GUEST_PAGE_OFFSET_MASK)
1273 && !((uintptr_t)pvFault & 3)
1274 && (pCtx->eax == 0 || pCtx->eax == 0x80) /* the two values observed. */
1275 )
1276 {
1277 fValidStosd = true;
1278 pCtx->rcx &= 0xffffffff; /* paranoia */
1279 }
1280 else
1281 if ( pDis->uCpuMode == DISCPUMODE_64BIT
1282 && pDis->x86.fPrefix == (DISPREFIX_REP | DISPREFIX_REX)
1283 && pCtx->rcx <= 0x20
1284 && pCtx->rcx * 8 <= GUEST_PAGE_SIZE - ((uintptr_t)pvFault & GUEST_PAGE_OFFSET_MASK)
1285 && !((uintptr_t)pvFault & 7)
1286 && (pCtx->rax == 0 || pCtx->rax == 0x80) /* the two values observed. */
1287 )
1288 {
1289 fValidStosd = true;
1290 }
1291
1292 if (fValidStosd)
1293 {
1294 rc = pgmRZPoolAccessPfHandlerSTOSD(pVM, pPool, pPage, pDis, pCtx, GCPhysFault, pvFault);
1295 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->StatMonitorPfRZ, &pPool->StatMonitorPfRZRepStosd, a);
1296 PGM_UNLOCK(pVM);
1297 return rc;
1298 }
1299 }
1300
1301 /* REP prefix, don't bother. */
1302 STAM_COUNTER_INC(&pPool->StatMonitorPfRZRepPrefix);
1303 Log4(("pgmRZPoolAccessPfHandler: eax=%#x ecx=%#x edi=%#x esi=%#x rip=%RGv opcode=%d prefix=%#x\n",
1304 pCtx->eax, pCtx->ecx, pCtx->edi, pCtx->esi, (RTGCPTR)pCtx->rip, pDis->pCurInstr->uOpcode, pDis->x86.fPrefix));
1305 fNotReusedNotForking = true;
1306 }
1307
1308# if defined(PGMPOOL_WITH_OPTIMIZED_DIRTY_PT) && defined(IN_RING0)
1309 /* E.g. Windows 7 x64 initializes page tables and touches some pages in the table during the process. This
1310 * leads to pgm pool trashing and an excessive amount of write faults due to page monitoring.
1311 */
1312 if ( pPage->cModifications >= cMaxModifications
1313 && !fForcedFlush
1314 && (pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT || pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_32BIT_PT)
1315 && ( fNotReusedNotForking
1316 || ( !pgmRZPoolMonitorIsReused(pVM, pVCpu, pCtx, pDis, pvFault, pPage)
1317 && !pgmRZPoolMonitorIsForking(pPool, pDis, GCPhysFault & PAGE_OFFSET_MASK))
1318 )
1319 )
1320 {
1321 Assert(!pgmPoolIsPageLocked(pPage));
1322 Assert(pPage->fDirty == false);
1323
1324 /* Flush any monitored duplicates as we will disable write protection. */
1325 if ( pPage->iMonitoredNext != NIL_PGMPOOL_IDX
1326 || pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
1327 {
1328 PPGMPOOLPAGE pPageHead = pPage;
1329
1330 /* Find the monitor head. */
1331 while (pPageHead->iMonitoredPrev != NIL_PGMPOOL_IDX)
1332 pPageHead = &pPool->aPages[pPageHead->iMonitoredPrev];
1333
1334 while (pPageHead)
1335 {
1336 unsigned idxNext = pPageHead->iMonitoredNext;
1337
1338 if (pPageHead != pPage)
1339 {
1340 STAM_COUNTER_INC(&pPool->StatDirtyPageDupFlush);
1341 Log(("Flush duplicate page idx=%d GCPhys=%RGp type=%s\n", pPageHead->idx, pPageHead->GCPhys, pgmPoolPoolKindToStr(pPageHead->enmKind)));
1342 int rc2 = pgmPoolFlushPage(pPool, pPageHead);
1343 AssertRC(rc2);
1344 }
1345
1346 if (idxNext == NIL_PGMPOOL_IDX)
1347 break;
1348
1349 pPageHead = &pPool->aPages[idxNext];
1350 }
1351 }
1352
1353 /* The flushing above might fail for locked pages, so double check. */
1354 if ( pPage->iMonitoredNext == NIL_PGMPOOL_IDX
1355 && pPage->iMonitoredPrev == NIL_PGMPOOL_IDX)
1356 {
1357 pgmPoolAddDirtyPage(pVM, pPool, pPage);
1358
1359 /* Temporarily allow write access to the page table again. */
1360 rc = PGMHandlerPhysicalPageTempOff(pVM,
1361 pPage->GCPhys & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK,
1362 pPage->GCPhys & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK);
1363 if (rc == VINF_SUCCESS)
1364 {
1365 rc = PGMShwMakePageWritable(pVCpu, pvFault, PGM_MK_PG_IS_WRITE_FAULT);
1366 AssertMsg(rc == VINF_SUCCESS
1367 /* In the SMP case the page table might be removed while we wait for the PGM lock in the trap handler. */
1368 || rc == VERR_PAGE_TABLE_NOT_PRESENT
1369 || rc == VERR_PAGE_NOT_PRESENT,
1370 ("PGMShwModifyPage -> GCPtr=%RGv rc=%d\n", pvFault, rc));
1371# ifdef VBOX_STRICT
1372 pPage->GCPtrDirtyFault = pvFault;
1373# endif
1374
1375 STAM_PROFILE_STOP(&pVM->pgm.s.CTX_SUFF(pPool)->StatMonitorPfRZ, a);
1376 PGM_UNLOCK(pVM);
1377 return rc;
1378 }
1379 }
1380 }
1381# endif /* PGMPOOL_WITH_OPTIMIZED_DIRTY_PT && IN_RING0 */
1382
1383 STAM_COUNTER_INC(&pPool->StatMonitorPfRZFlushModOverflow);
1384flushPage:
1385 /*
1386 * Not worth it, so flush it.
1387 *
1388 * If we considered it to be reused, don't go back to ring-3
1389 * to emulate failed instructions since we usually cannot
1390 * interpret then. This may be a bit risky, in which case
1391 * the reuse detection must be fixed.
1392 */
1393 rc = pgmRZPoolAccessPfHandlerFlush(pVM, pVCpu, pPool, pPage, pDis, pCtx, GCPhysFault);
1394 if ( rc == VINF_EM_RAW_EMULATE_INSTR
1395 && fReused)
1396 {
1397 Assert(!PGMPOOL_PAGE_IS_NESTED(pPage)); /* temporary, remove later. */
1398 /* Make sure that the current instruction still has shadow page backing, otherwise we'll end up in a loop. */
1399 if (PGMShwGetPage(pVCpu, pCtx->rip, NULL, NULL) == VINF_SUCCESS)
1400 rc = VINF_SUCCESS; /* safe to restart the instruction. */
1401 }
1402 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->StatMonitorPfRZ, &pPool->StatMonitorPfRZFlushPage, a);
1403 PGM_UNLOCK(pVM);
1404 return rc;
1405}
1406
1407#endif /* !IN_RING3 */
1408
1409/**
1410 * @callback_method_impl{FNPGMPHYSHANDLER,
1411 * Access handler for shadowed page table pages.}
1412 *
1413 * @remarks Only uses the VINF_PGM_HANDLER_DO_DEFAULT status.
1414 * @note The @a uUser argument is the index of the PGMPOOLPAGE.
1415 */
1416DECLCALLBACK(VBOXSTRICTRC)
1417pgmPoolAccessHandler(PVMCC pVM, PVMCPUCC pVCpu, RTGCPHYS GCPhys, void *pvPhys, void *pvBuf, size_t cbBuf,
1418 PGMACCESSTYPE enmAccessType, PGMACCESSORIGIN enmOrigin, uint64_t uUser)
1419{
1420 PPGMPOOL const pPool = pVM->pgm.s.CTX_SUFF(pPool);
1421 STAM_PROFILE_START(&pPool->CTX_SUFF_Z(StatMonitor), a);
1422 AssertReturn(uUser < pPool->cCurPages, VERR_PGM_POOL_IPE);
1423 PPGMPOOLPAGE const pPage = &pPool->aPages[uUser];
1424 LogFlow(("PGM_ALL_CB_DECL: GCPhys=%RGp %p:{.Core=%RHp, .idx=%d, .GCPhys=%RGp, .enmType=%d}\n",
1425 GCPhys, pPage, pPage->Core.Key, pPage->idx, pPage->GCPhys, pPage->enmKind));
1426
1427 NOREF(pvPhys); NOREF(pvBuf); NOREF(enmAccessType);
1428
1429 PGM_LOCK_VOID(pVM);
1430
1431#ifdef VBOX_WITH_STATISTICS
1432 /*
1433 * Collect stats on the access.
1434 */
1435 AssertCompile(RT_ELEMENTS(pPool->CTX_MID_Z(aStatMonitor,Sizes)) == 19);
1436 if (cbBuf <= 16 && cbBuf > 0)
1437 STAM_COUNTER_INC(&pPool->CTX_MID_Z(aStatMonitor,Sizes)[cbBuf - 1]);
1438 else if (cbBuf >= 17 && cbBuf < 32)
1439 STAM_COUNTER_INC(&pPool->CTX_MID_Z(aStatMonitor,Sizes)[16]);
1440 else if (cbBuf >= 32 && cbBuf < 64)
1441 STAM_COUNTER_INC(&pPool->CTX_MID_Z(aStatMonitor,Sizes)[17]);
1442 else if (cbBuf >= 64)
1443 STAM_COUNTER_INC(&pPool->CTX_MID_Z(aStatMonitor,Sizes)[18]);
1444
1445 uint8_t cbAlign;
1446 switch (pPage->enmKind)
1447 {
1448 default:
1449 cbAlign = 7;
1450 break;
1451 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1452 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1453 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1454 case PGMPOOLKIND_32BIT_PD:
1455 case PGMPOOLKIND_32BIT_PD_PHYS:
1456 cbAlign = 3;
1457 break;
1458 }
1459 AssertCompile(RT_ELEMENTS(pPool->CTX_MID_Z(aStatMonitor,Misaligned)) == 7);
1460 if ((uint8_t)GCPhys & cbAlign)
1461 STAM_COUNTER_INC(&pPool->CTX_MID_Z(aStatMonitor,Misaligned)[((uint8_t)GCPhys & cbAlign) - 1]);
1462#endif
1463
1464 /*
1465 * Make sure the pool page wasn't modified by a different CPU.
1466 */
1467 if (PHYS_PAGE_ADDRESS(GCPhys) == PHYS_PAGE_ADDRESS(pPage->GCPhys))
1468 {
1469 Assert(pPage->enmKind != PGMPOOLKIND_FREE);
1470
1471 /* The max modification count before flushing depends on the context and page type. */
1472#ifdef IN_RING3
1473 uint16_t const cMaxModifications = 96; /* it's cheaper here, right? */
1474#else
1475 uint16_t cMaxModifications;
1476 if ( pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT
1477 || pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_32BIT_PT)
1478 cMaxModifications = 4;
1479 else
1480 cMaxModifications = 24;
1481#endif
1482
1483 /*
1484 * We don't have to be very sophisticated about this since there are relativly few calls here.
1485 * However, we must try our best to detect any non-cpu accesses (disk / networking).
1486 */
1487 if ( ( pPage->cModifications < cMaxModifications
1488 || pgmPoolIsPageLocked(pPage) )
1489 && enmOrigin != PGMACCESSORIGIN_DEVICE
1490 && cbBuf <= 16)
1491 {
1492 /* Clear the shadow entry. */
1493 if (!pPage->cModifications++)
1494 pgmPoolMonitorModifiedInsert(pPool, pPage);
1495
1496 if (cbBuf <= 8)
1497 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhys, pvBuf, (uint32_t)cbBuf);
1498 else
1499 {
1500 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhys, pvBuf, 8);
1501 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhys + 8, (uint8_t *)pvBuf + 8, (uint32_t)cbBuf - 8);
1502 }
1503 }
1504 else
1505 pgmPoolMonitorChainFlush(pPool, pPage);
1506
1507 STAM_PROFILE_STOP_EX(&pPool->CTX_SUFF_Z(StatMonitor), &pPool->CTX_MID_Z(StatMonitor,FlushPage), a);
1508 }
1509 else
1510 Log(("CPU%d: PGM_ALL_CB_DECL pgm pool page for %RGp changed (to %RGp) while waiting!\n", pVCpu->idCpu, PHYS_PAGE_ADDRESS(GCPhys), PHYS_PAGE_ADDRESS(pPage->GCPhys)));
1511 PGM_UNLOCK(pVM);
1512 return VINF_PGM_HANDLER_DO_DEFAULT;
1513}
1514
1515
1516#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
1517
1518# if defined(VBOX_STRICT) && !defined(IN_RING3)
1519
1520/**
1521 * Check references to guest physical memory in a PAE / PAE page table.
1522 *
1523 * @param pPool The pool.
1524 * @param pPage The page.
1525 * @param pShwPT The shadow page table (mapping of the page).
1526 * @param pGstPT The guest page table.
1527 */
1528static void pgmPoolTrackCheckPTPaePae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT, PCX86PTPAE pGstPT)
1529{
1530 unsigned cErrors = 0;
1531 int LastRc = -1; /* initialized to shut up gcc */
1532 unsigned LastPTE = ~0U; /* initialized to shut up gcc */
1533 RTHCPHYS LastHCPhys = NIL_RTHCPHYS; /* initialized to shut up gcc */
1534 PVMCC pVM = pPool->CTX_SUFF(pVM);
1535
1536# ifdef VBOX_STRICT
1537 for (unsigned i = 0; i < RT_MIN(RT_ELEMENTS(pShwPT->a), pPage->iFirstPresent); i++)
1538 AssertMsg(!PGMSHWPTEPAE_IS_P(pShwPT->a[i]), ("Unexpected PTE: idx=%d %RX64 (first=%d)\n", i, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), pPage->iFirstPresent));
1539# endif
1540 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
1541 {
1542 if (PGMSHWPTEPAE_IS_P(pShwPT->a[i]))
1543 {
1544 RTHCPHYS HCPhys = NIL_RTHCPHYS;
1545 int rc = PGMPhysGCPhys2HCPhys(pVM, pGstPT->a[i].u & X86_PTE_PAE_PG_MASK, &HCPhys);
1546 if ( rc != VINF_SUCCESS
1547 || PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]) != HCPhys)
1548 {
1549 Log(("rc=%d idx=%d guest %RX64 shw=%RX64 vs %RHp\n", rc, i, pGstPT->a[i].u, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), HCPhys));
1550 LastPTE = i;
1551 LastRc = rc;
1552 LastHCPhys = HCPhys;
1553 cErrors++;
1554
1555 RTHCPHYS HCPhysPT = NIL_RTHCPHYS;
1556 rc = PGMPhysGCPhys2HCPhys(pVM, pPage->GCPhys, &HCPhysPT);
1557 AssertRC(rc);
1558
1559 for (unsigned iPage = 0; iPage < pPool->cCurPages; iPage++)
1560 {
1561 PPGMPOOLPAGE pTempPage = &pPool->aPages[iPage];
1562
1563 if (pTempPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT)
1564 {
1565 PPGMSHWPTPAE pShwPT2 = (PPGMSHWPTPAE)PGMPOOL_PAGE_2_PTR(pVM, pTempPage);
1566
1567 for (unsigned j = 0; j < RT_ELEMENTS(pShwPT->a); j++)
1568 {
1569 if ( PGMSHWPTEPAE_IS_P_RW(pShwPT2->a[j])
1570 && PGMSHWPTEPAE_GET_HCPHYS(pShwPT2->a[j]) == HCPhysPT)
1571 {
1572 Log(("GCPhys=%RGp idx=%d %RX64 vs %RX64\n", pTempPage->GCPhys, j, PGMSHWPTEPAE_GET_LOG(pShwPT->a[j]), PGMSHWPTEPAE_GET_LOG(pShwPT2->a[j])));
1573 }
1574 }
1575
1576 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pShwPT2);
1577 }
1578 }
1579 }
1580 }
1581 }
1582 AssertMsg(!cErrors, ("cErrors=%d: last rc=%d idx=%d guest %RX64 shw=%RX64 vs %RHp\n", cErrors, LastRc, LastPTE, pGstPT->a[LastPTE].u, PGMSHWPTEPAE_GET_LOG(pShwPT->a[LastPTE]), LastHCPhys));
1583}
1584
1585
1586/**
1587 * Check references to guest physical memory in a PAE / 32-bit page table.
1588 *
1589 * @param pPool The pool.
1590 * @param pPage The page.
1591 * @param pShwPT The shadow page table (mapping of the page).
1592 * @param pGstPT The guest page table.
1593 */
1594static void pgmPoolTrackCheckPTPae32Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT, PCX86PT pGstPT)
1595{
1596 unsigned cErrors = 0;
1597 int LastRc = -1; /* initialized to shut up gcc */
1598 unsigned LastPTE = ~0U; /* initialized to shut up gcc */
1599 RTHCPHYS LastHCPhys = NIL_RTHCPHYS; /* initialized to shut up gcc */
1600 PVMCC pVM = pPool->CTX_SUFF(pVM);
1601
1602# ifdef VBOX_STRICT
1603 for (unsigned i = 0; i < RT_MIN(RT_ELEMENTS(pShwPT->a), pPage->iFirstPresent); i++)
1604 AssertMsg(!PGMSHWPTEPAE_IS_P(pShwPT->a[i]), ("Unexpected PTE: idx=%d %RX64 (first=%d)\n", i, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), pPage->iFirstPresent));
1605# endif
1606 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
1607 {
1608 if (PGMSHWPTEPAE_IS_P(pShwPT->a[i]))
1609 {
1610 RTHCPHYS HCPhys = NIL_RTHCPHYS;
1611 int rc = PGMPhysGCPhys2HCPhys(pVM, pGstPT->a[i].u & X86_PTE_PG_MASK, &HCPhys);
1612 if ( rc != VINF_SUCCESS
1613 || PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]) != HCPhys)
1614 {
1615 Log(("rc=%d idx=%d guest %x shw=%RX64 vs %RHp\n", rc, i, pGstPT->a[i].u, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), HCPhys));
1616 LastPTE = i;
1617 LastRc = rc;
1618 LastHCPhys = HCPhys;
1619 cErrors++;
1620
1621 RTHCPHYS HCPhysPT = NIL_RTHCPHYS;
1622 rc = PGMPhysGCPhys2HCPhys(pVM, pPage->GCPhys, &HCPhysPT);
1623 AssertRC(rc);
1624
1625 for (unsigned iPage = 0; iPage < pPool->cCurPages; iPage++)
1626 {
1627 PPGMPOOLPAGE pTempPage = &pPool->aPages[iPage];
1628
1629 if (pTempPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_32BIT_PT)
1630 {
1631 PPGMSHWPTPAE pShwPT2 = (PPGMSHWPTPAE)PGMPOOL_PAGE_2_PTR(pVM, pTempPage);
1632
1633 for (unsigned j = 0; j < RT_ELEMENTS(pShwPT->a); j++)
1634 {
1635 if ( PGMSHWPTEPAE_IS_P_RW(pShwPT2->a[j])
1636 && PGMSHWPTEPAE_GET_HCPHYS(pShwPT2->a[j]) == HCPhysPT)
1637 {
1638 Log(("GCPhys=%RGp idx=%d %RX64 vs %RX64\n", pTempPage->GCPhys, j, PGMSHWPTEPAE_GET_LOG(pShwPT->a[j]), PGMSHWPTEPAE_GET_LOG(pShwPT2->a[j])));
1639 }
1640 }
1641
1642 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pShwPT2);
1643 }
1644 }
1645 }
1646 }
1647 }
1648 AssertMsg(!cErrors, ("cErrors=%d: last rc=%d idx=%d guest %x shw=%RX64 vs %RHp\n", cErrors, LastRc, LastPTE, pGstPT->a[LastPTE].u, PGMSHWPTEPAE_GET_LOG(pShwPT->a[LastPTE]), LastHCPhys));
1649}
1650
1651# endif /* VBOX_STRICT && !IN_RING3 */
1652
1653/**
1654 * Clear references to guest physical memory in a PAE / PAE page table.
1655 *
1656 * @returns nr of changed PTEs
1657 * @param pPool The pool.
1658 * @param pPage The page.
1659 * @param pShwPT The shadow page table (mapping of the page).
1660 * @param pGstPT The guest page table.
1661 * @param pOldGstPT The old cached guest page table.
1662 * @param fAllowRemoval Bail out as soon as we encounter an invalid PTE
1663 * @param pfFlush Flush reused page table (out)
1664 */
1665DECLINLINE(unsigned) pgmPoolTrackFlushPTPaePae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT, PCX86PTPAE pGstPT,
1666 PCX86PTPAE pOldGstPT, bool fAllowRemoval, bool *pfFlush)
1667{
1668 unsigned cChanged = 0;
1669
1670# ifdef VBOX_STRICT
1671 for (unsigned i = 0; i < RT_MIN(RT_ELEMENTS(pShwPT->a), pPage->iFirstPresent); i++)
1672 AssertMsg(!PGMSHWPTEPAE_IS_P(pShwPT->a[i]), ("Unexpected PTE: idx=%d %RX64 (first=%d)\n", i, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), pPage->iFirstPresent));
1673# endif
1674 *pfFlush = false;
1675
1676 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
1677 {
1678 /* Check the new value written by the guest. If present and with a bogus physical address, then
1679 * it's fairly safe to assume the guest is reusing the PT.
1680 */
1681 if ( fAllowRemoval
1682 && (pGstPT->a[i].u & X86_PTE_P))
1683 {
1684 if (!PGMPhysIsGCPhysValid(pPool->CTX_SUFF(pVM), pGstPT->a[i].u & X86_PTE_PAE_PG_MASK))
1685 {
1686 *pfFlush = true;
1687 return ++cChanged;
1688 }
1689 }
1690 if (PGMSHWPTEPAE_IS_P(pShwPT->a[i]))
1691 {
1692 /* If the old cached PTE is identical, then there's no need to flush the shadow copy. */
1693 if ((pGstPT->a[i].u & X86_PTE_PAE_PG_MASK) == (pOldGstPT->a[i].u & X86_PTE_PAE_PG_MASK))
1694 {
1695# ifdef VBOX_STRICT
1696 RTHCPHYS HCPhys = NIL_RTGCPHYS;
1697 int rc = PGMPhysGCPhys2HCPhys(pPool->CTX_SUFF(pVM), pGstPT->a[i].u & X86_PTE_PAE_PG_MASK, &HCPhys);
1698 AssertMsg(rc == VINF_SUCCESS && PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]) == HCPhys, ("rc=%d guest %RX64 old %RX64 shw=%RX64 vs %RHp\n", rc, pGstPT->a[i].u, pOldGstPT->a[i].u, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), HCPhys));
1699# endif
1700 uint64_t uHostAttr = PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & (X86_PTE_P | X86_PTE_US | X86_PTE_A | X86_PTE_D | X86_PTE_G | X86_PTE_PAE_NX);
1701 bool fHostRW = !!(PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & X86_PTE_RW);
1702 uint64_t uGuestAttr = pGstPT->a[i].u & (X86_PTE_P | X86_PTE_US | X86_PTE_A | X86_PTE_D | X86_PTE_G | X86_PTE_PAE_NX);
1703 bool fGuestRW = !!(pGstPT->a[i].u & X86_PTE_RW);
1704
1705 if ( uHostAttr == uGuestAttr
1706 && fHostRW <= fGuestRW)
1707 continue;
1708 }
1709 cChanged++;
1710 /* Something was changed, so flush it. */
1711 Log4(("pgmPoolTrackDerefPTPaePae: i=%d pte=%RX64 hint=%RX64\n",
1712 i, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pOldGstPT->a[i].u & X86_PTE_PAE_PG_MASK));
1713 pgmPoolTracDerefGCPhysHint(pPool, pPage, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pOldGstPT->a[i].u & X86_PTE_PAE_PG_MASK, i);
1714 PGMSHWPTEPAE_ATOMIC_SET(pShwPT->a[i], 0);
1715 }
1716 }
1717 return cChanged;
1718}
1719
1720
1721/**
1722 * Clear references to guest physical memory in a PAE / PAE page table.
1723 *
1724 * @returns nr of changed PTEs
1725 * @param pPool The pool.
1726 * @param pPage The page.
1727 * @param pShwPT The shadow page table (mapping of the page).
1728 * @param pGstPT The guest page table.
1729 * @param pOldGstPT The old cached guest page table.
1730 * @param fAllowRemoval Bail out as soon as we encounter an invalid PTE
1731 * @param pfFlush Flush reused page table (out)
1732 */
1733DECLINLINE(unsigned) pgmPoolTrackFlushPTPae32Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT, PCX86PT pGstPT,
1734 PCX86PT pOldGstPT, bool fAllowRemoval, bool *pfFlush)
1735{
1736 unsigned cChanged = 0;
1737
1738# ifdef VBOX_STRICT
1739 for (unsigned i = 0; i < RT_MIN(RT_ELEMENTS(pShwPT->a), pPage->iFirstPresent); i++)
1740 AssertMsg(!PGMSHWPTEPAE_IS_P(pShwPT->a[i]), ("Unexpected PTE: idx=%d %RX64 (first=%d)\n", i, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), pPage->iFirstPresent));
1741# endif
1742 *pfFlush = false;
1743
1744 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
1745 {
1746 /* Check the new value written by the guest. If present and with a bogus physical address, then
1747 * it's fairly safe to assume the guest is reusing the PT. */
1748 if (fAllowRemoval)
1749 {
1750 X86PGUINT const uPte = pGstPT->a[i].u;
1751 if ( (uPte & X86_PTE_P)
1752 && !PGMPhysIsGCPhysValid(pPool->CTX_SUFF(pVM), uPte & X86_PTE_PG_MASK))
1753 {
1754 *pfFlush = true;
1755 return ++cChanged;
1756 }
1757 }
1758 if (PGMSHWPTEPAE_IS_P(pShwPT->a[i]))
1759 {
1760 /* If the old cached PTE is identical, then there's no need to flush the shadow copy. */
1761 if ((pGstPT->a[i].u & X86_PTE_PG_MASK) == (pOldGstPT->a[i].u & X86_PTE_PG_MASK))
1762 {
1763# ifdef VBOX_STRICT
1764 RTHCPHYS HCPhys = NIL_RTGCPHYS;
1765 int rc = PGMPhysGCPhys2HCPhys(pPool->CTX_SUFF(pVM), pGstPT->a[i].u & X86_PTE_PG_MASK, &HCPhys);
1766 AssertMsg(rc == VINF_SUCCESS && PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]) == HCPhys, ("rc=%d guest %x old %x shw=%RX64 vs %RHp\n", rc, pGstPT->a[i].u, pOldGstPT->a[i].u, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), HCPhys));
1767# endif
1768 uint64_t uHostAttr = PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & (X86_PTE_P | X86_PTE_US | X86_PTE_A | X86_PTE_D | X86_PTE_G);
1769 bool fHostRW = !!(PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & X86_PTE_RW);
1770 uint64_t uGuestAttr = pGstPT->a[i].u & (X86_PTE_P | X86_PTE_US | X86_PTE_A | X86_PTE_D | X86_PTE_G);
1771 bool fGuestRW = !!(pGstPT->a[i].u & X86_PTE_RW);
1772
1773 if ( uHostAttr == uGuestAttr
1774 && fHostRW <= fGuestRW)
1775 continue;
1776 }
1777 cChanged++;
1778 /* Something was changed, so flush it. */
1779 Log4(("pgmPoolTrackDerefPTPaePae: i=%d pte=%RX64 hint=%x\n",
1780 i, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pOldGstPT->a[i].u & X86_PTE_PG_MASK));
1781 pgmPoolTracDerefGCPhysHint(pPool, pPage, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pOldGstPT->a[i].u & X86_PTE_PG_MASK, i);
1782 PGMSHWPTEPAE_ATOMIC_SET(pShwPT->a[i], 0);
1783 }
1784 }
1785 return cChanged;
1786}
1787
1788
1789/**
1790 * Flush a dirty page
1791 *
1792 * @param pVM The cross context VM structure.
1793 * @param pPool The pool.
1794 * @param idxSlot Dirty array slot index
1795 * @param fAllowRemoval Allow a reused page table to be removed
1796 */
1797static void pgmPoolFlushDirtyPage(PVMCC pVM, PPGMPOOL pPool, unsigned idxSlot, bool fAllowRemoval = false)
1798{
1799 AssertCompile(RT_ELEMENTS(pPool->aidxDirtyPages) == RT_ELEMENTS(pPool->aDirtyPages));
1800
1801 Assert(idxSlot < RT_ELEMENTS(pPool->aDirtyPages));
1802 unsigned idxPage = pPool->aidxDirtyPages[idxSlot];
1803 if (idxPage == NIL_PGMPOOL_IDX)
1804 return;
1805
1806 PPGMPOOLPAGE pPage = &pPool->aPages[idxPage];
1807 Assert(pPage->idx == idxPage);
1808 Assert(pPage->iMonitoredNext == NIL_PGMPOOL_IDX && pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
1809
1810 AssertMsg(pPage->fDirty, ("Page %RGp (slot=%d) not marked dirty!", pPage->GCPhys, idxSlot));
1811 Log(("Flush dirty page %RGp cMods=%d\n", pPage->GCPhys, pPage->cModifications));
1812
1813 /* First write protect the page again to catch all write accesses. (before checking for changes -> SMP) */
1814 int rc = PGMHandlerPhysicalReset(pVM, pPage->GCPhys & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK);
1815 Assert(rc == VINF_SUCCESS);
1816 pPage->fDirty = false;
1817
1818# ifdef VBOX_STRICT
1819 uint64_t fFlags = 0;
1820 RTHCPHYS HCPhys;
1821 rc = PGMShwGetPage(VMMGetCpu(pVM), pPage->GCPtrDirtyFault, &fFlags, &HCPhys);
1822 AssertMsg( ( rc == VINF_SUCCESS
1823 && (!(fFlags & X86_PTE_RW) || HCPhys != pPage->Core.Key))
1824 /* In the SMP case the page table might be removed while we wait for the PGM lock in the trap handler. */
1825 || rc == VERR_PAGE_TABLE_NOT_PRESENT
1826 || rc == VERR_PAGE_NOT_PRESENT,
1827 ("PGMShwGetPage -> GCPtr=%RGv rc=%d flags=%RX64\n", pPage->GCPtrDirtyFault, rc, fFlags));
1828# endif
1829
1830 /* Flush those PTEs that have changed. */
1831 STAM_PROFILE_START(&pPool->StatTrackDeref,a);
1832 void *pvShw = PGMPOOL_PAGE_2_PTR(pVM, pPage);
1833 void *pvGst;
1834 rc = PGM_GCPHYS_2_PTR_EX(pVM, pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
1835 bool fFlush;
1836 unsigned cChanges;
1837
1838 if (pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT)
1839 cChanges = pgmPoolTrackFlushPTPaePae(pPool, pPage, (PPGMSHWPTPAE)pvShw, (PCX86PTPAE)pvGst,
1840 (PCX86PTPAE)&pPool->aDirtyPages[idxSlot].aPage[0], fAllowRemoval, &fFlush);
1841 else
1842 {
1843 Assert(!PGMPOOL_PAGE_IS_NESTED(pPage)); /* temporary, remove later. */
1844 cChanges = pgmPoolTrackFlushPTPae32Bit(pPool, pPage, (PPGMSHWPTPAE)pvShw, (PCX86PT)pvGst,
1845 (PCX86PT)&pPool->aDirtyPages[idxSlot].aPage[0], fAllowRemoval, &fFlush);
1846 }
1847
1848 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvGst);
1849 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvShw);
1850 STAM_PROFILE_STOP(&pPool->StatTrackDeref,a);
1851 /* Note: we might want to consider keeping the dirty page active in case there were many changes. */
1852
1853 /* This page is likely to be modified again, so reduce the nr of modifications just a bit here. */
1854 Assert(pPage->cModifications);
1855 if (cChanges < 4)
1856 pPage->cModifications = 1; /* must use > 0 here */
1857 else
1858 pPage->cModifications = RT_MAX(1, pPage->cModifications / 2);
1859
1860 STAM_COUNTER_INC(&pPool->StatResetDirtyPages);
1861 if (pPool->cDirtyPages == RT_ELEMENTS(pPool->aDirtyPages))
1862 pPool->idxFreeDirtyPage = idxSlot;
1863
1864 pPool->cDirtyPages--;
1865 pPool->aidxDirtyPages[idxSlot] = NIL_PGMPOOL_IDX;
1866 Assert(pPool->cDirtyPages <= RT_ELEMENTS(pPool->aDirtyPages));
1867 if (fFlush)
1868 {
1869 Assert(fAllowRemoval);
1870 Log(("Flush reused page table!\n"));
1871 pgmPoolFlushPage(pPool, pPage);
1872 STAM_COUNTER_INC(&pPool->StatForceFlushReused);
1873 }
1874 else
1875 Log(("Removed dirty page %RGp cMods=%d cChanges=%d\n", pPage->GCPhys, pPage->cModifications, cChanges));
1876}
1877
1878
1879# ifndef IN_RING3
1880/**
1881 * Add a new dirty page
1882 *
1883 * @param pVM The cross context VM structure.
1884 * @param pPool The pool.
1885 * @param pPage The page.
1886 */
1887void pgmPoolAddDirtyPage(PVMCC pVM, PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1888{
1889 PGM_LOCK_ASSERT_OWNER(pVM);
1890 AssertCompile(RT_ELEMENTS(pPool->aDirtyPages) == 8 || RT_ELEMENTS(pPool->aDirtyPages) == 16);
1891 Assert(!pPage->fDirty);
1892 Assert(!PGMPOOL_PAGE_IS_NESTED(pPage));
1893
1894 unsigned idxFree = pPool->idxFreeDirtyPage;
1895 Assert(idxFree < RT_ELEMENTS(pPool->aDirtyPages));
1896 Assert(pPage->iMonitoredNext == NIL_PGMPOOL_IDX && pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
1897
1898 if (pPool->cDirtyPages >= RT_ELEMENTS(pPool->aDirtyPages))
1899 {
1900 STAM_COUNTER_INC(&pPool->StatDirtyPageOverFlowFlush);
1901 pgmPoolFlushDirtyPage(pVM, pPool, idxFree, true /* allow removal of reused page tables*/);
1902 }
1903 Assert(pPool->cDirtyPages < RT_ELEMENTS(pPool->aDirtyPages));
1904 AssertMsg(pPool->aidxDirtyPages[idxFree] == NIL_PGMPOOL_IDX, ("idxFree=%d cDirtyPages=%d\n", idxFree, pPool->cDirtyPages));
1905
1906 Log(("Add dirty page %RGp (slot=%d)\n", pPage->GCPhys, idxFree));
1907
1908 /*
1909 * Make a copy of the guest page table as we require valid GCPhys addresses
1910 * when removing references to physical pages.
1911 * (The HCPhys linear lookup is *extremely* expensive!)
1912 */
1913 void *pvGst;
1914 int rc = PGM_GCPHYS_2_PTR_EX(pVM, pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
1915 memcpy(&pPool->aDirtyPages[idxFree].aPage[0], pvGst,
1916 pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT ? PAGE_SIZE : PAGE_SIZE / 2);
1917# ifdef VBOX_STRICT
1918 void *pvShw = PGMPOOL_PAGE_2_PTR(pVM, pPage);
1919 if (pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT)
1920 pgmPoolTrackCheckPTPaePae(pPool, pPage, (PPGMSHWPTPAE)pvShw, (PCX86PTPAE)pvGst);
1921 else
1922 pgmPoolTrackCheckPTPae32Bit(pPool, pPage, (PPGMSHWPTPAE)pvShw, (PCX86PT)pvGst);
1923 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvShw);
1924# endif
1925 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvGst);
1926
1927 STAM_COUNTER_INC(&pPool->StatDirtyPage);
1928 pPage->fDirty = true;
1929 pPage->idxDirtyEntry = (uint8_t)idxFree; Assert(pPage->idxDirtyEntry == idxFree);
1930 pPool->aidxDirtyPages[idxFree] = pPage->idx;
1931 pPool->cDirtyPages++;
1932
1933 pPool->idxFreeDirtyPage = (pPool->idxFreeDirtyPage + 1) & (RT_ELEMENTS(pPool->aDirtyPages) - 1);
1934 if ( pPool->cDirtyPages < RT_ELEMENTS(pPool->aDirtyPages)
1935 && pPool->aidxDirtyPages[pPool->idxFreeDirtyPage] != NIL_PGMPOOL_IDX)
1936 {
1937 unsigned i;
1938 for (i = 1; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
1939 {
1940 idxFree = (pPool->idxFreeDirtyPage + i) & (RT_ELEMENTS(pPool->aDirtyPages) - 1);
1941 if (pPool->aidxDirtyPages[idxFree] == NIL_PGMPOOL_IDX)
1942 {
1943 pPool->idxFreeDirtyPage = idxFree;
1944 break;
1945 }
1946 }
1947 Assert(i != RT_ELEMENTS(pPool->aDirtyPages));
1948 }
1949
1950 Assert(pPool->cDirtyPages == RT_ELEMENTS(pPool->aDirtyPages) || pPool->aidxDirtyPages[pPool->idxFreeDirtyPage] == NIL_PGMPOOL_IDX);
1951
1952 /*
1953 * Clear all references to this shadow table. See @bugref{7298}.
1954 */
1955 pgmPoolTrackClearPageUsers(pPool, pPage);
1956}
1957# endif /* !IN_RING3 */
1958
1959
1960/**
1961 * Check if the specified page is dirty (not write monitored)
1962 *
1963 * @return dirty or not
1964 * @param pVM The cross context VM structure.
1965 * @param GCPhys Guest physical address
1966 */
1967bool pgmPoolIsDirtyPageSlow(PVMCC pVM, RTGCPHYS GCPhys)
1968{
1969 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1970 PGM_LOCK_ASSERT_OWNER(pVM);
1971 if (!pPool->cDirtyPages)
1972 return false;
1973
1974 GCPhys = GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK;
1975
1976 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
1977 {
1978 unsigned idxPage = pPool->aidxDirtyPages[i];
1979 if (idxPage != NIL_PGMPOOL_IDX)
1980 {
1981 PPGMPOOLPAGE pPage = &pPool->aPages[idxPage];
1982 if (pPage->GCPhys == GCPhys)
1983 return true;
1984 }
1985 }
1986 return false;
1987}
1988
1989
1990/**
1991 * Reset all dirty pages by reinstating page monitoring.
1992 *
1993 * @param pVM The cross context VM structure.
1994 */
1995void pgmPoolResetDirtyPages(PVMCC pVM)
1996{
1997 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1998 PGM_LOCK_ASSERT_OWNER(pVM);
1999 Assert(pPool->cDirtyPages <= RT_ELEMENTS(pPool->aDirtyPages));
2000
2001 if (!pPool->cDirtyPages)
2002 return;
2003
2004 Log(("pgmPoolResetDirtyPages\n"));
2005 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
2006 pgmPoolFlushDirtyPage(pVM, pPool, i, true /* allow removal of reused page tables*/);
2007
2008 pPool->idxFreeDirtyPage = 0;
2009 if ( pPool->cDirtyPages != RT_ELEMENTS(pPool->aDirtyPages)
2010 && pPool->aidxDirtyPages[pPool->idxFreeDirtyPage] != NIL_PGMPOOL_IDX)
2011 {
2012 unsigned i;
2013 for (i = 1; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
2014 {
2015 if (pPool->aidxDirtyPages[i] == NIL_PGMPOOL_IDX)
2016 {
2017 pPool->idxFreeDirtyPage = i;
2018 break;
2019 }
2020 }
2021 AssertMsg(i != RT_ELEMENTS(pPool->aDirtyPages), ("cDirtyPages %d", pPool->cDirtyPages));
2022 }
2023
2024 Assert(pPool->aidxDirtyPages[pPool->idxFreeDirtyPage] == NIL_PGMPOOL_IDX || pPool->cDirtyPages == RT_ELEMENTS(pPool->aDirtyPages));
2025 return;
2026}
2027
2028
2029/**
2030 * Invalidate the PT entry for the specified page
2031 *
2032 * @param pVM The cross context VM structure.
2033 * @param GCPtrPage Guest page to invalidate
2034 */
2035void pgmPoolResetDirtyPage(PVMCC pVM, RTGCPTR GCPtrPage)
2036{
2037 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
2038 PGM_LOCK_ASSERT_OWNER(pVM);
2039 Assert(pPool->cDirtyPages <= RT_ELEMENTS(pPool->aDirtyPages));
2040
2041 if (!pPool->cDirtyPages)
2042 return;
2043
2044 Log(("pgmPoolResetDirtyPage %RGv\n", GCPtrPage)); RT_NOREF_PV(GCPtrPage);
2045 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
2046 {
2047 /** @todo What was intended here??? This looks incomplete... */
2048 }
2049}
2050
2051
2052/**
2053 * Reset all dirty pages by reinstating page monitoring.
2054 *
2055 * @param pVM The cross context VM structure.
2056 * @param GCPhysPT Physical address of the page table
2057 */
2058void pgmPoolInvalidateDirtyPage(PVMCC pVM, RTGCPHYS GCPhysPT)
2059{
2060 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
2061 PGM_LOCK_ASSERT_OWNER(pVM);
2062 Assert(pPool->cDirtyPages <= RT_ELEMENTS(pPool->aDirtyPages));
2063 unsigned idxDirtyPage = RT_ELEMENTS(pPool->aDirtyPages);
2064
2065 if (!pPool->cDirtyPages)
2066 return;
2067
2068 GCPhysPT = GCPhysPT & ~(RTGCPHYS)PAGE_OFFSET_MASK;
2069
2070 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
2071 {
2072 unsigned idxPage = pPool->aidxDirtyPages[i];
2073 if (idxPage != NIL_PGMPOOL_IDX)
2074 {
2075 PPGMPOOLPAGE pPage = &pPool->aPages[idxPage];
2076 if (pPage->GCPhys == GCPhysPT)
2077 {
2078 idxDirtyPage = i;
2079 break;
2080 }
2081 }
2082 }
2083
2084 if (idxDirtyPage != RT_ELEMENTS(pPool->aDirtyPages))
2085 {
2086 pgmPoolFlushDirtyPage(pVM, pPool, idxDirtyPage, true /* allow removal of reused page tables*/);
2087 if ( pPool->cDirtyPages != RT_ELEMENTS(pPool->aDirtyPages)
2088 && pPool->aidxDirtyPages[pPool->idxFreeDirtyPage] != NIL_PGMPOOL_IDX)
2089 {
2090 unsigned i;
2091 for (i = 0; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
2092 {
2093 if (pPool->aidxDirtyPages[i] == NIL_PGMPOOL_IDX)
2094 {
2095 pPool->idxFreeDirtyPage = i;
2096 break;
2097 }
2098 }
2099 AssertMsg(i != RT_ELEMENTS(pPool->aDirtyPages), ("cDirtyPages %d", pPool->cDirtyPages));
2100 }
2101 }
2102}
2103
2104#endif /* PGMPOOL_WITH_OPTIMIZED_DIRTY_PT */
2105
2106/**
2107 * Inserts a page into the GCPhys hash table.
2108 *
2109 * @param pPool The pool.
2110 * @param pPage The page.
2111 */
2112DECLINLINE(void) pgmPoolHashInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2113{
2114 Log3(("pgmPoolHashInsert: %RGp\n", pPage->GCPhys));
2115 Assert(pPage->GCPhys != NIL_RTGCPHYS); Assert(pPage->iNext == NIL_PGMPOOL_IDX);
2116 uint16_t iHash = PGMPOOL_HASH(pPage->GCPhys);
2117 pPage->iNext = pPool->aiHash[iHash];
2118 pPool->aiHash[iHash] = pPage->idx;
2119}
2120
2121
2122/**
2123 * Removes a page from the GCPhys hash table.
2124 *
2125 * @param pPool The pool.
2126 * @param pPage The page.
2127 */
2128DECLINLINE(void) pgmPoolHashRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2129{
2130 Log3(("pgmPoolHashRemove: %RGp\n", pPage->GCPhys));
2131 uint16_t iHash = PGMPOOL_HASH(pPage->GCPhys);
2132 if (pPool->aiHash[iHash] == pPage->idx)
2133 pPool->aiHash[iHash] = pPage->iNext;
2134 else
2135 {
2136 uint16_t iPrev = pPool->aiHash[iHash];
2137 for (;;)
2138 {
2139 const int16_t i = pPool->aPages[iPrev].iNext;
2140 if (i == pPage->idx)
2141 {
2142 pPool->aPages[iPrev].iNext = pPage->iNext;
2143 break;
2144 }
2145 if (i == NIL_PGMPOOL_IDX)
2146 {
2147 AssertReleaseMsgFailed(("GCPhys=%RGp idx=%d\n", pPage->GCPhys, pPage->idx));
2148 break;
2149 }
2150 iPrev = i;
2151 }
2152 }
2153 pPage->iNext = NIL_PGMPOOL_IDX;
2154}
2155
2156
2157/**
2158 * Frees up one cache page.
2159 *
2160 * @returns VBox status code.
2161 * @retval VINF_SUCCESS on success.
2162 * @param pPool The pool.
2163 * @param iUser The user index.
2164 */
2165static int pgmPoolCacheFreeOne(PPGMPOOL pPool, uint16_t iUser)
2166{
2167#ifndef VBOX_VMM_TARGET_ARMV8
2168 const PVMCC pVM = pPool->CTX_SUFF(pVM);
2169#endif
2170 Assert(pPool->iAgeHead != pPool->iAgeTail); /* We shouldn't be here if there < 2 cached entries! */
2171 STAM_COUNTER_INC(&pPool->StatCacheFreeUpOne);
2172
2173 /*
2174 * Select one page from the tail of the age list.
2175 */
2176 PPGMPOOLPAGE pPage;
2177 for (unsigned iLoop = 0; ; iLoop++)
2178 {
2179 uint16_t iToFree = pPool->iAgeTail;
2180 if (iToFree == iUser && iUser != NIL_PGMPOOL_IDX)
2181 iToFree = pPool->aPages[iToFree].iAgePrev;
2182/* This is the alternative to the SyncCR3 pgmPoolCacheUsed calls.
2183 if (pPool->aPages[iToFree].iUserHead != NIL_PGMPOOL_USER_INDEX)
2184 {
2185 uint16_t i = pPool->aPages[iToFree].iAgePrev;
2186 for (unsigned j = 0; j < 10 && i != NIL_PGMPOOL_USER_INDEX; j++, i = pPool->aPages[i].iAgePrev)
2187 {
2188 if (pPool->aPages[iToFree].iUserHead == NIL_PGMPOOL_USER_INDEX)
2189 continue;
2190 iToFree = i;
2191 break;
2192 }
2193 }
2194*/
2195 Assert(iToFree != iUser);
2196 AssertReleaseMsg(iToFree != NIL_PGMPOOL_IDX,
2197 ("iToFree=%#x (iAgeTail=%#x) iUser=%#x iLoop=%u - pPool=%p LB %#zx\n",
2198 iToFree, pPool->iAgeTail, iUser, iLoop, pPool,
2199 RT_UOFFSETOF_DYN(PGMPOOL, aPages[pPool->cMaxPages])
2200 + pPool->cMaxUsers * sizeof(PGMPOOLUSER)
2201 + pPool->cMaxPhysExts * sizeof(PGMPOOLPHYSEXT) ));
2202
2203 pPage = &pPool->aPages[iToFree];
2204
2205 /*
2206 * Reject any attempts at flushing the currently active shadow CR3 mapping.
2207 * Call pgmPoolCacheUsed to move the page to the head of the age list.
2208 */
2209 if ( !pgmPoolIsPageLocked(pPage)
2210 && pPage->idx >= PGMPOOL_IDX_FIRST /* paranoia (#6349) */)
2211 break;
2212 LogFlow(("pgmPoolCacheFreeOne: refuse CR3 mapping\n"));
2213 pgmPoolCacheUsed(pPool, pPage);
2214 AssertLogRelReturn(iLoop < 8192, VERR_PGM_POOL_TOO_MANY_LOOPS);
2215 }
2216
2217 /*
2218 * Found a usable page, flush it and return.
2219 */
2220 int rc = pgmPoolFlushPage(pPool, pPage);
2221 /* This flush was initiated by us and not the guest, so explicitly flush the TLB. */
2222 /** @todo find out why this is necessary; pgmPoolFlushPage should trigger a flush if one is really needed. */
2223 if (rc == VINF_SUCCESS)
2224 PGM_INVL_ALL_VCPU_TLBS(pVM);
2225 return rc;
2226}
2227
2228
2229/**
2230 * Checks if a kind mismatch is really a page being reused
2231 * or if it's just normal remappings.
2232 *
2233 * @returns true if reused and the cached page (enmKind1) should be flushed
2234 * @returns false if not reused.
2235 * @param enmKind1 The kind of the cached page.
2236 * @param enmKind2 The kind of the requested page.
2237 */
2238static bool pgmPoolCacheReusedByKind(PGMPOOLKIND enmKind1, PGMPOOLKIND enmKind2)
2239{
2240 switch (enmKind1)
2241 {
2242 /*
2243 * Never reuse them. There is no remapping in non-paging mode.
2244 */
2245 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2246 case PGMPOOLKIND_32BIT_PD_PHYS:
2247 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2248 case PGMPOOLKIND_PAE_PD_PHYS:
2249 case PGMPOOLKIND_PAE_PDPT_PHYS:
2250 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2251 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2252 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2253 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2254 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2255 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT: /* never reuse them for other types */
2256 return false;
2257
2258 /*
2259 * It's perfectly fine to reuse these, except for PAE and non-paging stuff.
2260 */
2261 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2262 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2263 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2264 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2265 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2266 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2267 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2268 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2269 case PGMPOOLKIND_32BIT_PD:
2270 case PGMPOOLKIND_PAE_PDPT:
2271 Assert(!PGMPOOL_PAGE_IS_KIND_NESTED(enmKind2));
2272 switch (enmKind2)
2273 {
2274 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2275 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2276 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2277 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2278 case PGMPOOLKIND_64BIT_PML4:
2279 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2280 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2281 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2282 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2283 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2284 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2285 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2286 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2287 return true;
2288 default:
2289 return false;
2290 }
2291
2292 /*
2293 * It's perfectly fine to reuse these, except for PAE and non-paging stuff.
2294 */
2295 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2296 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2297 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2298 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2299 case PGMPOOLKIND_64BIT_PML4:
2300 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2301 Assert(!PGMPOOL_PAGE_IS_KIND_NESTED(enmKind2));
2302 switch (enmKind2)
2303 {
2304 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2305 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2306 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2307 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2308 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2309 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2310 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2311 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2312 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2313 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2314 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2315 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2316 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2317 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2318 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2319 return true;
2320 default:
2321 return false;
2322 }
2323
2324#ifdef VBOX_WITH_NESTED_HWVIRT_VMX_EPT
2325 case PGMPOOLKIND_EPT_PT_FOR_EPT_PT:
2326 case PGMPOOLKIND_EPT_PT_FOR_EPT_2MB:
2327 case PGMPOOLKIND_EPT_PD_FOR_EPT_PD:
2328 case PGMPOOLKIND_EPT_PDPT_FOR_EPT_PDPT:
2329 return PGMPOOL_PAGE_IS_KIND_NESTED(enmKind2);
2330
2331 case PGMPOOLKIND_EPT_PML4_FOR_EPT_PML4:
2332 return false;
2333#endif
2334
2335 /*
2336 * These cannot be flushed, and it's common to reuse the PDs as PTs.
2337 */
2338 case PGMPOOLKIND_ROOT_NESTED:
2339 return false;
2340
2341 default:
2342 AssertFatalMsgFailed(("enmKind1=%d\n", enmKind1));
2343 }
2344}
2345
2346
2347/**
2348 * Attempts to satisfy a pgmPoolAlloc request from the cache.
2349 *
2350 * @returns VBox status code.
2351 * @retval VINF_PGM_CACHED_PAGE on success.
2352 * @retval VERR_FILE_NOT_FOUND if not found.
2353 * @param pPool The pool.
2354 * @param GCPhys The GC physical address of the page we're gonna shadow.
2355 * @param enmKind The kind of mapping.
2356 * @param enmAccess Access type for the mapping (only relevant for big pages)
2357 * @param fA20Enabled Whether the CPU has the A20 gate enabled.
2358 * @param iUser The shadow page pool index of the user table. This is
2359 * NIL_PGMPOOL_IDX for root pages.
2360 * @param iUserTable The index into the user table (shadowed). Ignored if
2361 * root page
2362 * @param ppPage Where to store the pointer to the page.
2363 */
2364static int pgmPoolCacheAlloc(PPGMPOOL pPool, RTGCPHYS GCPhys, PGMPOOLKIND enmKind, PGMPOOLACCESS enmAccess, bool fA20Enabled,
2365 uint16_t iUser, uint32_t iUserTable, PPPGMPOOLPAGE ppPage)
2366{
2367 /*
2368 * Look up the GCPhys in the hash.
2369 */
2370 unsigned i = pPool->aiHash[PGMPOOL_HASH(GCPhys)];
2371 Log3(("pgmPoolCacheAlloc: %RGp kind %s iUser=%d iUserTable=%x SLOT=%d\n", GCPhys, pgmPoolPoolKindToStr(enmKind), iUser, iUserTable, i));
2372 if (i != NIL_PGMPOOL_IDX)
2373 {
2374 do
2375 {
2376 PPGMPOOLPAGE pPage = &pPool->aPages[i];
2377 Log4(("pgmPoolCacheAlloc: slot %d found page %RGp\n", i, pPage->GCPhys));
2378 if (pPage->GCPhys == GCPhys)
2379 {
2380 if ( (PGMPOOLKIND)pPage->enmKind == enmKind
2381 && (PGMPOOLACCESS)pPage->enmAccess == enmAccess
2382 && pPage->fA20Enabled == fA20Enabled)
2383 {
2384 /* Put it at the start of the use list to make sure pgmPoolTrackAddUser
2385 * doesn't flush it in case there are no more free use records.
2386 */
2387 pgmPoolCacheUsed(pPool, pPage);
2388
2389 int rc = VINF_SUCCESS;
2390 if (iUser != NIL_PGMPOOL_IDX)
2391 rc = pgmPoolTrackAddUser(pPool, pPage, iUser, iUserTable);
2392 if (RT_SUCCESS(rc))
2393 {
2394 Assert((PGMPOOLKIND)pPage->enmKind == enmKind);
2395 *ppPage = pPage;
2396 if (pPage->cModifications)
2397 pPage->cModifications = 1; /* reset counter (can't use 0, or else it will be reinserted in the modified list) */
2398 STAM_COUNTER_INC(&pPool->StatCacheHits);
2399 return VINF_PGM_CACHED_PAGE;
2400 }
2401 return rc;
2402 }
2403
2404 if ((PGMPOOLKIND)pPage->enmKind != enmKind)
2405 {
2406 /*
2407 * The kind is different. In some cases we should now flush the page
2408 * as it has been reused, but in most cases this is normal remapping
2409 * of PDs as PT or big pages using the GCPhys field in a slightly
2410 * different way than the other kinds.
2411 */
2412 if (pgmPoolCacheReusedByKind((PGMPOOLKIND)pPage->enmKind, enmKind))
2413 {
2414 STAM_COUNTER_INC(&pPool->StatCacheKindMismatches);
2415 pgmPoolFlushPage(pPool, pPage);
2416 break;
2417 }
2418 }
2419 }
2420
2421 /* next */
2422 i = pPage->iNext;
2423 } while (i != NIL_PGMPOOL_IDX);
2424 }
2425
2426 Log3(("pgmPoolCacheAlloc: Missed GCPhys=%RGp enmKind=%s\n", GCPhys, pgmPoolPoolKindToStr(enmKind)));
2427 STAM_COUNTER_INC(&pPool->StatCacheMisses);
2428 return VERR_FILE_NOT_FOUND;
2429}
2430
2431
2432/**
2433 * Inserts a page into the cache.
2434 *
2435 * @param pPool The pool.
2436 * @param pPage The cached page.
2437 * @param fCanBeCached Set if the page is fit for caching from the caller's point of view.
2438 */
2439static void pgmPoolCacheInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage, bool fCanBeCached)
2440{
2441 /*
2442 * Insert into the GCPhys hash if the page is fit for that.
2443 */
2444 Assert(!pPage->fCached);
2445 if (fCanBeCached)
2446 {
2447 pPage->fCached = true;
2448 pgmPoolHashInsert(pPool, pPage);
2449 Log3(("pgmPoolCacheInsert: Caching %p:{.Core=%RHp, .idx=%d, .enmKind=%s, GCPhys=%RGp}\n",
2450 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), pPage->GCPhys));
2451 STAM_COUNTER_INC(&pPool->StatCacheCacheable);
2452 }
2453 else
2454 {
2455 Log3(("pgmPoolCacheInsert: Not caching %p:{.Core=%RHp, .idx=%d, .enmKind=%s, GCPhys=%RGp}\n",
2456 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), pPage->GCPhys));
2457 STAM_COUNTER_INC(&pPool->StatCacheUncacheable);
2458 }
2459
2460 /*
2461 * Insert at the head of the age list.
2462 */
2463 pPage->iAgePrev = NIL_PGMPOOL_IDX;
2464 pPage->iAgeNext = pPool->iAgeHead;
2465 if (pPool->iAgeHead != NIL_PGMPOOL_IDX)
2466 pPool->aPages[pPool->iAgeHead].iAgePrev = pPage->idx;
2467 else
2468 pPool->iAgeTail = pPage->idx;
2469 pPool->iAgeHead = pPage->idx;
2470}
2471
2472
2473/**
2474 * Flushes a cached page.
2475 *
2476 * @param pPool The pool.
2477 * @param pPage The cached page.
2478 */
2479static void pgmPoolCacheFlushPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2480{
2481 Log3(("pgmPoolCacheFlushPage: %RGp\n", pPage->GCPhys));
2482
2483 /*
2484 * Remove the page from the hash.
2485 */
2486 if (pPage->fCached)
2487 {
2488 pPage->fCached = false;
2489 pgmPoolHashRemove(pPool, pPage);
2490 }
2491 else
2492 Assert(pPage->iNext == NIL_PGMPOOL_IDX);
2493
2494 /*
2495 * Remove it from the age list.
2496 */
2497 if (pPage->iAgeNext != NIL_PGMPOOL_IDX)
2498 pPool->aPages[pPage->iAgeNext].iAgePrev = pPage->iAgePrev;
2499 else
2500 pPool->iAgeTail = pPage->iAgePrev;
2501 if (pPage->iAgePrev != NIL_PGMPOOL_IDX)
2502 pPool->aPages[pPage->iAgePrev].iAgeNext = pPage->iAgeNext;
2503 else
2504 pPool->iAgeHead = pPage->iAgeNext;
2505 pPage->iAgeNext = NIL_PGMPOOL_IDX;
2506 pPage->iAgePrev = NIL_PGMPOOL_IDX;
2507}
2508
2509
2510/**
2511 * Looks for pages sharing the monitor.
2512 *
2513 * @returns Pointer to the head page.
2514 * @returns NULL if not found.
2515 * @param pPool The Pool
2516 * @param pNewPage The page which is going to be monitored.
2517 */
2518static PPGMPOOLPAGE pgmPoolMonitorGetPageByGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pNewPage)
2519{
2520 /*
2521 * Look up the GCPhys in the hash.
2522 */
2523 RTGCPHYS GCPhys = pNewPage->GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK;
2524 unsigned i = pPool->aiHash[PGMPOOL_HASH(GCPhys)];
2525 if (i == NIL_PGMPOOL_IDX)
2526 return NULL;
2527 do
2528 {
2529 PPGMPOOLPAGE pPage = &pPool->aPages[i];
2530 if ( pPage->GCPhys - GCPhys < PAGE_SIZE
2531 && pPage != pNewPage)
2532 {
2533 switch (pPage->enmKind)
2534 {
2535 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2536 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2537 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2538 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2539 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2540 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2541 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2542 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2543 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2544 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2545 case PGMPOOLKIND_64BIT_PML4:
2546 case PGMPOOLKIND_32BIT_PD:
2547 case PGMPOOLKIND_PAE_PDPT:
2548#ifdef VBOX_WITH_NESTED_HWVIRT_VMX_EPT
2549 case PGMPOOLKIND_EPT_PT_FOR_EPT_PT:
2550 case PGMPOOLKIND_EPT_PD_FOR_EPT_PD:
2551 case PGMPOOLKIND_EPT_PDPT_FOR_EPT_PDPT:
2552#endif
2553 {
2554 /* find the head */
2555 while (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
2556 {
2557 Assert(pPage->iMonitoredPrev != pPage->idx);
2558 pPage = &pPool->aPages[pPage->iMonitoredPrev];
2559 }
2560 return pPage;
2561 }
2562
2563 /* ignore, no monitoring. */
2564 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2565 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2566 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2567 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2568 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2569 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2570 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2571 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2572 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2573 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2574 case PGMPOOLKIND_ROOT_NESTED:
2575 case PGMPOOLKIND_PAE_PD_PHYS:
2576 case PGMPOOLKIND_PAE_PDPT_PHYS:
2577 case PGMPOOLKIND_32BIT_PD_PHYS:
2578 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
2579#ifdef VBOX_WITH_NESTED_HWVIRT_VMX_EPT
2580 case PGMPOOLKIND_EPT_PT_FOR_EPT_2MB:
2581 case PGMPOOLKIND_EPT_PML4_FOR_EPT_PML4:
2582#endif
2583 break;
2584 default:
2585 AssertFatalMsgFailed(("enmKind=%d idx=%d\n", pPage->enmKind, pPage->idx));
2586 }
2587 }
2588
2589 /* next */
2590 i = pPage->iNext;
2591 } while (i != NIL_PGMPOOL_IDX);
2592 return NULL;
2593}
2594
2595
2596/**
2597 * Enabled write monitoring of a guest page.
2598 *
2599 * @returns VBox status code.
2600 * @retval VINF_SUCCESS on success.
2601 * @param pPool The pool.
2602 * @param pPage The cached page.
2603 */
2604static int pgmPoolMonitorInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2605{
2606 LogFlow(("pgmPoolMonitorInsert %RGp\n", pPage->GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK));
2607
2608 /*
2609 * Filter out the relevant kinds.
2610 */
2611 switch (pPage->enmKind)
2612 {
2613 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2614 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2615 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2616 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2617 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2618 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2619 case PGMPOOLKIND_64BIT_PML4:
2620 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2621 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2622 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2623 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2624 case PGMPOOLKIND_32BIT_PD:
2625 case PGMPOOLKIND_PAE_PDPT:
2626 break;
2627
2628 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2629 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2630 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2631 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2632 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2633 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2634 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2635 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2636 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2637 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2638 case PGMPOOLKIND_ROOT_NESTED:
2639 /* Nothing to monitor here. */
2640 return VINF_SUCCESS;
2641
2642 case PGMPOOLKIND_32BIT_PD_PHYS:
2643 case PGMPOOLKIND_PAE_PDPT_PHYS:
2644 case PGMPOOLKIND_PAE_PD_PHYS:
2645 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
2646 /* Nothing to monitor here. */
2647 return VINF_SUCCESS;
2648
2649#ifdef VBOX_WITH_NESTED_HWVIRT_VMX_EPT
2650 case PGMPOOLKIND_EPT_PT_FOR_EPT_PT:
2651 case PGMPOOLKIND_EPT_PD_FOR_EPT_PD:
2652 case PGMPOOLKIND_EPT_PDPT_FOR_EPT_PDPT:
2653 break;
2654
2655 case PGMPOOLKIND_EPT_PT_FOR_EPT_2MB:
2656 case PGMPOOLKIND_EPT_PML4_FOR_EPT_PML4:
2657 /* Nothing to monitor here. */
2658 return VINF_SUCCESS;
2659#endif
2660
2661 default:
2662 AssertFatalMsgFailed(("This can't happen! enmKind=%d\n", pPage->enmKind));
2663 }
2664
2665 /*
2666 * Install handler.
2667 */
2668 int rc;
2669 PPGMPOOLPAGE pPageHead = pgmPoolMonitorGetPageByGCPhys(pPool, pPage);
2670 if (pPageHead)
2671 {
2672 Assert(pPageHead != pPage); Assert(pPageHead->iMonitoredNext != pPage->idx);
2673 Assert(pPageHead->iMonitoredPrev != pPage->idx);
2674
2675#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
2676 if (pPageHead->fDirty)
2677 pgmPoolFlushDirtyPage(pPool->CTX_SUFF(pVM), pPool, pPageHead->idxDirtyEntry, false /* do not remove */);
2678#endif
2679
2680 pPage->iMonitoredPrev = pPageHead->idx;
2681 pPage->iMonitoredNext = pPageHead->iMonitoredNext;
2682 if (pPageHead->iMonitoredNext != NIL_PGMPOOL_IDX)
2683 pPool->aPages[pPageHead->iMonitoredNext].iMonitoredPrev = pPage->idx;
2684 pPageHead->iMonitoredNext = pPage->idx;
2685 rc = VINF_SUCCESS;
2686 if (PGMPOOL_PAGE_IS_NESTED(pPage))
2687 Log7Func(("Adding to monitoring list GCPhysPage=%RGp\n", pPage->GCPhys));
2688 }
2689 else
2690 {
2691 if (PGMPOOL_PAGE_IS_NESTED(pPage))
2692 Log7Func(("Started monitoring GCPhysPage=%RGp HCPhys=%RHp enmKind=%s\n", pPage->GCPhys, pPage->Core.Key, pgmPoolPoolKindToStr(pPage->enmKind)));
2693
2694 Assert(pPage->iMonitoredNext == NIL_PGMPOOL_IDX); Assert(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
2695 PVMCC pVM = pPool->CTX_SUFF(pVM);
2696 const RTGCPHYS GCPhysPage = pPage->GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK;
2697 rc = PGMHandlerPhysicalRegister(pVM, GCPhysPage, GCPhysPage + PAGE_OFFSET_MASK, pPool->hAccessHandlerType,
2698 pPage - &pPool->aPages[0], NIL_RTR3PTR /*pszDesc*/);
2699 /** @todo we should probably deal with out-of-memory conditions here, but for now increasing
2700 * the heap size should suffice. */
2701 AssertFatalMsgRC(rc, ("PGMHandlerPhysicalRegisterEx %RGp failed with %Rrc\n", GCPhysPage, rc));
2702 PVMCPU pVCpu = VMMGetCpu(pVM);
2703 AssertFatalMsg(!(pVCpu->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL) || VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3), ("fSyncFlags=%x syncff=%d\n", pVCpu->pgm.s.fSyncFlags, VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3)));
2704 }
2705 pPage->fMonitored = true;
2706 return rc;
2707}
2708
2709
2710/**
2711 * Disables write monitoring of a guest page.
2712 *
2713 * @returns VBox status code.
2714 * @retval VINF_SUCCESS on success.
2715 * @param pPool The pool.
2716 * @param pPage The cached page.
2717 */
2718static int pgmPoolMonitorFlush(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2719{
2720 /*
2721 * Filter out the relevant kinds.
2722 */
2723 switch (pPage->enmKind)
2724 {
2725 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2726 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2727 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2728 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2729 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2730 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2731 case PGMPOOLKIND_64BIT_PML4:
2732 case PGMPOOLKIND_32BIT_PD:
2733 case PGMPOOLKIND_PAE_PDPT:
2734 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2735 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2736 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2737 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2738 break;
2739
2740 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2741 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2742 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2743 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2744 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2745 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2746 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2747 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2748 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2749 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2750 case PGMPOOLKIND_ROOT_NESTED:
2751 case PGMPOOLKIND_PAE_PD_PHYS:
2752 case PGMPOOLKIND_PAE_PDPT_PHYS:
2753 case PGMPOOLKIND_32BIT_PD_PHYS:
2754 /* Nothing to monitor here. */
2755 Assert(!pPage->fMonitored);
2756 return VINF_SUCCESS;
2757
2758#ifdef VBOX_WITH_NESTED_HWVIRT_VMX_EPT
2759 case PGMPOOLKIND_EPT_PT_FOR_EPT_PT:
2760 case PGMPOOLKIND_EPT_PD_FOR_EPT_PD:
2761 case PGMPOOLKIND_EPT_PDPT_FOR_EPT_PDPT:
2762 break;
2763
2764 case PGMPOOLKIND_EPT_PT_FOR_EPT_2MB:
2765 case PGMPOOLKIND_EPT_PML4_FOR_EPT_PML4:
2766 /* Nothing to monitor here. */
2767 Assert(!pPage->fMonitored);
2768 return VINF_SUCCESS;
2769#endif
2770
2771 default:
2772 AssertFatalMsgFailed(("This can't happen! enmKind=%d\n", pPage->enmKind));
2773 }
2774 Assert(pPage->fMonitored);
2775
2776 /*
2777 * Remove the page from the monitored list or uninstall it if last.
2778 */
2779 const PVMCC pVM = pPool->CTX_SUFF(pVM);
2780 int rc;
2781 if ( pPage->iMonitoredNext != NIL_PGMPOOL_IDX
2782 || pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
2783 {
2784 if (pPage->iMonitoredPrev == NIL_PGMPOOL_IDX)
2785 {
2786 PPGMPOOLPAGE pNewHead = &pPool->aPages[pPage->iMonitoredNext];
2787 pNewHead->iMonitoredPrev = NIL_PGMPOOL_IDX;
2788 rc = PGMHandlerPhysicalChangeUserArg(pVM, pPage->GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK, pPage->iMonitoredNext);
2789
2790 AssertFatalRCSuccess(rc);
2791 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
2792 }
2793 else
2794 {
2795 pPool->aPages[pPage->iMonitoredPrev].iMonitoredNext = pPage->iMonitoredNext;
2796 if (pPage->iMonitoredNext != NIL_PGMPOOL_IDX)
2797 {
2798 pPool->aPages[pPage->iMonitoredNext].iMonitoredPrev = pPage->iMonitoredPrev;
2799 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
2800 }
2801 pPage->iMonitoredPrev = NIL_PGMPOOL_IDX;
2802 rc = VINF_SUCCESS;
2803 }
2804 }
2805 else
2806 {
2807 rc = PGMHandlerPhysicalDeregister(pVM, pPage->GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK);
2808 AssertFatalRC(rc);
2809 PVMCPU pVCpu = VMMGetCpu(pVM);
2810 AssertFatalMsg(!(pVCpu->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL) || VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3),
2811 ("%#x %#x\n", pVCpu->pgm.s.fSyncFlags, pVM->fGlobalForcedActions));
2812 }
2813 pPage->fMonitored = false;
2814
2815 /*
2816 * Remove it from the list of modified pages (if in it).
2817 */
2818 pgmPoolMonitorModifiedRemove(pPool, pPage);
2819
2820 if (PGMPOOL_PAGE_IS_NESTED(pPage))
2821 Log7Func(("Stopped monitoring %RGp\n", pPage->GCPhys));
2822
2823 return rc;
2824}
2825
2826
2827/**
2828 * Inserts the page into the list of modified pages.
2829 *
2830 * @param pPool The pool.
2831 * @param pPage The page.
2832 */
2833void pgmPoolMonitorModifiedInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2834{
2835 Log3(("pgmPoolMonitorModifiedInsert: idx=%d\n", pPage->idx));
2836 AssertMsg( pPage->iModifiedNext == NIL_PGMPOOL_IDX
2837 && pPage->iModifiedPrev == NIL_PGMPOOL_IDX
2838 && pPool->iModifiedHead != pPage->idx,
2839 ("Next=%d Prev=%d idx=%d cModifications=%d Head=%d cModifiedPages=%d\n",
2840 pPage->iModifiedNext, pPage->iModifiedPrev, pPage->idx, pPage->cModifications,
2841 pPool->iModifiedHead, pPool->cModifiedPages));
2842
2843 pPage->iModifiedNext = pPool->iModifiedHead;
2844 if (pPool->iModifiedHead != NIL_PGMPOOL_IDX)
2845 pPool->aPages[pPool->iModifiedHead].iModifiedPrev = pPage->idx;
2846 pPool->iModifiedHead = pPage->idx;
2847 pPool->cModifiedPages++;
2848#ifdef VBOX_WITH_STATISTICS
2849 if (pPool->cModifiedPages > pPool->cModifiedPagesHigh)
2850 pPool->cModifiedPagesHigh = pPool->cModifiedPages;
2851#endif
2852}
2853
2854
2855/**
2856 * Removes the page from the list of modified pages and resets the
2857 * modification counter.
2858 *
2859 * @param pPool The pool.
2860 * @param pPage The page which is believed to be in the list of modified pages.
2861 */
2862static void pgmPoolMonitorModifiedRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2863{
2864 Log3(("pgmPoolMonitorModifiedRemove: idx=%d cModifications=%d\n", pPage->idx, pPage->cModifications));
2865 if (pPool->iModifiedHead == pPage->idx)
2866 {
2867 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX);
2868 pPool->iModifiedHead = pPage->iModifiedNext;
2869 if (pPage->iModifiedNext != NIL_PGMPOOL_IDX)
2870 {
2871 pPool->aPages[pPage->iModifiedNext].iModifiedPrev = NIL_PGMPOOL_IDX;
2872 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
2873 }
2874 pPool->cModifiedPages--;
2875 }
2876 else if (pPage->iModifiedPrev != NIL_PGMPOOL_IDX)
2877 {
2878 pPool->aPages[pPage->iModifiedPrev].iModifiedNext = pPage->iModifiedNext;
2879 if (pPage->iModifiedNext != NIL_PGMPOOL_IDX)
2880 {
2881 pPool->aPages[pPage->iModifiedNext].iModifiedPrev = pPage->iModifiedPrev;
2882 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
2883 }
2884 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
2885 pPool->cModifiedPages--;
2886 }
2887 else
2888 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX);
2889 pPage->cModifications = 0;
2890}
2891
2892
2893/**
2894 * Zaps the list of modified pages, resetting their modification counters in the process.
2895 *
2896 * @param pVM The cross context VM structure.
2897 */
2898static void pgmPoolMonitorModifiedClearAll(PVMCC pVM)
2899{
2900 PGM_LOCK_VOID(pVM);
2901 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
2902 LogFlow(("pgmPoolMonitorModifiedClearAll: cModifiedPages=%d\n", pPool->cModifiedPages));
2903
2904 unsigned cPages = 0; NOREF(cPages);
2905
2906#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
2907 pgmPoolResetDirtyPages(pVM);
2908#endif
2909
2910 uint16_t idx = pPool->iModifiedHead;
2911 pPool->iModifiedHead = NIL_PGMPOOL_IDX;
2912 while (idx != NIL_PGMPOOL_IDX)
2913 {
2914 PPGMPOOLPAGE pPage = &pPool->aPages[idx];
2915 idx = pPage->iModifiedNext;
2916 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
2917 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
2918 pPage->cModifications = 0;
2919 Assert(++cPages);
2920 }
2921 AssertMsg(cPages == pPool->cModifiedPages, ("%d != %d\n", cPages, pPool->cModifiedPages));
2922 pPool->cModifiedPages = 0;
2923 PGM_UNLOCK(pVM);
2924}
2925
2926
2927/**
2928 * Handle SyncCR3 pool tasks
2929 *
2930 * @returns VBox status code.
2931 * @retval VINF_SUCCESS if successfully added.
2932 * @retval VINF_PGM_SYNC_CR3 is it needs to be deferred to ring 3 (GC only)
2933 * @param pVCpu The cross context virtual CPU structure.
2934 * @remark Should only be used when monitoring is available, thus placed in
2935 * the PGMPOOL_WITH_MONITORING \#ifdef.
2936 */
2937int pgmPoolSyncCR3(PVMCPUCC pVCpu)
2938{
2939 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
2940 LogFlow(("pgmPoolSyncCR3 fSyncFlags=%x\n", pVCpu->pgm.s.fSyncFlags));
2941
2942 /*
2943 * When monitoring shadowed pages, we reset the modification counters on CR3 sync.
2944 * Occasionally we will have to clear all the shadow page tables because we wanted
2945 * to monitor a page which was mapped by too many shadowed page tables. This operation
2946 * sometimes referred to as a 'lightweight flush'.
2947 */
2948# ifdef IN_RING3 /* Don't flush in ring-0 or raw mode, it's taking too long. */
2949 if (pVCpu->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)
2950 pgmR3PoolClearAll(pVM, false /*fFlushRemTlb*/);
2951# else /* !IN_RING3 */
2952 if (pVCpu->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)
2953 {
2954 Log(("SyncCR3: PGM_SYNC_CLEAR_PGM_POOL is set -> VINF_PGM_SYNC_CR3\n"));
2955 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3); /** @todo no need to do global sync, right? */
2956
2957 /* Make sure all other VCPUs return to ring 3. */
2958 if (pVM->cCpus > 1)
2959 {
2960 VM_FF_SET(pVM, VM_FF_PGM_POOL_FLUSH_PENDING);
2961 PGM_INVL_ALL_VCPU_TLBS(pVM);
2962 }
2963 return VINF_PGM_SYNC_CR3;
2964 }
2965# endif /* !IN_RING3 */
2966 else
2967 {
2968 pgmPoolMonitorModifiedClearAll(pVM);
2969
2970 /* pgmPoolMonitorModifiedClearAll can cause a pgm pool flush (dirty page clearing), so make sure we handle this! */
2971 if (pVCpu->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)
2972 {
2973 Log(("pgmPoolMonitorModifiedClearAll caused a pgm flush -> call pgmPoolSyncCR3 again!\n"));
2974 return pgmPoolSyncCR3(pVCpu);
2975 }
2976 }
2977 return VINF_SUCCESS;
2978}
2979
2980
2981/**
2982 * Frees up at least one user entry.
2983 *
2984 * @returns VBox status code.
2985 * @retval VINF_SUCCESS if successfully added.
2986 *
2987 * @param pPool The pool.
2988 * @param iUser The user index.
2989 */
2990static int pgmPoolTrackFreeOneUser(PPGMPOOL pPool, uint16_t iUser)
2991{
2992 STAM_COUNTER_INC(&pPool->StatTrackFreeUpOneUser);
2993 /*
2994 * Just free cached pages in a braindead fashion.
2995 */
2996 /** @todo walk the age list backwards and free the first with usage. */
2997 int rc = VINF_SUCCESS;
2998 do
2999 {
3000 int rc2 = pgmPoolCacheFreeOne(pPool, iUser);
3001 if (RT_FAILURE(rc2) && rc == VINF_SUCCESS)
3002 rc = rc2;
3003 } while (pPool->iUserFreeHead == NIL_PGMPOOL_USER_INDEX);
3004 return rc;
3005}
3006
3007
3008/**
3009 * Inserts a page into the cache.
3010 *
3011 * This will create user node for the page, insert it into the GCPhys
3012 * hash, and insert it into the age list.
3013 *
3014 * @returns VBox status code.
3015 * @retval VINF_SUCCESS if successfully added.
3016 *
3017 * @param pPool The pool.
3018 * @param pPage The cached page.
3019 * @param GCPhys The GC physical address of the page we're gonna shadow.
3020 * @param iUser The user index.
3021 * @param iUserTable The user table index.
3022 */
3023DECLINLINE(int) pgmPoolTrackInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTGCPHYS GCPhys, uint16_t iUser, uint32_t iUserTable)
3024{
3025 int rc = VINF_SUCCESS;
3026 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
3027
3028 LogFlow(("pgmPoolTrackInsert GCPhys=%RGp iUser=%d iUserTable=%x\n", GCPhys, iUser, iUserTable)); RT_NOREF_PV(GCPhys);
3029
3030 if (iUser != NIL_PGMPOOL_IDX)
3031 {
3032#ifdef VBOX_STRICT
3033 /*
3034 * Check that the entry doesn't already exists.
3035 */
3036 if (pPage->iUserHead != NIL_PGMPOOL_USER_INDEX)
3037 {
3038 uint16_t i = pPage->iUserHead;
3039 do
3040 {
3041 Assert(i < pPool->cMaxUsers);
3042 AssertMsg(paUsers[i].iUser != iUser || paUsers[i].iUserTable != iUserTable, ("%x %x vs new %x %x\n", paUsers[i].iUser, paUsers[i].iUserTable, iUser, iUserTable));
3043 i = paUsers[i].iNext;
3044 } while (i != NIL_PGMPOOL_USER_INDEX);
3045 }
3046#endif
3047
3048 /*
3049 * Find free a user node.
3050 */
3051 uint16_t i = pPool->iUserFreeHead;
3052 if (i == NIL_PGMPOOL_USER_INDEX)
3053 {
3054 rc = pgmPoolTrackFreeOneUser(pPool, iUser);
3055 if (RT_FAILURE(rc))
3056 return rc;
3057 i = pPool->iUserFreeHead;
3058 }
3059
3060 /*
3061 * Unlink the user node from the free list,
3062 * initialize and insert it into the user list.
3063 */
3064 pPool->iUserFreeHead = paUsers[i].iNext;
3065 paUsers[i].iNext = NIL_PGMPOOL_USER_INDEX;
3066 paUsers[i].iUser = iUser;
3067 paUsers[i].iUserTable = iUserTable;
3068 pPage->iUserHead = i;
3069 }
3070 else
3071 pPage->iUserHead = NIL_PGMPOOL_USER_INDEX;
3072
3073
3074 /*
3075 * Insert into cache and enable monitoring of the guest page if enabled.
3076 *
3077 * Until we implement caching of all levels, including the CR3 one, we'll
3078 * have to make sure we don't try monitor & cache any recursive reuse of
3079 * a monitored CR3 page. Because all windows versions are doing this we'll
3080 * have to be able to do combined access monitoring, CR3 + PT and
3081 * PD + PT (guest PAE).
3082 *
3083 * Update:
3084 * We're now cooperating with the CR3 monitor if an uncachable page is found.
3085 */
3086 const bool fCanBeMonitored = true;
3087 pgmPoolCacheInsert(pPool, pPage, fCanBeMonitored); /* This can be expanded. */
3088 if (fCanBeMonitored)
3089 {
3090 rc = pgmPoolMonitorInsert(pPool, pPage);
3091 AssertRC(rc);
3092 }
3093 return rc;
3094}
3095
3096
3097/**
3098 * Adds a user reference to a page.
3099 *
3100 * This will move the page to the head of the
3101 *
3102 * @returns VBox status code.
3103 * @retval VINF_SUCCESS if successfully added.
3104 *
3105 * @param pPool The pool.
3106 * @param pPage The cached page.
3107 * @param iUser The user index.
3108 * @param iUserTable The user table.
3109 */
3110static int pgmPoolTrackAddUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable)
3111{
3112 Log3(("pgmPoolTrackAddUser: GCPhys=%RGp iUser=%x iUserTable=%x\n", pPage->GCPhys, iUser, iUserTable));
3113 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
3114 Assert(iUser != NIL_PGMPOOL_IDX);
3115
3116# ifdef VBOX_STRICT
3117 /*
3118 * Check that the entry doesn't already exists. We only allow multiple
3119 * users of top-level paging structures (SHW_POOL_ROOT_IDX).
3120 */
3121 if (pPage->iUserHead != NIL_PGMPOOL_USER_INDEX)
3122 {
3123 uint16_t i = pPage->iUserHead;
3124 do
3125 {
3126 Assert(i < pPool->cMaxUsers);
3127 /** @todo this assertion looks odd... Shouldn't it be && here? */
3128 AssertMsg(paUsers[i].iUser != iUser || paUsers[i].iUserTable != iUserTable, ("%x %x vs new %x %x\n", paUsers[i].iUser, paUsers[i].iUserTable, iUser, iUserTable));
3129 i = paUsers[i].iNext;
3130 } while (i != NIL_PGMPOOL_USER_INDEX);
3131 }
3132# endif
3133
3134 /*
3135 * Allocate a user node.
3136 */
3137 uint16_t i = pPool->iUserFreeHead;
3138 if (i == NIL_PGMPOOL_USER_INDEX)
3139 {
3140 int rc = pgmPoolTrackFreeOneUser(pPool, iUser);
3141 if (RT_FAILURE(rc))
3142 return rc;
3143 i = pPool->iUserFreeHead;
3144 }
3145 pPool->iUserFreeHead = paUsers[i].iNext;
3146
3147 /*
3148 * Initialize the user node and insert it.
3149 */
3150 paUsers[i].iNext = pPage->iUserHead;
3151 paUsers[i].iUser = iUser;
3152 paUsers[i].iUserTable = iUserTable;
3153 pPage->iUserHead = i;
3154
3155# ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
3156 if (pPage->fDirty)
3157 pgmPoolFlushDirtyPage(pPool->CTX_SUFF(pVM), pPool, pPage->idxDirtyEntry, false /* do not remove */);
3158# endif
3159
3160 /*
3161 * Tell the cache to update its replacement stats for this page.
3162 */
3163 pgmPoolCacheUsed(pPool, pPage);
3164 return VINF_SUCCESS;
3165}
3166
3167
3168/**
3169 * Frees a user record associated with a page.
3170 *
3171 * This does not clear the entry in the user table, it simply replaces the
3172 * user record to the chain of free records.
3173 *
3174 * @param pPool The pool.
3175 * @param pPage The shadow page.
3176 * @param iUser The shadow page pool index of the user table.
3177 * @param iUserTable The index into the user table (shadowed).
3178 *
3179 * @remarks Don't call this for root pages.
3180 */
3181static void pgmPoolTrackFreeUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable)
3182{
3183 Log3(("pgmPoolTrackFreeUser %RGp %x %x\n", pPage->GCPhys, iUser, iUserTable));
3184 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
3185 Assert(iUser != NIL_PGMPOOL_IDX);
3186
3187 /*
3188 * Unlink and free the specified user entry.
3189 */
3190
3191 /* Special: For PAE and 32-bit paging, there is usually no more than one user. */
3192 uint16_t i = pPage->iUserHead;
3193 if ( i != NIL_PGMPOOL_USER_INDEX
3194 && paUsers[i].iUser == iUser
3195 && paUsers[i].iUserTable == iUserTable)
3196 {
3197 pPage->iUserHead = paUsers[i].iNext;
3198
3199 paUsers[i].iUser = NIL_PGMPOOL_IDX;
3200 paUsers[i].iNext = pPool->iUserFreeHead;
3201 pPool->iUserFreeHead = i;
3202 return;
3203 }
3204
3205 /* General: Linear search. */
3206 uint16_t iPrev = NIL_PGMPOOL_USER_INDEX;
3207 while (i != NIL_PGMPOOL_USER_INDEX)
3208 {
3209 if ( paUsers[i].iUser == iUser
3210 && paUsers[i].iUserTable == iUserTable)
3211 {
3212 if (iPrev != NIL_PGMPOOL_USER_INDEX)
3213 paUsers[iPrev].iNext = paUsers[i].iNext;
3214 else
3215 pPage->iUserHead = paUsers[i].iNext;
3216
3217 paUsers[i].iUser = NIL_PGMPOOL_IDX;
3218 paUsers[i].iNext = pPool->iUserFreeHead;
3219 pPool->iUserFreeHead = i;
3220 return;
3221 }
3222 iPrev = i;
3223 i = paUsers[i].iNext;
3224 }
3225
3226 /* Fatal: didn't find it */
3227 AssertFatalMsgFailed(("Didn't find the user entry! iUser=%d iUserTable=%#x GCPhys=%RGp\n",
3228 iUser, iUserTable, pPage->GCPhys));
3229}
3230
3231
3232#if 0 /* unused */
3233/**
3234 * Gets the entry size of a shadow table.
3235 *
3236 * @param enmKind The kind of page.
3237 *
3238 * @returns The size of the entry in bytes. That is, 4 or 8.
3239 * @returns If the kind is not for a table, an assertion is raised and 0 is
3240 * returned.
3241 */
3242DECLINLINE(unsigned) pgmPoolTrackGetShadowEntrySize(PGMPOOLKIND enmKind)
3243{
3244 switch (enmKind)
3245 {
3246 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
3247 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
3248 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
3249 case PGMPOOLKIND_32BIT_PD:
3250 case PGMPOOLKIND_32BIT_PD_PHYS:
3251 return 4;
3252
3253 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
3254 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
3255 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
3256 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
3257 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
3258 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
3259 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
3260 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
3261 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
3262 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
3263 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
3264 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
3265 case PGMPOOLKIND_64BIT_PML4:
3266 case PGMPOOLKIND_PAE_PDPT:
3267 case PGMPOOLKIND_ROOT_NESTED:
3268 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
3269 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
3270 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
3271 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
3272 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
3273 case PGMPOOLKIND_PAE_PD_PHYS:
3274 case PGMPOOLKIND_PAE_PDPT_PHYS:
3275 return 8;
3276
3277 default:
3278 AssertFatalMsgFailed(("enmKind=%d\n", enmKind));
3279 }
3280}
3281#endif /* unused */
3282
3283#if 0 /* unused */
3284/**
3285 * Gets the entry size of a guest table.
3286 *
3287 * @param enmKind The kind of page.
3288 *
3289 * @returns The size of the entry in bytes. That is, 0, 4 or 8.
3290 * @returns If the kind is not for a table, an assertion is raised and 0 is
3291 * returned.
3292 */
3293DECLINLINE(unsigned) pgmPoolTrackGetGuestEntrySize(PGMPOOLKIND enmKind)
3294{
3295 switch (enmKind)
3296 {
3297 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
3298 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
3299 case PGMPOOLKIND_32BIT_PD:
3300 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
3301 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
3302 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
3303 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
3304 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
3305 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
3306 return 4;
3307
3308 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
3309 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
3310 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
3311 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
3312 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
3313 case PGMPOOLKIND_64BIT_PML4:
3314 case PGMPOOLKIND_PAE_PDPT:
3315 return 8;
3316
3317 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
3318 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
3319 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
3320 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
3321 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
3322 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
3323 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
3324 case PGMPOOLKIND_ROOT_NESTED:
3325 case PGMPOOLKIND_PAE_PD_PHYS:
3326 case PGMPOOLKIND_PAE_PDPT_PHYS:
3327 case PGMPOOLKIND_32BIT_PD_PHYS:
3328 /** @todo can we return 0? (nobody is calling this...) */
3329 AssertFailed();
3330 return 0;
3331
3332 default:
3333 AssertFatalMsgFailed(("enmKind=%d\n", enmKind));
3334 }
3335}
3336#endif /* unused */
3337
3338
3339/**
3340 * Checks one shadow page table entry for a mapping of a physical page.
3341 *
3342 * @returns true / false indicating removal of all relevant PTEs
3343 *
3344 * @param pVM The cross context VM structure.
3345 * @param pPhysPage The guest page in question.
3346 * @param fFlushPTEs Flush PTEs or allow them to be updated (e.g. in case of an RW bit change)
3347 * @param iShw The shadow page table.
3348 * @param iPte Page table entry or NIL_PGMPOOL_PHYSEXT_IDX_PTE if unknown
3349 */
3350static bool pgmPoolTrackFlushGCPhysPTInt(PVM pVM, PCPGMPAGE pPhysPage, bool fFlushPTEs, uint16_t iShw, uint16_t iPte)
3351{
3352 LogFlow(("pgmPoolTrackFlushGCPhysPTInt: pPhysPage=%RHp iShw=%d iPte=%d\n", PGM_PAGE_GET_HCPHYS(pPhysPage), iShw, iPte));
3353 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3354 bool fRet = false;
3355
3356 /*
3357 * Assert sanity.
3358 */
3359 Assert(iPte != NIL_PGMPOOL_PHYSEXT_IDX_PTE);
3360 AssertFatalMsg(iShw < pPool->cCurPages && iShw != NIL_PGMPOOL_IDX, ("iShw=%d\n", iShw));
3361 PPGMPOOLPAGE pPage = &pPool->aPages[iShw];
3362
3363 /*
3364 * Then, clear the actual mappings to the page in the shadow PT.
3365 */
3366 switch (pPage->enmKind)
3367 {
3368 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
3369 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
3370 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
3371 {
3372 const uint32_t u32 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P;
3373 PX86PT pPT = (PX86PT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3374 uint32_t u32AndMask = 0;
3375 uint32_t u32OrMask = 0;
3376
3377 if (!fFlushPTEs)
3378 {
3379 /* Note! Disregarding the PGMPHYSHANDLER_F_NOT_IN_HM bit here. Should be harmless. */
3380 switch (PGM_PAGE_GET_HNDL_PHYS_STATE(pPhysPage))
3381 {
3382 case PGM_PAGE_HNDL_PHYS_STATE_NONE: /* No handler installed. */
3383 case PGM_PAGE_HNDL_PHYS_STATE_DISABLED: /* Monitoring is temporarily disabled. */
3384 u32OrMask = X86_PTE_RW;
3385 u32AndMask = UINT32_MAX;
3386 fRet = true;
3387 STAM_COUNTER_INC(&pPool->StatTrackFlushEntryKeep);
3388 break;
3389
3390 case PGM_PAGE_HNDL_PHYS_STATE_WRITE: /* Write access is monitored. */
3391 u32OrMask = 0;
3392 u32AndMask = ~X86_PTE_RW;
3393 fRet = true;
3394 STAM_COUNTER_INC(&pPool->StatTrackFlushEntryKeep);
3395 break;
3396 default:
3397 /* We will end up here when called with an "ALL" access handler. */
3398 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3399 break;
3400 }
3401 }
3402 else
3403 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3404
3405 /* Update the counter if we're removing references. */
3406 if (!u32AndMask)
3407 {
3408 Assert(pPage->cPresent);
3409 Assert(pPool->cPresent);
3410 pPage->cPresent--;
3411 pPool->cPresent--;
3412 }
3413
3414 if ((pPT->a[iPte].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
3415 {
3416 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pte=%RX32\n", iPte, pPT->a[iPte]));
3417 X86PTE Pte;
3418 Pte.u = (pPT->a[iPte].u & u32AndMask) | u32OrMask;
3419 if (Pte.u & PGM_PTFLAGS_TRACK_DIRTY)
3420 Pte.u &= ~(X86PGUINT)X86_PTE_RW; /* need to disallow writes when dirty bit tracking is still active. */
3421
3422 ASMAtomicWriteU32(&pPT->a[iPte].u, Pte.u);
3423 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);
3424 return fRet;
3425 }
3426#ifdef LOG_ENABLED
3427 Log(("iFirstPresent=%d cPresent=%d\n", pPage->iFirstPresent, pPage->cPresent));
3428 for (unsigned i = 0, cFound = 0; i < RT_ELEMENTS(pPT->a); i++)
3429 if ((pPT->a[i].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
3430 {
3431 Log(("i=%d cFound=%d\n", i, ++cFound));
3432 }
3433#endif
3434 AssertFatalMsgFailed(("iFirstPresent=%d cPresent=%d u32=%RX32 poolkind=%x\n", pPage->iFirstPresent, pPage->cPresent, u32, pPage->enmKind));
3435 /*PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);*/
3436 break;
3437 }
3438
3439 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
3440 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
3441 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
3442 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
3443 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
3444 case PGMPOOLKIND_EPT_PT_FOR_PHYS: /* physical mask the same as PAE; RW bit as well; be careful! */
3445#ifdef VBOX_WITH_NESTED_HWVIRT_VMX_EPT
3446 case PGMPOOLKIND_EPT_PT_FOR_EPT_PT:
3447# ifdef PGM_WITH_LARGE_PAGES
3448 case PGMPOOLKIND_EPT_PT_FOR_EPT_2MB:
3449# endif
3450#endif
3451 {
3452 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P;
3453 PPGMSHWPTPAE pPT = (PPGMSHWPTPAE)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3454 uint64_t u64OrMask = 0;
3455 uint64_t u64AndMask = 0;
3456
3457 if (!fFlushPTEs)
3458 {
3459 /* Note! Disregarding the PGMPHYSHANDLER_F_NOT_IN_HM bit here. Should be harmless. */
3460 switch (PGM_PAGE_GET_HNDL_PHYS_STATE(pPhysPage))
3461 {
3462 case PGM_PAGE_HNDL_PHYS_STATE_NONE: /* No handler installed. */
3463 case PGM_PAGE_HNDL_PHYS_STATE_DISABLED: /* Monitoring is temporarily disabled. */
3464 u64OrMask = X86_PTE_RW;
3465 u64AndMask = UINT64_MAX;
3466 fRet = true;
3467 STAM_COUNTER_INC(&pPool->StatTrackFlushEntryKeep);
3468 break;
3469
3470 case PGM_PAGE_HNDL_PHYS_STATE_WRITE: /* Write access is monitored. */
3471 u64OrMask = 0;
3472 u64AndMask = ~(uint64_t)X86_PTE_RW;
3473 fRet = true;
3474 STAM_COUNTER_INC(&pPool->StatTrackFlushEntryKeep);
3475 break;
3476
3477 default:
3478 /* We will end up here when called with an "ALL" access handler. */
3479 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3480 break;
3481 }
3482 }
3483 else
3484 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3485
3486 /* Update the counter if we're removing references. */
3487 if (!u64AndMask)
3488 {
3489 Assert(pPage->cPresent);
3490 Assert(pPool->cPresent);
3491 pPage->cPresent--;
3492 pPool->cPresent--;
3493 }
3494
3495 if ((PGMSHWPTEPAE_GET_U(pPT->a[iPte]) & (X86_PTE_PAE_PG_MASK | X86_PTE_P | X86_PTE_PAE_MBZ_MASK_NX)) == u64)
3496 {
3497 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pte=%RX64\n", iPte, PGMSHWPTEPAE_GET_LOG(pPT->a[iPte])));
3498 X86PTEPAE Pte;
3499 Pte.u = (PGMSHWPTEPAE_GET_U(pPT->a[iPte]) & u64AndMask) | u64OrMask;
3500 if (Pte.u & PGM_PTFLAGS_TRACK_DIRTY)
3501 Pte.u &= ~(X86PGPAEUINT)X86_PTE_RW; /* need to disallow writes when dirty bit tracking is still active. */
3502
3503 PGMSHWPTEPAE_ATOMIC_SET(pPT->a[iPte], Pte.u);
3504 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);
3505 return fRet;
3506 }
3507#ifdef LOG_ENABLED
3508 Log(("iFirstPresent=%d cPresent=%d\n", pPage->iFirstPresent, pPage->cPresent));
3509 Log(("Found %RX64 expected %RX64\n", PGMSHWPTEPAE_GET_U(pPT->a[iPte]) & (X86_PTE_PAE_PG_MASK | X86_PTE_P | X86_PTE_PAE_MBZ_MASK_NX), u64));
3510 for (unsigned i = 0, cFound = 0; i < RT_ELEMENTS(pPT->a); i++)
3511 if ((PGMSHWPTEPAE_GET_U(pPT->a[i]) & (X86_PTE_PAE_PG_MASK | X86_PTE_P | X86_PTE_PAE_MBZ_MASK_NX)) == u64)
3512 Log(("i=%d cFound=%d\n", i, ++cFound));
3513#endif
3514 AssertFatalMsgFailed(("iFirstPresent=%d cPresent=%d u64=%RX64 poolkind=%x iPte=%d PT=%RX64\n", pPage->iFirstPresent, pPage->cPresent, u64, pPage->enmKind, iPte, PGMSHWPTEPAE_GET_LOG(pPT->a[iPte])));
3515 /*PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);*/
3516 break;
3517 }
3518
3519#ifdef PGM_WITH_LARGE_PAGES
3520 /* Large page case only. */
3521 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
3522 case PGMPOOLKIND_EPT_PD_FOR_EPT_PD:
3523 {
3524 Assert(pVM->pgm.s.fNestedPaging);
3525
3526 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PDE4M_P | X86_PDE4M_PS;
3527 PEPTPD pPD = (PEPTPD)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3528
3529 Assert( pPage->enmKind != PGMPOOLKIND_EPT_PD_FOR_EPT_PD
3530 || (pPD->a[iPte].u & EPT_E_LEAF));
3531
3532 if ((pPD->a[iPte].u & (EPT_PDE2M_PG_MASK | X86_PDE4M_P | X86_PDE4M_PS)) == u64)
3533 {
3534 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pde=%RX64\n", iPte, pPD->a[iPte]));
3535 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3536 pPD->a[iPte].u = 0;
3537 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPD);
3538
3539 /* Update the counter as we're removing references. */
3540 Assert(pPage->cPresent);
3541 Assert(pPool->cPresent);
3542 pPage->cPresent--;
3543 pPool->cPresent--;
3544
3545 return fRet;
3546 }
3547# ifdef LOG_ENABLED
3548 LogRel(("iFirstPresent=%d cPresent=%d\n", pPage->iFirstPresent, pPage->cPresent));
3549 for (unsigned i = 0, cFound = 0; i < RT_ELEMENTS(pPD->a); i++)
3550 if ((pPD->a[i].u & (EPT_PDE2M_PG_MASK | X86_PDE4M_P | X86_PDE4M_PS)) == u64)
3551 {
3552 cFound++;
3553 LogRel(("i=%d cFound=%d\n", i, cFound));
3554 }
3555# endif
3556 AssertFatalMsgFailed(("iFirstPresent=%d cPresent=%d enmKind=%d\n", pPage->iFirstPresent, pPage->cPresent, pPage->enmKind));
3557 /*PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPD);*/
3558 break;
3559 }
3560
3561 /* AMD-V nested paging */ /** @todo merge with EPT as we only check the parts that are identical. */
3562 case PGMPOOLKIND_PAE_PD_PHYS:
3563 {
3564 Assert(pVM->pgm.s.fNestedPaging);
3565
3566 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PDE4M_P | X86_PDE4M_PS;
3567 PX86PDPAE pPD = (PX86PDPAE)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3568
3569 if ((pPD->a[iPte].u & (X86_PDE2M_PAE_PG_MASK | X86_PDE4M_P | X86_PDE4M_PS)) == u64)
3570 {
3571 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pde=%RX64\n", iPte, pPD->a[iPte]));
3572 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3573 pPD->a[iPte].u = 0;
3574 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPD);
3575
3576 /* Update the counter as we're removing references. */
3577 Assert(pPage->cPresent);
3578 Assert(pPool->cPresent);
3579 pPage->cPresent--;
3580 pPool->cPresent--;
3581 return fRet;
3582 }
3583# ifdef LOG_ENABLED
3584 Log(("iFirstPresent=%d cPresent=%d\n", pPage->iFirstPresent, pPage->cPresent));
3585 for (unsigned i = 0, cFound = 0; i < RT_ELEMENTS(pPD->a); i++)
3586 if ((pPD->a[i].u & (X86_PDE2M_PAE_PG_MASK | X86_PDE4M_P | X86_PDE4M_PS)) == u64)
3587 Log(("i=%d cFound=%d\n", i, ++cFound));
3588# endif
3589 AssertFatalMsgFailed(("iFirstPresent=%d cPresent=%d\n", pPage->iFirstPresent, pPage->cPresent));
3590 /*PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPD);*/
3591 break;
3592 }
3593#endif /* PGM_WITH_LARGE_PAGES */
3594
3595 default:
3596 AssertFatalMsgFailed(("enmKind=%d iShw=%d\n", pPage->enmKind, iShw));
3597 }
3598
3599 /* not reached. */
3600#ifndef _MSC_VER
3601 return fRet;
3602#endif
3603}
3604
3605
3606/**
3607 * Scans one shadow page table for mappings of a physical page.
3608 *
3609 * @param pVM The cross context VM structure.
3610 * @param pPhysPage The guest page in question.
3611 * @param fFlushPTEs Flush PTEs or allow them to be updated (e.g. in case of an RW bit change)
3612 * @param iShw The shadow page table.
3613 */
3614static void pgmPoolTrackFlushGCPhysPT(PVM pVM, PPGMPAGE pPhysPage, bool fFlushPTEs, uint16_t iShw)
3615{
3616 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool); NOREF(pPool);
3617
3618 /* We should only come here with when there's only one reference to this physical page. */
3619 Assert(PGMPOOL_TD_GET_CREFS(PGM_PAGE_GET_TRACKING(pPhysPage)) == 1);
3620
3621 Log2(("pgmPoolTrackFlushGCPhysPT: pPhysPage=%RHp iShw=%d\n", PGM_PAGE_GET_HCPHYS(pPhysPage), iShw));
3622 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPT, f);
3623 bool fKeptPTEs = pgmPoolTrackFlushGCPhysPTInt(pVM, pPhysPage, fFlushPTEs, iShw, PGM_PAGE_GET_PTE_INDEX(pPhysPage));
3624 if (!fKeptPTEs)
3625 PGM_PAGE_SET_TRACKING(pVM, pPhysPage, 0);
3626 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPT, f);
3627}
3628
3629
3630/**
3631 * Flushes a list of shadow page tables mapping the same physical page.
3632 *
3633 * @param pVM The cross context VM structure.
3634 * @param pPhysPage The guest page in question.
3635 * @param fFlushPTEs Flush PTEs or allow them to be updated (e.g. in case of an RW bit change)
3636 * @param iPhysExt The physical cross reference extent list to flush.
3637 */
3638static void pgmPoolTrackFlushGCPhysPTs(PVMCC pVM, PPGMPAGE pPhysPage, bool fFlushPTEs, uint16_t iPhysExt)
3639{
3640 PGM_LOCK_ASSERT_OWNER(pVM);
3641 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3642 bool fKeepList = false;
3643
3644 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPTs, f);
3645 Log2(("pgmPoolTrackFlushGCPhysPTs: pPhysPage=%RHp iPhysExt=%u\n", PGM_PAGE_GET_HCPHYS(pPhysPage), iPhysExt));
3646
3647 const uint16_t iPhysExtStart = iPhysExt;
3648 PPGMPOOLPHYSEXT pPhysExt;
3649 do
3650 {
3651 Assert(iPhysExt < pPool->cMaxPhysExts);
3652 pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
3653 for (unsigned i = 0; i < RT_ELEMENTS(pPhysExt->aidx); i++)
3654 {
3655 if (pPhysExt->aidx[i] != NIL_PGMPOOL_IDX)
3656 {
3657 bool fKeptPTEs = pgmPoolTrackFlushGCPhysPTInt(pVM, pPhysPage, fFlushPTEs, pPhysExt->aidx[i], pPhysExt->apte[i]);
3658 if (!fKeptPTEs)
3659 {
3660 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
3661 pPhysExt->apte[i] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
3662 }
3663 else
3664 fKeepList = true;
3665 }
3666 }
3667 /* next */
3668 iPhysExt = pPhysExt->iNext;
3669 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
3670
3671 if (!fKeepList)
3672 {
3673 /* insert the list into the free list and clear the ram range entry. */
3674 pPhysExt->iNext = pPool->iPhysExtFreeHead;
3675 pPool->iPhysExtFreeHead = iPhysExtStart;
3676 /* Invalidate the tracking data. */
3677 PGM_PAGE_SET_TRACKING(pVM, pPhysPage, 0);
3678 }
3679
3680 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTs, f);
3681}
3682
3683
3684/**
3685 * Flushes all shadow page table mappings of the given guest page.
3686 *
3687 * This is typically called when the host page backing the guest one has been
3688 * replaced or when the page protection was changed due to a guest access
3689 * caught by the monitoring.
3690 *
3691 * @returns VBox status code.
3692 * @retval VINF_SUCCESS if all references has been successfully cleared.
3693 * @retval VINF_PGM_SYNC_CR3 if we're better off with a CR3 sync and a page
3694 * pool cleaning. FF and sync flags are set.
3695 *
3696 * @param pVM The cross context VM structure.
3697 * @param GCPhysPage GC physical address of the page in question
3698 * @param pPhysPage The guest page in question.
3699 * @param fFlushPTEs Flush PTEs or allow them to be updated (e.g. in case of an RW bit change)
3700 * @param pfFlushTLBs This is set to @a true if the shadow TLBs should be
3701 * flushed, it is NOT touched if this isn't necessary.
3702 * The caller MUST initialized this to @a false.
3703 */
3704int pgmPoolTrackUpdateGCPhys(PVMCC pVM, RTGCPHYS GCPhysPage, PPGMPAGE pPhysPage, bool fFlushPTEs, bool *pfFlushTLBs)
3705{
3706 PVMCPUCC pVCpu = VMMGetCpu(pVM);
3707 PGM_LOCK_VOID(pVM);
3708 int rc = VINF_SUCCESS;
3709
3710#ifdef PGM_WITH_LARGE_PAGES
3711 /* Is this page part of a large page? */
3712 if (PGM_PAGE_GET_PDE_TYPE(pPhysPage) == PGM_PAGE_PDE_TYPE_PDE)
3713 {
3714 RTGCPHYS GCPhysBase = GCPhysPage & X86_PDE2M_PAE_PG_MASK;
3715 GCPhysPage &= X86_PDE_PAE_PG_MASK;
3716
3717 /* Fetch the large page base. */
3718 PPGMPAGE pLargePage;
3719 if (GCPhysBase != GCPhysPage)
3720 {
3721 pLargePage = pgmPhysGetPage(pVM, GCPhysBase);
3722 AssertFatal(pLargePage);
3723 }
3724 else
3725 pLargePage = pPhysPage;
3726
3727 Log(("pgmPoolTrackUpdateGCPhys: update large page PDE for %RGp (%RGp)\n", GCPhysBase, GCPhysPage));
3728
3729 if (PGM_PAGE_GET_PDE_TYPE(pLargePage) == PGM_PAGE_PDE_TYPE_PDE)
3730 {
3731 /* Mark the large page as disabled as we need to break it up to change a single page in the 2 MB range. */
3732 PGM_PAGE_SET_PDE_TYPE(pVM, pLargePage, PGM_PAGE_PDE_TYPE_PDE_DISABLED);
3733 pVM->pgm.s.cLargePagesDisabled++;
3734
3735 /* Update the base as that *only* that one has a reference and there's only one PDE to clear. */
3736 rc = pgmPoolTrackUpdateGCPhys(pVM, GCPhysBase, pLargePage, fFlushPTEs, pfFlushTLBs);
3737
3738 *pfFlushTLBs = true;
3739 PGM_UNLOCK(pVM);
3740 return rc;
3741 }
3742 }
3743#else
3744 NOREF(GCPhysPage);
3745#endif /* PGM_WITH_LARGE_PAGES */
3746
3747 const uint16_t u16 = PGM_PAGE_GET_TRACKING(pPhysPage);
3748 if (u16)
3749 {
3750 /*
3751 * The zero page is currently screwing up the tracking and we'll
3752 * have to flush the whole shebang. Unless VBOX_WITH_NEW_LAZY_PAGE_ALLOC
3753 * is defined, zero pages won't normally be mapped. Some kind of solution
3754 * will be needed for this problem of course, but it will have to wait...
3755 */
3756# ifndef VBOX_WITH_NEW_LAZY_PAGE_ALLOC /* end up guruing after pgmR0PhysAllocateLargePage otherwise. */
3757 if ( PGM_PAGE_IS_ZERO(pPhysPage)
3758 || PGM_PAGE_IS_BALLOONED(pPhysPage))
3759# else
3760 if (PGM_PAGE_IS_BALLOONED(pPhysPage))
3761# endif
3762 rc = VINF_PGM_GCPHYS_ALIASED;
3763 else
3764 {
3765 if (PGMPOOL_TD_GET_CREFS(u16) != PGMPOOL_TD_CREFS_PHYSEXT)
3766 {
3767 Assert(PGMPOOL_TD_GET_CREFS(u16) == 1);
3768 pgmPoolTrackFlushGCPhysPT(pVM,
3769 pPhysPage,
3770 fFlushPTEs,
3771 PGMPOOL_TD_GET_IDX(u16));
3772 }
3773 else if (u16 != PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED))
3774 pgmPoolTrackFlushGCPhysPTs(pVM, pPhysPage, fFlushPTEs, PGMPOOL_TD_GET_IDX(u16));
3775 else
3776 rc = pgmPoolTrackFlushGCPhysPTsSlow(pVM, pPhysPage);
3777 *pfFlushTLBs = true;
3778 }
3779 }
3780
3781 if (rc == VINF_PGM_GCPHYS_ALIASED)
3782 {
3783 pVCpu->pgm.s.fSyncFlags |= PGM_SYNC_CLEAR_PGM_POOL;
3784 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
3785 rc = VINF_PGM_SYNC_CR3;
3786 }
3787 PGM_UNLOCK(pVM);
3788 return rc;
3789}
3790
3791
3792/**
3793 * Scans all shadow page tables for mappings of a physical page.
3794 *
3795 * This may be slow, but it's most likely more efficient than cleaning
3796 * out the entire page pool / cache.
3797 *
3798 * @returns VBox status code.
3799 * @retval VINF_SUCCESS if all references has been successfully cleared.
3800 * @retval VINF_PGM_GCPHYS_ALIASED if we're better off with a CR3 sync and
3801 * a page pool cleaning.
3802 *
3803 * @param pVM The cross context VM structure.
3804 * @param pPhysPage The guest page in question.
3805 */
3806int pgmPoolTrackFlushGCPhysPTsSlow(PVMCC pVM, PPGMPAGE pPhysPage)
3807{
3808 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3809 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPTsSlow, s);
3810 LogFlow(("pgmPoolTrackFlushGCPhysPTsSlow: cUsedPages=%d cPresent=%d pPhysPage=%R[pgmpage]\n",
3811 pPool->cUsedPages, pPool->cPresent, pPhysPage));
3812
3813 /*
3814 * There is a limit to what makes sense.
3815 */
3816 if ( pPool->cPresent > 1024
3817 && pVM->cCpus == 1)
3818 {
3819 LogFlow(("pgmPoolTrackFlushGCPhysPTsSlow: giving up... (cPresent=%d)\n", pPool->cPresent));
3820 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTsSlow, s);
3821 return VINF_PGM_GCPHYS_ALIASED;
3822 }
3823
3824 /*
3825 * Iterate all the pages until we've encountered all that in use.
3826 * This is simple but not quite optimal solution.
3827 */
3828 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage);
3829 unsigned cLeft = pPool->cUsedPages;
3830 unsigned iPage = pPool->cCurPages;
3831 while (--iPage >= PGMPOOL_IDX_FIRST)
3832 {
3833 PPGMPOOLPAGE pPage = &pPool->aPages[iPage];
3834 if ( pPage->GCPhys != NIL_RTGCPHYS
3835 && pPage->cPresent)
3836 {
3837 Assert(!PGMPOOL_PAGE_IS_NESTED(pPage)); /* see if it hits */
3838 switch (pPage->enmKind)
3839 {
3840 /*
3841 * We only care about shadow page tables.
3842 */
3843 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
3844 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
3845 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
3846 {
3847 const uint32_t u32 = (uint32_t)u64;
3848 unsigned cPresent = pPage->cPresent;
3849 PX86PT pPT = (PX86PT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3850 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
3851 {
3852 const X86PGUINT uPte = pPT->a[i].u;
3853 if (uPte & X86_PTE_P)
3854 {
3855 if ((uPte & X86_PTE_PG_MASK) == u32)
3856 {
3857 //Log4(("pgmPoolTrackFlushGCPhysPTsSlow: idx=%d i=%d pte=%RX32\n", iPage, i, pPT->a[i]));
3858 ASMAtomicWriteU32(&pPT->a[i].u, 0);
3859
3860 /* Update the counter as we're removing references. */
3861 Assert(pPage->cPresent);
3862 Assert(pPool->cPresent);
3863 pPage->cPresent--;
3864 pPool->cPresent--;
3865 }
3866 if (!--cPresent)
3867 break;
3868 }
3869 }
3870 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);
3871 break;
3872 }
3873
3874 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
3875 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
3876 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
3877 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
3878 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
3879 {
3880 unsigned cPresent = pPage->cPresent;
3881 PPGMSHWPTPAE pPT = (PPGMSHWPTPAE)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3882 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
3883 if (PGMSHWPTEPAE_IS_P(pPT->a[i]))
3884 {
3885 if ((PGMSHWPTEPAE_GET_U(pPT->a[i]) & X86_PTE_PAE_PG_MASK) == u64)
3886 {
3887 //Log4(("pgmPoolTrackFlushGCPhysPTsSlow: idx=%d i=%d pte=%RX64\n", iPage, i, pPT->a[i]));
3888 PGMSHWPTEPAE_ATOMIC_SET(pPT->a[i], 0); /// @todo why not atomic?
3889
3890 /* Update the counter as we're removing references. */
3891 Assert(pPage->cPresent);
3892 Assert(pPool->cPresent);
3893 pPage->cPresent--;
3894 pPool->cPresent--;
3895 }
3896 if (!--cPresent)
3897 break;
3898 }
3899 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);
3900 break;
3901 }
3902
3903 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
3904 {
3905 unsigned cPresent = pPage->cPresent;
3906 PEPTPT pPT = (PEPTPT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3907 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
3908 {
3909 X86PGPAEUINT const uPte = pPT->a[i].u;
3910 if (uPte & EPT_E_READ)
3911 {
3912 if ((uPte & EPT_PTE_PG_MASK) == u64)
3913 {
3914 //Log4(("pgmPoolTrackFlushGCPhysPTsSlow: idx=%d i=%d pte=%RX64\n", iPage, i, pPT->a[i]));
3915 ASMAtomicWriteU64(&pPT->a[i].u, 0);
3916
3917 /* Update the counter as we're removing references. */
3918 Assert(pPage->cPresent);
3919 Assert(pPool->cPresent);
3920 pPage->cPresent--;
3921 pPool->cPresent--;
3922 }
3923 if (!--cPresent)
3924 break;
3925 }
3926 }
3927 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);
3928 break;
3929 }
3930 }
3931
3932 if (!--cLeft)
3933 break;
3934 }
3935 }
3936
3937 PGM_PAGE_SET_TRACKING(pVM, pPhysPage, 0);
3938 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTsSlow, s);
3939
3940 /*
3941 * There is a limit to what makes sense. The above search is very expensive, so force a pgm pool flush.
3942 */
3943 if (pPool->cPresent > 1024)
3944 {
3945 LogFlow(("pgmPoolTrackFlushGCPhysPTsSlow: giving up... (cPresent=%d)\n", pPool->cPresent));
3946 return VINF_PGM_GCPHYS_ALIASED;
3947 }
3948
3949 return VINF_SUCCESS;
3950}
3951
3952
3953/**
3954 * Clears the user entry in a user table.
3955 *
3956 * This is used to remove all references to a page when flushing it.
3957 */
3958static void pgmPoolTrackClearPageUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PCPGMPOOLUSER pUser)
3959{
3960 Assert(pUser->iUser != NIL_PGMPOOL_IDX);
3961 Assert(pUser->iUser < pPool->cCurPages);
3962 uint32_t iUserTable = pUser->iUserTable;
3963
3964 /*
3965 * Map the user page. Ignore references made by fictitious pages.
3966 */
3967 PPGMPOOLPAGE pUserPage = &pPool->aPages[pUser->iUser];
3968 LogFlow(("pgmPoolTrackClearPageUser: clear %x in %s (%RGp) (flushing %s)\n", iUserTable, pgmPoolPoolKindToStr(pUserPage->enmKind), pUserPage->Core.Key, pgmPoolPoolKindToStr(pPage->enmKind)));
3969 union
3970 {
3971 uint64_t *pau64;
3972 uint32_t *pau32;
3973 } u;
3974 if (pUserPage->idx < PGMPOOL_IDX_FIRST)
3975 {
3976 Assert(!pUserPage->pvPageR3);
3977 return;
3978 }
3979 u.pau64 = (uint64_t *)PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pUserPage);
3980
3981
3982 /* Safety precaution in case we change the paging for other modes too in the future. */
3983 Assert(!pgmPoolIsPageLocked(pPage)); RT_NOREF_PV(pPage);
3984
3985#ifdef VBOX_STRICT
3986 /*
3987 * Some sanity checks.
3988 */
3989 switch (pUserPage->enmKind)
3990 {
3991 case PGMPOOLKIND_32BIT_PD:
3992 case PGMPOOLKIND_32BIT_PD_PHYS:
3993 Assert(iUserTable < X86_PG_ENTRIES);
3994 break;
3995 case PGMPOOLKIND_PAE_PDPT:
3996 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
3997 case PGMPOOLKIND_PAE_PDPT_PHYS:
3998 Assert(iUserTable < 4);
3999 Assert(!(u.pau64[iUserTable] & PGM_PLXFLAGS_PERMANENT));
4000 break;
4001 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
4002 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
4003 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
4004 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
4005 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
4006 case PGMPOOLKIND_PAE_PD_PHYS:
4007 Assert(iUserTable < X86_PG_PAE_ENTRIES);
4008 break;
4009 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
4010 Assert(iUserTable < X86_PG_PAE_ENTRIES);
4011 break;
4012 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
4013 Assert(iUserTable < X86_PG_PAE_ENTRIES);
4014 Assert(!(u.pau64[iUserTable] & PGM_PLXFLAGS_PERMANENT));
4015 break;
4016 case PGMPOOLKIND_64BIT_PML4:
4017 Assert(!(u.pau64[iUserTable] & PGM_PLXFLAGS_PERMANENT));
4018 /* GCPhys >> PAGE_SHIFT is the index here */
4019 break;
4020 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
4021 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
4022 Assert(iUserTable < X86_PG_PAE_ENTRIES);
4023 break;
4024
4025 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
4026 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
4027 Assert(iUserTable < X86_PG_PAE_ENTRIES);
4028 break;
4029
4030 case PGMPOOLKIND_ROOT_NESTED:
4031 Assert(iUserTable < X86_PG_PAE_ENTRIES);
4032 break;
4033
4034# ifdef VBOX_WITH_NESTED_HWVIRT_VMX_EPT
4035 case PGMPOOLKIND_EPT_PT_FOR_EPT_PT:
4036 case PGMPOOLKIND_EPT_PT_FOR_EPT_2MB:
4037 case PGMPOOLKIND_EPT_PD_FOR_EPT_PD:
4038 case PGMPOOLKIND_EPT_PDPT_FOR_EPT_PDPT:
4039 case PGMPOOLKIND_EPT_PML4_FOR_EPT_PML4:
4040 Assert(iUserTable < EPT_PG_ENTRIES);
4041 break;
4042# endif
4043
4044 default:
4045 AssertMsgFailed(("enmKind=%d GCPhys=%RGp\n", pUserPage->enmKind, pPage->GCPhys));
4046 break;
4047 }
4048#endif /* VBOX_STRICT */
4049
4050 /*
4051 * Clear the entry in the user page.
4052 */
4053 switch (pUserPage->enmKind)
4054 {
4055 /* 32-bit entries */
4056 case PGMPOOLKIND_32BIT_PD:
4057 case PGMPOOLKIND_32BIT_PD_PHYS:
4058 ASMAtomicWriteU32(&u.pau32[iUserTable], 0);
4059 break;
4060
4061 /* 64-bit entries */
4062 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
4063 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
4064 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
4065 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
4066 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
4067 case PGMPOOLKIND_PAE_PD_PHYS:
4068 case PGMPOOLKIND_PAE_PDPT_PHYS:
4069 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
4070 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
4071 case PGMPOOLKIND_64BIT_PML4:
4072 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
4073 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
4074 case PGMPOOLKIND_PAE_PDPT:
4075 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
4076 case PGMPOOLKIND_ROOT_NESTED:
4077 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
4078 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
4079# ifdef VBOX_WITH_NESTED_HWVIRT_VMX_EPT
4080 case PGMPOOLKIND_EPT_PT_FOR_EPT_PT:
4081 case PGMPOOLKIND_EPT_PT_FOR_EPT_2MB:
4082 case PGMPOOLKIND_EPT_PD_FOR_EPT_PD:
4083 case PGMPOOLKIND_EPT_PDPT_FOR_EPT_PDPT:
4084 case PGMPOOLKIND_EPT_PML4_FOR_EPT_PML4:
4085#endif
4086 ASMAtomicWriteU64(&u.pau64[iUserTable], 0);
4087 break;
4088
4089 default:
4090 AssertFatalMsgFailed(("enmKind=%d iUser=%d iUserTable=%#x\n", pUserPage->enmKind, pUser->iUser, pUser->iUserTable));
4091 }
4092 PGM_DYNMAP_UNUSED_HINT_VM(pPool->CTX_SUFF(pVM), u.pau64);
4093}
4094
4095
4096/**
4097 * Clears all users of a page.
4098 */
4099static void pgmPoolTrackClearPageUsers(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
4100{
4101 /*
4102 * Free all the user records.
4103 */
4104 LogFlow(("pgmPoolTrackClearPageUsers %RGp\n", pPage->GCPhys));
4105
4106 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
4107 uint16_t i = pPage->iUserHead;
4108 while (i != NIL_PGMPOOL_USER_INDEX)
4109 {
4110 /* Clear enter in user table. */
4111 pgmPoolTrackClearPageUser(pPool, pPage, &paUsers[i]);
4112
4113 /* Free it. */
4114 const uint16_t iNext = paUsers[i].iNext;
4115 paUsers[i].iUser = NIL_PGMPOOL_IDX;
4116 paUsers[i].iNext = pPool->iUserFreeHead;
4117 pPool->iUserFreeHead = i;
4118
4119 /* Next. */
4120 i = iNext;
4121 }
4122 pPage->iUserHead = NIL_PGMPOOL_USER_INDEX;
4123}
4124
4125
4126/**
4127 * Allocates a new physical cross reference extent.
4128 *
4129 * @returns Pointer to the allocated extent on success. NULL if we're out of them.
4130 * @param pVM The cross context VM structure.
4131 * @param piPhysExt Where to store the phys ext index.
4132 */
4133PPGMPOOLPHYSEXT pgmPoolTrackPhysExtAlloc(PVMCC pVM, uint16_t *piPhysExt)
4134{
4135 PGM_LOCK_ASSERT_OWNER(pVM);
4136 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
4137 uint16_t iPhysExt = pPool->iPhysExtFreeHead;
4138 if (iPhysExt == NIL_PGMPOOL_PHYSEXT_INDEX)
4139 {
4140 STAM_COUNTER_INC(&pPool->StamTrackPhysExtAllocFailures);
4141 return NULL;
4142 }
4143 PPGMPOOLPHYSEXT pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
4144 pPool->iPhysExtFreeHead = pPhysExt->iNext;
4145 pPhysExt->iNext = NIL_PGMPOOL_PHYSEXT_INDEX;
4146 *piPhysExt = iPhysExt;
4147 return pPhysExt;
4148}
4149
4150
4151/**
4152 * Frees a physical cross reference extent.
4153 *
4154 * @param pVM The cross context VM structure.
4155 * @param iPhysExt The extent to free.
4156 */
4157void pgmPoolTrackPhysExtFree(PVMCC pVM, uint16_t iPhysExt)
4158{
4159 PGM_LOCK_ASSERT_OWNER(pVM);
4160 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
4161 Assert(iPhysExt < pPool->cMaxPhysExts);
4162 PPGMPOOLPHYSEXT pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
4163 for (unsigned i = 0; i < RT_ELEMENTS(pPhysExt->aidx); i++)
4164 {
4165 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
4166 pPhysExt->apte[i] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
4167 }
4168 pPhysExt->iNext = pPool->iPhysExtFreeHead;
4169 pPool->iPhysExtFreeHead = iPhysExt;
4170}
4171
4172
4173/**
4174 * Frees a physical cross reference extent.
4175 *
4176 * @param pVM The cross context VM structure.
4177 * @param iPhysExt The extent to free.
4178 */
4179void pgmPoolTrackPhysExtFreeList(PVMCC pVM, uint16_t iPhysExt)
4180{
4181 PGM_LOCK_ASSERT_OWNER(pVM);
4182 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
4183
4184 const uint16_t iPhysExtStart = iPhysExt;
4185 PPGMPOOLPHYSEXT pPhysExt;
4186 do
4187 {
4188 Assert(iPhysExt < pPool->cMaxPhysExts);
4189 pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
4190 for (unsigned i = 0; i < RT_ELEMENTS(pPhysExt->aidx); i++)
4191 {
4192 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
4193 pPhysExt->apte[i] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
4194 }
4195
4196 /* next */
4197 iPhysExt = pPhysExt->iNext;
4198 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
4199
4200 pPhysExt->iNext = pPool->iPhysExtFreeHead;
4201 pPool->iPhysExtFreeHead = iPhysExtStart;
4202}
4203
4204
4205/**
4206 * Insert a reference into a list of physical cross reference extents.
4207 *
4208 * @returns The new tracking data for PGMPAGE.
4209 *
4210 * @param pVM The cross context VM structure.
4211 * @param iPhysExt The physical extent index of the list head.
4212 * @param iShwPT The shadow page table index.
4213 * @param iPte Page table entry
4214 *
4215 */
4216static uint16_t pgmPoolTrackPhysExtInsert(PVMCC pVM, uint16_t iPhysExt, uint16_t iShwPT, uint16_t iPte)
4217{
4218 PGM_LOCK_ASSERT_OWNER(pVM);
4219 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
4220 PPGMPOOLPHYSEXT paPhysExts = pPool->CTX_SUFF(paPhysExts);
4221
4222 /*
4223 * Special common cases.
4224 */
4225 if (paPhysExts[iPhysExt].aidx[1] == NIL_PGMPOOL_IDX)
4226 {
4227 paPhysExts[iPhysExt].aidx[1] = iShwPT;
4228 paPhysExts[iPhysExt].apte[1] = iPte;
4229 STAM_COUNTER_INC(&pVM->pgm.s.Stats.StatTrackAliasedMany);
4230 LogFlow(("pgmPoolTrackPhysExtInsert: %d:{,%d pte %d,}\n", iPhysExt, iShwPT, iPte));
4231 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExt);
4232 }
4233 if (paPhysExts[iPhysExt].aidx[2] == NIL_PGMPOOL_IDX)
4234 {
4235 paPhysExts[iPhysExt].aidx[2] = iShwPT;
4236 paPhysExts[iPhysExt].apte[2] = iPte;
4237 STAM_COUNTER_INC(&pVM->pgm.s.Stats.StatTrackAliasedMany);
4238 LogFlow(("pgmPoolTrackPhysExtInsert: %d:{,,%d pte %d}\n", iPhysExt, iShwPT, iPte));
4239 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExt);
4240 }
4241 AssertCompile(RT_ELEMENTS(paPhysExts[iPhysExt].aidx) == 3);
4242
4243 /*
4244 * General treatment.
4245 */
4246 const uint16_t iPhysExtStart = iPhysExt;
4247 unsigned cMax = 15;
4248 for (;;)
4249 {
4250 Assert(iPhysExt < pPool->cMaxPhysExts);
4251 for (unsigned i = 0; i < RT_ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
4252 if (paPhysExts[iPhysExt].aidx[i] == NIL_PGMPOOL_IDX)
4253 {
4254 paPhysExts[iPhysExt].aidx[i] = iShwPT;
4255 paPhysExts[iPhysExt].apte[i] = iPte;
4256 STAM_COUNTER_INC(&pVM->pgm.s.Stats.StatTrackAliasedMany);
4257 LogFlow(("pgmPoolTrackPhysExtInsert: %d:{%d pte %d} i=%d cMax=%d\n", iPhysExt, iShwPT, iPte, i, cMax));
4258 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExtStart);
4259 }
4260 if (!--cMax)
4261 {
4262 STAM_COUNTER_INC(&pVM->pgm.s.Stats.StatTrackOverflows);
4263 pgmPoolTrackPhysExtFreeList(pVM, iPhysExtStart);
4264 LogFlow(("pgmPoolTrackPhysExtInsert: overflow (1) iShwPT=%d\n", iShwPT));
4265 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED);
4266 }
4267
4268 /* advance */
4269 iPhysExt = paPhysExts[iPhysExt].iNext;
4270 if (iPhysExt == NIL_PGMPOOL_PHYSEXT_INDEX)
4271 break;
4272 }
4273
4274 /*
4275 * Add another extent to the list.
4276 */
4277 PPGMPOOLPHYSEXT pNew = pgmPoolTrackPhysExtAlloc(pVM, &iPhysExt);
4278 if (!pNew)
4279 {
4280 STAM_COUNTER_INC(&pVM->pgm.s.Stats.StatTrackNoExtentsLeft);
4281 pgmPoolTrackPhysExtFreeList(pVM, iPhysExtStart);
4282 LogFlow(("pgmPoolTrackPhysExtInsert: pgmPoolTrackPhysExtAlloc failed iShwPT=%d\n", iShwPT));
4283 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED);
4284 }
4285 pNew->iNext = iPhysExtStart;
4286 pNew->aidx[0] = iShwPT;
4287 pNew->apte[0] = iPte;
4288 LogFlow(("pgmPoolTrackPhysExtInsert: added new extent %d:{%d pte %d}->%d\n", iPhysExt, iShwPT, iPte, iPhysExtStart));
4289 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExt);
4290}
4291
4292
4293/**
4294 * Add a reference to guest physical page where extents are in use.
4295 *
4296 * @returns The new tracking data for PGMPAGE.
4297 *
4298 * @param pVM The cross context VM structure.
4299 * @param pPhysPage Pointer to the aPages entry in the ram range.
4300 * @param u16 The ram range flags (top 16-bits).
4301 * @param iShwPT The shadow page table index.
4302 * @param iPte Page table entry
4303 */
4304uint16_t pgmPoolTrackPhysExtAddref(PVMCC pVM, PPGMPAGE pPhysPage, uint16_t u16, uint16_t iShwPT, uint16_t iPte)
4305{
4306 PGM_LOCK_VOID(pVM);
4307 if (PGMPOOL_TD_GET_CREFS(u16) != PGMPOOL_TD_CREFS_PHYSEXT)
4308 {
4309 /*
4310 * Convert to extent list.
4311 */
4312 Assert(PGMPOOL_TD_GET_CREFS(u16) == 1);
4313 uint16_t iPhysExt;
4314 PPGMPOOLPHYSEXT pPhysExt = pgmPoolTrackPhysExtAlloc(pVM, &iPhysExt);
4315 if (pPhysExt)
4316 {
4317 LogFlow(("pgmPoolTrackPhysExtAddref: new extent: %d:{%d, %d}\n", iPhysExt, PGMPOOL_TD_GET_IDX(u16), iShwPT));
4318 STAM_COUNTER_INC(&pVM->pgm.s.Stats.StatTrackAliased);
4319 pPhysExt->aidx[0] = PGMPOOL_TD_GET_IDX(u16);
4320 pPhysExt->apte[0] = PGM_PAGE_GET_PTE_INDEX(pPhysPage);
4321 pPhysExt->aidx[1] = iShwPT;
4322 pPhysExt->apte[1] = iPte;
4323 u16 = PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExt);
4324 }
4325 else
4326 u16 = PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED);
4327 }
4328 else if (u16 != PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED))
4329 {
4330 /*
4331 * Insert into the extent list.
4332 */
4333 u16 = pgmPoolTrackPhysExtInsert(pVM, PGMPOOL_TD_GET_IDX(u16), iShwPT, iPte);
4334 }
4335 else
4336 STAM_COUNTER_INC(&pVM->pgm.s.Stats.StatTrackAliasedLots);
4337 PGM_UNLOCK(pVM);
4338 return u16;
4339}
4340
4341
4342/**
4343 * Clear references to guest physical memory.
4344 *
4345 * @param pPool The pool.
4346 * @param pPage The page.
4347 * @param pPhysPage Pointer to the aPages entry in the ram range.
4348 * @param iPte Shadow PTE index
4349 */
4350void pgmPoolTrackPhysExtDerefGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMPAGE pPhysPage, uint16_t iPte)
4351{
4352 PVMCC pVM = pPool->CTX_SUFF(pVM);
4353 const unsigned cRefs = PGM_PAGE_GET_TD_CREFS(pPhysPage);
4354 AssertFatalMsg(cRefs == PGMPOOL_TD_CREFS_PHYSEXT, ("cRefs=%d pPhysPage=%R[pgmpage] pPage=%p:{.idx=%d}\n", cRefs, pPhysPage, pPage, pPage->idx));
4355
4356 uint16_t iPhysExt = PGM_PAGE_GET_TD_IDX(pPhysPage);
4357 if (iPhysExt != PGMPOOL_TD_IDX_OVERFLOWED)
4358 {
4359 PGM_LOCK_VOID(pVM);
4360
4361 uint16_t iPhysExtPrev = NIL_PGMPOOL_PHYSEXT_INDEX;
4362 PPGMPOOLPHYSEXT paPhysExts = pPool->CTX_SUFF(paPhysExts);
4363 do
4364 {
4365 Assert(iPhysExt < pPool->cMaxPhysExts);
4366
4367 /*
4368 * Look for the shadow page and check if it's all freed.
4369 */
4370 for (unsigned i = 0; i < RT_ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
4371 {
4372 if ( paPhysExts[iPhysExt].aidx[i] == pPage->idx
4373 && paPhysExts[iPhysExt].apte[i] == iPte)
4374 {
4375 paPhysExts[iPhysExt].aidx[i] = NIL_PGMPOOL_IDX;
4376 paPhysExts[iPhysExt].apte[i] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
4377
4378 for (i = 0; i < RT_ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
4379 if (paPhysExts[iPhysExt].aidx[i] != NIL_PGMPOOL_IDX)
4380 {
4381 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage] idx=%d\n", pPhysPage, pPage->idx));
4382 PGM_UNLOCK(pVM);
4383 return;
4384 }
4385
4386 /* we can free the node. */
4387 const uint16_t iPhysExtNext = paPhysExts[iPhysExt].iNext;
4388 if ( iPhysExtPrev == NIL_PGMPOOL_PHYSEXT_INDEX
4389 && iPhysExtNext == NIL_PGMPOOL_PHYSEXT_INDEX)
4390 {
4391 /* lonely node */
4392 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
4393 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage] idx=%d lonely\n", pPhysPage, pPage->idx));
4394 PGM_PAGE_SET_TRACKING(pVM, pPhysPage, 0);
4395 }
4396 else if (iPhysExtPrev == NIL_PGMPOOL_PHYSEXT_INDEX)
4397 {
4398 /* head */
4399 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage] idx=%d head\n", pPhysPage, pPage->idx));
4400 PGM_PAGE_SET_TRACKING(pVM, pPhysPage, PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExtNext));
4401 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
4402 }
4403 else
4404 {
4405 /* in list */
4406 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage] idx=%d in list\n", pPhysPage, pPage->idx));
4407 paPhysExts[iPhysExtPrev].iNext = iPhysExtNext;
4408 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
4409 }
4410 iPhysExt = iPhysExtNext;
4411 PGM_UNLOCK(pVM);
4412 return;
4413 }
4414 }
4415
4416 /* next */
4417 iPhysExtPrev = iPhysExt;
4418 iPhysExt = paPhysExts[iPhysExt].iNext;
4419 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
4420
4421 PGM_UNLOCK(pVM);
4422 AssertFatalMsgFailed(("not-found! cRefs=%d pPhysPage=%R[pgmpage] pPage=%p:{.idx=%d}\n", cRefs, pPhysPage, pPage, pPage->idx));
4423 }
4424 else /* nothing to do */
4425 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage]\n", pPhysPage));
4426}
4427
4428/**
4429 * Clear references to guest physical memory.
4430 *
4431 * This is the same as pgmPoolTracDerefGCPhysHint except that the guest
4432 * physical address is assumed to be correct, so the linear search can be
4433 * skipped and we can assert at an earlier point.
4434 *
4435 * @param pPool The pool.
4436 * @param pPage The page.
4437 * @param HCPhys The host physical address corresponding to the guest page.
4438 * @param GCPhys The guest physical address corresponding to HCPhys.
4439 * @param iPte Shadow PTE index
4440 */
4441static void pgmPoolTracDerefGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTHCPHYS HCPhys, RTGCPHYS GCPhys, uint16_t iPte)
4442{
4443 /*
4444 * Lookup the page and check if it checks out before derefing it.
4445 */
4446 PVMCC pVM = pPool->CTX_SUFF(pVM);
4447 PPGMPAGE pPhysPage = pgmPhysGetPage(pVM, GCPhys);
4448 if (pPhysPage)
4449 {
4450 Assert(PGM_PAGE_GET_HCPHYS(pPhysPage));
4451#ifdef LOG_ENABLED
4452 RTHCPHYS HCPhysPage = PGM_PAGE_GET_HCPHYS(pPhysPage);
4453 Log2(("pgmPoolTracDerefGCPhys %RHp vs %RHp\n", HCPhysPage, HCPhys));
4454#endif
4455 if (PGM_PAGE_GET_HCPHYS(pPhysPage) == HCPhys)
4456 {
4457 Assert(pPage->cPresent);
4458 Assert(pPool->cPresent);
4459 pPage->cPresent--;
4460 pPool->cPresent--;
4461 pgmTrackDerefGCPhys(pPool, pPage, pPhysPage, iPte);
4462 return;
4463 }
4464
4465 AssertFatalMsgFailed(("HCPhys=%RHp GCPhys=%RGp; found page has HCPhys=%RHp iPte=%u fIsNested=%RTbool\n",
4466 HCPhys, GCPhys, PGM_PAGE_GET_HCPHYS(pPhysPage), iPte, PGMPOOL_PAGE_IS_NESTED(pPage)));
4467 }
4468 AssertFatalMsgFailed(("HCPhys=%RHp GCPhys=%RGp\n", HCPhys, GCPhys));
4469}
4470
4471
4472/**
4473 * Clear references to guest physical memory.
4474 *
4475 * @param pPool The pool.
4476 * @param pPage The page.
4477 * @param HCPhys The host physical address corresponding to the guest page.
4478 * @param GCPhysHint The guest physical address which may corresponding to HCPhys.
4479 * @param iPte Shadow pte index
4480 */
4481void pgmPoolTracDerefGCPhysHint(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTHCPHYS HCPhys, RTGCPHYS GCPhysHint, uint16_t iPte)
4482{
4483 Log4(("pgmPoolTracDerefGCPhysHint %RHp %RGp\n", HCPhys, GCPhysHint));
4484
4485 /*
4486 * Try the hint first.
4487 */
4488 RTHCPHYS HCPhysHinted;
4489 PVMCC pVM = pPool->CTX_SUFF(pVM);
4490 PPGMPAGE pPhysPage = pgmPhysGetPage(pVM, GCPhysHint);
4491 if (pPhysPage)
4492 {
4493 HCPhysHinted = PGM_PAGE_GET_HCPHYS(pPhysPage);
4494 Assert(HCPhysHinted);
4495 if (HCPhysHinted == HCPhys)
4496 {
4497 Assert(pPage->cPresent);
4498 Assert(pPool->cPresent);
4499 pPage->cPresent--;
4500 pPool->cPresent--;
4501 pgmTrackDerefGCPhys(pPool, pPage, pPhysPage, iPte);
4502 return;
4503 }
4504 }
4505 else
4506 HCPhysHinted = UINT64_C(0xdeadbeefdeadbeef);
4507
4508 /*
4509 * Damn, the hint didn't work. We'll have to do an expensive linear search.
4510 */
4511 STAM_COUNTER_INC(&pPool->StatTrackLinearRamSearches);
4512 uint32_t const idRamRangeMax = RT_MIN(pVM->pgm.s.idRamRangeMax, RT_ELEMENTS(pVM->pgm.s.apRamRanges) - 1U);
4513 Assert(pVM->pgm.s.apRamRanges[0] == NULL);
4514 for (uint32_t idx = 1; idx <= idRamRangeMax; idx++)
4515 {
4516 PPGMRAMRANGE const pRam = pVM->CTX_EXPR(pgm, pgmr0, pgm).s.apRamRanges[idx];
4517 AssertContinue(pRam);
4518 unsigned iPage = pRam->cb >> PAGE_SHIFT;
4519 while (iPage-- > 0)
4520 if (PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]) == HCPhys)
4521 {
4522 Log4(("pgmPoolTracDerefGCPhysHint: Linear HCPhys=%RHp GCPhysHint=%RGp GCPhysReal=%RGp\n",
4523 HCPhys, GCPhysHint, pRam->GCPhys + (iPage << PAGE_SHIFT)));
4524 Assert(pPage->cPresent);
4525 Assert(pPool->cPresent);
4526 pPage->cPresent--;
4527 pPool->cPresent--;
4528 pgmTrackDerefGCPhys(pPool, pPage, &pRam->aPages[iPage], iPte);
4529 return;
4530 }
4531 }
4532
4533 AssertFatalMsgFailed(("HCPhys=%RHp GCPhysHint=%RGp (Hinted page has HCPhys = %RHp)\n", HCPhys, GCPhysHint, HCPhysHinted));
4534}
4535
4536
4537/**
4538 * Clear references to guest physical memory in a 32-bit / 32-bit page table.
4539 *
4540 * @param pPool The pool.
4541 * @param pPage The page.
4542 * @param pShwPT The shadow page table (mapping of the page).
4543 * @param pGstPT The guest page table.
4544 */
4545DECLINLINE(void) pgmPoolTrackDerefPT32Bit32Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PT pShwPT, PCX86PT pGstPT)
4546{
4547 RTGCPHYS32 const fPgMask = pPage->fA20Enabled ? X86_PTE_PG_MASK : X86_PTE_PG_MASK & ~RT_BIT_32(20);
4548 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
4549 {
4550 const X86PGUINT uPte = pShwPT->a[i].u;
4551 Assert(!(uPte & RT_BIT_32(10)));
4552 if (uPte & X86_PTE_P)
4553 {
4554 Log4(("pgmPoolTrackDerefPT32Bit32Bit: i=%d pte=%RX32 hint=%RX32\n",
4555 i, uPte & X86_PTE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK));
4556 pgmPoolTracDerefGCPhysHint(pPool, pPage, uPte & X86_PTE_PG_MASK, pGstPT->a[i].u & fPgMask, i);
4557 if (!pPage->cPresent)
4558 break;
4559 }
4560 }
4561}
4562
4563
4564/**
4565 * Clear references to guest physical memory in a PAE / 32-bit page table.
4566 *
4567 * @param pPool The pool.
4568 * @param pPage The page.
4569 * @param pShwPT The shadow page table (mapping of the page).
4570 * @param pGstPT The guest page table (just a half one).
4571 */
4572DECLINLINE(void) pgmPoolTrackDerefPTPae32Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT, PCX86PT pGstPT)
4573{
4574 RTGCPHYS32 const fPgMask = pPage->fA20Enabled ? X86_PTE_PG_MASK : X86_PTE_PG_MASK & ~RT_BIT_32(20);
4575 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
4576 {
4577 Assert( (PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & UINT64_C(0x7ff0000000000400)) == 0
4578 || (PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & UINT64_C(0x7ff0000000000400)) == UINT64_C(0x7ff0000000000000));
4579 if (PGMSHWPTEPAE_IS_P(pShwPT->a[i]))
4580 {
4581 Log4(("pgmPoolTrackDerefPTPae32Bit: i=%d pte=%RX64 hint=%RX32\n",
4582 i, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pGstPT->a[i].u & X86_PTE_PG_MASK));
4583 pgmPoolTracDerefGCPhysHint(pPool, pPage, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pGstPT->a[i].u & fPgMask, i);
4584 if (!pPage->cPresent)
4585 break;
4586 }
4587 }
4588}
4589
4590
4591/**
4592 * Clear references to guest physical memory in a PAE / PAE page table.
4593 *
4594 * @param pPool The pool.
4595 * @param pPage The page.
4596 * @param pShwPT The shadow page table (mapping of the page).
4597 * @param pGstPT The guest page table.
4598 */
4599DECLINLINE(void) pgmPoolTrackDerefPTPaePae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT, PCX86PTPAE pGstPT)
4600{
4601 RTGCPHYS const fPgMask = pPage->fA20Enabled ? X86_PTE_PAE_PG_MASK : X86_PTE_PAE_PG_MASK & ~RT_BIT_64(20);
4602 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
4603 {
4604 Assert( (PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & UINT64_C(0x7ff0000000000400)) == 0
4605 || (PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & UINT64_C(0x7ff0000000000400)) == UINT64_C(0x7ff0000000000000));
4606 if (PGMSHWPTEPAE_IS_P(pShwPT->a[i]))
4607 {
4608 Log4(("pgmPoolTrackDerefPTPaePae: i=%d pte=%RX32 hint=%RX32\n",
4609 i, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pGstPT->a[i].u & X86_PTE_PAE_PG_MASK));
4610 pgmPoolTracDerefGCPhysHint(pPool, pPage, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pGstPT->a[i].u & fPgMask, i);
4611 if (!pPage->cPresent)
4612 break;
4613 }
4614 }
4615}
4616
4617
4618/**
4619 * Clear references to guest physical memory in a 32-bit / 4MB page table.
4620 *
4621 * @param pPool The pool.
4622 * @param pPage The page.
4623 * @param pShwPT The shadow page table (mapping of the page).
4624 */
4625DECLINLINE(void) pgmPoolTrackDerefPT32Bit4MB(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PT pShwPT)
4626{
4627 RTGCPHYS const GCPhysA20Mask = pPage->fA20Enabled ? UINT64_MAX : ~RT_BIT_64(20);
4628 RTGCPHYS GCPhys = pPage->GCPhys + PAGE_SIZE * pPage->iFirstPresent;
4629 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
4630 {
4631 const X86PGUINT uPte = pShwPT->a[i].u;
4632 Assert(!(uPte & RT_BIT_32(10)));
4633 if (uPte & X86_PTE_P)
4634 {
4635 Log4(("pgmPoolTrackDerefPT32Bit4MB: i=%d pte=%RX32 GCPhys=%RGp\n",
4636 i, uPte & X86_PTE_PG_MASK, GCPhys));
4637 pgmPoolTracDerefGCPhys(pPool, pPage, uPte & X86_PTE_PG_MASK, GCPhys & GCPhysA20Mask, i);
4638 if (!pPage->cPresent)
4639 break;
4640 }
4641 }
4642}
4643
4644
4645/**
4646 * Clear references to guest physical memory in a PAE / 2/4MB page table.
4647 *
4648 * @param pPool The pool.
4649 * @param pPage The page.
4650 * @param pShwPT The shadow page table (mapping of the page).
4651 */
4652DECLINLINE(void) pgmPoolTrackDerefPTPaeBig(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT)
4653{
4654 RTGCPHYS const GCPhysA20Mask = pPage->fA20Enabled ? UINT64_MAX : ~RT_BIT_64(20);
4655 RTGCPHYS GCPhys = pPage->GCPhys + PAGE_SIZE * pPage->iFirstPresent;
4656 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
4657 {
4658 Assert( (PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & UINT64_C(0x7ff0000000000400)) == 0
4659 || (PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & UINT64_C(0x7ff0000000000400)) == UINT64_C(0x7ff0000000000000));
4660 if (PGMSHWPTEPAE_IS_P(pShwPT->a[i]))
4661 {
4662 Log4(("pgmPoolTrackDerefPTPaeBig: i=%d pte=%RX64 hint=%RGp\n",
4663 i, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), GCPhys));
4664 pgmPoolTracDerefGCPhys(pPool, pPage, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), GCPhys & GCPhysA20Mask, i);
4665 if (!pPage->cPresent)
4666 break;
4667 }
4668 }
4669}
4670
4671
4672/**
4673 * Clear references to shadowed pages in an EPT page table.
4674 *
4675 * @param pPool The pool.
4676 * @param pPage The page.
4677 * @param pShwPT The shadow page directory pointer table (mapping of the
4678 * page).
4679 */
4680DECLINLINE(void) pgmPoolTrackDerefPTEPT(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PEPTPT pShwPT)
4681{
4682 RTGCPHYS const GCPhysA20Mask = pPage->fA20Enabled ? UINT64_MAX : ~RT_BIT_64(20);
4683 RTGCPHYS GCPhys = pPage->GCPhys + PAGE_SIZE * pPage->iFirstPresent;
4684 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
4685 {
4686 X86PGPAEUINT const uPte = pShwPT->a[i].u;
4687 Assert((uPte & UINT64_C(0xfff0000000000f80)) == 0);
4688 if (uPte & EPT_E_READ)
4689 {
4690 Log4(("pgmPoolTrackDerefPTEPT: i=%d pte=%RX64 GCPhys=%RX64\n",
4691 i, uPte & EPT_PTE_PG_MASK, pPage->GCPhys));
4692 pgmPoolTracDerefGCPhys(pPool, pPage, uPte & EPT_PTE_PG_MASK, GCPhys & GCPhysA20Mask, i);
4693 if (!pPage->cPresent)
4694 break;
4695 }
4696 }
4697}
4698
4699#ifdef VBOX_WITH_NESTED_HWVIRT_VMX_EPT
4700
4701/**
4702 * Clears references to shadowed pages in a SLAT EPT page table.
4703 *
4704 * @param pPool The pool.
4705 * @param pPage The page.
4706 * @param pShwPT The shadow page table (mapping of the page).
4707 * @param pGstPT The guest page table.
4708 */
4709DECLINLINE(void) pgmPoolTrackDerefNestedPTEPT(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PEPTPT pShwPT, PCEPTPT pGstPT)
4710{
4711 Assert(PGMPOOL_PAGE_IS_NESTED(pPage));
4712 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
4713 {
4714 X86PGPAEUINT const uShwPte = pShwPT->a[i].u;
4715 Assert((uShwPte & UINT64_C(0xfff0000000000f80)) == 0); /* Access, Dirty, UserX (not supported) and ignored bits 7, 11. */
4716 if (uShwPte & EPT_PRESENT_MASK)
4717 {
4718 Log7Func(("Shw=%RX64 GstPte=%RX64\n", uShwPte, pGstPT->a[i].u));
4719 pgmPoolTracDerefGCPhys(pPool, pPage, uShwPte & EPT_PTE_PG_MASK, pGstPT->a[i].u & EPT_PTE_PG_MASK, i);
4720 if (!pPage->cPresent)
4721 break;
4722 }
4723 }
4724}
4725
4726
4727/**
4728 * Clear references to guest physical memory in a SLAT 2MB EPT page table.
4729 *
4730 * @param pPool The pool.
4731 * @param pPage The page.
4732 * @param pShwPT The shadow page table (mapping of the page).
4733 */
4734DECLINLINE(void) pgmPoolTrackDerefNestedPTEPT2MB(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PEPTPT pShwPT)
4735{
4736 Assert(pPage->fA20Enabled);
4737 RTGCPHYS GCPhys = pPage->GCPhys + PAGE_SIZE * pPage->iFirstPresent;
4738 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
4739 {
4740 X86PGPAEUINT const uShwPte = pShwPT->a[i].u;
4741 Assert((uShwPte & UINT64_C(0xfff0000000000f80)) == 0); /* Access, Dirty, UserX (not supported) and ignored bits 7, 11. */
4742 if (uShwPte & EPT_PRESENT_MASK)
4743 {
4744 Log7Func(("Shw=%RX64 GstPte=%RX64\n", uShwPte, GCPhys));
4745 pgmPoolTracDerefGCPhys(pPool, pPage, uShwPte & EPT_PTE_PG_MASK, GCPhys, i);
4746 if (!pPage->cPresent)
4747 break;
4748 }
4749 }
4750}
4751
4752
4753/**
4754 * Clear references to shadowed pages in a SLAT EPT page directory.
4755 *
4756 * @param pPool The pool.
4757 * @param pPage The page.
4758 * @param pShwPD The shadow page directory (mapping of the page).
4759 * @param pGstPD The guest page directory.
4760 */
4761DECLINLINE(void) pgmPoolTrackDerefNestedPDEpt(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PEPTPD pShwPD, PCEPTPD pGstPD)
4762{
4763 for (unsigned i = 0; i < RT_ELEMENTS(pShwPD->a); i++)
4764 {
4765 X86PGPAEUINT const uPde = pShwPD->a[i].u;
4766#ifdef PGM_WITH_LARGE_PAGES
4767 AssertMsg((uPde & UINT64_C(0xfff0000000000f00)) == 0, ("uPde=%RX64\n", uPde));
4768#else
4769 AssertMsg((uPde & UINT64_C(0xfff0000000000f80)) == 0, ("uPde=%RX64\n", uPde));
4770#endif
4771 if (uPde & EPT_PRESENT_MASK)
4772 {
4773#ifdef PGM_WITH_LARGE_PAGES
4774 if (uPde & EPT_E_LEAF)
4775 {
4776 Log4(("pgmPoolTrackDerefPDEPT: i=%d pde=%RX64 GCPhys=%RX64\n", i, uPde & EPT_PDE2M_PG_MASK, pPage->GCPhys));
4777 pgmPoolTracDerefGCPhys(pPool, pPage, uPde & EPT_PDE2M_PG_MASK, pGstPD->a[i].u & EPT_PDE2M_PG_MASK, i);
4778 }
4779 else
4780#endif
4781 {
4782 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, uPde & EPT_PDE_PG_MASK);
4783 if (pSubPage)
4784 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4785 else
4786 AssertFatalMsgFailed(("%RX64\n", pShwPD->a[i].u & EPT_PDE_PG_MASK));
4787 }
4788 }
4789 }
4790}
4791
4792
4793/**
4794 * Clear references to shadowed pages in a SLAT EPT PML4 table.
4795 *
4796 * @param pPool The pool.
4797 * @param pPage The page.
4798 * @param pShwPml4 The shadow PML4 table.
4799 */
4800DECLINLINE(void) pgmPoolTrackDerefNestedPML4(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PEPTPML4 pShwPml4)
4801{
4802 Assert(PGMPOOL_PAGE_IS_NESTED(pPage));
4803 for (unsigned i = 0; i < RT_ELEMENTS(pShwPml4->a); i++)
4804 {
4805 X86PGPAEUINT const uPml4e = pShwPml4->a[i].u;
4806 AssertMsg((uPml4e & (EPT_PML4E_MBZ_MASK | 0xfff0000000000f00)) == 0, ("uPml4e=%RX64\n", uPml4e));
4807 if (uPml4e & EPT_PRESENT_MASK)
4808 {
4809 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, uPml4e & EPT_PML4E_PG_MASK);
4810 if (pSubPage)
4811 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4812 else
4813 AssertFatalMsgFailed(("%RX64\n", uPml4e & X86_PML4E_PG_MASK));
4814 }
4815 }
4816}
4817#endif /* VBOX_WITH_NESTED_HWVIRT_VMX_EPT */
4818
4819
4820/**
4821 * Clear references to shadowed pages in a 32 bits page directory.
4822 *
4823 * @param pPool The pool.
4824 * @param pPage The page.
4825 * @param pShwPD The shadow page directory (mapping of the page).
4826 */
4827DECLINLINE(void) pgmPoolTrackDerefPD(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PD pShwPD)
4828{
4829 for (unsigned i = 0; i < RT_ELEMENTS(pShwPD->a); i++)
4830 {
4831 X86PGUINT const uPde = pShwPD->a[i].u;
4832 if (uPde & X86_PDE_P)
4833 {
4834 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPD->a[i].u & X86_PDE_PG_MASK);
4835 if (pSubPage)
4836 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4837 else
4838 AssertFatalMsgFailed(("%x\n", pShwPD->a[i].u & X86_PDE_PG_MASK));
4839 }
4840 }
4841}
4842
4843
4844/**
4845 * Clear references to shadowed pages in a PAE (legacy or 64 bits) page directory.
4846 *
4847 * @param pPool The pool.
4848 * @param pPage The page.
4849 * @param pShwPD The shadow page directory (mapping of the page).
4850 */
4851DECLINLINE(void) pgmPoolTrackDerefPDPae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PDPAE pShwPD)
4852{
4853 for (unsigned i = 0; i < RT_ELEMENTS(pShwPD->a); i++)
4854 {
4855 X86PGPAEUINT const uPde = pShwPD->a[i].u;
4856 if (uPde & X86_PDE_P)
4857 {
4858#ifdef PGM_WITH_LARGE_PAGES
4859 if (uPde & X86_PDE_PS)
4860 {
4861 Log4(("pgmPoolTrackDerefPDPae: i=%d pde=%RX64 GCPhys=%RX64\n",
4862 i, uPde & X86_PDE2M_PAE_PG_MASK, pPage->GCPhys));
4863 pgmPoolTracDerefGCPhys(pPool, pPage, uPde & X86_PDE2M_PAE_PG_MASK,
4864 pPage->GCPhys + i * 2 * _1M /* pPage->GCPhys = base address of the memory described by the PD */,
4865 i);
4866 }
4867 else
4868#endif
4869 {
4870 Assert((uPde & (X86_PDE_PAE_MBZ_MASK_NX | UINT64_C(0x7ff0000000000000))) == 0);
4871 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, uPde & X86_PDE_PAE_PG_MASK);
4872 if (pSubPage)
4873 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4874 else
4875 AssertFatalMsgFailed(("%RX64\n", uPde & X86_PDE_PAE_PG_MASK));
4876 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4877 }
4878 }
4879 }
4880}
4881
4882
4883/**
4884 * Clear references to shadowed pages in a PAE page directory pointer table.
4885 *
4886 * @param pPool The pool.
4887 * @param pPage The page.
4888 * @param pShwPDPT The shadow page directory pointer table (mapping of the page).
4889 */
4890DECLINLINE(void) pgmPoolTrackDerefPDPTPae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PDPT pShwPDPT)
4891{
4892 for (unsigned i = 0; i < X86_PG_PAE_PDPE_ENTRIES; i++)
4893 {
4894 X86PGPAEUINT const uPdpe = pShwPDPT->a[i].u;
4895 Assert((uPdpe & (X86_PDPE_PAE_MBZ_MASK | UINT64_C(0x7ff0000000000200))) == 0);
4896 if (uPdpe & X86_PDPE_P)
4897 {
4898 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, uPdpe & X86_PDPE_PG_MASK);
4899 if (pSubPage)
4900 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4901 else
4902 AssertFatalMsgFailed(("%RX64\n", uPdpe & X86_PDPE_PG_MASK));
4903 }
4904 }
4905}
4906
4907
4908/**
4909 * Clear references to shadowed pages in a 64-bit page directory pointer table.
4910 *
4911 * @param pPool The pool.
4912 * @param pPage The page.
4913 * @param pShwPDPT The shadow page directory pointer table (mapping of the page).
4914 */
4915DECLINLINE(void) pgmPoolTrackDerefPDPT64Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PDPT pShwPDPT)
4916{
4917 for (unsigned i = 0; i < RT_ELEMENTS(pShwPDPT->a); i++)
4918 {
4919 X86PGPAEUINT const uPdpe = pShwPDPT->a[i].u;
4920 Assert((uPdpe & (X86_PDPE_LM_MBZ_MASK_NX | UINT64_C(0x7ff0000000000200))) == 0);
4921 if (uPdpe & X86_PDPE_P)
4922 {
4923 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, uPdpe & X86_PDPE_PG_MASK);
4924 if (pSubPage)
4925 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4926 else
4927 AssertFatalMsgFailed(("%RX64\n", uPdpe & X86_PDPE_PG_MASK));
4928 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4929 }
4930 }
4931}
4932
4933
4934/**
4935 * Clear references to shadowed pages in a 64-bit level 4 page table.
4936 *
4937 * @param pPool The pool.
4938 * @param pPage The page.
4939 * @param pShwPML4 The shadow page directory pointer table (mapping of the page).
4940 */
4941DECLINLINE(void) pgmPoolTrackDerefPML464Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PML4 pShwPML4)
4942{
4943 for (unsigned i = 0; i < RT_ELEMENTS(pShwPML4->a); i++)
4944 {
4945 X86PGPAEUINT const uPml4e = pShwPML4->a[i].u;
4946 Assert((uPml4e & (X86_PML4E_MBZ_MASK_NX | UINT64_C(0x7ff0000000000200))) == 0);
4947 if (uPml4e & X86_PML4E_P)
4948 {
4949 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, uPml4e & X86_PDPE_PG_MASK);
4950 if (pSubPage)
4951 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4952 else
4953 AssertFatalMsgFailed(("%RX64\n", uPml4e & X86_PML4E_PG_MASK));
4954 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4955 }
4956 }
4957}
4958
4959
4960/**
4961 * Clear references to shadowed pages in an EPT page directory.
4962 *
4963 * @param pPool The pool.
4964 * @param pPage The page.
4965 * @param pShwPD The shadow page directory (mapping of the page).
4966 */
4967DECLINLINE(void) pgmPoolTrackDerefPDEPT(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PEPTPD pShwPD)
4968{
4969 for (unsigned i = 0; i < RT_ELEMENTS(pShwPD->a); i++)
4970 {
4971 X86PGPAEUINT const uPde = pShwPD->a[i].u;
4972#ifdef PGM_WITH_LARGE_PAGES
4973 AssertMsg((uPde & UINT64_C(0xfff0000000000f00)) == 0, ("uPde=%RX64\n", uPde));
4974#else
4975 AssertMsg((uPde & UINT64_C(0xfff0000000000f80)) == 0, ("uPde=%RX64\n", uPde));
4976#endif
4977 if (uPde & EPT_E_READ)
4978 {
4979#ifdef PGM_WITH_LARGE_PAGES
4980 if (uPde & EPT_E_LEAF)
4981 {
4982 Log4(("pgmPoolTrackDerefPDEPT: i=%d pde=%RX64 GCPhys=%RX64\n",
4983 i, uPde & EPT_PDE2M_PG_MASK, pPage->GCPhys));
4984 pgmPoolTracDerefGCPhys(pPool, pPage, uPde & EPT_PDE2M_PG_MASK,
4985 pPage->GCPhys + i * 2 * _1M /* pPage->GCPhys = base address of the memory described by the PD */,
4986 i);
4987 }
4988 else
4989#endif
4990 {
4991 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, uPde & EPT_PDE_PG_MASK);
4992 if (pSubPage)
4993 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4994 else
4995 AssertFatalMsgFailed(("%RX64\n", pShwPD->a[i].u & EPT_PDE_PG_MASK));
4996 }
4997 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4998 }
4999 }
5000}
5001
5002
5003/**
5004 * Clear references to shadowed pages in an EPT page directory pointer table.
5005 *
5006 * @param pPool The pool.
5007 * @param pPage The page.
5008 * @param pShwPDPT The shadow page directory pointer table (mapping of the page).
5009 */
5010DECLINLINE(void) pgmPoolTrackDerefPDPTEPT(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PEPTPDPT pShwPDPT)
5011{
5012 for (unsigned i = 0; i < RT_ELEMENTS(pShwPDPT->a); i++)
5013 {
5014 X86PGPAEUINT const uPdpe = pShwPDPT->a[i].u;
5015 Assert((uPdpe & UINT64_C(0xfff0000000000f80)) == 0);
5016 if (uPdpe & EPT_E_READ)
5017 {
5018 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, uPdpe & EPT_PDPTE_PG_MASK);
5019 if (pSubPage)
5020 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
5021 else
5022 AssertFatalMsgFailed(("%RX64\n", uPdpe & EPT_PDPTE_PG_MASK));
5023 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
5024 }
5025 }
5026}
5027
5028
5029/**
5030 * Clears all references made by this page.
5031 *
5032 * This includes other shadow pages and GC physical addresses.
5033 *
5034 * @param pPool The pool.
5035 * @param pPage The page.
5036 */
5037static void pgmPoolTrackDeref(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
5038{
5039 /*
5040 * Map the shadow page and take action according to the page kind.
5041 */
5042 PVMCC pVM = pPool->CTX_SUFF(pVM);
5043 void *pvShw = PGMPOOL_PAGE_2_PTR(pVM, pPage);
5044 switch (pPage->enmKind)
5045 {
5046 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
5047 {
5048 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
5049 void *pvGst;
5050 int rc = PGM_GCPHYS_2_PTR(pVM, pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
5051 pgmPoolTrackDerefPT32Bit32Bit(pPool, pPage, (PX86PT)pvShw, (PCX86PT)pvGst);
5052 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvGst);
5053 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
5054 break;
5055 }
5056
5057 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
5058 {
5059 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
5060 void *pvGst;
5061 int rc = PGM_GCPHYS_2_PTR_EX(pVM, pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
5062 pgmPoolTrackDerefPTPae32Bit(pPool, pPage, (PPGMSHWPTPAE)pvShw, (PCX86PT)pvGst);
5063 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvGst);
5064 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
5065 break;
5066 }
5067
5068 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
5069 {
5070 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
5071 void *pvGst;
5072 int rc = PGM_GCPHYS_2_PTR(pVM, pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
5073 pgmPoolTrackDerefPTPaePae(pPool, pPage, (PPGMSHWPTPAE)pvShw, (PCX86PTPAE)pvGst);
5074 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvGst);
5075 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
5076 break;
5077 }
5078
5079 case PGMPOOLKIND_32BIT_PT_FOR_PHYS: /* treat it like a 4 MB page */
5080 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
5081 {
5082 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
5083 pgmPoolTrackDerefPT32Bit4MB(pPool, pPage, (PX86PT)pvShw);
5084 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
5085 break;
5086 }
5087
5088 case PGMPOOLKIND_PAE_PT_FOR_PHYS: /* treat it like a 2 MB page */
5089 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
5090 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
5091 {
5092 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
5093 pgmPoolTrackDerefPTPaeBig(pPool, pPage, (PPGMSHWPTPAE)pvShw);
5094 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
5095 break;
5096 }
5097
5098 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
5099 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
5100 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
5101 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
5102 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
5103 case PGMPOOLKIND_PAE_PD_PHYS:
5104 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
5105 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
5106 pgmPoolTrackDerefPDPae(pPool, pPage, (PX86PDPAE)pvShw);
5107 break;
5108
5109 case PGMPOOLKIND_32BIT_PD_PHYS:
5110 case PGMPOOLKIND_32BIT_PD:
5111 pgmPoolTrackDerefPD(pPool, pPage, (PX86PD)pvShw);
5112 break;
5113
5114 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
5115 case PGMPOOLKIND_PAE_PDPT:
5116 case PGMPOOLKIND_PAE_PDPT_PHYS:
5117 pgmPoolTrackDerefPDPTPae(pPool, pPage, (PX86PDPT)pvShw);
5118 break;
5119
5120 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
5121 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
5122 pgmPoolTrackDerefPDPT64Bit(pPool, pPage, (PX86PDPT)pvShw);
5123 break;
5124
5125 case PGMPOOLKIND_64BIT_PML4:
5126 pgmPoolTrackDerefPML464Bit(pPool, pPage, (PX86PML4)pvShw);
5127 break;
5128
5129 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
5130 pgmPoolTrackDerefPTEPT(pPool, pPage, (PEPTPT)pvShw);
5131 break;
5132
5133 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
5134 pgmPoolTrackDerefPDEPT(pPool, pPage, (PEPTPD)pvShw);
5135 break;
5136
5137 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
5138 pgmPoolTrackDerefPDPTEPT(pPool, pPage, (PEPTPDPT)pvShw);
5139 break;
5140
5141#ifdef VBOX_WITH_NESTED_HWVIRT_VMX_EPT
5142 case PGMPOOLKIND_EPT_PT_FOR_EPT_PT:
5143 {
5144 void *pvGst;
5145 int const rc = PGM_GCPHYS_2_PTR(pVM, pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
5146 pgmPoolTrackDerefNestedPTEPT(pPool, pPage, (PEPTPT)pvShw, (PCEPTPT)pvGst);
5147 break;
5148 }
5149
5150 case PGMPOOLKIND_EPT_PT_FOR_EPT_2MB:
5151 pgmPoolTrackDerefNestedPTEPT2MB(pPool, pPage, (PEPTPT)pvShw);
5152 break;
5153
5154 case PGMPOOLKIND_EPT_PD_FOR_EPT_PD:
5155 {
5156 void *pvGst;
5157 int const rc = PGM_GCPHYS_2_PTR(pVM, pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
5158 pgmPoolTrackDerefNestedPDEpt(pPool, pPage, (PEPTPD)pvShw, (PCEPTPD)pvGst);
5159 break;
5160 }
5161
5162 case PGMPOOLKIND_EPT_PDPT_FOR_EPT_PDPT:
5163 pgmPoolTrackDerefPDPTEPT(pPool, pPage, (PEPTPDPT)pvShw);
5164 break;
5165
5166 case PGMPOOLKIND_EPT_PML4_FOR_EPT_PML4:
5167 pgmPoolTrackDerefNestedPML4(pPool, pPage, (PEPTPML4)pvShw);
5168 break;
5169#endif
5170
5171 default:
5172 AssertFatalMsgFailed(("enmKind=%d GCPhys=%RGp\n", pPage->enmKind, pPage->GCPhys));
5173 }
5174
5175 /* paranoia, clear the shadow page. Remove this laser (i.e. let Alloc and ClearAll do it). */
5176 STAM_PROFILE_START(&pPool->StatZeroPage, z);
5177 RT_BZERO(pvShw, PAGE_SIZE);
5178 STAM_PROFILE_STOP(&pPool->StatZeroPage, z);
5179 pPage->fZeroed = true;
5180 Assert(!pPage->cPresent);
5181 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvShw);
5182}
5183
5184
5185/**
5186 * Flushes a pool page.
5187 *
5188 * This moves the page to the free list after removing all user references to it.
5189 *
5190 * @returns VBox status code.
5191 * @retval VINF_SUCCESS on success.
5192 * @param pPool The pool.
5193 * @param pPage The shadow page.
5194 * @param fFlush Flush the TLBS when required (should only be false in very specific use cases!!)
5195 */
5196int pgmPoolFlushPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage, bool fFlush)
5197{
5198 PVMCC pVM = pPool->CTX_SUFF(pVM);
5199 bool fFlushRequired = false;
5200
5201 int rc = VINF_SUCCESS;
5202 STAM_PROFILE_START(&pPool->StatFlushPage, f);
5203 LogFlow(("pgmPoolFlushPage: pPage=%p:{.Key=%RHp, .idx=%d, .enmKind=%s, .GCPhys=%RGp}\n",
5204 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), pPage->GCPhys));
5205
5206 if (PGMPOOL_PAGE_IS_NESTED(pPage))
5207 Log7Func(("pPage=%p:{.Key=%RHp, .idx=%d, .enmKind=%s, .GCPhys=%RGp}\n",
5208 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), pPage->GCPhys));
5209
5210 /*
5211 * Reject any attempts at flushing any of the special root pages (shall
5212 * not happen).
5213 */
5214 AssertMsgReturn(pPage->idx >= PGMPOOL_IDX_FIRST,
5215 ("pgmPoolFlushPage: special root page, rejected. enmKind=%s idx=%d\n",
5216 pgmPoolPoolKindToStr(pPage->enmKind), pPage->idx),
5217 VINF_SUCCESS);
5218
5219 PGM_LOCK_VOID(pVM);
5220
5221 /*
5222 * Quietly reject any attempts at flushing the currently active shadow CR3 mapping
5223 */
5224 if (pgmPoolIsPageLocked(pPage))
5225 {
5226#if !defined(VBOX_VMM_TARGET_ARMV8)
5227 AssertMsg( pPage->enmKind == PGMPOOLKIND_64BIT_PML4
5228 || pPage->enmKind == PGMPOOLKIND_PAE_PDPT
5229 || pPage->enmKind == PGMPOOLKIND_PAE_PDPT_FOR_32BIT
5230 || pPage->enmKind == PGMPOOLKIND_32BIT_PD
5231 || pPage->enmKind == PGMPOOLKIND_PAE_PD_FOR_PAE_PD
5232 || pPage->enmKind == PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD
5233 || pPage->enmKind == PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD
5234 || pPage->enmKind == PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD
5235 || pPage->enmKind == PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD
5236 || pPage->enmKind == PGMPOOLKIND_ROOT_NESTED,
5237 ("Can't free the shadow CR3! (%RHp vs %RHp kind=%d\n", PGMGetHyperCR3(VMMGetCpu(pVM)), pPage->Core.Key, pPage->enmKind));
5238#endif
5239 Log(("pgmPoolFlushPage: current active shadow CR3, rejected. enmKind=%s idx=%d\n", pgmPoolPoolKindToStr(pPage->enmKind), pPage->idx));
5240 PGM_UNLOCK(pVM);
5241 return VINF_SUCCESS;
5242 }
5243
5244 /*
5245 * Mark the page as being in need of an ASMMemZeroPage().
5246 */
5247 pPage->fZeroed = false;
5248
5249#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
5250 if (pPage->fDirty)
5251 pgmPoolFlushDirtyPage(pVM, pPool, pPage->idxDirtyEntry, false /* do not remove */);
5252#endif
5253
5254 /* If there are any users of this table, then we *must* issue a tlb flush on all VCPUs. */
5255 if (pPage->iUserHead != NIL_PGMPOOL_USER_INDEX)
5256 fFlushRequired = true;
5257
5258 /*
5259 * Clear the page.
5260 */
5261 pgmPoolTrackClearPageUsers(pPool, pPage);
5262 STAM_PROFILE_START(&pPool->StatTrackDeref,a);
5263 pgmPoolTrackDeref(pPool, pPage);
5264 STAM_PROFILE_STOP(&pPool->StatTrackDeref,a);
5265
5266 /*
5267 * Flush it from the cache.
5268 */
5269 pgmPoolCacheFlushPage(pPool, pPage);
5270
5271 /*
5272 * Deregistering the monitoring.
5273 */
5274 if (pPage->fMonitored)
5275 rc = pgmPoolMonitorFlush(pPool, pPage);
5276
5277 /*
5278 * Free the page.
5279 */
5280 Assert(pPage->iNext == NIL_PGMPOOL_IDX);
5281 pPage->iNext = pPool->iFreeHead;
5282 pPool->iFreeHead = pPage->idx;
5283 pPage->enmKind = PGMPOOLKIND_FREE;
5284 pPage->enmAccess = PGMPOOLACCESS_DONTCARE;
5285 pPage->GCPhys = NIL_RTGCPHYS;
5286 pPage->fReusedFlushPending = false;
5287
5288 pPool->cUsedPages--;
5289
5290 /* Flush the TLBs of all VCPUs if required. */
5291 if ( fFlushRequired
5292 && fFlush)
5293 {
5294 PGM_INVL_ALL_VCPU_TLBS(pVM);
5295 }
5296
5297 PGM_UNLOCK(pVM);
5298 STAM_PROFILE_STOP(&pPool->StatFlushPage, f);
5299 return rc;
5300}
5301
5302
5303/**
5304 * Frees a usage of a pool page.
5305 *
5306 * The caller is responsible to updating the user table so that it no longer
5307 * references the shadow page.
5308 *
5309 * @param pPool The pool.
5310 * @param pPage The shadow page.
5311 * @param iUser The shadow page pool index of the user table.
5312 * NIL_PGMPOOL_IDX for root pages.
5313 * @param iUserTable The index into the user table (shadowed). Ignored if
5314 * root page.
5315 */
5316void pgmPoolFreeByPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable)
5317{
5318 PVMCC pVM = pPool->CTX_SUFF(pVM);
5319
5320 STAM_PROFILE_START(&pPool->StatFree, a);
5321 LogFlow(("pgmPoolFreeByPage: pPage=%p:{.Key=%RHp, .idx=%d, enmKind=%s} iUser=%d iUserTable=%#x\n",
5322 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), iUser, iUserTable));
5323 AssertReturnVoid(pPage->idx >= PGMPOOL_IDX_FIRST); /* paranoia (#6349) */
5324
5325 PGM_LOCK_VOID(pVM);
5326 if (iUser != NIL_PGMPOOL_IDX)
5327 pgmPoolTrackFreeUser(pPool, pPage, iUser, iUserTable);
5328 if (!pPage->fCached)
5329 pgmPoolFlushPage(pPool, pPage);
5330 PGM_UNLOCK(pVM);
5331 STAM_PROFILE_STOP(&pPool->StatFree, a);
5332}
5333
5334
5335/**
5336 * Makes one or more free page free.
5337 *
5338 * @returns VBox status code.
5339 * @retval VINF_SUCCESS on success.
5340 *
5341 * @param pPool The pool.
5342 * @param enmKind Page table kind
5343 * @param iUser The user of the page.
5344 */
5345static int pgmPoolMakeMoreFreePages(PPGMPOOL pPool, PGMPOOLKIND enmKind, uint16_t iUser)
5346{
5347 PVMCC pVM = pPool->CTX_SUFF(pVM);
5348 LogFlow(("pgmPoolMakeMoreFreePages: enmKind=%d iUser=%d\n", enmKind, iUser));
5349 NOREF(enmKind);
5350
5351 /*
5352 * If the pool isn't full grown yet, expand it.
5353 */
5354 if (pPool->cCurPages < pPool->cMaxPages)
5355 {
5356 STAM_PROFILE_ADV_SUSPEND(&pPool->StatAlloc, a);
5357#ifdef IN_RING3
5358 int rc = PGMR3PoolGrow(pVM, VMMGetCpu(pVM));
5359#else
5360 int rc = PGMR0PoolGrow(pVM, VMMGetCpuId(pVM));
5361#endif
5362 if (RT_FAILURE(rc))
5363 return rc;
5364 STAM_PROFILE_ADV_RESUME(&pPool->StatAlloc, a);
5365 if (pPool->iFreeHead != NIL_PGMPOOL_IDX)
5366 return VINF_SUCCESS;
5367 }
5368
5369 /*
5370 * Free one cached page.
5371 */
5372 return pgmPoolCacheFreeOne(pPool, iUser);
5373}
5374
5375
5376/**
5377 * Allocates a page from the pool.
5378 *
5379 * This page may actually be a cached page and not in need of any processing
5380 * on the callers part.
5381 *
5382 * @returns VBox status code.
5383 * @retval VINF_SUCCESS if a NEW page was allocated.
5384 * @retval VINF_PGM_CACHED_PAGE if a CACHED page was returned.
5385 *
5386 * @param pVM The cross context VM structure.
5387 * @param GCPhys The GC physical address of the page we're gonna shadow.
5388 * For 4MB and 2MB PD entries, it's the first address the
5389 * shadow PT is covering.
5390 * @param enmKind The kind of mapping.
5391 * @param enmAccess Access type for the mapping (only relevant for big pages)
5392 * @param fA20Enabled Whether the A20 gate is enabled or not.
5393 * @param iUser The shadow page pool index of the user table. Root
5394 * pages should pass NIL_PGMPOOL_IDX.
5395 * @param iUserTable The index into the user table (shadowed). Ignored for
5396 * root pages (iUser == NIL_PGMPOOL_IDX).
5397 * @param fLockPage Lock the page
5398 * @param ppPage Where to store the pointer to the page. NULL is stored here on failure.
5399 */
5400int pgmPoolAlloc(PVMCC pVM, RTGCPHYS GCPhys, PGMPOOLKIND enmKind, PGMPOOLACCESS enmAccess, bool fA20Enabled,
5401 uint16_t iUser, uint32_t iUserTable, bool fLockPage, PPPGMPOOLPAGE ppPage)
5402{
5403 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
5404 STAM_PROFILE_ADV_START(&pPool->StatAlloc, a);
5405 LogFlow(("pgmPoolAlloc: GCPhys=%RGp enmKind=%s iUser=%d iUserTable=%#x\n", GCPhys, pgmPoolPoolKindToStr(enmKind), iUser, iUserTable));
5406 *ppPage = NULL;
5407 /** @todo CSAM/PGMPrefetchPage messes up here during CSAMR3CheckGates
5408 * (TRPMR3SyncIDT) because of FF priority. Try fix that?
5409 * Assert(!(pVM->pgm.s.fGlobalSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)); */
5410
5411#if defined(VBOX_STRICT) && defined(VBOX_WITH_NESTED_HWVIRT_VMX_EPT)
5412 PVMCPUCC pVCpu = VMMGetCpu(pVM);
5413 Assert(pVCpu->pgm.s.enmGuestSlatMode == PGMSLAT_DIRECT || PGMPOOL_PAGE_IS_KIND_NESTED(enmKind));
5414#endif
5415
5416 PGM_LOCK_VOID(pVM);
5417
5418 if (pPool->fCacheEnabled)
5419 {
5420 int rc2 = pgmPoolCacheAlloc(pPool, GCPhys, enmKind, enmAccess, fA20Enabled, iUser, iUserTable, ppPage);
5421 if (RT_SUCCESS(rc2))
5422 {
5423 if (fLockPage)
5424 pgmPoolLockPage(pPool, *ppPage);
5425 PGM_UNLOCK(pVM);
5426 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
5427 LogFlow(("pgmPoolAlloc: cached returns %Rrc *ppPage=%p:{.Key=%RHp, .idx=%d}\n", rc2, *ppPage, (*ppPage)->Core.Key, (*ppPage)->idx));
5428 return rc2;
5429 }
5430 }
5431
5432 /*
5433 * Allocate a new one.
5434 */
5435 int rc = VINF_SUCCESS;
5436 uint16_t iNew = pPool->iFreeHead;
5437 if (iNew == NIL_PGMPOOL_IDX)
5438 {
5439 rc = pgmPoolMakeMoreFreePages(pPool, enmKind, iUser);
5440 if (RT_FAILURE(rc))
5441 {
5442 PGM_UNLOCK(pVM);
5443 Log(("pgmPoolAlloc: returns %Rrc (Free)\n", rc));
5444 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
5445 return rc;
5446 }
5447 iNew = pPool->iFreeHead;
5448 AssertReleaseMsgReturn(iNew != NIL_PGMPOOL_IDX, ("iNew=%#x\n", iNew), VERR_PGM_POOL_IPE);
5449 }
5450
5451 /* unlink the free head */
5452 PPGMPOOLPAGE pPage = &pPool->aPages[iNew];
5453 pPool->iFreeHead = pPage->iNext;
5454 pPage->iNext = NIL_PGMPOOL_IDX;
5455
5456 /*
5457 * Initialize it.
5458 */
5459 pPool->cUsedPages++; /* physical handler registration / pgmPoolTrackFlushGCPhysPTsSlow requirement. */
5460 pPage->enmKind = enmKind;
5461 pPage->enmAccess = enmAccess;
5462 pPage->GCPhys = GCPhys;
5463 pPage->fA20Enabled = fA20Enabled;
5464 pPage->fSeenNonGlobal = false; /* Set this to 'true' to disable this feature. */
5465 pPage->fMonitored = false;
5466 pPage->fCached = false;
5467 pPage->fDirty = false;
5468 pPage->fReusedFlushPending = false;
5469 pPage->cModifications = 0;
5470 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
5471 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
5472 pPage->cPresent = 0;
5473 pPage->iFirstPresent = NIL_PGMPOOL_PRESENT_INDEX;
5474 pPage->idxDirtyEntry = 0;
5475 pPage->GCPtrLastAccessHandlerFault = NIL_RTGCPTR;
5476 pPage->GCPtrLastAccessHandlerRip = NIL_RTGCPTR;
5477 pPage->cLastAccessHandler = 0;
5478 pPage->cLocked = 0;
5479# ifdef VBOX_STRICT
5480 pPage->GCPtrDirtyFault = NIL_RTGCPTR;
5481# endif
5482
5483 /*
5484 * Insert into the tracking and cache. If this fails, free the page.
5485 */
5486 int rc3 = pgmPoolTrackInsert(pPool, pPage, GCPhys, iUser, iUserTable);
5487 if (RT_FAILURE(rc3))
5488 {
5489 pPool->cUsedPages--;
5490 pPage->enmKind = PGMPOOLKIND_FREE;
5491 pPage->enmAccess = PGMPOOLACCESS_DONTCARE;
5492 pPage->GCPhys = NIL_RTGCPHYS;
5493 pPage->iNext = pPool->iFreeHead;
5494 pPool->iFreeHead = pPage->idx;
5495 PGM_UNLOCK(pVM);
5496 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
5497 Log(("pgmPoolAlloc: returns %Rrc (Insert)\n", rc3));
5498 return rc3;
5499 }
5500
5501 /*
5502 * Commit the allocation, clear the page and return.
5503 */
5504#ifdef VBOX_WITH_STATISTICS
5505 if (pPool->cUsedPages > pPool->cUsedPagesHigh)
5506 pPool->cUsedPagesHigh = pPool->cUsedPages;
5507#endif
5508
5509 if (!pPage->fZeroed)
5510 {
5511 STAM_PROFILE_START(&pPool->StatZeroPage, z);
5512 void *pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
5513 RT_BZERO(pv, PAGE_SIZE);
5514 STAM_PROFILE_STOP(&pPool->StatZeroPage, z);
5515 }
5516
5517 *ppPage = pPage;
5518 if (fLockPage)
5519 pgmPoolLockPage(pPool, pPage);
5520 PGM_UNLOCK(pVM);
5521 LogFlow(("pgmPoolAlloc: returns %Rrc *ppPage=%p:{.Key=%RHp, .idx=%d, .fCached=%RTbool, .fMonitored=%RTbool}\n",
5522 rc, pPage, pPage->Core.Key, pPage->idx, pPage->fCached, pPage->fMonitored));
5523 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
5524 return rc;
5525}
5526
5527
5528/**
5529 * Frees a usage of a pool page.
5530 *
5531 * @param pVM The cross context VM structure.
5532 * @param HCPhys The HC physical address of the shadow page.
5533 * @param iUser The shadow page pool index of the user table.
5534 * NIL_PGMPOOL_IDX if root page.
5535 * @param iUserTable The index into the user table (shadowed). Ignored if
5536 * root page.
5537 */
5538void pgmPoolFree(PVM pVM, RTHCPHYS HCPhys, uint16_t iUser, uint32_t iUserTable)
5539{
5540 LogFlow(("pgmPoolFree: HCPhys=%RHp iUser=%d iUserTable=%#x\n", HCPhys, iUser, iUserTable));
5541 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
5542 pgmPoolFreeByPage(pPool, pgmPoolGetPage(pPool, HCPhys), iUser, iUserTable);
5543}
5544
5545
5546/**
5547 * Internal worker for finding a 'in-use' shadow page give by it's physical address.
5548 *
5549 * @returns Pointer to the shadow page structure.
5550 * @param pPool The pool.
5551 * @param HCPhys The HC physical address of the shadow page.
5552 */
5553PPGMPOOLPAGE pgmPoolGetPage(PPGMPOOL pPool, RTHCPHYS HCPhys)
5554{
5555 PGM_LOCK_ASSERT_OWNER(pPool->CTX_SUFF(pVM));
5556
5557 /*
5558 * Look up the page.
5559 */
5560 PPGMPOOLPAGE pPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, HCPhys & X86_PTE_PAE_PG_MASK);
5561
5562 AssertFatalMsg(pPage && pPage->enmKind != PGMPOOLKIND_FREE, ("HCPhys=%RHp pPage=%p idx=%d\n", HCPhys, pPage, (pPage) ? pPage->idx : 0));
5563 return pPage;
5564}
5565
5566
5567/**
5568 * Internal worker for finding a page for debugging purposes, no assertions.
5569 *
5570 * @returns Pointer to the shadow page structure. NULL on if not found.
5571 * @param pPool The pool.
5572 * @param HCPhys The HC physical address of the shadow page.
5573 */
5574PPGMPOOLPAGE pgmPoolQueryPageForDbg(PPGMPOOL pPool, RTHCPHYS HCPhys)
5575{
5576 PGM_LOCK_ASSERT_OWNER(pPool->CTX_SUFF(pVM));
5577 return (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, HCPhys & X86_PTE_PAE_PG_MASK);
5578}
5579
5580
5581/**
5582 * Internal worker for PGM_HCPHYS_2_PTR.
5583 *
5584 * @returns VBox status code.
5585 * @param pVM The cross context VM structure.
5586 * @param HCPhys The HC physical address of the shadow page.
5587 * @param ppv Where to return the address.
5588 */
5589int pgmPoolHCPhys2Ptr(PVM pVM, RTHCPHYS HCPhys, void **ppv)
5590{
5591 PPGMPOOLPAGE pPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pVM->pgm.s.CTX_SUFF(pPool)->HCPhysTree, HCPhys & X86_PTE_PAE_PG_MASK);
5592 AssertMsgReturn(pPage && pPage->enmKind != PGMPOOLKIND_FREE,
5593 ("HCPhys=%RHp pPage=%p idx=%d\n", HCPhys, pPage, (pPage) ? pPage->idx : 0),
5594 VERR_PGM_POOL_GET_PAGE_FAILED);
5595 *ppv = (uint8_t *)pPage->CTX_SUFF(pvPage) + (HCPhys & PAGE_OFFSET_MASK);
5596 return VINF_SUCCESS;
5597}
5598
5599#ifdef IN_RING3 /* currently only used in ring 3; save some space in the R0 & GC modules (left it here as we might need it elsewhere later on) */
5600
5601/**
5602 * Flush the specified page if present
5603 *
5604 * @param pVM The cross context VM structure.
5605 * @param GCPhys Guest physical address of the page to flush
5606 */
5607void pgmPoolFlushPageByGCPhys(PVM pVM, RTGCPHYS GCPhys)
5608{
5609 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
5610
5611 VM_ASSERT_EMT(pVM);
5612
5613 /*
5614 * Look up the GCPhys in the hash.
5615 */
5616 GCPhys = GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK;
5617 unsigned i = pPool->aiHash[PGMPOOL_HASH(GCPhys)];
5618 if (i == NIL_PGMPOOL_IDX)
5619 return;
5620
5621 do
5622 {
5623 PPGMPOOLPAGE pPage = &pPool->aPages[i];
5624 if (pPage->GCPhys - GCPhys < PAGE_SIZE)
5625 {
5626 Assert(!PGMPOOL_PAGE_IS_NESTED(pPage)); /* Temporary to see if it hits. Remove later. */
5627 switch (pPage->enmKind)
5628 {
5629 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
5630 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
5631 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
5632 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
5633 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
5634 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
5635 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
5636 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
5637 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
5638 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
5639 case PGMPOOLKIND_64BIT_PML4:
5640 case PGMPOOLKIND_32BIT_PD:
5641 case PGMPOOLKIND_PAE_PDPT:
5642 {
5643 Log(("PGMPoolFlushPage: found pgm pool pages for %RGp\n", GCPhys));
5644# ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
5645 if (pPage->fDirty)
5646 STAM_COUNTER_INC(&pPool->StatForceFlushDirtyPage);
5647 else
5648# endif
5649 STAM_COUNTER_INC(&pPool->StatForceFlushPage);
5650 Assert(!pgmPoolIsPageLocked(pPage));
5651 pgmPoolMonitorChainFlush(pPool, pPage);
5652 return;
5653 }
5654
5655 /* ignore, no monitoring. */
5656 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
5657 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
5658 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
5659 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
5660 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
5661 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
5662 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
5663 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
5664 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
5665 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
5666 case PGMPOOLKIND_ROOT_NESTED:
5667 case PGMPOOLKIND_PAE_PD_PHYS:
5668 case PGMPOOLKIND_PAE_PDPT_PHYS:
5669 case PGMPOOLKIND_32BIT_PD_PHYS:
5670 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
5671 break;
5672
5673 default:
5674 AssertFatalMsgFailed(("enmKind=%d idx=%d\n", pPage->enmKind, pPage->idx));
5675 }
5676 }
5677
5678 /* next */
5679 i = pPage->iNext;
5680 } while (i != NIL_PGMPOOL_IDX);
5681 return;
5682}
5683
5684
5685/**
5686 * Reset CPU on hot plugging.
5687 *
5688 * @param pVM The cross context VM structure.
5689 * @param pVCpu The cross context virtual CPU structure.
5690 */
5691void pgmR3PoolResetUnpluggedCpu(PVM pVM, PVMCPU pVCpu)
5692{
5693 pgmR3ExitShadowModeBeforePoolFlush(pVCpu);
5694
5695 pgmR3ReEnterShadowModeAfterPoolFlush(pVM, pVCpu);
5696 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
5697 VMCPU_FF_SET(pVCpu, VMCPU_FF_TLB_FLUSH);
5698}
5699
5700
5701/**
5702 * Flushes the entire cache.
5703 *
5704 * It will assert a global CR3 flush (FF) and assumes the caller is aware of
5705 * this and execute this CR3 flush.
5706 *
5707 * @param pVM The cross context VM structure.
5708 */
5709void pgmR3PoolReset(PVM pVM)
5710{
5711 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
5712
5713 PGM_LOCK_ASSERT_OWNER(pVM);
5714 STAM_PROFILE_START(&pPool->StatR3Reset, a);
5715 LogFlow(("pgmR3PoolReset:\n"));
5716
5717 /*
5718 * If there are no pages in the pool, there is nothing to do.
5719 */
5720 if (pPool->cCurPages <= PGMPOOL_IDX_FIRST)
5721 {
5722 STAM_PROFILE_STOP(&pPool->StatR3Reset, a);
5723 return;
5724 }
5725
5726 /*
5727 * Exit the shadow mode since we're going to clear everything,
5728 * including the root page.
5729 */
5730 VMCC_FOR_EACH_VMCPU(pVM)
5731 pgmR3ExitShadowModeBeforePoolFlush(pVCpu);
5732 VMCC_FOR_EACH_VMCPU_END(pVM);
5733
5734
5735 /*
5736 * Nuke the free list and reinsert all pages into it.
5737 */
5738 for (unsigned i = pPool->cCurPages - 1; i >= PGMPOOL_IDX_FIRST; i--)
5739 {
5740 PPGMPOOLPAGE pPage = &pPool->aPages[i];
5741
5742 if (pPage->fMonitored)
5743 pgmPoolMonitorFlush(pPool, pPage);
5744 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
5745 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
5746 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
5747 pPage->iMonitoredPrev = NIL_PGMPOOL_IDX;
5748 pPage->GCPhys = NIL_RTGCPHYS;
5749 pPage->enmKind = PGMPOOLKIND_FREE;
5750 pPage->enmAccess = PGMPOOLACCESS_DONTCARE;
5751 Assert(pPage->idx == i);
5752 pPage->iNext = i + 1;
5753 pPage->fA20Enabled = true;
5754 pPage->fZeroed = false; /* This could probably be optimized, but better safe than sorry. */
5755 pPage->fSeenNonGlobal = false;
5756 pPage->fMonitored = false;
5757 pPage->fDirty = false;
5758 pPage->fCached = false;
5759 pPage->fReusedFlushPending = false;
5760 pPage->iUserHead = NIL_PGMPOOL_USER_INDEX;
5761 pPage->cPresent = 0;
5762 pPage->iFirstPresent = NIL_PGMPOOL_PRESENT_INDEX;
5763 pPage->cModifications = 0;
5764 pPage->iAgeNext = NIL_PGMPOOL_IDX;
5765 pPage->iAgePrev = NIL_PGMPOOL_IDX;
5766 pPage->idxDirtyEntry = 0;
5767 pPage->GCPtrLastAccessHandlerRip = NIL_RTGCPTR;
5768 pPage->GCPtrLastAccessHandlerFault = NIL_RTGCPTR;
5769 pPage->cLastAccessHandler = 0;
5770 pPage->cLocked = 0;
5771# ifdef VBOX_STRICT
5772 pPage->GCPtrDirtyFault = NIL_RTGCPTR;
5773# endif
5774 }
5775 pPool->aPages[pPool->cCurPages - 1].iNext = NIL_PGMPOOL_IDX;
5776 pPool->iFreeHead = PGMPOOL_IDX_FIRST;
5777 pPool->cUsedPages = 0;
5778
5779 /*
5780 * Zap and reinitialize the user records.
5781 */
5782 pPool->cPresent = 0;
5783 pPool->iUserFreeHead = 0;
5784 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
5785 const unsigned cMaxUsers = pPool->cMaxUsers;
5786 for (unsigned i = 0; i < cMaxUsers; i++)
5787 {
5788 paUsers[i].iNext = i + 1;
5789 paUsers[i].iUser = NIL_PGMPOOL_IDX;
5790 paUsers[i].iUserTable = 0xfffffffe;
5791 }
5792 paUsers[cMaxUsers - 1].iNext = NIL_PGMPOOL_USER_INDEX;
5793
5794 /*
5795 * Clear all the GCPhys links and rebuild the phys ext free list.
5796 */
5797 uint32_t const idRamRangeMax = RT_MIN(pVM->pgm.s.idRamRangeMax, RT_ELEMENTS(pVM->pgm.s.apRamRanges) - 1U);
5798 Assert(pVM->pgm.s.apRamRanges[0] == NULL);
5799 for (uint32_t idx = 1; idx <= idRamRangeMax; idx++)
5800 {
5801 PPGMRAMRANGE const pRam = pVM->CTX_EXPR(pgm, pgmr0, pgm).s.apRamRanges[idx];
5802 AssertContinue(pRam);
5803 unsigned iPage = pRam->cb >> PAGE_SHIFT;
5804 while (iPage-- > 0)
5805 PGM_PAGE_SET_TRACKING(pVM, &pRam->aPages[iPage], 0);
5806 }
5807
5808 pPool->iPhysExtFreeHead = 0;
5809 PPGMPOOLPHYSEXT paPhysExts = pPool->CTX_SUFF(paPhysExts);
5810 const unsigned cMaxPhysExts = pPool->cMaxPhysExts;
5811 for (unsigned i = 0; i < cMaxPhysExts; i++)
5812 {
5813 paPhysExts[i].iNext = i + 1;
5814 paPhysExts[i].aidx[0] = NIL_PGMPOOL_IDX;
5815 paPhysExts[i].apte[0] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
5816 paPhysExts[i].aidx[1] = NIL_PGMPOOL_IDX;
5817 paPhysExts[i].apte[1] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
5818 paPhysExts[i].aidx[2] = NIL_PGMPOOL_IDX;
5819 paPhysExts[i].apte[2] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
5820 }
5821 paPhysExts[cMaxPhysExts - 1].iNext = NIL_PGMPOOL_PHYSEXT_INDEX;
5822
5823 /*
5824 * Just zap the modified list.
5825 */
5826 pPool->cModifiedPages = 0;
5827 pPool->iModifiedHead = NIL_PGMPOOL_IDX;
5828
5829 /*
5830 * Clear the GCPhys hash and the age list.
5831 */
5832 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aiHash); i++)
5833 pPool->aiHash[i] = NIL_PGMPOOL_IDX;
5834 pPool->iAgeHead = NIL_PGMPOOL_IDX;
5835 pPool->iAgeTail = NIL_PGMPOOL_IDX;
5836
5837# ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
5838 /* Clear all dirty pages. */
5839 pPool->idxFreeDirtyPage = 0;
5840 pPool->cDirtyPages = 0;
5841 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aidxDirtyPages); i++)
5842 pPool->aidxDirtyPages[i] = NIL_PGMPOOL_IDX;
5843# endif
5844
5845 /*
5846 * Reinsert active pages into the hash and ensure monitoring chains are correct.
5847 */
5848 VMCC_FOR_EACH_VMCPU(pVM)
5849 {
5850 /*
5851 * Re-enter the shadowing mode and assert Sync CR3 FF.
5852 */
5853 pgmR3ReEnterShadowModeAfterPoolFlush(pVM, pVCpu);
5854 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
5855 VMCPU_FF_SET(pVCpu, VMCPU_FF_TLB_FLUSH);
5856 }
5857 VMCC_FOR_EACH_VMCPU_END(pVM);
5858
5859 STAM_PROFILE_STOP(&pPool->StatR3Reset, a);
5860}
5861
5862#endif /* IN_RING3 */
5863
5864#if defined(LOG_ENABLED) || defined(VBOX_STRICT)
5865/**
5866 * Stringifies a PGMPOOLKIND value.
5867 */
5868static const char *pgmPoolPoolKindToStr(uint8_t enmKind)
5869{
5870 switch ((PGMPOOLKIND)enmKind)
5871 {
5872 case PGMPOOLKIND_INVALID:
5873 return "PGMPOOLKIND_INVALID";
5874 case PGMPOOLKIND_FREE:
5875 return "PGMPOOLKIND_FREE";
5876 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
5877 return "PGMPOOLKIND_32BIT_PT_FOR_PHYS";
5878 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
5879 return "PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT";
5880 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
5881 return "PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB";
5882 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
5883 return "PGMPOOLKIND_PAE_PT_FOR_PHYS";
5884 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
5885 return "PGMPOOLKIND_PAE_PT_FOR_32BIT_PT";
5886 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
5887 return "PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB";
5888 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
5889 return "PGMPOOLKIND_PAE_PT_FOR_PAE_PT";
5890 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
5891 return "PGMPOOLKIND_PAE_PT_FOR_PAE_2MB";
5892 case PGMPOOLKIND_32BIT_PD:
5893 return "PGMPOOLKIND_32BIT_PD";
5894 case PGMPOOLKIND_32BIT_PD_PHYS:
5895 return "PGMPOOLKIND_32BIT_PD_PHYS";
5896 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
5897 return "PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD";
5898 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
5899 return "PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD";
5900 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
5901 return "PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD";
5902 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
5903 return "PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD";
5904 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
5905 return "PGMPOOLKIND_PAE_PD_FOR_PAE_PD";
5906 case PGMPOOLKIND_PAE_PD_PHYS:
5907 return "PGMPOOLKIND_PAE_PD_PHYS";
5908 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
5909 return "PGMPOOLKIND_PAE_PDPT_FOR_32BIT";
5910 case PGMPOOLKIND_PAE_PDPT:
5911 return "PGMPOOLKIND_PAE_PDPT";
5912 case PGMPOOLKIND_PAE_PDPT_PHYS:
5913 return "PGMPOOLKIND_PAE_PDPT_PHYS";
5914 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
5915 return "PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT";
5916 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
5917 return "PGMPOOLKIND_64BIT_PDPT_FOR_PHYS";
5918 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
5919 return "PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD";
5920 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
5921 return "PGMPOOLKIND_64BIT_PD_FOR_PHYS";
5922 case PGMPOOLKIND_64BIT_PML4:
5923 return "PGMPOOLKIND_64BIT_PML4";
5924 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
5925 return "PGMPOOLKIND_EPT_PDPT_FOR_PHYS";
5926 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
5927 return "PGMPOOLKIND_EPT_PD_FOR_PHYS";
5928 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
5929 return "PGMPOOLKIND_EPT_PT_FOR_PHYS";
5930 case PGMPOOLKIND_ROOT_NESTED:
5931 return "PGMPOOLKIND_ROOT_NESTED";
5932 case PGMPOOLKIND_EPT_PT_FOR_EPT_PT:
5933 return "PGMPOOLKIND_EPT_PT_FOR_EPT_PT";
5934 case PGMPOOLKIND_EPT_PT_FOR_EPT_2MB:
5935 return "PGMPOOLKIND_EPT_PT_FOR_EPT_2MB";
5936 case PGMPOOLKIND_EPT_PD_FOR_EPT_PD:
5937 return "PGMPOOLKIND_EPT_PD_FOR_EPT_PD";
5938 case PGMPOOLKIND_EPT_PDPT_FOR_EPT_PDPT:
5939 return "PGMPOOLKIND_EPT_PDPT_FOR_EPT_PDPT";
5940 case PGMPOOLKIND_EPT_PML4_FOR_EPT_PML4:
5941 return "PGMPOOLKIND_EPT_PML4_FOR_EPT_PML4";
5942 }
5943 return "Unknown kind!";
5944}
5945#endif /* LOG_ENABLED || VBOX_STRICT */
5946
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette