VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/PGMAllPool.cpp@ 99132

Last change on this file since 99132 was 99132, checked in by vboxsync, 21 months ago

VMM: Nested VMX: bugref:10318 PGM fixes for supporting Hyper-V in a VM using hardware-assisted execution.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id Revision
File size: 224.6 KB
Line 
1/* $Id: PGMAllPool.cpp 99132 2023-03-23 09:00:20Z vboxsync $ */
2/** @file
3 * PGM Shadow Page Pool.
4 */
5
6/*
7 * Copyright (C) 2006-2023 Oracle and/or its affiliates.
8 *
9 * This file is part of VirtualBox base platform packages, as
10 * available from https://www.virtualbox.org.
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation, in version 3 of the
15 * License.
16 *
17 * This program is distributed in the hope that it will be useful, but
18 * WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * General Public License for more details.
21 *
22 * You should have received a copy of the GNU General Public License
23 * along with this program; if not, see <https://www.gnu.org/licenses>.
24 *
25 * SPDX-License-Identifier: GPL-3.0-only
26 */
27
28
29/*********************************************************************************************************************************
30* Header Files *
31*********************************************************************************************************************************/
32#define LOG_GROUP LOG_GROUP_PGM_POOL
33#define VBOX_WITHOUT_PAGING_BIT_FIELDS /* 64-bit bitfields are just asking for trouble. See @bugref{9841} and others. */
34#include <VBox/vmm/pgm.h>
35#include <VBox/vmm/mm.h>
36#include <VBox/vmm/em.h>
37#include <VBox/vmm/cpum.h>
38#include "PGMInternal.h"
39#include <VBox/vmm/vmcc.h>
40#include "PGMInline.h"
41#include <VBox/disopcode.h>
42#include <VBox/vmm/hm_vmx.h>
43
44#include <VBox/log.h>
45#include <VBox/err.h>
46#include <iprt/asm.h>
47#include <iprt/string.h>
48
49
50/*********************************************************************************************************************************
51* Internal Functions *
52*********************************************************************************************************************************/
53RT_C_DECLS_BEGIN
54#if 0 /* unused */
55DECLINLINE(unsigned) pgmPoolTrackGetShadowEntrySize(PGMPOOLKIND enmKind);
56DECLINLINE(unsigned) pgmPoolTrackGetGuestEntrySize(PGMPOOLKIND enmKind);
57#endif /* unused */
58static void pgmPoolTrackClearPageUsers(PPGMPOOL pPool, PPGMPOOLPAGE pPage);
59static void pgmPoolTrackDeref(PPGMPOOL pPool, PPGMPOOLPAGE pPage);
60static int pgmPoolTrackAddUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable);
61static void pgmPoolMonitorModifiedRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage);
62#if defined(LOG_ENABLED) || defined(VBOX_STRICT)
63static const char *pgmPoolPoolKindToStr(uint8_t enmKind);
64#endif
65#if 0 /*defined(VBOX_STRICT) && defined(PGMPOOL_WITH_OPTIMIZED_DIRTY_PT)*/
66static void pgmPoolTrackCheckPTPaePae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT, PCX86PTPAE pGstPT);
67#endif
68
69int pgmPoolTrackFlushGCPhysPTsSlow(PVMCC pVM, PPGMPAGE pPhysPage);
70PPGMPOOLPHYSEXT pgmPoolTrackPhysExtAlloc(PVMCC pVM, uint16_t *piPhysExt);
71void pgmPoolTrackPhysExtFree(PVMCC pVM, uint16_t iPhysExt);
72void pgmPoolTrackPhysExtFreeList(PVMCC pVM, uint16_t iPhysExt);
73
74RT_C_DECLS_END
75
76
77#if 0 /* unused */
78/**
79 * Checks if the specified page pool kind is for a 4MB or 2MB guest page.
80 *
81 * @returns true if it's the shadow of a 4MB or 2MB guest page, otherwise false.
82 * @param enmKind The page kind.
83 */
84DECLINLINE(bool) pgmPoolIsBigPage(PGMPOOLKIND enmKind)
85{
86 switch (enmKind)
87 {
88 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
89 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
90 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
91 return true;
92 default:
93 return false;
94 }
95}
96#endif /* unused */
97
98
99/**
100 * Flushes a chain of pages sharing the same access monitor.
101 *
102 * @param pPool The pool.
103 * @param pPage A page in the chain.
104 */
105void pgmPoolMonitorChainFlush(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
106{
107 LogFlow(("pgmPoolMonitorChainFlush: Flush page %RGp type=%d\n", pPage->GCPhys, pPage->enmKind));
108
109 /*
110 * Find the list head.
111 */
112 uint16_t idx = pPage->idx;
113 if (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
114 {
115 while (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
116 {
117 idx = pPage->iMonitoredPrev;
118 Assert(idx != pPage->idx);
119 pPage = &pPool->aPages[idx];
120 }
121 }
122
123 /*
124 * Iterate the list flushing each shadow page.
125 */
126 for (;;)
127 {
128 idx = pPage->iMonitoredNext;
129 Assert(idx != pPage->idx);
130 if (pPage->idx >= PGMPOOL_IDX_FIRST)
131 {
132 int rc2 = pgmPoolFlushPage(pPool, pPage);
133 AssertRC(rc2);
134 }
135 /* next */
136 if (idx == NIL_PGMPOOL_IDX)
137 break;
138 pPage = &pPool->aPages[idx];
139 }
140}
141
142
143/**
144 * Wrapper for getting the current context pointer to the entry being modified.
145 *
146 * @returns VBox status code suitable for scheduling.
147 * @param pVM The cross context VM structure.
148 * @param pvDst Destination address
149 * @param pvSrc Pointer to the mapping of @a GCPhysSrc or NULL depending
150 * on the context (e.g. \#PF in R0 & RC).
151 * @param GCPhysSrc The source guest physical address.
152 * @param cb Size of data to read
153 */
154DECLINLINE(int) pgmPoolPhysSimpleReadGCPhys(PVMCC pVM, void *pvDst, void const *pvSrc, RTGCPHYS GCPhysSrc, size_t cb)
155{
156#if defined(IN_RING3)
157 NOREF(pVM); NOREF(GCPhysSrc);
158 memcpy(pvDst, (RTHCPTR)((uintptr_t)pvSrc & ~(RTHCUINTPTR)(cb - 1)), cb);
159 return VINF_SUCCESS;
160#else
161 /** @todo in RC we could attempt to use the virtual address, although this can cause many faults (PAE Windows XP guest). */
162 NOREF(pvSrc);
163 return PGMPhysSimpleReadGCPhys(pVM, pvDst, GCPhysSrc & ~(RTGCPHYS)(cb - 1), cb);
164#endif
165}
166
167
168/**
169 * Process shadow entries before they are changed by the guest.
170 *
171 * For PT entries we will clear them. For PD entries, we'll simply check
172 * for mapping conflicts and set the SyncCR3 FF if found.
173 *
174 * @param pVCpu The cross context virtual CPU structure.
175 * @param pPool The pool.
176 * @param pPage The head page.
177 * @param GCPhysFault The guest physical fault address.
178 * @param pvAddress Pointer to the mapping of @a GCPhysFault or NULL
179 * depending on the context (e.g. \#PF in R0 & RC).
180 * @param cbWrite Write size; might be zero if the caller knows we're not crossing entry boundaries
181 */
182static void pgmPoolMonitorChainChanging(PVMCPU pVCpu, PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTGCPHYS GCPhysFault,
183 void const *pvAddress, unsigned cbWrite)
184{
185 AssertMsg(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX, ("%u (idx=%u)\n", pPage->iMonitoredPrev, pPage->idx));
186 const unsigned off = GCPhysFault & GUEST_PAGE_OFFSET_MASK;
187 PVMCC pVM = pPool->CTX_SUFF(pVM);
188 NOREF(pVCpu);
189
190 LogFlow(("pgmPoolMonitorChainChanging: %RGv phys=%RGp cbWrite=%d\n",
191 (RTGCPTR)(CTXTYPE(RTGCPTR, uintptr_t, RTGCPTR))(uintptr_t)pvAddress, GCPhysFault, cbWrite));
192
193 if (PGMPOOL_PAGE_IS_NESTED(pPage))
194 Log7Func(("%RGv phys=%RGp cbWrite=%d\n", (RTGCPTR)(CTXTYPE(RTGCPTR, uintptr_t, RTGCPTR))(uintptr_t)pvAddress, GCPhysFault, cbWrite));
195
196 for (;;)
197 {
198 union
199 {
200 void *pv;
201 PX86PT pPT;
202 PPGMSHWPTPAE pPTPae;
203 PX86PD pPD;
204 PX86PDPAE pPDPae;
205 PX86PDPT pPDPT;
206 PX86PML4 pPML4;
207#ifdef VBOX_WITH_NESTED_HWVIRT_VMX_EPT
208 PEPTPDPT pEptPdpt;
209 PEPTPD pEptPd;
210 PEPTPT pEptPt;
211#endif
212 } uShw;
213
214 LogFlow(("pgmPoolMonitorChainChanging: page idx=%d phys=%RGp (next=%d) kind=%s write=%#x\n",
215 pPage->idx, pPage->GCPhys, pPage->iMonitoredNext, pgmPoolPoolKindToStr(pPage->enmKind), cbWrite));
216
217 uShw.pv = NULL;
218 switch (pPage->enmKind)
219 {
220 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
221 {
222 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPT));
223 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
224 const unsigned iShw = off / sizeof(X86PTE);
225 LogFlow(("PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT iShw=%x\n", iShw));
226 X86PGUINT const uPde = uShw.pPT->a[iShw].u;
227 if (uPde & X86_PTE_P)
228 {
229 X86PTE GstPte;
230 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvAddress, GCPhysFault, sizeof(GstPte));
231 AssertRC(rc);
232 Log4(("pgmPoolMonitorChainChanging 32_32: deref %016RX64 GCPhys %08RX32\n", uPde & X86_PTE_PG_MASK, GstPte.u & X86_PTE_PG_MASK));
233 pgmPoolTracDerefGCPhysHint(pPool, pPage, uPde & X86_PTE_PG_MASK, GstPte.u & X86_PTE_PG_MASK, iShw);
234 ASMAtomicWriteU32(&uShw.pPT->a[iShw].u, 0);
235 }
236 break;
237 }
238
239 /* page/2 sized */
240 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
241 {
242 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPT));
243 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
244 if (!((off ^ pPage->GCPhys) & (PAGE_SIZE / 2)))
245 {
246 const unsigned iShw = (off / sizeof(X86PTE)) & (X86_PG_PAE_ENTRIES - 1);
247 LogFlow(("PGMPOOLKIND_PAE_PT_FOR_32BIT_PT iShw=%x\n", iShw));
248 if (PGMSHWPTEPAE_IS_P(uShw.pPTPae->a[iShw]))
249 {
250 X86PTE GstPte;
251 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvAddress, GCPhysFault, sizeof(GstPte));
252 AssertRC(rc);
253
254 Log4(("pgmPoolMonitorChainChanging pae_32: deref %016RX64 GCPhys %08RX32\n", uShw.pPT->a[iShw].u & X86_PTE_PAE_PG_MASK, GstPte.u & X86_PTE_PG_MASK));
255 pgmPoolTracDerefGCPhysHint(pPool, pPage,
256 PGMSHWPTEPAE_GET_HCPHYS(uShw.pPTPae->a[iShw]),
257 GstPte.u & X86_PTE_PG_MASK,
258 iShw);
259 PGMSHWPTEPAE_ATOMIC_SET(uShw.pPTPae->a[iShw], 0);
260 }
261 }
262 break;
263 }
264
265 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
266 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
267 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
268 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
269 {
270 unsigned iGst = off / sizeof(X86PDE);
271 unsigned iShwPdpt = iGst / 256;
272 unsigned iShw = (iGst % 256) * 2;
273 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
274
275 LogFlow(("pgmPoolMonitorChainChanging PAE for 32 bits: iGst=%x iShw=%x idx = %d page idx=%d\n", iGst, iShw, iShwPdpt, pPage->enmKind - PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD));
276 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPD));
277 if (iShwPdpt == pPage->enmKind - (unsigned)PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD)
278 {
279 for (unsigned i = 0; i < 2; i++)
280 {
281 X86PGPAEUINT const uPde = uShw.pPDPae->a[iShw + i].u;
282 if (uPde & X86_PDE_P)
283 {
284 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw + i, uPde));
285 pgmPoolFree(pVM, uPde & X86_PDE_PAE_PG_MASK, pPage->idx, iShw + i);
286 ASMAtomicWriteU64(&uShw.pPDPae->a[iShw + i].u, 0);
287 }
288
289 /* paranoia / a bit assumptive. */
290 if ( (off & 3)
291 && (off & 3) + cbWrite > 4)
292 {
293 const unsigned iShw2 = iShw + 2 + i;
294 if (iShw2 < RT_ELEMENTS(uShw.pPDPae->a))
295 {
296 X86PGPAEUINT const uPde2 = uShw.pPDPae->a[iShw2].u;
297 if (uPde2 & X86_PDE_P)
298 {
299 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw2, uPde2));
300 pgmPoolFree(pVM, uPde2 & X86_PDE_PAE_PG_MASK, pPage->idx, iShw2);
301 ASMAtomicWriteU64(&uShw.pPDPae->a[iShw2].u, 0);
302 }
303 }
304 }
305 }
306 }
307 break;
308 }
309
310 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
311 {
312 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
313 const unsigned iShw = off / sizeof(X86PTEPAE);
314 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPT));
315 if (PGMSHWPTEPAE_IS_P(uShw.pPTPae->a[iShw]))
316 {
317 X86PTEPAE GstPte;
318 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvAddress, GCPhysFault, sizeof(GstPte));
319 AssertRC(rc);
320
321 Log4(("pgmPoolMonitorChainChanging pae: deref %016RX64 GCPhys %016RX64\n", PGMSHWPTEPAE_GET_HCPHYS(uShw.pPTPae->a[iShw]), GstPte.u & X86_PTE_PAE_PG_MASK));
322 pgmPoolTracDerefGCPhysHint(pPool, pPage,
323 PGMSHWPTEPAE_GET_HCPHYS(uShw.pPTPae->a[iShw]),
324 GstPte.u & X86_PTE_PAE_PG_MASK,
325 iShw);
326 PGMSHWPTEPAE_ATOMIC_SET(uShw.pPTPae->a[iShw], 0);
327 }
328
329 /* paranoia / a bit assumptive. */
330 if ( (off & 7)
331 && (off & 7) + cbWrite > sizeof(X86PTEPAE))
332 {
333 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PTEPAE);
334 AssertBreak(iShw2 < RT_ELEMENTS(uShw.pPTPae->a));
335
336 if (PGMSHWPTEPAE_IS_P(uShw.pPTPae->a[iShw2]))
337 {
338 X86PTEPAE GstPte;
339 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte,
340 pvAddress ? (uint8_t const *)pvAddress + sizeof(GstPte) : NULL,
341 GCPhysFault + sizeof(GstPte), sizeof(GstPte));
342 AssertRC(rc);
343 Log4(("pgmPoolMonitorChainChanging pae: deref %016RX64 GCPhys %016RX64\n", PGMSHWPTEPAE_GET_HCPHYS(uShw.pPTPae->a[iShw2]), GstPte.u & X86_PTE_PAE_PG_MASK));
344 pgmPoolTracDerefGCPhysHint(pPool, pPage,
345 PGMSHWPTEPAE_GET_HCPHYS(uShw.pPTPae->a[iShw2]),
346 GstPte.u & X86_PTE_PAE_PG_MASK,
347 iShw2);
348 PGMSHWPTEPAE_ATOMIC_SET(uShw.pPTPae->a[iShw2], 0);
349 }
350 }
351 break;
352 }
353
354 case PGMPOOLKIND_32BIT_PD:
355 {
356 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
357 const unsigned iShw = off / sizeof(X86PTE); // ASSUMING 32-bit guest paging!
358
359 LogFlow(("pgmPoolMonitorChainChanging: PGMPOOLKIND_32BIT_PD %x\n", iShw));
360 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPD));
361 X86PGUINT const uPde = uShw.pPD->a[iShw].u;
362 if (uPde & X86_PDE_P)
363 {
364 LogFlow(("pgmPoolMonitorChainChanging: 32 bit pd iShw=%#x: %RX64 -> freeing it!\n", iShw, uPde));
365 pgmPoolFree(pVM, uPde & X86_PDE_PG_MASK, pPage->idx, iShw);
366 ASMAtomicWriteU32(&uShw.pPD->a[iShw].u, 0);
367 }
368
369 /* paranoia / a bit assumptive. */
370 if ( (off & 3)
371 && (off & 3) + cbWrite > sizeof(X86PTE))
372 {
373 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PTE);
374 if ( iShw2 != iShw
375 && iShw2 < RT_ELEMENTS(uShw.pPD->a))
376 {
377 X86PGUINT const uPde2 = uShw.pPD->a[iShw2].u;
378 if (uPde2 & X86_PDE_P)
379 {
380 LogFlow(("pgmPoolMonitorChainChanging: 32 bit pd iShw=%#x: %RX64 -> freeing it!\n", iShw2, uPde2));
381 pgmPoolFree(pVM, uPde2 & X86_PDE_PG_MASK, pPage->idx, iShw2);
382 ASMAtomicWriteU32(&uShw.pPD->a[iShw2].u, 0);
383 }
384 }
385 }
386#if 0 /* useful when running PGMAssertCR3(), a bit too troublesome for general use (TLBs). - not working any longer... */
387 if ( uShw.pPD->a[iShw].n.u1Present
388 && !VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3))
389 {
390 LogFlow(("pgmPoolMonitorChainChanging: iShw=%#x: %RX32 -> freeing it!\n", iShw, uShw.pPD->a[iShw].u));
391 pgmPoolFree(pVM, uShw.pPD->a[iShw].u & X86_PDE_PG_MASK, pPage->idx, iShw);
392 ASMAtomicWriteU32(&uShw.pPD->a[iShw].u, 0);
393 }
394#endif
395 break;
396 }
397
398 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
399 {
400 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
401 const unsigned iShw = off / sizeof(X86PDEPAE);
402 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPD));
403
404 /*
405 * Causes trouble when the guest uses a PDE to refer to the whole page table level
406 * structure. (Invalidate here; faults later on when it tries to change the page
407 * table entries -> recheck; probably only applies to the RC case.)
408 */
409 X86PGPAEUINT const uPde = uShw.pPDPae->a[iShw].u;
410 if (uPde & X86_PDE_P)
411 {
412 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw, uPde));
413 pgmPoolFree(pVM, uPde & X86_PDE_PAE_PG_MASK, pPage->idx, iShw);
414 ASMAtomicWriteU64(&uShw.pPDPae->a[iShw].u, 0);
415 }
416
417 /* paranoia / a bit assumptive. */
418 if ( (off & 7)
419 && (off & 7) + cbWrite > sizeof(X86PDEPAE))
420 {
421 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PDEPAE);
422 AssertBreak(iShw2 < RT_ELEMENTS(uShw.pPDPae->a));
423
424 X86PGPAEUINT const uPde2 = uShw.pPDPae->a[iShw2].u;
425 if (uPde2 & X86_PDE_P)
426 {
427 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uPde2));
428 pgmPoolFree(pVM, uPde2 & X86_PDE_PAE_PG_MASK, pPage->idx, iShw2);
429 ASMAtomicWriteU64(&uShw.pPDPae->a[iShw2].u, 0);
430 }
431 }
432 break;
433 }
434
435 case PGMPOOLKIND_PAE_PDPT:
436 {
437 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPDPT));
438 /*
439 * Hopefully this doesn't happen very often:
440 * - touching unused parts of the page
441 * - messing with the bits of pd pointers without changing the physical address
442 */
443 /* PDPT roots are not page aligned; 32 byte only! */
444 const unsigned offPdpt = GCPhysFault - pPage->GCPhys;
445
446 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
447 const unsigned iShw = offPdpt / sizeof(X86PDPE);
448 if (iShw < X86_PG_PAE_PDPE_ENTRIES) /* don't use RT_ELEMENTS(uShw.pPDPT->a), because that's for long mode only */
449 {
450 X86PGPAEUINT const uPdpe = uShw.pPDPT->a[iShw].u;
451 if (uPdpe & X86_PDPE_P)
452 {
453 LogFlow(("pgmPoolMonitorChainChanging: pae pdpt iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPT->a[iShw].u));
454 pgmPoolFree(pVM, uPdpe & X86_PDPE_PG_MASK, pPage->idx, iShw);
455 ASMAtomicWriteU64(&uShw.pPDPT->a[iShw].u, 0);
456 }
457
458 /* paranoia / a bit assumptive. */
459 if ( (offPdpt & 7)
460 && (offPdpt & 7) + cbWrite > sizeof(X86PDPE))
461 {
462 const unsigned iShw2 = (offPdpt + cbWrite - 1) / sizeof(X86PDPE);
463 if ( iShw2 != iShw
464 && iShw2 < X86_PG_PAE_PDPE_ENTRIES)
465 {
466 X86PGPAEUINT const uPdpe2 = uShw.pPDPT->a[iShw2].u;
467 if (uPdpe2 & X86_PDPE_P)
468 {
469 LogFlow(("pgmPoolMonitorChainChanging: pae pdpt iShw=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPT->a[iShw2].u));
470 pgmPoolFree(pVM, uPdpe2 & X86_PDPE_PG_MASK, pPage->idx, iShw2);
471 ASMAtomicWriteU64(&uShw.pPDPT->a[iShw2].u, 0);
472 }
473 }
474 }
475 }
476 break;
477 }
478
479 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
480 {
481 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPD));
482 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
483 const unsigned iShw = off / sizeof(X86PDEPAE);
484 X86PGPAEUINT const uPde = uShw.pPDPae->a[iShw].u;
485 if (uPde & X86_PDE_P)
486 {
487 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw, uPde));
488 pgmPoolFree(pVM, uPde & X86_PDE_PAE_PG_MASK, pPage->idx, iShw);
489 ASMAtomicWriteU64(&uShw.pPDPae->a[iShw].u, 0);
490 }
491
492 /* paranoia / a bit assumptive. */
493 if ( (off & 7)
494 && (off & 7) + cbWrite > sizeof(X86PDEPAE))
495 {
496 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PDEPAE);
497 AssertBreak(iShw2 < RT_ELEMENTS(uShw.pPDPae->a));
498 X86PGPAEUINT const uPde2 = uShw.pPDPae->a[iShw2].u;
499 if (uPde2 & X86_PDE_P)
500 {
501 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uPde2));
502 pgmPoolFree(pVM, uPde2 & X86_PDE_PAE_PG_MASK, pPage->idx, iShw2);
503 ASMAtomicWriteU64(&uShw.pPDPae->a[iShw2].u, 0);
504 }
505 }
506 break;
507 }
508
509 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
510 {
511 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPDPT));
512 /*
513 * Hopefully this doesn't happen very often:
514 * - messing with the bits of pd pointers without changing the physical address
515 */
516 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
517 const unsigned iShw = off / sizeof(X86PDPE);
518 X86PGPAEUINT const uPdpe = uShw.pPDPT->a[iShw].u;
519 if (uPdpe & X86_PDPE_P)
520 {
521 LogFlow(("pgmPoolMonitorChainChanging: pdpt iShw=%#x: %RX64 -> freeing it!\n", iShw, uPdpe));
522 pgmPoolFree(pVM, uPdpe & X86_PDPE_PG_MASK, pPage->idx, iShw);
523 ASMAtomicWriteU64(&uShw.pPDPT->a[iShw].u, 0);
524 }
525 /* paranoia / a bit assumptive. */
526 if ( (off & 7)
527 && (off & 7) + cbWrite > sizeof(X86PDPE))
528 {
529 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PDPE);
530 X86PGPAEUINT const uPdpe2 = uShw.pPDPT->a[iShw2].u;
531 if (uPdpe2 & X86_PDPE_P)
532 {
533 LogFlow(("pgmPoolMonitorChainChanging: pdpt iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uPdpe2));
534 pgmPoolFree(pVM, uPdpe2 & X86_PDPE_PG_MASK, pPage->idx, iShw2);
535 ASMAtomicWriteU64(&uShw.pPDPT->a[iShw2].u, 0);
536 }
537 }
538 break;
539 }
540
541 case PGMPOOLKIND_64BIT_PML4:
542 {
543 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPML4));
544 /*
545 * Hopefully this doesn't happen very often:
546 * - messing with the bits of pd pointers without changing the physical address
547 */
548 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
549 const unsigned iShw = off / sizeof(X86PDPE);
550 X86PGPAEUINT const uPml4e = uShw.pPML4->a[iShw].u;
551 if (uPml4e & X86_PML4E_P)
552 {
553 LogFlow(("pgmPoolMonitorChainChanging: pml4 iShw=%#x: %RX64 -> freeing it!\n", iShw, uPml4e));
554 pgmPoolFree(pVM, uPml4e & X86_PML4E_PG_MASK, pPage->idx, iShw);
555 ASMAtomicWriteU64(&uShw.pPML4->a[iShw].u, 0);
556 }
557 /* paranoia / a bit assumptive. */
558 if ( (off & 7)
559 && (off & 7) + cbWrite > sizeof(X86PDPE))
560 {
561 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PML4E);
562 X86PGPAEUINT const uPml4e2 = uShw.pPML4->a[iShw2].u;
563 if (uPml4e2 & X86_PML4E_P)
564 {
565 LogFlow(("pgmPoolMonitorChainChanging: pml4 iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uPml4e2));
566 pgmPoolFree(pVM, uPml4e2 & X86_PML4E_PG_MASK, pPage->idx, iShw2);
567 ASMAtomicWriteU64(&uShw.pPML4->a[iShw2].u, 0);
568 }
569 }
570 break;
571 }
572
573#ifdef VBOX_WITH_NESTED_HWVIRT_VMX_EPT
574 case PGMPOOLKIND_EPT_PML4_FOR_EPT_PML4:
575 {
576 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
577 const unsigned iShw = off / sizeof(EPTPML4E);
578 X86PGPAEUINT const uPml4e = uShw.pPML4->a[iShw].u;
579 if (uPml4e & EPT_PRESENT_MASK)
580 {
581 Log7Func(("PML4 iShw=%#x: %RX64 (%RGp) -> freeing it!\n", iShw, uPml4e, pPage->GCPhys));
582 pgmPoolFree(pVM, uPml4e & X86_PML4E_PG_MASK, pPage->idx, iShw);
583 ASMAtomicWriteU64(&uShw.pPML4->a[iShw].u, 0);
584 }
585
586 /* paranoia / a bit assumptive. */
587 if ( (off & 7)
588 && (off & 7) + cbWrite > sizeof(X86PML4E))
589 {
590 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PML4E);
591 X86PGPAEUINT const uPml4e2 = uShw.pPML4->a[iShw2].u;
592 if (uPml4e2 & EPT_PRESENT_MASK)
593 {
594 Log7Func(("PML4 iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uPml4e2));
595 pgmPoolFree(pVM, uPml4e2 & X86_PML4E_PG_MASK, pPage->idx, iShw2);
596 ASMAtomicWriteU64(&uShw.pPML4->a[iShw2].u, 0);
597 }
598 }
599 break;
600 }
601
602 case PGMPOOLKIND_EPT_PDPT_FOR_EPT_PDPT:
603 {
604 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
605 const unsigned iShw = off / sizeof(EPTPDPTE);
606 X86PGPAEUINT const uPdpte = uShw.pEptPdpt->a[iShw].u;
607 if (uPdpte & EPT_PRESENT_MASK)
608 {
609 Log7Func(("EPT PDPT iShw=%#x: %RX64 (%RGp) -> freeing it!\n", iShw, uPdpte, pPage->GCPhys));
610 pgmPoolFree(pVM, uPdpte & EPT_PDPTE_PG_MASK, pPage->idx, iShw);
611 ASMAtomicWriteU64(&uShw.pEptPdpt->a[iShw].u, 0);
612 }
613
614 /* paranoia / a bit assumptive. */
615 if ( (off & 7)
616 && (off & 7) + cbWrite > sizeof(EPTPDPTE))
617 {
618 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(EPTPDPTE);
619 X86PGPAEUINT const uPdpte2 = uShw.pEptPdpt->a[iShw2].u;
620 if (uPdpte2 & EPT_PRESENT_MASK)
621 {
622 Log7Func(("EPT PDPT iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uPdpte2));
623 pgmPoolFree(pVM, uPdpte2 & EPT_PDPTE_PG_MASK, pPage->idx, iShw2);
624 ASMAtomicWriteU64(&uShw.pEptPdpt->a[iShw2].u, 0);
625 }
626 }
627 break;
628 }
629
630 case PGMPOOLKIND_EPT_PD_FOR_EPT_PD:
631 {
632 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
633 const unsigned iShw = off / sizeof(EPTPDE);
634 X86PGPAEUINT const uPde = uShw.pEptPd->a[iShw].u;
635 if (uPde & EPT_PRESENT_MASK)
636 {
637 Log7Func(("EPT PD iShw=%#x: %RX64 (%RGp) -> freeing it!\n", iShw, uPde, pPage->GCPhys));
638 pgmPoolFree(pVM, uPde & EPT_PDE_PG_MASK, pPage->idx, iShw);
639 ASMAtomicWriteU64(&uShw.pEptPd->a[iShw].u, 0);
640 }
641
642 /* paranoia / a bit assumptive. */
643 if ( (off & 7)
644 && (off & 7) + cbWrite > sizeof(EPTPDE))
645 {
646 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(EPTPDE);
647 AssertBreak(iShw2 < RT_ELEMENTS(uShw.pEptPd->a));
648 X86PGPAEUINT const uPde2 = uShw.pEptPd->a[iShw2].u;
649 if (uPde2 & EPT_PRESENT_MASK)
650 {
651 Log7Func(("EPT PD (2): iShw2=%#x: %RX64 (%RGp) -> freeing it!\n", iShw2, uPde2, pPage->GCPhys));
652 pgmPoolFree(pVM, uPde2 & EPT_PDE_PG_MASK, pPage->idx, iShw2);
653 ASMAtomicWriteU64(&uShw.pEptPd->a[iShw2].u, 0);
654 }
655 }
656 break;
657 }
658
659 case PGMPOOLKIND_EPT_PT_FOR_EPT_PT:
660 {
661 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
662 const unsigned iShw = off / sizeof(EPTPTE);
663 X86PGPAEUINT const uPte = uShw.pEptPt->a[iShw].u;
664 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPT));
665 if (uPte & EPT_PRESENT_MASK)
666 {
667 EPTPTE GstPte;
668 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvAddress, GCPhysFault, sizeof(GstPte));
669 AssertRC(rc);
670
671 Log7Func(("EPT PT: iShw=%#x %RX64 (%RGp)\n", iShw, uPte, pPage->GCPhys));
672 pgmPoolTracDerefGCPhysHint(pPool, pPage,
673 uShw.pEptPt->a[iShw].u & EPT_PTE_PG_MASK,
674 GstPte.u & EPT_PTE_PG_MASK,
675 iShw);
676 ASMAtomicWriteU64(&uShw.pEptPt->a[iShw].u, 0);
677 }
678
679 /* paranoia / a bit assumptive. */
680 if ( (off & 7)
681 && (off & 7) + cbWrite > sizeof(EPTPTE))
682 {
683 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(EPTPTE);
684 AssertBreak(iShw2 < RT_ELEMENTS(uShw.pEptPt->a));
685 X86PGPAEUINT const uPte2 = uShw.pEptPt->a[iShw2].u;
686 if (uPte2 & EPT_PRESENT_MASK)
687 {
688 EPTPTE GstPte;
689 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte,
690 pvAddress ? (uint8_t const *)pvAddress + sizeof(GstPte) : NULL,
691 GCPhysFault + sizeof(GstPte), sizeof(GstPte));
692 AssertRC(rc);
693 Log7Func(("EPT PT (2): iShw=%#x %RX64 (%RGp)\n", iShw2, uPte2, pPage->GCPhys));
694 pgmPoolTracDerefGCPhysHint(pPool, pPage,
695 uShw.pEptPt->a[iShw2].u & EPT_PTE_PG_MASK,
696 GstPte.u & EPT_PTE_PG_MASK,
697 iShw2);
698 ASMAtomicWriteU64(&uShw.pEptPt->a[iShw2].u, 0);
699 }
700 }
701 break;
702 }
703#endif /* VBOX_WITH_NESTED_HWVIRT_VMX_EPT */
704
705 default:
706 AssertFatalMsgFailed(("enmKind=%d\n", pPage->enmKind));
707 }
708 PGM_DYNMAP_UNUSED_HINT_VM(pVM, uShw.pv);
709
710 /* next */
711 if (pPage->iMonitoredNext == NIL_PGMPOOL_IDX)
712 return;
713 pPage = &pPool->aPages[pPage->iMonitoredNext];
714 }
715}
716
717#ifndef IN_RING3
718
719/**
720 * Checks if a access could be a fork operation in progress.
721 *
722 * Meaning, that the guest is setting up the parent process for Copy-On-Write.
723 *
724 * @returns true if it's likely that we're forking, otherwise false.
725 * @param pPool The pool.
726 * @param pDis The disassembled instruction.
727 * @param offFault The access offset.
728 */
729DECLINLINE(bool) pgmRZPoolMonitorIsForking(PPGMPOOL pPool, PDISCPUSTATE pDis, unsigned offFault)
730{
731 /*
732 * i386 linux is using btr to clear X86_PTE_RW.
733 * The functions involved are (2.6.16 source inspection):
734 * clear_bit
735 * ptep_set_wrprotect
736 * copy_one_pte
737 * copy_pte_range
738 * copy_pmd_range
739 * copy_pud_range
740 * copy_page_range
741 * dup_mmap
742 * dup_mm
743 * copy_mm
744 * copy_process
745 * do_fork
746 */
747 if ( pDis->pCurInstr->uOpcode == OP_BTR
748 && !(offFault & 4)
749 /** @todo Validate that the bit index is X86_PTE_RW. */
750 )
751 {
752 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitorPf,Fork)); RT_NOREF_PV(pPool);
753 return true;
754 }
755 return false;
756}
757
758
759/**
760 * Determine whether the page is likely to have been reused.
761 *
762 * @returns true if we consider the page as being reused for a different purpose.
763 * @returns false if we consider it to still be a paging page.
764 * @param pVM The cross context VM structure.
765 * @param pVCpu The cross context virtual CPU structure.
766 * @param pCtx Pointer to the register context for the CPU.
767 * @param pDis The disassembly info for the faulting instruction.
768 * @param pvFault The fault address.
769 * @param pPage The pool page being accessed.
770 *
771 * @remark The REP prefix check is left to the caller because of STOSD/W.
772 */
773DECLINLINE(bool) pgmRZPoolMonitorIsReused(PVMCC pVM, PVMCPUCC pVCpu, PCPUMCTX pCtx, PDISCPUSTATE pDis, RTGCPTR pvFault,
774 PPGMPOOLPAGE pPage)
775{
776 /* Locked (CR3, PDPTR*4) should not be reusable. Considering them as
777 such may cause loops booting tst-ubuntu-15_10-64-efi, ++. */
778 if (pPage->cLocked)
779 {
780 Log2(("pgmRZPoolMonitorIsReused: %RGv (%p) can't have been resued, because it's locked!\n", pvFault, pPage));
781 return false;
782 }
783
784 /** @todo could make this general, faulting close to rsp should be a safe reuse heuristic. */
785 if ( HMHasPendingIrq(pVM)
786 && pCtx->rsp - pvFault < 32)
787 {
788 /* Fault caused by stack writes while trying to inject an interrupt event. */
789 Log(("pgmRZPoolMonitorIsReused: reused %RGv for interrupt stack (rsp=%RGv).\n", pvFault, pCtx->rsp));
790 return true;
791 }
792
793 LogFlow(("Reused instr %RGv %d at %RGv param1.fUse=%llx param1.reg=%d\n", pCtx->rip, pDis->pCurInstr->uOpcode, pvFault, pDis->Param1.fUse, pDis->Param1.Base.idxGenReg));
794
795 /* Non-supervisor mode write means it's used for something else. */
796 if (CPUMGetGuestCPL(pVCpu) == 3)
797 return true;
798
799 switch (pDis->pCurInstr->uOpcode)
800 {
801 /* call implies the actual push of the return address faulted */
802 case OP_CALL:
803 Log4(("pgmRZPoolMonitorIsReused: CALL\n"));
804 return true;
805 case OP_PUSH:
806 Log4(("pgmRZPoolMonitorIsReused: PUSH\n"));
807 return true;
808 case OP_PUSHF:
809 Log4(("pgmRZPoolMonitorIsReused: PUSHF\n"));
810 return true;
811 case OP_PUSHA:
812 Log4(("pgmRZPoolMonitorIsReused: PUSHA\n"));
813 return true;
814 case OP_FXSAVE:
815 Log4(("pgmRZPoolMonitorIsReused: FXSAVE\n"));
816 return true;
817 case OP_MOVNTI: /* solaris - block_zero_no_xmm */
818 Log4(("pgmRZPoolMonitorIsReused: MOVNTI\n"));
819 return true;
820 case OP_MOVNTDQ: /* solaris - hwblkclr & hwblkpagecopy */
821 Log4(("pgmRZPoolMonitorIsReused: MOVNTDQ\n"));
822 return true;
823 case OP_MOVSWD:
824 case OP_STOSWD:
825 if ( pDis->fPrefix == (DISPREFIX_REP|DISPREFIX_REX)
826 && pCtx->rcx >= 0x40
827 )
828 {
829 Assert(pDis->uCpuMode == DISCPUMODE_64BIT);
830
831 Log(("pgmRZPoolMonitorIsReused: OP_STOSQ\n"));
832 return true;
833 }
834 break;
835
836 default:
837 /*
838 * Anything having ESP on the left side means stack writes.
839 */
840 if ( ( (pDis->Param1.fUse & DISUSE_REG_GEN32)
841 || (pDis->Param1.fUse & DISUSE_REG_GEN64))
842 && (pDis->Param1.Base.idxGenReg == DISGREG_ESP))
843 {
844 Log4(("pgmRZPoolMonitorIsReused: ESP\n"));
845 return true;
846 }
847 break;
848 }
849
850 /*
851 * Page table updates are very very unlikely to be crossing page boundraries,
852 * and we don't want to deal with that in pgmPoolMonitorChainChanging and such.
853 */
854 uint32_t const cbWrite = DISGetParamSize(pDis, &pDis->Param1);
855 if ( (((uintptr_t)pvFault + cbWrite) >> X86_PAGE_SHIFT) != ((uintptr_t)pvFault >> X86_PAGE_SHIFT) )
856 {
857 Log4(("pgmRZPoolMonitorIsReused: cross page write\n"));
858 return true;
859 }
860
861 /*
862 * Nobody does an unaligned 8 byte write to a page table, right.
863 */
864 if (cbWrite >= 8 && ((uintptr_t)pvFault & 7) != 0)
865 {
866 Log4(("pgmRZPoolMonitorIsReused: Unaligned 8+ byte write\n"));
867 return true;
868 }
869
870 return false;
871}
872
873
874/**
875 * Flushes the page being accessed.
876 *
877 * @returns VBox status code suitable for scheduling.
878 * @param pVM The cross context VM structure.
879 * @param pVCpu The cross context virtual CPU structure.
880 * @param pPool The pool.
881 * @param pPage The pool page (head).
882 * @param pDis The disassembly of the write instruction.
883 * @param pCtx Pointer to the register context for the CPU.
884 * @param GCPhysFault The fault address as guest physical address.
885 * @todo VBOXSTRICTRC
886 */
887static int pgmRZPoolAccessPfHandlerFlush(PVMCC pVM, PVMCPUCC pVCpu, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pDis,
888 PCPUMCTX pCtx, RTGCPHYS GCPhysFault)
889{
890 NOREF(pVM); NOREF(GCPhysFault);
891
892 /*
893 * First, do the flushing.
894 */
895 pgmPoolMonitorChainFlush(pPool, pPage);
896
897 /*
898 * Emulate the instruction (xp/w2k problem, requires pc/cr2/sp detection).
899 * Must do this in raw mode (!); XP boot will fail otherwise.
900 */
901 int rc = VINF_SUCCESS;
902 VBOXSTRICTRC rc2 = EMInterpretInstructionDisasState(pVCpu, pDis, pCtx->rip);
903 if (rc2 == VINF_SUCCESS)
904 { /* do nothing */ }
905 else if (rc2 == VINF_EM_RESCHEDULE)
906 {
907 rc = VBOXSTRICTRC_VAL(rc2);
908# ifndef IN_RING3
909 VMCPU_FF_SET(pVCpu, VMCPU_FF_TO_R3);
910# endif
911 }
912 else if (rc2 == VERR_EM_INTERPRETER)
913 {
914 rc = VINF_EM_RAW_EMULATE_INSTR;
915 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitorPf,EmulateInstr));
916 }
917 else if (RT_FAILURE_NP(rc2))
918 rc = VBOXSTRICTRC_VAL(rc2);
919 else
920 AssertMsgFailed(("%Rrc\n", VBOXSTRICTRC_VAL(rc2))); /* ASSUMES no complicated stuff here. */
921
922 LogFlow(("pgmRZPoolAccessPfHandlerFlush: returns %Rrc (flushed)\n", rc));
923 return rc;
924}
925
926
927/**
928 * Handles the STOSD write accesses.
929 *
930 * @returns VBox status code suitable for scheduling.
931 * @param pVM The cross context VM structure.
932 * @param pPool The pool.
933 * @param pPage The pool page (head).
934 * @param pDis The disassembly of the write instruction.
935 * @param pCtx Pointer to the register context for the CPU.
936 * @param GCPhysFault The fault address as guest physical address.
937 * @param pvFault The fault address.
938 */
939DECLINLINE(int) pgmRZPoolAccessPfHandlerSTOSD(PVMCC pVM, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pDis,
940 PCPUMCTX pCtx, RTGCPHYS GCPhysFault, RTGCPTR pvFault)
941{
942 unsigned uIncrement = pDis->Param1.cb;
943 NOREF(pVM);
944
945 Assert(pDis->uCpuMode == DISCPUMODE_32BIT || pDis->uCpuMode == DISCPUMODE_64BIT);
946 Assert(pCtx->rcx <= 0x20);
947
948# ifdef VBOX_STRICT
949 if (pDis->uOpMode == DISCPUMODE_32BIT)
950 Assert(uIncrement == 4);
951 else
952 Assert(uIncrement == 8);
953# endif
954
955 Log3(("pgmRZPoolAccessPfHandlerSTOSD\n"));
956
957 /*
958 * Increment the modification counter and insert it into the list
959 * of modified pages the first time.
960 */
961 if (!pPage->cModifications++)
962 pgmPoolMonitorModifiedInsert(pPool, pPage);
963
964 /*
965 * Execute REP STOSD.
966 *
967 * This ASSUMES that we're not invoked by Trap0e on in a out-of-sync
968 * write situation, meaning that it's safe to write here.
969 */
970 PVMCPUCC pVCpu = VMMGetCpu(pPool->CTX_SUFF(pVM));
971 RTGCUINTPTR pu32 = (RTGCUINTPTR)pvFault;
972 while (pCtx->rcx)
973 {
974 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault, NULL, uIncrement);
975 PGMPhysSimpleWriteGCPhys(pVM, GCPhysFault, &pCtx->rax, uIncrement);
976 pu32 += uIncrement;
977 GCPhysFault += uIncrement;
978 pCtx->rdi += uIncrement;
979 pCtx->rcx--;
980 }
981 pCtx->rip += pDis->cbInstr;
982
983 LogFlow(("pgmRZPoolAccessPfHandlerSTOSD: returns\n"));
984 return VINF_SUCCESS;
985}
986
987
988/**
989 * Handles the simple write accesses.
990 *
991 * @returns VBox status code suitable for scheduling.
992 * @param pVM The cross context VM structure.
993 * @param pVCpu The cross context virtual CPU structure.
994 * @param pPool The pool.
995 * @param pPage The pool page (head).
996 * @param pDis The disassembly of the write instruction.
997 * @param pCtx Pointer to the register context for the CPU.
998 * @param GCPhysFault The fault address as guest physical address.
999 * @param pfReused Reused state (in/out)
1000 */
1001DECLINLINE(int) pgmRZPoolAccessPfHandlerSimple(PVMCC pVM, PVMCPUCC pVCpu, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pDis,
1002 PCPUMCTX pCtx, RTGCPHYS GCPhysFault, bool *pfReused)
1003{
1004 Log3(("pgmRZPoolAccessPfHandlerSimple\n"));
1005 NOREF(pVM);
1006 NOREF(pfReused); /* initialized by caller */
1007
1008 /*
1009 * Increment the modification counter and insert it into the list
1010 * of modified pages the first time.
1011 */
1012 if (!pPage->cModifications++)
1013 pgmPoolMonitorModifiedInsert(pPool, pPage);
1014
1015 /*
1016 * Clear all the pages.
1017 */
1018 uint32_t cbWrite = DISGetParamSize(pDis, &pDis->Param1);
1019 if (cbWrite <= 8)
1020 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault, NULL, cbWrite);
1021 else if (cbWrite <= 16)
1022 {
1023 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault, NULL, 8);
1024 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault + 8, NULL, cbWrite - 8);
1025 }
1026 else
1027 {
1028 Assert(cbWrite <= 32);
1029 for (uint32_t off = 0; off < cbWrite; off += 8)
1030 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault + off, NULL, RT_MIN(8, cbWrite - off));
1031 }
1032
1033 /*
1034 * Interpret the instruction.
1035 */
1036 VBOXSTRICTRC rc = EMInterpretInstructionDisasState(pVCpu, pDis, pCtx->rip);
1037 if (RT_SUCCESS(rc))
1038 AssertMsg(rc == VINF_SUCCESS, ("%Rrc\n", VBOXSTRICTRC_VAL(rc))); /* ASSUMES no complicated stuff here. */
1039 else if (rc == VERR_EM_INTERPRETER)
1040 {
1041 LogFlow(("pgmRZPoolAccessPfHandlerSimple: Interpretation failed for %04x:%RGv - opcode=%d\n",
1042 pCtx->cs.Sel, (RTGCPTR)pCtx->rip, pDis->pCurInstr->uOpcode));
1043 rc = VINF_EM_RAW_EMULATE_INSTR;
1044 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitorPf,EmulateInstr));
1045 }
1046
1047# if 0 /* experimental code */
1048 if (rc == VINF_SUCCESS)
1049 {
1050 switch (pPage->enmKind)
1051 {
1052 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1053 {
1054 X86PTEPAE GstPte;
1055 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvFault, GCPhysFault, sizeof(GstPte));
1056 AssertRC(rc);
1057
1058 /* Check the new value written by the guest. If present and with a bogus physical address, then
1059 * it's fairly safe to assume the guest is reusing the PT.
1060 */
1061 if (GstPte.n.u1Present)
1062 {
1063 RTHCPHYS HCPhys = -1;
1064 int rc = PGMPhysGCPhys2HCPhys(pVM, GstPte.u & X86_PTE_PAE_PG_MASK, &HCPhys);
1065 if (rc != VINF_SUCCESS)
1066 {
1067 *pfReused = true;
1068 STAM_COUNTER_INC(&pPool->StatForceFlushReused);
1069 }
1070 }
1071 break;
1072 }
1073 }
1074 }
1075# endif
1076
1077 LogFlow(("pgmRZPoolAccessPfHandlerSimple: returns %Rrc\n", VBOXSTRICTRC_VAL(rc)));
1078 return VBOXSTRICTRC_VAL(rc);
1079}
1080
1081
1082/**
1083 * @callback_method_impl{FNPGMRZPHYSPFHANDLER,
1084 * \#PF access handler callback for page table pages.}
1085 *
1086 * @remarks The @a uUser argument is the index of the PGMPOOLPAGE.
1087 */
1088DECLCALLBACK(VBOXSTRICTRC) pgmRZPoolAccessPfHandler(PVMCC pVM, PVMCPUCC pVCpu, RTGCUINT uErrorCode, PCPUMCTX pCtx,
1089 RTGCPTR pvFault, RTGCPHYS GCPhysFault, uint64_t uUser)
1090{
1091 STAM_PROFILE_START(&pVM->pgm.s.CTX_SUFF(pPool)->StatMonitorRZ, a);
1092 PPGMPOOL const pPool = pVM->pgm.s.CTX_SUFF(pPool);
1093 AssertReturn(uUser < pPool->cCurPages, VERR_PGM_POOL_IPE);
1094 PPGMPOOLPAGE const pPage = &pPool->aPages[uUser];
1095 unsigned cMaxModifications;
1096 bool fForcedFlush = false;
1097 RT_NOREF_PV(uErrorCode);
1098
1099# ifdef VBOX_WITH_NESTED_HWVIRT_VMX_EPT
1100 AssertMsg(pVCpu->pgm.s.enmGuestSlatMode == PGMSLAT_DIRECT,
1101 ("pvFault=%RGv pPage=%p:{.idx=%d} GCPhysFault=%RGp\n", pvFault, pPage, pPage->idx, GCPhysFault));
1102# endif
1103 LogFlow(("pgmRZPoolAccessPfHandler: pvFault=%RGv pPage=%p:{.idx=%d} GCPhysFault=%RGp\n", pvFault, pPage, pPage->idx, GCPhysFault));
1104
1105 PGM_LOCK_VOID(pVM);
1106 if (PHYS_PAGE_ADDRESS(GCPhysFault) != PHYS_PAGE_ADDRESS(pPage->GCPhys))
1107 {
1108 /* Pool page changed while we were waiting for the lock; ignore. */
1109 Log(("CPU%d: pgmRZPoolAccessPfHandler pgm pool page for %RGp changed (to %RGp) while waiting!\n", pVCpu->idCpu, PHYS_PAGE_ADDRESS(GCPhysFault), PHYS_PAGE_ADDRESS(pPage->GCPhys)));
1110 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->StatMonitorPfRZ, &pPool->StatMonitorPfRZHandled, a);
1111 PGM_UNLOCK(pVM);
1112 return VINF_SUCCESS;
1113 }
1114# ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
1115 if (pPage->fDirty)
1116 {
1117# ifdef VBOX_WITH_NESTED_HWVIRT_VMX_EPT
1118 Assert(!PGMPOOL_PAGE_IS_NESTED(pPage));
1119# endif
1120 Assert(VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_TLB_FLUSH));
1121 PGM_UNLOCK(pVM);
1122 return VINF_SUCCESS; /* SMP guest case where we were blocking on the pgm lock while the same page was being marked dirty. */
1123 }
1124# endif
1125
1126# if 0 /* test code defined(VBOX_STRICT) && defined(PGMPOOL_WITH_OPTIMIZED_DIRTY_PT) */
1127 if (pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT)
1128 {
1129 void *pvShw = PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage);
1130 void *pvGst;
1131 int rc = PGM_GCPHYS_2_PTR(pPool->CTX_SUFF(pVM), pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
1132 pgmPoolTrackCheckPTPaePae(pPool, pPage, (PPGMSHWPTPAE)pvShw, (PCX86PTPAE)pvGst);
1133 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvGst);
1134 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvShw);
1135 }
1136# endif
1137
1138# ifdef VBOX_WITH_NESTED_HWVIRT_VMX_EPT
1139 if (PGMPOOL_PAGE_IS_NESTED(pPage))
1140 {
1141 Assert(!CPUMIsGuestInVmxNonRootMode(CPUMQueryGuestCtxPtr(pVCpu)));
1142 Log7Func(("Flushing pvFault=%RGv GCPhysFault=%RGp\n", pvFault, GCPhysFault));
1143 pgmPoolMonitorChainFlush(pPool, pPage);
1144 PGM_UNLOCK(pVM);
1145 return VINF_SUCCESS;
1146 }
1147# endif
1148
1149 /*
1150 * Disassemble the faulting instruction.
1151 */
1152 PDISCPUSTATE pDis = &pVCpu->pgm.s.DisState;
1153 int rc = EMInterpretDisasCurrent(pVCpu, pDis, NULL);
1154 if (RT_UNLIKELY(rc != VINF_SUCCESS))
1155 {
1156 AssertMsg(rc == VERR_PAGE_NOT_PRESENT || rc == VERR_PAGE_TABLE_NOT_PRESENT, ("Unexpected rc %d\n", rc));
1157 PGM_UNLOCK(pVM);
1158 return rc;
1159 }
1160
1161 Assert(pPage->enmKind != PGMPOOLKIND_FREE);
1162
1163 /*
1164 * We should ALWAYS have the list head as user parameter. This
1165 * is because we use that page to record the changes.
1166 */
1167 Assert(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
1168
1169# ifdef IN_RING0
1170 /* Maximum nr of modifications depends on the page type. */
1171 if ( pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT
1172 || pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_32BIT_PT)
1173 cMaxModifications = 4;
1174 else
1175 cMaxModifications = 24;
1176# else
1177 cMaxModifications = 48;
1178# endif
1179
1180 /*
1181 * Incremental page table updates should weigh more than random ones.
1182 * (Only applies when started from offset 0)
1183 */
1184 pVCpu->pgm.s.cPoolAccessHandler++;
1185 if ( pPage->GCPtrLastAccessHandlerRip >= pCtx->rip - 0x40 /* observed loops in Windows 7 x64 */
1186 && pPage->GCPtrLastAccessHandlerRip < pCtx->rip + 0x40
1187 && pvFault == (pPage->GCPtrLastAccessHandlerFault + pDis->Param1.cb)
1188 && pVCpu->pgm.s.cPoolAccessHandler == pPage->cLastAccessHandler + 1)
1189 {
1190 Log(("Possible page reuse cMods=%d -> %d (locked=%d type=%s)\n", pPage->cModifications, pPage->cModifications * 2, pgmPoolIsPageLocked(pPage), pgmPoolPoolKindToStr(pPage->enmKind)));
1191 Assert(pPage->cModifications < 32000);
1192 pPage->cModifications = pPage->cModifications * 2;
1193 pPage->GCPtrLastAccessHandlerFault = pvFault;
1194 pPage->cLastAccessHandler = pVCpu->pgm.s.cPoolAccessHandler;
1195 if (pPage->cModifications >= cMaxModifications)
1196 {
1197 STAM_COUNTER_INC(&pPool->StatMonitorPfRZFlushReinit);
1198 fForcedFlush = true;
1199 }
1200 }
1201
1202 if (pPage->cModifications >= cMaxModifications)
1203 Log(("Mod overflow %RGv cMods=%d (locked=%d type=%s)\n", pvFault, pPage->cModifications, pgmPoolIsPageLocked(pPage), pgmPoolPoolKindToStr(pPage->enmKind)));
1204
1205 /*
1206 * Check if it's worth dealing with.
1207 */
1208 bool fReused = false;
1209 bool fNotReusedNotForking = false;
1210 if ( ( pPage->cModifications < cMaxModifications /** @todo \#define */ /** @todo need to check that it's not mapping EIP. */ /** @todo adjust this! */
1211 || pgmPoolIsPageLocked(pPage)
1212 )
1213 && !(fReused = pgmRZPoolMonitorIsReused(pVM, pVCpu, pCtx, pDis, pvFault, pPage))
1214 && !pgmRZPoolMonitorIsForking(pPool, pDis, GCPhysFault & PAGE_OFFSET_MASK))
1215 {
1216 /*
1217 * Simple instructions, no REP prefix.
1218 */
1219 if (!(pDis->fPrefix & (DISPREFIX_REP | DISPREFIX_REPNE)))
1220 {
1221 rc = pgmRZPoolAccessPfHandlerSimple(pVM, pVCpu, pPool, pPage, pDis, pCtx, GCPhysFault, &fReused);
1222 if (fReused)
1223 goto flushPage;
1224
1225 /* A mov instruction to change the first page table entry will be remembered so we can detect
1226 * full page table changes early on. This will reduce the amount of unnecessary traps we'll take.
1227 */
1228 if ( rc == VINF_SUCCESS
1229 && !pPage->cLocked /* only applies to unlocked pages as we can't free locked ones (e.g. cr3 root). */
1230 && pDis->pCurInstr->uOpcode == OP_MOV
1231 && (pvFault & PAGE_OFFSET_MASK) == 0)
1232 {
1233 pPage->GCPtrLastAccessHandlerFault = pvFault;
1234 pPage->cLastAccessHandler = pVCpu->pgm.s.cPoolAccessHandler;
1235 pPage->GCPtrLastAccessHandlerRip = pCtx->rip;
1236 /* Make sure we don't kick out a page too quickly. */
1237 if (pPage->cModifications > 8)
1238 pPage->cModifications = 2;
1239 }
1240 else if (pPage->GCPtrLastAccessHandlerFault == pvFault)
1241 {
1242 /* ignore the 2nd write to this page table entry. */
1243 pPage->cLastAccessHandler = pVCpu->pgm.s.cPoolAccessHandler;
1244 }
1245 else
1246 {
1247 pPage->GCPtrLastAccessHandlerFault = NIL_RTGCPTR;
1248 pPage->GCPtrLastAccessHandlerRip = 0;
1249 }
1250
1251 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->StatMonitorPfRZ, &pPool->StatMonitorPfRZHandled, a);
1252 PGM_UNLOCK(pVM);
1253 return rc;
1254 }
1255
1256 /*
1257 * Windows is frequently doing small memset() operations (netio test 4k+).
1258 * We have to deal with these or we'll kill the cache and performance.
1259 */
1260 if ( pDis->pCurInstr->uOpcode == OP_STOSWD
1261 && !pCtx->eflags.Bits.u1DF
1262 && pDis->uOpMode == pDis->uCpuMode
1263 && pDis->uAddrMode == pDis->uCpuMode)
1264 {
1265 bool fValidStosd = false;
1266
1267 if ( pDis->uCpuMode == DISCPUMODE_32BIT
1268 && pDis->fPrefix == DISPREFIX_REP
1269 && pCtx->ecx <= 0x20
1270 && pCtx->ecx * 4 <= GUEST_PAGE_SIZE - ((uintptr_t)pvFault & GUEST_PAGE_OFFSET_MASK)
1271 && !((uintptr_t)pvFault & 3)
1272 && (pCtx->eax == 0 || pCtx->eax == 0x80) /* the two values observed. */
1273 )
1274 {
1275 fValidStosd = true;
1276 pCtx->rcx &= 0xffffffff; /* paranoia */
1277 }
1278 else
1279 if ( pDis->uCpuMode == DISCPUMODE_64BIT
1280 && pDis->fPrefix == (DISPREFIX_REP | DISPREFIX_REX)
1281 && pCtx->rcx <= 0x20
1282 && pCtx->rcx * 8 <= GUEST_PAGE_SIZE - ((uintptr_t)pvFault & GUEST_PAGE_OFFSET_MASK)
1283 && !((uintptr_t)pvFault & 7)
1284 && (pCtx->rax == 0 || pCtx->rax == 0x80) /* the two values observed. */
1285 )
1286 {
1287 fValidStosd = true;
1288 }
1289
1290 if (fValidStosd)
1291 {
1292 rc = pgmRZPoolAccessPfHandlerSTOSD(pVM, pPool, pPage, pDis, pCtx, GCPhysFault, pvFault);
1293 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->StatMonitorPfRZ, &pPool->StatMonitorPfRZRepStosd, a);
1294 PGM_UNLOCK(pVM);
1295 return rc;
1296 }
1297 }
1298
1299 /* REP prefix, don't bother. */
1300 STAM_COUNTER_INC(&pPool->StatMonitorPfRZRepPrefix);
1301 Log4(("pgmRZPoolAccessPfHandler: eax=%#x ecx=%#x edi=%#x esi=%#x rip=%RGv opcode=%d prefix=%#x\n",
1302 pCtx->eax, pCtx->ecx, pCtx->edi, pCtx->esi, (RTGCPTR)pCtx->rip, pDis->pCurInstr->uOpcode, pDis->fPrefix));
1303 fNotReusedNotForking = true;
1304 }
1305
1306# if defined(PGMPOOL_WITH_OPTIMIZED_DIRTY_PT) && defined(IN_RING0)
1307 /* E.g. Windows 7 x64 initializes page tables and touches some pages in the table during the process. This
1308 * leads to pgm pool trashing and an excessive amount of write faults due to page monitoring.
1309 */
1310 if ( pPage->cModifications >= cMaxModifications
1311 && !fForcedFlush
1312 && (pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT || pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_32BIT_PT)
1313 && ( fNotReusedNotForking
1314 || ( !pgmRZPoolMonitorIsReused(pVM, pVCpu, pCtx, pDis, pvFault, pPage)
1315 && !pgmRZPoolMonitorIsForking(pPool, pDis, GCPhysFault & PAGE_OFFSET_MASK))
1316 )
1317 )
1318 {
1319 Assert(!pgmPoolIsPageLocked(pPage));
1320 Assert(pPage->fDirty == false);
1321
1322 /* Flush any monitored duplicates as we will disable write protection. */
1323 if ( pPage->iMonitoredNext != NIL_PGMPOOL_IDX
1324 || pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
1325 {
1326 PPGMPOOLPAGE pPageHead = pPage;
1327
1328 /* Find the monitor head. */
1329 while (pPageHead->iMonitoredPrev != NIL_PGMPOOL_IDX)
1330 pPageHead = &pPool->aPages[pPageHead->iMonitoredPrev];
1331
1332 while (pPageHead)
1333 {
1334 unsigned idxNext = pPageHead->iMonitoredNext;
1335
1336 if (pPageHead != pPage)
1337 {
1338 STAM_COUNTER_INC(&pPool->StatDirtyPageDupFlush);
1339 Log(("Flush duplicate page idx=%d GCPhys=%RGp type=%s\n", pPageHead->idx, pPageHead->GCPhys, pgmPoolPoolKindToStr(pPageHead->enmKind)));
1340 int rc2 = pgmPoolFlushPage(pPool, pPageHead);
1341 AssertRC(rc2);
1342 }
1343
1344 if (idxNext == NIL_PGMPOOL_IDX)
1345 break;
1346
1347 pPageHead = &pPool->aPages[idxNext];
1348 }
1349 }
1350
1351 /* The flushing above might fail for locked pages, so double check. */
1352 if ( pPage->iMonitoredNext == NIL_PGMPOOL_IDX
1353 && pPage->iMonitoredPrev == NIL_PGMPOOL_IDX)
1354 {
1355 pgmPoolAddDirtyPage(pVM, pPool, pPage);
1356
1357 /* Temporarily allow write access to the page table again. */
1358 rc = PGMHandlerPhysicalPageTempOff(pVM,
1359 pPage->GCPhys & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK,
1360 pPage->GCPhys & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK);
1361 if (rc == VINF_SUCCESS)
1362 {
1363 rc = PGMShwMakePageWritable(pVCpu, pvFault, PGM_MK_PG_IS_WRITE_FAULT);
1364 AssertMsg(rc == VINF_SUCCESS
1365 /* In the SMP case the page table might be removed while we wait for the PGM lock in the trap handler. */
1366 || rc == VERR_PAGE_TABLE_NOT_PRESENT
1367 || rc == VERR_PAGE_NOT_PRESENT,
1368 ("PGMShwModifyPage -> GCPtr=%RGv rc=%d\n", pvFault, rc));
1369# ifdef VBOX_STRICT
1370 pPage->GCPtrDirtyFault = pvFault;
1371# endif
1372
1373 STAM_PROFILE_STOP(&pVM->pgm.s.CTX_SUFF(pPool)->StatMonitorPfRZ, a);
1374 PGM_UNLOCK(pVM);
1375 return rc;
1376 }
1377 }
1378 }
1379# endif /* PGMPOOL_WITH_OPTIMIZED_DIRTY_PT && IN_RING0 */
1380
1381 STAM_COUNTER_INC(&pPool->StatMonitorPfRZFlushModOverflow);
1382flushPage:
1383 /*
1384 * Not worth it, so flush it.
1385 *
1386 * If we considered it to be reused, don't go back to ring-3
1387 * to emulate failed instructions since we usually cannot
1388 * interpret then. This may be a bit risky, in which case
1389 * the reuse detection must be fixed.
1390 */
1391 rc = pgmRZPoolAccessPfHandlerFlush(pVM, pVCpu, pPool, pPage, pDis, pCtx, GCPhysFault);
1392 if ( rc == VINF_EM_RAW_EMULATE_INSTR
1393 && fReused)
1394 {
1395 Assert(!PGMPOOL_PAGE_IS_NESTED(pPage)); /* temporary, remove later. */
1396 /* Make sure that the current instruction still has shadow page backing, otherwise we'll end up in a loop. */
1397 if (PGMShwGetPage(pVCpu, pCtx->rip, NULL, NULL) == VINF_SUCCESS)
1398 rc = VINF_SUCCESS; /* safe to restart the instruction. */
1399 }
1400 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->StatMonitorPfRZ, &pPool->StatMonitorPfRZFlushPage, a);
1401 PGM_UNLOCK(pVM);
1402 return rc;
1403}
1404
1405#endif /* !IN_RING3 */
1406
1407/**
1408 * @callback_method_impl{FNPGMPHYSHANDLER,
1409 * Access handler for shadowed page table pages.}
1410 *
1411 * @remarks Only uses the VINF_PGM_HANDLER_DO_DEFAULT status.
1412 * @note The @a uUser argument is the index of the PGMPOOLPAGE.
1413 */
1414DECLCALLBACK(VBOXSTRICTRC)
1415pgmPoolAccessHandler(PVMCC pVM, PVMCPUCC pVCpu, RTGCPHYS GCPhys, void *pvPhys, void *pvBuf, size_t cbBuf,
1416 PGMACCESSTYPE enmAccessType, PGMACCESSORIGIN enmOrigin, uint64_t uUser)
1417{
1418 PPGMPOOL const pPool = pVM->pgm.s.CTX_SUFF(pPool);
1419 STAM_PROFILE_START(&pPool->CTX_SUFF_Z(StatMonitor), a);
1420 AssertReturn(uUser < pPool->cCurPages, VERR_PGM_POOL_IPE);
1421 PPGMPOOLPAGE const pPage = &pPool->aPages[uUser];
1422 LogFlow(("PGM_ALL_CB_DECL: GCPhys=%RGp %p:{.Core=%RHp, .idx=%d, .GCPhys=%RGp, .enmType=%d}\n",
1423 GCPhys, pPage, pPage->Core.Key, pPage->idx, pPage->GCPhys, pPage->enmKind));
1424
1425 NOREF(pvPhys); NOREF(pvBuf); NOREF(enmAccessType);
1426
1427 PGM_LOCK_VOID(pVM);
1428
1429#ifdef VBOX_WITH_STATISTICS
1430 /*
1431 * Collect stats on the access.
1432 */
1433 AssertCompile(RT_ELEMENTS(pPool->CTX_MID_Z(aStatMonitor,Sizes)) == 19);
1434 if (cbBuf <= 16 && cbBuf > 0)
1435 STAM_COUNTER_INC(&pPool->CTX_MID_Z(aStatMonitor,Sizes)[cbBuf - 1]);
1436 else if (cbBuf >= 17 && cbBuf < 32)
1437 STAM_COUNTER_INC(&pPool->CTX_MID_Z(aStatMonitor,Sizes)[16]);
1438 else if (cbBuf >= 32 && cbBuf < 64)
1439 STAM_COUNTER_INC(&pPool->CTX_MID_Z(aStatMonitor,Sizes)[17]);
1440 else if (cbBuf >= 64)
1441 STAM_COUNTER_INC(&pPool->CTX_MID_Z(aStatMonitor,Sizes)[18]);
1442
1443 uint8_t cbAlign;
1444 switch (pPage->enmKind)
1445 {
1446 default:
1447 cbAlign = 7;
1448 break;
1449 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1450 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1451 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1452 case PGMPOOLKIND_32BIT_PD:
1453 case PGMPOOLKIND_32BIT_PD_PHYS:
1454 cbAlign = 3;
1455 break;
1456 }
1457 AssertCompile(RT_ELEMENTS(pPool->CTX_MID_Z(aStatMonitor,Misaligned)) == 7);
1458 if ((uint8_t)GCPhys & cbAlign)
1459 STAM_COUNTER_INC(&pPool->CTX_MID_Z(aStatMonitor,Misaligned)[((uint8_t)GCPhys & cbAlign) - 1]);
1460#endif
1461
1462 /*
1463 * Make sure the pool page wasn't modified by a different CPU.
1464 */
1465 if (PHYS_PAGE_ADDRESS(GCPhys) == PHYS_PAGE_ADDRESS(pPage->GCPhys))
1466 {
1467 Assert(pPage->enmKind != PGMPOOLKIND_FREE);
1468
1469 /* The max modification count before flushing depends on the context and page type. */
1470#ifdef IN_RING3
1471 uint16_t const cMaxModifications = 96; /* it's cheaper here, right? */
1472#else
1473 uint16_t cMaxModifications;
1474 if ( pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT
1475 || pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_32BIT_PT)
1476 cMaxModifications = 4;
1477 else
1478 cMaxModifications = 24;
1479#endif
1480
1481 /*
1482 * We don't have to be very sophisticated about this since there are relativly few calls here.
1483 * However, we must try our best to detect any non-cpu accesses (disk / networking).
1484 */
1485 if ( ( pPage->cModifications < cMaxModifications
1486 || pgmPoolIsPageLocked(pPage) )
1487 && enmOrigin != PGMACCESSORIGIN_DEVICE
1488 && cbBuf <= 16)
1489 {
1490 /* Clear the shadow entry. */
1491 if (!pPage->cModifications++)
1492 pgmPoolMonitorModifiedInsert(pPool, pPage);
1493
1494 if (cbBuf <= 8)
1495 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhys, pvBuf, (uint32_t)cbBuf);
1496 else
1497 {
1498 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhys, pvBuf, 8);
1499 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhys + 8, (uint8_t *)pvBuf + 8, (uint32_t)cbBuf - 8);
1500 }
1501 }
1502 else
1503 pgmPoolMonitorChainFlush(pPool, pPage);
1504
1505 STAM_PROFILE_STOP_EX(&pPool->CTX_SUFF_Z(StatMonitor), &pPool->CTX_MID_Z(StatMonitor,FlushPage), a);
1506 }
1507 else
1508 Log(("CPU%d: PGM_ALL_CB_DECL pgm pool page for %RGp changed (to %RGp) while waiting!\n", pVCpu->idCpu, PHYS_PAGE_ADDRESS(GCPhys), PHYS_PAGE_ADDRESS(pPage->GCPhys)));
1509 PGM_UNLOCK(pVM);
1510 return VINF_PGM_HANDLER_DO_DEFAULT;
1511}
1512
1513
1514#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
1515
1516# if defined(VBOX_STRICT) && !defined(IN_RING3)
1517
1518/**
1519 * Check references to guest physical memory in a PAE / PAE page table.
1520 *
1521 * @param pPool The pool.
1522 * @param pPage The page.
1523 * @param pShwPT The shadow page table (mapping of the page).
1524 * @param pGstPT The guest page table.
1525 */
1526static void pgmPoolTrackCheckPTPaePae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT, PCX86PTPAE pGstPT)
1527{
1528 unsigned cErrors = 0;
1529 int LastRc = -1; /* initialized to shut up gcc */
1530 unsigned LastPTE = ~0U; /* initialized to shut up gcc */
1531 RTHCPHYS LastHCPhys = NIL_RTHCPHYS; /* initialized to shut up gcc */
1532 PVMCC pVM = pPool->CTX_SUFF(pVM);
1533
1534# ifdef VBOX_STRICT
1535 for (unsigned i = 0; i < RT_MIN(RT_ELEMENTS(pShwPT->a), pPage->iFirstPresent); i++)
1536 AssertMsg(!PGMSHWPTEPAE_IS_P(pShwPT->a[i]), ("Unexpected PTE: idx=%d %RX64 (first=%d)\n", i, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), pPage->iFirstPresent));
1537# endif
1538 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
1539 {
1540 if (PGMSHWPTEPAE_IS_P(pShwPT->a[i]))
1541 {
1542 RTHCPHYS HCPhys = NIL_RTHCPHYS;
1543 int rc = PGMPhysGCPhys2HCPhys(pVM, pGstPT->a[i].u & X86_PTE_PAE_PG_MASK, &HCPhys);
1544 if ( rc != VINF_SUCCESS
1545 || PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]) != HCPhys)
1546 {
1547 Log(("rc=%d idx=%d guest %RX64 shw=%RX64 vs %RHp\n", rc, i, pGstPT->a[i].u, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), HCPhys));
1548 LastPTE = i;
1549 LastRc = rc;
1550 LastHCPhys = HCPhys;
1551 cErrors++;
1552
1553 RTHCPHYS HCPhysPT = NIL_RTHCPHYS;
1554 rc = PGMPhysGCPhys2HCPhys(pVM, pPage->GCPhys, &HCPhysPT);
1555 AssertRC(rc);
1556
1557 for (unsigned iPage = 0; iPage < pPool->cCurPages; iPage++)
1558 {
1559 PPGMPOOLPAGE pTempPage = &pPool->aPages[iPage];
1560
1561 if (pTempPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT)
1562 {
1563 PPGMSHWPTPAE pShwPT2 = (PPGMSHWPTPAE)PGMPOOL_PAGE_2_PTR(pVM, pTempPage);
1564
1565 for (unsigned j = 0; j < RT_ELEMENTS(pShwPT->a); j++)
1566 {
1567 if ( PGMSHWPTEPAE_IS_P_RW(pShwPT2->a[j])
1568 && PGMSHWPTEPAE_GET_HCPHYS(pShwPT2->a[j]) == HCPhysPT)
1569 {
1570 Log(("GCPhys=%RGp idx=%d %RX64 vs %RX64\n", pTempPage->GCPhys, j, PGMSHWPTEPAE_GET_LOG(pShwPT->a[j]), PGMSHWPTEPAE_GET_LOG(pShwPT2->a[j])));
1571 }
1572 }
1573
1574 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pShwPT2);
1575 }
1576 }
1577 }
1578 }
1579 }
1580 AssertMsg(!cErrors, ("cErrors=%d: last rc=%d idx=%d guest %RX64 shw=%RX64 vs %RHp\n", cErrors, LastRc, LastPTE, pGstPT->a[LastPTE].u, PGMSHWPTEPAE_GET_LOG(pShwPT->a[LastPTE]), LastHCPhys));
1581}
1582
1583
1584/**
1585 * Check references to guest physical memory in a PAE / 32-bit page table.
1586 *
1587 * @param pPool The pool.
1588 * @param pPage The page.
1589 * @param pShwPT The shadow page table (mapping of the page).
1590 * @param pGstPT The guest page table.
1591 */
1592static void pgmPoolTrackCheckPTPae32Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT, PCX86PT pGstPT)
1593{
1594 unsigned cErrors = 0;
1595 int LastRc = -1; /* initialized to shut up gcc */
1596 unsigned LastPTE = ~0U; /* initialized to shut up gcc */
1597 RTHCPHYS LastHCPhys = NIL_RTHCPHYS; /* initialized to shut up gcc */
1598 PVMCC pVM = pPool->CTX_SUFF(pVM);
1599
1600# ifdef VBOX_STRICT
1601 for (unsigned i = 0; i < RT_MIN(RT_ELEMENTS(pShwPT->a), pPage->iFirstPresent); i++)
1602 AssertMsg(!PGMSHWPTEPAE_IS_P(pShwPT->a[i]), ("Unexpected PTE: idx=%d %RX64 (first=%d)\n", i, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), pPage->iFirstPresent));
1603# endif
1604 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
1605 {
1606 if (PGMSHWPTEPAE_IS_P(pShwPT->a[i]))
1607 {
1608 RTHCPHYS HCPhys = NIL_RTHCPHYS;
1609 int rc = PGMPhysGCPhys2HCPhys(pVM, pGstPT->a[i].u & X86_PTE_PG_MASK, &HCPhys);
1610 if ( rc != VINF_SUCCESS
1611 || PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]) != HCPhys)
1612 {
1613 Log(("rc=%d idx=%d guest %x shw=%RX64 vs %RHp\n", rc, i, pGstPT->a[i].u, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), HCPhys));
1614 LastPTE = i;
1615 LastRc = rc;
1616 LastHCPhys = HCPhys;
1617 cErrors++;
1618
1619 RTHCPHYS HCPhysPT = NIL_RTHCPHYS;
1620 rc = PGMPhysGCPhys2HCPhys(pVM, pPage->GCPhys, &HCPhysPT);
1621 AssertRC(rc);
1622
1623 for (unsigned iPage = 0; iPage < pPool->cCurPages; iPage++)
1624 {
1625 PPGMPOOLPAGE pTempPage = &pPool->aPages[iPage];
1626
1627 if (pTempPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_32BIT_PT)
1628 {
1629 PPGMSHWPTPAE pShwPT2 = (PPGMSHWPTPAE)PGMPOOL_PAGE_2_PTR(pVM, pTempPage);
1630
1631 for (unsigned j = 0; j < RT_ELEMENTS(pShwPT->a); j++)
1632 {
1633 if ( PGMSHWPTEPAE_IS_P_RW(pShwPT2->a[j])
1634 && PGMSHWPTEPAE_GET_HCPHYS(pShwPT2->a[j]) == HCPhysPT)
1635 {
1636 Log(("GCPhys=%RGp idx=%d %RX64 vs %RX64\n", pTempPage->GCPhys, j, PGMSHWPTEPAE_GET_LOG(pShwPT->a[j]), PGMSHWPTEPAE_GET_LOG(pShwPT2->a[j])));
1637 }
1638 }
1639
1640 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pShwPT2);
1641 }
1642 }
1643 }
1644 }
1645 }
1646 AssertMsg(!cErrors, ("cErrors=%d: last rc=%d idx=%d guest %x shw=%RX64 vs %RHp\n", cErrors, LastRc, LastPTE, pGstPT->a[LastPTE].u, PGMSHWPTEPAE_GET_LOG(pShwPT->a[LastPTE]), LastHCPhys));
1647}
1648
1649# endif /* VBOX_STRICT && !IN_RING3 */
1650
1651/**
1652 * Clear references to guest physical memory in a PAE / PAE page table.
1653 *
1654 * @returns nr of changed PTEs
1655 * @param pPool The pool.
1656 * @param pPage The page.
1657 * @param pShwPT The shadow page table (mapping of the page).
1658 * @param pGstPT The guest page table.
1659 * @param pOldGstPT The old cached guest page table.
1660 * @param fAllowRemoval Bail out as soon as we encounter an invalid PTE
1661 * @param pfFlush Flush reused page table (out)
1662 */
1663DECLINLINE(unsigned) pgmPoolTrackFlushPTPaePae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT, PCX86PTPAE pGstPT,
1664 PCX86PTPAE pOldGstPT, bool fAllowRemoval, bool *pfFlush)
1665{
1666 unsigned cChanged = 0;
1667
1668# ifdef VBOX_STRICT
1669 for (unsigned i = 0; i < RT_MIN(RT_ELEMENTS(pShwPT->a), pPage->iFirstPresent); i++)
1670 AssertMsg(!PGMSHWPTEPAE_IS_P(pShwPT->a[i]), ("Unexpected PTE: idx=%d %RX64 (first=%d)\n", i, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), pPage->iFirstPresent));
1671# endif
1672 *pfFlush = false;
1673
1674 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
1675 {
1676 /* Check the new value written by the guest. If present and with a bogus physical address, then
1677 * it's fairly safe to assume the guest is reusing the PT.
1678 */
1679 if ( fAllowRemoval
1680 && (pGstPT->a[i].u & X86_PTE_P))
1681 {
1682 if (!PGMPhysIsGCPhysValid(pPool->CTX_SUFF(pVM), pGstPT->a[i].u & X86_PTE_PAE_PG_MASK))
1683 {
1684 *pfFlush = true;
1685 return ++cChanged;
1686 }
1687 }
1688 if (PGMSHWPTEPAE_IS_P(pShwPT->a[i]))
1689 {
1690 /* If the old cached PTE is identical, then there's no need to flush the shadow copy. */
1691 if ((pGstPT->a[i].u & X86_PTE_PAE_PG_MASK) == (pOldGstPT->a[i].u & X86_PTE_PAE_PG_MASK))
1692 {
1693# ifdef VBOX_STRICT
1694 RTHCPHYS HCPhys = NIL_RTGCPHYS;
1695 int rc = PGMPhysGCPhys2HCPhys(pPool->CTX_SUFF(pVM), pGstPT->a[i].u & X86_PTE_PAE_PG_MASK, &HCPhys);
1696 AssertMsg(rc == VINF_SUCCESS && PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]) == HCPhys, ("rc=%d guest %RX64 old %RX64 shw=%RX64 vs %RHp\n", rc, pGstPT->a[i].u, pOldGstPT->a[i].u, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), HCPhys));
1697# endif
1698 uint64_t uHostAttr = PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & (X86_PTE_P | X86_PTE_US | X86_PTE_A | X86_PTE_D | X86_PTE_G | X86_PTE_PAE_NX);
1699 bool fHostRW = !!(PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & X86_PTE_RW);
1700 uint64_t uGuestAttr = pGstPT->a[i].u & (X86_PTE_P | X86_PTE_US | X86_PTE_A | X86_PTE_D | X86_PTE_G | X86_PTE_PAE_NX);
1701 bool fGuestRW = !!(pGstPT->a[i].u & X86_PTE_RW);
1702
1703 if ( uHostAttr == uGuestAttr
1704 && fHostRW <= fGuestRW)
1705 continue;
1706 }
1707 cChanged++;
1708 /* Something was changed, so flush it. */
1709 Log4(("pgmPoolTrackDerefPTPaePae: i=%d pte=%RX64 hint=%RX64\n",
1710 i, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pOldGstPT->a[i].u & X86_PTE_PAE_PG_MASK));
1711 pgmPoolTracDerefGCPhysHint(pPool, pPage, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pOldGstPT->a[i].u & X86_PTE_PAE_PG_MASK, i);
1712 PGMSHWPTEPAE_ATOMIC_SET(pShwPT->a[i], 0);
1713 }
1714 }
1715 return cChanged;
1716}
1717
1718
1719/**
1720 * Clear references to guest physical memory in a PAE / PAE page table.
1721 *
1722 * @returns nr of changed PTEs
1723 * @param pPool The pool.
1724 * @param pPage The page.
1725 * @param pShwPT The shadow page table (mapping of the page).
1726 * @param pGstPT The guest page table.
1727 * @param pOldGstPT The old cached guest page table.
1728 * @param fAllowRemoval Bail out as soon as we encounter an invalid PTE
1729 * @param pfFlush Flush reused page table (out)
1730 */
1731DECLINLINE(unsigned) pgmPoolTrackFlushPTPae32Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT, PCX86PT pGstPT,
1732 PCX86PT pOldGstPT, bool fAllowRemoval, bool *pfFlush)
1733{
1734 unsigned cChanged = 0;
1735
1736# ifdef VBOX_STRICT
1737 for (unsigned i = 0; i < RT_MIN(RT_ELEMENTS(pShwPT->a), pPage->iFirstPresent); i++)
1738 AssertMsg(!PGMSHWPTEPAE_IS_P(pShwPT->a[i]), ("Unexpected PTE: idx=%d %RX64 (first=%d)\n", i, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), pPage->iFirstPresent));
1739# endif
1740 *pfFlush = false;
1741
1742 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
1743 {
1744 /* Check the new value written by the guest. If present and with a bogus physical address, then
1745 * it's fairly safe to assume the guest is reusing the PT. */
1746 if (fAllowRemoval)
1747 {
1748 X86PGUINT const uPte = pGstPT->a[i].u;
1749 if ( (uPte & X86_PTE_P)
1750 && !PGMPhysIsGCPhysValid(pPool->CTX_SUFF(pVM), uPte & X86_PTE_PG_MASK))
1751 {
1752 *pfFlush = true;
1753 return ++cChanged;
1754 }
1755 }
1756 if (PGMSHWPTEPAE_IS_P(pShwPT->a[i]))
1757 {
1758 /* If the old cached PTE is identical, then there's no need to flush the shadow copy. */
1759 if ((pGstPT->a[i].u & X86_PTE_PG_MASK) == (pOldGstPT->a[i].u & X86_PTE_PG_MASK))
1760 {
1761# ifdef VBOX_STRICT
1762 RTHCPHYS HCPhys = NIL_RTGCPHYS;
1763 int rc = PGMPhysGCPhys2HCPhys(pPool->CTX_SUFF(pVM), pGstPT->a[i].u & X86_PTE_PG_MASK, &HCPhys);
1764 AssertMsg(rc == VINF_SUCCESS && PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]) == HCPhys, ("rc=%d guest %x old %x shw=%RX64 vs %RHp\n", rc, pGstPT->a[i].u, pOldGstPT->a[i].u, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), HCPhys));
1765# endif
1766 uint64_t uHostAttr = PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & (X86_PTE_P | X86_PTE_US | X86_PTE_A | X86_PTE_D | X86_PTE_G);
1767 bool fHostRW = !!(PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & X86_PTE_RW);
1768 uint64_t uGuestAttr = pGstPT->a[i].u & (X86_PTE_P | X86_PTE_US | X86_PTE_A | X86_PTE_D | X86_PTE_G);
1769 bool fGuestRW = !!(pGstPT->a[i].u & X86_PTE_RW);
1770
1771 if ( uHostAttr == uGuestAttr
1772 && fHostRW <= fGuestRW)
1773 continue;
1774 }
1775 cChanged++;
1776 /* Something was changed, so flush it. */
1777 Log4(("pgmPoolTrackDerefPTPaePae: i=%d pte=%RX64 hint=%x\n",
1778 i, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pOldGstPT->a[i].u & X86_PTE_PG_MASK));
1779 pgmPoolTracDerefGCPhysHint(pPool, pPage, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pOldGstPT->a[i].u & X86_PTE_PG_MASK, i);
1780 PGMSHWPTEPAE_ATOMIC_SET(pShwPT->a[i], 0);
1781 }
1782 }
1783 return cChanged;
1784}
1785
1786
1787/**
1788 * Flush a dirty page
1789 *
1790 * @param pVM The cross context VM structure.
1791 * @param pPool The pool.
1792 * @param idxSlot Dirty array slot index
1793 * @param fAllowRemoval Allow a reused page table to be removed
1794 */
1795static void pgmPoolFlushDirtyPage(PVMCC pVM, PPGMPOOL pPool, unsigned idxSlot, bool fAllowRemoval = false)
1796{
1797 AssertCompile(RT_ELEMENTS(pPool->aidxDirtyPages) == RT_ELEMENTS(pPool->aDirtyPages));
1798
1799 Assert(idxSlot < RT_ELEMENTS(pPool->aDirtyPages));
1800 unsigned idxPage = pPool->aidxDirtyPages[idxSlot];
1801 if (idxPage == NIL_PGMPOOL_IDX)
1802 return;
1803
1804 PPGMPOOLPAGE pPage = &pPool->aPages[idxPage];
1805 Assert(pPage->idx == idxPage);
1806 Assert(pPage->iMonitoredNext == NIL_PGMPOOL_IDX && pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
1807
1808 AssertMsg(pPage->fDirty, ("Page %RGp (slot=%d) not marked dirty!", pPage->GCPhys, idxSlot));
1809 Log(("Flush dirty page %RGp cMods=%d\n", pPage->GCPhys, pPage->cModifications));
1810
1811 /* First write protect the page again to catch all write accesses. (before checking for changes -> SMP) */
1812 int rc = PGMHandlerPhysicalReset(pVM, pPage->GCPhys & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK);
1813 Assert(rc == VINF_SUCCESS);
1814 pPage->fDirty = false;
1815
1816# ifdef VBOX_STRICT
1817 uint64_t fFlags = 0;
1818 RTHCPHYS HCPhys;
1819 rc = PGMShwGetPage(VMMGetCpu(pVM), pPage->GCPtrDirtyFault, &fFlags, &HCPhys);
1820 AssertMsg( ( rc == VINF_SUCCESS
1821 && (!(fFlags & X86_PTE_RW) || HCPhys != pPage->Core.Key))
1822 /* In the SMP case the page table might be removed while we wait for the PGM lock in the trap handler. */
1823 || rc == VERR_PAGE_TABLE_NOT_PRESENT
1824 || rc == VERR_PAGE_NOT_PRESENT,
1825 ("PGMShwGetPage -> GCPtr=%RGv rc=%d flags=%RX64\n", pPage->GCPtrDirtyFault, rc, fFlags));
1826# endif
1827
1828 /* Flush those PTEs that have changed. */
1829 STAM_PROFILE_START(&pPool->StatTrackDeref,a);
1830 void *pvShw = PGMPOOL_PAGE_2_PTR(pVM, pPage);
1831 void *pvGst;
1832 rc = PGM_GCPHYS_2_PTR_EX(pVM, pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
1833 bool fFlush;
1834 unsigned cChanges;
1835
1836 if (pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT)
1837 cChanges = pgmPoolTrackFlushPTPaePae(pPool, pPage, (PPGMSHWPTPAE)pvShw, (PCX86PTPAE)pvGst,
1838 (PCX86PTPAE)&pPool->aDirtyPages[idxSlot].aPage[0], fAllowRemoval, &fFlush);
1839 else
1840 {
1841 Assert(!PGMPOOL_PAGE_IS_NESTED(pPage)); /* temporary, remove later. */
1842 cChanges = pgmPoolTrackFlushPTPae32Bit(pPool, pPage, (PPGMSHWPTPAE)pvShw, (PCX86PT)pvGst,
1843 (PCX86PT)&pPool->aDirtyPages[idxSlot].aPage[0], fAllowRemoval, &fFlush);
1844 }
1845
1846 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvGst);
1847 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvShw);
1848 STAM_PROFILE_STOP(&pPool->StatTrackDeref,a);
1849 /* Note: we might want to consider keeping the dirty page active in case there were many changes. */
1850
1851 /* This page is likely to be modified again, so reduce the nr of modifications just a bit here. */
1852 Assert(pPage->cModifications);
1853 if (cChanges < 4)
1854 pPage->cModifications = 1; /* must use > 0 here */
1855 else
1856 pPage->cModifications = RT_MAX(1, pPage->cModifications / 2);
1857
1858 STAM_COUNTER_INC(&pPool->StatResetDirtyPages);
1859 if (pPool->cDirtyPages == RT_ELEMENTS(pPool->aDirtyPages))
1860 pPool->idxFreeDirtyPage = idxSlot;
1861
1862 pPool->cDirtyPages--;
1863 pPool->aidxDirtyPages[idxSlot] = NIL_PGMPOOL_IDX;
1864 Assert(pPool->cDirtyPages <= RT_ELEMENTS(pPool->aDirtyPages));
1865 if (fFlush)
1866 {
1867 Assert(fAllowRemoval);
1868 Log(("Flush reused page table!\n"));
1869 pgmPoolFlushPage(pPool, pPage);
1870 STAM_COUNTER_INC(&pPool->StatForceFlushReused);
1871 }
1872 else
1873 Log(("Removed dirty page %RGp cMods=%d cChanges=%d\n", pPage->GCPhys, pPage->cModifications, cChanges));
1874}
1875
1876
1877# ifndef IN_RING3
1878/**
1879 * Add a new dirty page
1880 *
1881 * @param pVM The cross context VM structure.
1882 * @param pPool The pool.
1883 * @param pPage The page.
1884 */
1885void pgmPoolAddDirtyPage(PVMCC pVM, PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1886{
1887 PGM_LOCK_ASSERT_OWNER(pVM);
1888 AssertCompile(RT_ELEMENTS(pPool->aDirtyPages) == 8 || RT_ELEMENTS(pPool->aDirtyPages) == 16);
1889 Assert(!pPage->fDirty);
1890 Assert(!PGMPOOL_PAGE_IS_NESTED(pPage));
1891
1892 unsigned idxFree = pPool->idxFreeDirtyPage;
1893 Assert(idxFree < RT_ELEMENTS(pPool->aDirtyPages));
1894 Assert(pPage->iMonitoredNext == NIL_PGMPOOL_IDX && pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
1895
1896 if (pPool->cDirtyPages >= RT_ELEMENTS(pPool->aDirtyPages))
1897 {
1898 STAM_COUNTER_INC(&pPool->StatDirtyPageOverFlowFlush);
1899 pgmPoolFlushDirtyPage(pVM, pPool, idxFree, true /* allow removal of reused page tables*/);
1900 }
1901 Assert(pPool->cDirtyPages < RT_ELEMENTS(pPool->aDirtyPages));
1902 AssertMsg(pPool->aidxDirtyPages[idxFree] == NIL_PGMPOOL_IDX, ("idxFree=%d cDirtyPages=%d\n", idxFree, pPool->cDirtyPages));
1903
1904 Log(("Add dirty page %RGp (slot=%d)\n", pPage->GCPhys, idxFree));
1905
1906 /*
1907 * Make a copy of the guest page table as we require valid GCPhys addresses
1908 * when removing references to physical pages.
1909 * (The HCPhys linear lookup is *extremely* expensive!)
1910 */
1911 void *pvGst;
1912 int rc = PGM_GCPHYS_2_PTR_EX(pVM, pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
1913 memcpy(&pPool->aDirtyPages[idxFree].aPage[0], pvGst,
1914 pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT ? PAGE_SIZE : PAGE_SIZE / 2);
1915# ifdef VBOX_STRICT
1916 void *pvShw = PGMPOOL_PAGE_2_PTR(pVM, pPage);
1917 if (pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT)
1918 pgmPoolTrackCheckPTPaePae(pPool, pPage, (PPGMSHWPTPAE)pvShw, (PCX86PTPAE)pvGst);
1919 else
1920 pgmPoolTrackCheckPTPae32Bit(pPool, pPage, (PPGMSHWPTPAE)pvShw, (PCX86PT)pvGst);
1921 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvShw);
1922# endif
1923 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvGst);
1924
1925 STAM_COUNTER_INC(&pPool->StatDirtyPage);
1926 pPage->fDirty = true;
1927 pPage->idxDirtyEntry = (uint8_t)idxFree; Assert(pPage->idxDirtyEntry == idxFree);
1928 pPool->aidxDirtyPages[idxFree] = pPage->idx;
1929 pPool->cDirtyPages++;
1930
1931 pPool->idxFreeDirtyPage = (pPool->idxFreeDirtyPage + 1) & (RT_ELEMENTS(pPool->aDirtyPages) - 1);
1932 if ( pPool->cDirtyPages < RT_ELEMENTS(pPool->aDirtyPages)
1933 && pPool->aidxDirtyPages[pPool->idxFreeDirtyPage] != NIL_PGMPOOL_IDX)
1934 {
1935 unsigned i;
1936 for (i = 1; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
1937 {
1938 idxFree = (pPool->idxFreeDirtyPage + i) & (RT_ELEMENTS(pPool->aDirtyPages) - 1);
1939 if (pPool->aidxDirtyPages[idxFree] == NIL_PGMPOOL_IDX)
1940 {
1941 pPool->idxFreeDirtyPage = idxFree;
1942 break;
1943 }
1944 }
1945 Assert(i != RT_ELEMENTS(pPool->aDirtyPages));
1946 }
1947
1948 Assert(pPool->cDirtyPages == RT_ELEMENTS(pPool->aDirtyPages) || pPool->aidxDirtyPages[pPool->idxFreeDirtyPage] == NIL_PGMPOOL_IDX);
1949
1950 /*
1951 * Clear all references to this shadow table. See @bugref{7298}.
1952 */
1953 pgmPoolTrackClearPageUsers(pPool, pPage);
1954}
1955# endif /* !IN_RING3 */
1956
1957
1958/**
1959 * Check if the specified page is dirty (not write monitored)
1960 *
1961 * @return dirty or not
1962 * @param pVM The cross context VM structure.
1963 * @param GCPhys Guest physical address
1964 */
1965bool pgmPoolIsDirtyPageSlow(PVMCC pVM, RTGCPHYS GCPhys)
1966{
1967 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1968 PGM_LOCK_ASSERT_OWNER(pVM);
1969 if (!pPool->cDirtyPages)
1970 return false;
1971
1972 GCPhys = GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK;
1973
1974 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
1975 {
1976 unsigned idxPage = pPool->aidxDirtyPages[i];
1977 if (idxPage != NIL_PGMPOOL_IDX)
1978 {
1979 PPGMPOOLPAGE pPage = &pPool->aPages[idxPage];
1980 if (pPage->GCPhys == GCPhys)
1981 return true;
1982 }
1983 }
1984 return false;
1985}
1986
1987
1988/**
1989 * Reset all dirty pages by reinstating page monitoring.
1990 *
1991 * @param pVM The cross context VM structure.
1992 */
1993void pgmPoolResetDirtyPages(PVMCC pVM)
1994{
1995 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1996 PGM_LOCK_ASSERT_OWNER(pVM);
1997 Assert(pPool->cDirtyPages <= RT_ELEMENTS(pPool->aDirtyPages));
1998
1999 if (!pPool->cDirtyPages)
2000 return;
2001
2002 Log(("pgmPoolResetDirtyPages\n"));
2003 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
2004 pgmPoolFlushDirtyPage(pVM, pPool, i, true /* allow removal of reused page tables*/);
2005
2006 pPool->idxFreeDirtyPage = 0;
2007 if ( pPool->cDirtyPages != RT_ELEMENTS(pPool->aDirtyPages)
2008 && pPool->aidxDirtyPages[pPool->idxFreeDirtyPage] != NIL_PGMPOOL_IDX)
2009 {
2010 unsigned i;
2011 for (i = 1; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
2012 {
2013 if (pPool->aidxDirtyPages[i] == NIL_PGMPOOL_IDX)
2014 {
2015 pPool->idxFreeDirtyPage = i;
2016 break;
2017 }
2018 }
2019 AssertMsg(i != RT_ELEMENTS(pPool->aDirtyPages), ("cDirtyPages %d", pPool->cDirtyPages));
2020 }
2021
2022 Assert(pPool->aidxDirtyPages[pPool->idxFreeDirtyPage] == NIL_PGMPOOL_IDX || pPool->cDirtyPages == RT_ELEMENTS(pPool->aDirtyPages));
2023 return;
2024}
2025
2026
2027/**
2028 * Invalidate the PT entry for the specified page
2029 *
2030 * @param pVM The cross context VM structure.
2031 * @param GCPtrPage Guest page to invalidate
2032 */
2033void pgmPoolResetDirtyPage(PVMCC pVM, RTGCPTR GCPtrPage)
2034{
2035 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
2036 PGM_LOCK_ASSERT_OWNER(pVM);
2037 Assert(pPool->cDirtyPages <= RT_ELEMENTS(pPool->aDirtyPages));
2038
2039 if (!pPool->cDirtyPages)
2040 return;
2041
2042 Log(("pgmPoolResetDirtyPage %RGv\n", GCPtrPage)); RT_NOREF_PV(GCPtrPage);
2043 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
2044 {
2045 /** @todo What was intended here??? This looks incomplete... */
2046 }
2047}
2048
2049
2050/**
2051 * Reset all dirty pages by reinstating page monitoring.
2052 *
2053 * @param pVM The cross context VM structure.
2054 * @param GCPhysPT Physical address of the page table
2055 */
2056void pgmPoolInvalidateDirtyPage(PVMCC pVM, RTGCPHYS GCPhysPT)
2057{
2058 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
2059 PGM_LOCK_ASSERT_OWNER(pVM);
2060 Assert(pPool->cDirtyPages <= RT_ELEMENTS(pPool->aDirtyPages));
2061 unsigned idxDirtyPage = RT_ELEMENTS(pPool->aDirtyPages);
2062
2063 if (!pPool->cDirtyPages)
2064 return;
2065
2066 GCPhysPT = GCPhysPT & ~(RTGCPHYS)PAGE_OFFSET_MASK;
2067
2068 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
2069 {
2070 unsigned idxPage = pPool->aidxDirtyPages[i];
2071 if (idxPage != NIL_PGMPOOL_IDX)
2072 {
2073 PPGMPOOLPAGE pPage = &pPool->aPages[idxPage];
2074 if (pPage->GCPhys == GCPhysPT)
2075 {
2076 idxDirtyPage = i;
2077 break;
2078 }
2079 }
2080 }
2081
2082 if (idxDirtyPage != RT_ELEMENTS(pPool->aDirtyPages))
2083 {
2084 pgmPoolFlushDirtyPage(pVM, pPool, idxDirtyPage, true /* allow removal of reused page tables*/);
2085 if ( pPool->cDirtyPages != RT_ELEMENTS(pPool->aDirtyPages)
2086 && pPool->aidxDirtyPages[pPool->idxFreeDirtyPage] != NIL_PGMPOOL_IDX)
2087 {
2088 unsigned i;
2089 for (i = 0; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
2090 {
2091 if (pPool->aidxDirtyPages[i] == NIL_PGMPOOL_IDX)
2092 {
2093 pPool->idxFreeDirtyPage = i;
2094 break;
2095 }
2096 }
2097 AssertMsg(i != RT_ELEMENTS(pPool->aDirtyPages), ("cDirtyPages %d", pPool->cDirtyPages));
2098 }
2099 }
2100}
2101
2102#endif /* PGMPOOL_WITH_OPTIMIZED_DIRTY_PT */
2103
2104/**
2105 * Inserts a page into the GCPhys hash table.
2106 *
2107 * @param pPool The pool.
2108 * @param pPage The page.
2109 */
2110DECLINLINE(void) pgmPoolHashInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2111{
2112 Log3(("pgmPoolHashInsert: %RGp\n", pPage->GCPhys));
2113 Assert(pPage->GCPhys != NIL_RTGCPHYS); Assert(pPage->iNext == NIL_PGMPOOL_IDX);
2114 uint16_t iHash = PGMPOOL_HASH(pPage->GCPhys);
2115 pPage->iNext = pPool->aiHash[iHash];
2116 pPool->aiHash[iHash] = pPage->idx;
2117}
2118
2119
2120/**
2121 * Removes a page from the GCPhys hash table.
2122 *
2123 * @param pPool The pool.
2124 * @param pPage The page.
2125 */
2126DECLINLINE(void) pgmPoolHashRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2127{
2128 Log3(("pgmPoolHashRemove: %RGp\n", pPage->GCPhys));
2129 uint16_t iHash = PGMPOOL_HASH(pPage->GCPhys);
2130 if (pPool->aiHash[iHash] == pPage->idx)
2131 pPool->aiHash[iHash] = pPage->iNext;
2132 else
2133 {
2134 uint16_t iPrev = pPool->aiHash[iHash];
2135 for (;;)
2136 {
2137 const int16_t i = pPool->aPages[iPrev].iNext;
2138 if (i == pPage->idx)
2139 {
2140 pPool->aPages[iPrev].iNext = pPage->iNext;
2141 break;
2142 }
2143 if (i == NIL_PGMPOOL_IDX)
2144 {
2145 AssertReleaseMsgFailed(("GCPhys=%RGp idx=%d\n", pPage->GCPhys, pPage->idx));
2146 break;
2147 }
2148 iPrev = i;
2149 }
2150 }
2151 pPage->iNext = NIL_PGMPOOL_IDX;
2152}
2153
2154
2155/**
2156 * Frees up one cache page.
2157 *
2158 * @returns VBox status code.
2159 * @retval VINF_SUCCESS on success.
2160 * @param pPool The pool.
2161 * @param iUser The user index.
2162 */
2163static int pgmPoolCacheFreeOne(PPGMPOOL pPool, uint16_t iUser)
2164{
2165 const PVMCC pVM = pPool->CTX_SUFF(pVM);
2166 Assert(pPool->iAgeHead != pPool->iAgeTail); /* We shouldn't be here if there < 2 cached entries! */
2167 STAM_COUNTER_INC(&pPool->StatCacheFreeUpOne);
2168
2169 /*
2170 * Select one page from the tail of the age list.
2171 */
2172 PPGMPOOLPAGE pPage;
2173 for (unsigned iLoop = 0; ; iLoop++)
2174 {
2175 uint16_t iToFree = pPool->iAgeTail;
2176 if (iToFree == iUser && iUser != NIL_PGMPOOL_IDX)
2177 iToFree = pPool->aPages[iToFree].iAgePrev;
2178/* This is the alternative to the SyncCR3 pgmPoolCacheUsed calls.
2179 if (pPool->aPages[iToFree].iUserHead != NIL_PGMPOOL_USER_INDEX)
2180 {
2181 uint16_t i = pPool->aPages[iToFree].iAgePrev;
2182 for (unsigned j = 0; j < 10 && i != NIL_PGMPOOL_USER_INDEX; j++, i = pPool->aPages[i].iAgePrev)
2183 {
2184 if (pPool->aPages[iToFree].iUserHead == NIL_PGMPOOL_USER_INDEX)
2185 continue;
2186 iToFree = i;
2187 break;
2188 }
2189 }
2190*/
2191 Assert(iToFree != iUser);
2192 AssertReleaseMsg(iToFree != NIL_PGMPOOL_IDX,
2193 ("iToFree=%#x (iAgeTail=%#x) iUser=%#x iLoop=%u - pPool=%p LB %#zx\n",
2194 iToFree, pPool->iAgeTail, iUser, iLoop, pPool,
2195 RT_UOFFSETOF_DYN(PGMPOOL, aPages[pPool->cMaxPages])
2196 + pPool->cMaxUsers * sizeof(PGMPOOLUSER)
2197 + pPool->cMaxPhysExts * sizeof(PGMPOOLPHYSEXT) ));
2198
2199 pPage = &pPool->aPages[iToFree];
2200
2201 /*
2202 * Reject any attempts at flushing the currently active shadow CR3 mapping.
2203 * Call pgmPoolCacheUsed to move the page to the head of the age list.
2204 */
2205 if ( !pgmPoolIsPageLocked(pPage)
2206 && pPage->idx >= PGMPOOL_IDX_FIRST /* paranoia (#6349) */)
2207 break;
2208 LogFlow(("pgmPoolCacheFreeOne: refuse CR3 mapping\n"));
2209 pgmPoolCacheUsed(pPool, pPage);
2210 AssertLogRelReturn(iLoop < 8192, VERR_PGM_POOL_TOO_MANY_LOOPS);
2211 }
2212
2213 /*
2214 * Found a usable page, flush it and return.
2215 */
2216 int rc = pgmPoolFlushPage(pPool, pPage);
2217 /* This flush was initiated by us and not the guest, so explicitly flush the TLB. */
2218 /** @todo find out why this is necessary; pgmPoolFlushPage should trigger a flush if one is really needed. */
2219 if (rc == VINF_SUCCESS)
2220 PGM_INVL_ALL_VCPU_TLBS(pVM);
2221 return rc;
2222}
2223
2224
2225/**
2226 * Checks if a kind mismatch is really a page being reused
2227 * or if it's just normal remappings.
2228 *
2229 * @returns true if reused and the cached page (enmKind1) should be flushed
2230 * @returns false if not reused.
2231 * @param enmKind1 The kind of the cached page.
2232 * @param enmKind2 The kind of the requested page.
2233 */
2234static bool pgmPoolCacheReusedByKind(PGMPOOLKIND enmKind1, PGMPOOLKIND enmKind2)
2235{
2236 switch (enmKind1)
2237 {
2238 /*
2239 * Never reuse them. There is no remapping in non-paging mode.
2240 */
2241 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2242 case PGMPOOLKIND_32BIT_PD_PHYS:
2243 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2244 case PGMPOOLKIND_PAE_PD_PHYS:
2245 case PGMPOOLKIND_PAE_PDPT_PHYS:
2246 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2247 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2248 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2249 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2250 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2251 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT: /* never reuse them for other types */
2252 return false;
2253
2254 /*
2255 * It's perfectly fine to reuse these, except for PAE and non-paging stuff.
2256 */
2257 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2258 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2259 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2260 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2261 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2262 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2263 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2264 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2265 case PGMPOOLKIND_32BIT_PD:
2266 case PGMPOOLKIND_PAE_PDPT:
2267 Assert(!PGMPOOL_PAGE_IS_KIND_NESTED(enmKind2));
2268 switch (enmKind2)
2269 {
2270 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2271 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2272 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2273 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2274 case PGMPOOLKIND_64BIT_PML4:
2275 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2276 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2277 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2278 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2279 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2280 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2281 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2282 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2283 return true;
2284 default:
2285 return false;
2286 }
2287
2288 /*
2289 * It's perfectly fine to reuse these, except for PAE and non-paging stuff.
2290 */
2291 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2292 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2293 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2294 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2295 case PGMPOOLKIND_64BIT_PML4:
2296 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2297 Assert(!PGMPOOL_PAGE_IS_KIND_NESTED(enmKind2));
2298 switch (enmKind2)
2299 {
2300 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2301 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2302 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2303 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2304 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2305 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2306 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2307 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2308 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2309 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2310 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2311 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2312 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2313 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2314 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2315 return true;
2316 default:
2317 return false;
2318 }
2319
2320#ifdef VBOX_WITH_NESTED_HWVIRT_VMX_EPT
2321 case PGMPOOLKIND_EPT_PT_FOR_EPT_PT:
2322 case PGMPOOLKIND_EPT_PT_FOR_EPT_2MB:
2323 case PGMPOOLKIND_EPT_PD_FOR_EPT_PD:
2324 case PGMPOOLKIND_EPT_PDPT_FOR_EPT_PDPT:
2325 return PGMPOOL_PAGE_IS_KIND_NESTED(enmKind2);
2326
2327 case PGMPOOLKIND_EPT_PML4_FOR_EPT_PML4:
2328 return false;
2329#endif
2330
2331 /*
2332 * These cannot be flushed, and it's common to reuse the PDs as PTs.
2333 */
2334 case PGMPOOLKIND_ROOT_NESTED:
2335 return false;
2336
2337 default:
2338 AssertFatalMsgFailed(("enmKind1=%d\n", enmKind1));
2339 }
2340}
2341
2342
2343/**
2344 * Attempts to satisfy a pgmPoolAlloc request from the cache.
2345 *
2346 * @returns VBox status code.
2347 * @retval VINF_PGM_CACHED_PAGE on success.
2348 * @retval VERR_FILE_NOT_FOUND if not found.
2349 * @param pPool The pool.
2350 * @param GCPhys The GC physical address of the page we're gonna shadow.
2351 * @param enmKind The kind of mapping.
2352 * @param enmAccess Access type for the mapping (only relevant for big pages)
2353 * @param fA20Enabled Whether the CPU has the A20 gate enabled.
2354 * @param iUser The shadow page pool index of the user table. This is
2355 * NIL_PGMPOOL_IDX for root pages.
2356 * @param iUserTable The index into the user table (shadowed). Ignored if
2357 * root page
2358 * @param ppPage Where to store the pointer to the page.
2359 */
2360static int pgmPoolCacheAlloc(PPGMPOOL pPool, RTGCPHYS GCPhys, PGMPOOLKIND enmKind, PGMPOOLACCESS enmAccess, bool fA20Enabled,
2361 uint16_t iUser, uint32_t iUserTable, PPPGMPOOLPAGE ppPage)
2362{
2363 /*
2364 * Look up the GCPhys in the hash.
2365 */
2366 unsigned i = pPool->aiHash[PGMPOOL_HASH(GCPhys)];
2367 Log3(("pgmPoolCacheAlloc: %RGp kind %s iUser=%d iUserTable=%x SLOT=%d\n", GCPhys, pgmPoolPoolKindToStr(enmKind), iUser, iUserTable, i));
2368 if (i != NIL_PGMPOOL_IDX)
2369 {
2370 do
2371 {
2372 PPGMPOOLPAGE pPage = &pPool->aPages[i];
2373 Log4(("pgmPoolCacheAlloc: slot %d found page %RGp\n", i, pPage->GCPhys));
2374 if (pPage->GCPhys == GCPhys)
2375 {
2376 if ( (PGMPOOLKIND)pPage->enmKind == enmKind
2377 && (PGMPOOLACCESS)pPage->enmAccess == enmAccess
2378 && pPage->fA20Enabled == fA20Enabled)
2379 {
2380 /* Put it at the start of the use list to make sure pgmPoolTrackAddUser
2381 * doesn't flush it in case there are no more free use records.
2382 */
2383 pgmPoolCacheUsed(pPool, pPage);
2384
2385 int rc = VINF_SUCCESS;
2386 if (iUser != NIL_PGMPOOL_IDX)
2387 rc = pgmPoolTrackAddUser(pPool, pPage, iUser, iUserTable);
2388 if (RT_SUCCESS(rc))
2389 {
2390 Assert((PGMPOOLKIND)pPage->enmKind == enmKind);
2391 *ppPage = pPage;
2392 if (pPage->cModifications)
2393 pPage->cModifications = 1; /* reset counter (can't use 0, or else it will be reinserted in the modified list) */
2394 STAM_COUNTER_INC(&pPool->StatCacheHits);
2395 return VINF_PGM_CACHED_PAGE;
2396 }
2397 return rc;
2398 }
2399
2400 if ((PGMPOOLKIND)pPage->enmKind != enmKind)
2401 {
2402 /*
2403 * The kind is different. In some cases we should now flush the page
2404 * as it has been reused, but in most cases this is normal remapping
2405 * of PDs as PT or big pages using the GCPhys field in a slightly
2406 * different way than the other kinds.
2407 */
2408 if (pgmPoolCacheReusedByKind((PGMPOOLKIND)pPage->enmKind, enmKind))
2409 {
2410 STAM_COUNTER_INC(&pPool->StatCacheKindMismatches);
2411 pgmPoolFlushPage(pPool, pPage);
2412 break;
2413 }
2414 }
2415 }
2416
2417 /* next */
2418 i = pPage->iNext;
2419 } while (i != NIL_PGMPOOL_IDX);
2420 }
2421
2422 Log3(("pgmPoolCacheAlloc: Missed GCPhys=%RGp enmKind=%s\n", GCPhys, pgmPoolPoolKindToStr(enmKind)));
2423 STAM_COUNTER_INC(&pPool->StatCacheMisses);
2424 return VERR_FILE_NOT_FOUND;
2425}
2426
2427
2428/**
2429 * Inserts a page into the cache.
2430 *
2431 * @param pPool The pool.
2432 * @param pPage The cached page.
2433 * @param fCanBeCached Set if the page is fit for caching from the caller's point of view.
2434 */
2435static void pgmPoolCacheInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage, bool fCanBeCached)
2436{
2437 /*
2438 * Insert into the GCPhys hash if the page is fit for that.
2439 */
2440 Assert(!pPage->fCached);
2441 if (fCanBeCached)
2442 {
2443 pPage->fCached = true;
2444 pgmPoolHashInsert(pPool, pPage);
2445 Log3(("pgmPoolCacheInsert: Caching %p:{.Core=%RHp, .idx=%d, .enmKind=%s, GCPhys=%RGp}\n",
2446 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), pPage->GCPhys));
2447 STAM_COUNTER_INC(&pPool->StatCacheCacheable);
2448 }
2449 else
2450 {
2451 Log3(("pgmPoolCacheInsert: Not caching %p:{.Core=%RHp, .idx=%d, .enmKind=%s, GCPhys=%RGp}\n",
2452 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), pPage->GCPhys));
2453 STAM_COUNTER_INC(&pPool->StatCacheUncacheable);
2454 }
2455
2456 /*
2457 * Insert at the head of the age list.
2458 */
2459 pPage->iAgePrev = NIL_PGMPOOL_IDX;
2460 pPage->iAgeNext = pPool->iAgeHead;
2461 if (pPool->iAgeHead != NIL_PGMPOOL_IDX)
2462 pPool->aPages[pPool->iAgeHead].iAgePrev = pPage->idx;
2463 else
2464 pPool->iAgeTail = pPage->idx;
2465 pPool->iAgeHead = pPage->idx;
2466}
2467
2468
2469/**
2470 * Flushes a cached page.
2471 *
2472 * @param pPool The pool.
2473 * @param pPage The cached page.
2474 */
2475static void pgmPoolCacheFlushPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2476{
2477 Log3(("pgmPoolCacheFlushPage: %RGp\n", pPage->GCPhys));
2478
2479 /*
2480 * Remove the page from the hash.
2481 */
2482 if (pPage->fCached)
2483 {
2484 pPage->fCached = false;
2485 pgmPoolHashRemove(pPool, pPage);
2486 }
2487 else
2488 Assert(pPage->iNext == NIL_PGMPOOL_IDX);
2489
2490 /*
2491 * Remove it from the age list.
2492 */
2493 if (pPage->iAgeNext != NIL_PGMPOOL_IDX)
2494 pPool->aPages[pPage->iAgeNext].iAgePrev = pPage->iAgePrev;
2495 else
2496 pPool->iAgeTail = pPage->iAgePrev;
2497 if (pPage->iAgePrev != NIL_PGMPOOL_IDX)
2498 pPool->aPages[pPage->iAgePrev].iAgeNext = pPage->iAgeNext;
2499 else
2500 pPool->iAgeHead = pPage->iAgeNext;
2501 pPage->iAgeNext = NIL_PGMPOOL_IDX;
2502 pPage->iAgePrev = NIL_PGMPOOL_IDX;
2503}
2504
2505
2506/**
2507 * Looks for pages sharing the monitor.
2508 *
2509 * @returns Pointer to the head page.
2510 * @returns NULL if not found.
2511 * @param pPool The Pool
2512 * @param pNewPage The page which is going to be monitored.
2513 */
2514static PPGMPOOLPAGE pgmPoolMonitorGetPageByGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pNewPage)
2515{
2516 /*
2517 * Look up the GCPhys in the hash.
2518 */
2519 RTGCPHYS GCPhys = pNewPage->GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK;
2520 unsigned i = pPool->aiHash[PGMPOOL_HASH(GCPhys)];
2521 if (i == NIL_PGMPOOL_IDX)
2522 return NULL;
2523 do
2524 {
2525 PPGMPOOLPAGE pPage = &pPool->aPages[i];
2526 if ( pPage->GCPhys - GCPhys < PAGE_SIZE
2527 && pPage != pNewPage)
2528 {
2529 switch (pPage->enmKind)
2530 {
2531 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2532 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2533 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2534 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2535 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2536 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2537 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2538 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2539 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2540 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2541 case PGMPOOLKIND_64BIT_PML4:
2542 case PGMPOOLKIND_32BIT_PD:
2543 case PGMPOOLKIND_PAE_PDPT:
2544#ifdef VBOX_WITH_NESTED_HWVIRT_VMX_EPT
2545 case PGMPOOLKIND_EPT_PT_FOR_EPT_PT:
2546 case PGMPOOLKIND_EPT_PD_FOR_EPT_PD:
2547 case PGMPOOLKIND_EPT_PDPT_FOR_EPT_PDPT:
2548#endif
2549 {
2550 /* find the head */
2551 while (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
2552 {
2553 Assert(pPage->iMonitoredPrev != pPage->idx);
2554 pPage = &pPool->aPages[pPage->iMonitoredPrev];
2555 }
2556 return pPage;
2557 }
2558
2559 /* ignore, no monitoring. */
2560 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2561 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2562 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2563 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2564 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2565 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2566 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2567 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2568 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2569 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2570 case PGMPOOLKIND_ROOT_NESTED:
2571 case PGMPOOLKIND_PAE_PD_PHYS:
2572 case PGMPOOLKIND_PAE_PDPT_PHYS:
2573 case PGMPOOLKIND_32BIT_PD_PHYS:
2574 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
2575#ifdef VBOX_WITH_NESTED_HWVIRT_VMX_EPT
2576 case PGMPOOLKIND_EPT_PT_FOR_EPT_2MB:
2577 case PGMPOOLKIND_EPT_PML4_FOR_EPT_PML4:
2578#endif
2579 break;
2580 default:
2581 AssertFatalMsgFailed(("enmKind=%d idx=%d\n", pPage->enmKind, pPage->idx));
2582 }
2583 }
2584
2585 /* next */
2586 i = pPage->iNext;
2587 } while (i != NIL_PGMPOOL_IDX);
2588 return NULL;
2589}
2590
2591
2592/**
2593 * Enabled write monitoring of a guest page.
2594 *
2595 * @returns VBox status code.
2596 * @retval VINF_SUCCESS on success.
2597 * @param pPool The pool.
2598 * @param pPage The cached page.
2599 */
2600static int pgmPoolMonitorInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2601{
2602 LogFlow(("pgmPoolMonitorInsert %RGp\n", pPage->GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK));
2603
2604 /*
2605 * Filter out the relevant kinds.
2606 */
2607 switch (pPage->enmKind)
2608 {
2609 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2610 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2611 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2612 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2613 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2614 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2615 case PGMPOOLKIND_64BIT_PML4:
2616 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2617 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2618 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2619 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2620 case PGMPOOLKIND_32BIT_PD:
2621 case PGMPOOLKIND_PAE_PDPT:
2622 break;
2623
2624 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2625 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2626 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2627 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2628 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2629 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2630 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2631 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2632 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2633 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2634 case PGMPOOLKIND_ROOT_NESTED:
2635 /* Nothing to monitor here. */
2636 return VINF_SUCCESS;
2637
2638 case PGMPOOLKIND_32BIT_PD_PHYS:
2639 case PGMPOOLKIND_PAE_PDPT_PHYS:
2640 case PGMPOOLKIND_PAE_PD_PHYS:
2641 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
2642 /* Nothing to monitor here. */
2643 return VINF_SUCCESS;
2644
2645#ifdef VBOX_WITH_NESTED_HWVIRT_VMX_EPT
2646 case PGMPOOLKIND_EPT_PT_FOR_EPT_PT:
2647 case PGMPOOLKIND_EPT_PD_FOR_EPT_PD:
2648 case PGMPOOLKIND_EPT_PDPT_FOR_EPT_PDPT:
2649 break;
2650
2651 case PGMPOOLKIND_EPT_PT_FOR_EPT_2MB:
2652 case PGMPOOLKIND_EPT_PML4_FOR_EPT_PML4:
2653 /* Nothing to monitor here. */
2654 return VINF_SUCCESS;
2655#endif
2656
2657 default:
2658 AssertFatalMsgFailed(("This can't happen! enmKind=%d\n", pPage->enmKind));
2659 }
2660
2661 /*
2662 * Install handler.
2663 */
2664 int rc;
2665 PPGMPOOLPAGE pPageHead = pgmPoolMonitorGetPageByGCPhys(pPool, pPage);
2666 if (pPageHead)
2667 {
2668 Assert(pPageHead != pPage); Assert(pPageHead->iMonitoredNext != pPage->idx);
2669 Assert(pPageHead->iMonitoredPrev != pPage->idx);
2670
2671#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
2672 if (pPageHead->fDirty)
2673 pgmPoolFlushDirtyPage(pPool->CTX_SUFF(pVM), pPool, pPageHead->idxDirtyEntry, false /* do not remove */);
2674#endif
2675
2676 pPage->iMonitoredPrev = pPageHead->idx;
2677 pPage->iMonitoredNext = pPageHead->iMonitoredNext;
2678 if (pPageHead->iMonitoredNext != NIL_PGMPOOL_IDX)
2679 pPool->aPages[pPageHead->iMonitoredNext].iMonitoredPrev = pPage->idx;
2680 pPageHead->iMonitoredNext = pPage->idx;
2681 rc = VINF_SUCCESS;
2682 if (PGMPOOL_PAGE_IS_NESTED(pPage))
2683 Log7Func(("Adding to monitoring list GCPhysPage=%RGp\n", pPage->GCPhys));
2684 }
2685 else
2686 {
2687 if (PGMPOOL_PAGE_IS_NESTED(pPage))
2688 Log7Func(("Started monitoring GCPhysPage=%RGp HCPhys=%RHp enmKind=%s\n", pPage->GCPhys, pPage->Core.Key, pgmPoolPoolKindToStr(pPage->enmKind)));
2689
2690 Assert(pPage->iMonitoredNext == NIL_PGMPOOL_IDX); Assert(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
2691 PVMCC pVM = pPool->CTX_SUFF(pVM);
2692 const RTGCPHYS GCPhysPage = pPage->GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK;
2693 rc = PGMHandlerPhysicalRegister(pVM, GCPhysPage, GCPhysPage + PAGE_OFFSET_MASK, pPool->hAccessHandlerType,
2694 pPage - &pPool->aPages[0], NIL_RTR3PTR /*pszDesc*/);
2695 /** @todo we should probably deal with out-of-memory conditions here, but for now increasing
2696 * the heap size should suffice. */
2697 AssertFatalMsgRC(rc, ("PGMHandlerPhysicalRegisterEx %RGp failed with %Rrc\n", GCPhysPage, rc));
2698 PVMCPU pVCpu = VMMGetCpu(pVM);
2699 AssertFatalMsg(!(pVCpu->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL) || VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3), ("fSyncFlags=%x syncff=%d\n", pVCpu->pgm.s.fSyncFlags, VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3)));
2700 }
2701 pPage->fMonitored = true;
2702 return rc;
2703}
2704
2705
2706/**
2707 * Disables write monitoring of a guest page.
2708 *
2709 * @returns VBox status code.
2710 * @retval VINF_SUCCESS on success.
2711 * @param pPool The pool.
2712 * @param pPage The cached page.
2713 */
2714static int pgmPoolMonitorFlush(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2715{
2716 /*
2717 * Filter out the relevant kinds.
2718 */
2719 switch (pPage->enmKind)
2720 {
2721 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2722 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2723 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2724 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2725 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2726 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2727 case PGMPOOLKIND_64BIT_PML4:
2728 case PGMPOOLKIND_32BIT_PD:
2729 case PGMPOOLKIND_PAE_PDPT:
2730 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2731 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2732 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2733 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2734 break;
2735
2736 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2737 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2738 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2739 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2740 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2741 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2742 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2743 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2744 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2745 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2746 case PGMPOOLKIND_ROOT_NESTED:
2747 case PGMPOOLKIND_PAE_PD_PHYS:
2748 case PGMPOOLKIND_PAE_PDPT_PHYS:
2749 case PGMPOOLKIND_32BIT_PD_PHYS:
2750 /* Nothing to monitor here. */
2751 Assert(!pPage->fMonitored);
2752 return VINF_SUCCESS;
2753
2754#ifdef VBOX_WITH_NESTED_HWVIRT_VMX_EPT
2755 case PGMPOOLKIND_EPT_PT_FOR_EPT_PT:
2756 case PGMPOOLKIND_EPT_PD_FOR_EPT_PD:
2757 case PGMPOOLKIND_EPT_PDPT_FOR_EPT_PDPT:
2758 break;
2759
2760 case PGMPOOLKIND_EPT_PT_FOR_EPT_2MB:
2761 case PGMPOOLKIND_EPT_PML4_FOR_EPT_PML4:
2762 /* Nothing to monitor here. */
2763 Assert(!pPage->fMonitored);
2764 return VINF_SUCCESS;
2765#endif
2766
2767 default:
2768 AssertFatalMsgFailed(("This can't happen! enmKind=%d\n", pPage->enmKind));
2769 }
2770 Assert(pPage->fMonitored);
2771
2772 /*
2773 * Remove the page from the monitored list or uninstall it if last.
2774 */
2775 const PVMCC pVM = pPool->CTX_SUFF(pVM);
2776 int rc;
2777 if ( pPage->iMonitoredNext != NIL_PGMPOOL_IDX
2778 || pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
2779 {
2780 if (pPage->iMonitoredPrev == NIL_PGMPOOL_IDX)
2781 {
2782 PPGMPOOLPAGE pNewHead = &pPool->aPages[pPage->iMonitoredNext];
2783 pNewHead->iMonitoredPrev = NIL_PGMPOOL_IDX;
2784 rc = PGMHandlerPhysicalChangeUserArg(pVM, pPage->GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK, pPage->iMonitoredNext);
2785
2786 AssertFatalRCSuccess(rc);
2787 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
2788 }
2789 else
2790 {
2791 pPool->aPages[pPage->iMonitoredPrev].iMonitoredNext = pPage->iMonitoredNext;
2792 if (pPage->iMonitoredNext != NIL_PGMPOOL_IDX)
2793 {
2794 pPool->aPages[pPage->iMonitoredNext].iMonitoredPrev = pPage->iMonitoredPrev;
2795 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
2796 }
2797 pPage->iMonitoredPrev = NIL_PGMPOOL_IDX;
2798 rc = VINF_SUCCESS;
2799 }
2800 }
2801 else
2802 {
2803 rc = PGMHandlerPhysicalDeregister(pVM, pPage->GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK);
2804 AssertFatalRC(rc);
2805 PVMCPU pVCpu = VMMGetCpu(pVM);
2806 AssertFatalMsg(!(pVCpu->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL) || VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3),
2807 ("%#x %#x\n", pVCpu->pgm.s.fSyncFlags, pVM->fGlobalForcedActions));
2808 }
2809 pPage->fMonitored = false;
2810
2811 /*
2812 * Remove it from the list of modified pages (if in it).
2813 */
2814 pgmPoolMonitorModifiedRemove(pPool, pPage);
2815
2816 if (PGMPOOL_PAGE_IS_NESTED(pPage))
2817 Log7Func(("Stopped monitoring %RGp\n", pPage->GCPhys));
2818
2819 return rc;
2820}
2821
2822
2823/**
2824 * Inserts the page into the list of modified pages.
2825 *
2826 * @param pPool The pool.
2827 * @param pPage The page.
2828 */
2829void pgmPoolMonitorModifiedInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2830{
2831 Log3(("pgmPoolMonitorModifiedInsert: idx=%d\n", pPage->idx));
2832 AssertMsg( pPage->iModifiedNext == NIL_PGMPOOL_IDX
2833 && pPage->iModifiedPrev == NIL_PGMPOOL_IDX
2834 && pPool->iModifiedHead != pPage->idx,
2835 ("Next=%d Prev=%d idx=%d cModifications=%d Head=%d cModifiedPages=%d\n",
2836 pPage->iModifiedNext, pPage->iModifiedPrev, pPage->idx, pPage->cModifications,
2837 pPool->iModifiedHead, pPool->cModifiedPages));
2838
2839 pPage->iModifiedNext = pPool->iModifiedHead;
2840 if (pPool->iModifiedHead != NIL_PGMPOOL_IDX)
2841 pPool->aPages[pPool->iModifiedHead].iModifiedPrev = pPage->idx;
2842 pPool->iModifiedHead = pPage->idx;
2843 pPool->cModifiedPages++;
2844#ifdef VBOX_WITH_STATISTICS
2845 if (pPool->cModifiedPages > pPool->cModifiedPagesHigh)
2846 pPool->cModifiedPagesHigh = pPool->cModifiedPages;
2847#endif
2848}
2849
2850
2851/**
2852 * Removes the page from the list of modified pages and resets the
2853 * modification counter.
2854 *
2855 * @param pPool The pool.
2856 * @param pPage The page which is believed to be in the list of modified pages.
2857 */
2858static void pgmPoolMonitorModifiedRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2859{
2860 Log3(("pgmPoolMonitorModifiedRemove: idx=%d cModifications=%d\n", pPage->idx, pPage->cModifications));
2861 if (pPool->iModifiedHead == pPage->idx)
2862 {
2863 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX);
2864 pPool->iModifiedHead = pPage->iModifiedNext;
2865 if (pPage->iModifiedNext != NIL_PGMPOOL_IDX)
2866 {
2867 pPool->aPages[pPage->iModifiedNext].iModifiedPrev = NIL_PGMPOOL_IDX;
2868 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
2869 }
2870 pPool->cModifiedPages--;
2871 }
2872 else if (pPage->iModifiedPrev != NIL_PGMPOOL_IDX)
2873 {
2874 pPool->aPages[pPage->iModifiedPrev].iModifiedNext = pPage->iModifiedNext;
2875 if (pPage->iModifiedNext != NIL_PGMPOOL_IDX)
2876 {
2877 pPool->aPages[pPage->iModifiedNext].iModifiedPrev = pPage->iModifiedPrev;
2878 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
2879 }
2880 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
2881 pPool->cModifiedPages--;
2882 }
2883 else
2884 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX);
2885 pPage->cModifications = 0;
2886}
2887
2888
2889/**
2890 * Zaps the list of modified pages, resetting their modification counters in the process.
2891 *
2892 * @param pVM The cross context VM structure.
2893 */
2894static void pgmPoolMonitorModifiedClearAll(PVMCC pVM)
2895{
2896 PGM_LOCK_VOID(pVM);
2897 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
2898 LogFlow(("pgmPoolMonitorModifiedClearAll: cModifiedPages=%d\n", pPool->cModifiedPages));
2899
2900 unsigned cPages = 0; NOREF(cPages);
2901
2902#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
2903 pgmPoolResetDirtyPages(pVM);
2904#endif
2905
2906 uint16_t idx = pPool->iModifiedHead;
2907 pPool->iModifiedHead = NIL_PGMPOOL_IDX;
2908 while (idx != NIL_PGMPOOL_IDX)
2909 {
2910 PPGMPOOLPAGE pPage = &pPool->aPages[idx];
2911 idx = pPage->iModifiedNext;
2912 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
2913 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
2914 pPage->cModifications = 0;
2915 Assert(++cPages);
2916 }
2917 AssertMsg(cPages == pPool->cModifiedPages, ("%d != %d\n", cPages, pPool->cModifiedPages));
2918 pPool->cModifiedPages = 0;
2919 PGM_UNLOCK(pVM);
2920}
2921
2922
2923/**
2924 * Handle SyncCR3 pool tasks
2925 *
2926 * @returns VBox status code.
2927 * @retval VINF_SUCCESS if successfully added.
2928 * @retval VINF_PGM_SYNC_CR3 is it needs to be deferred to ring 3 (GC only)
2929 * @param pVCpu The cross context virtual CPU structure.
2930 * @remark Should only be used when monitoring is available, thus placed in
2931 * the PGMPOOL_WITH_MONITORING \#ifdef.
2932 */
2933int pgmPoolSyncCR3(PVMCPUCC pVCpu)
2934{
2935 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
2936 LogFlow(("pgmPoolSyncCR3 fSyncFlags=%x\n", pVCpu->pgm.s.fSyncFlags));
2937
2938 /*
2939 * When monitoring shadowed pages, we reset the modification counters on CR3 sync.
2940 * Occasionally we will have to clear all the shadow page tables because we wanted
2941 * to monitor a page which was mapped by too many shadowed page tables. This operation
2942 * sometimes referred to as a 'lightweight flush'.
2943 */
2944# ifdef IN_RING3 /* Don't flush in ring-0 or raw mode, it's taking too long. */
2945 if (pVCpu->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)
2946 pgmR3PoolClearAll(pVM, false /*fFlushRemTlb*/);
2947# else /* !IN_RING3 */
2948 if (pVCpu->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)
2949 {
2950 Log(("SyncCR3: PGM_SYNC_CLEAR_PGM_POOL is set -> VINF_PGM_SYNC_CR3\n"));
2951 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3); /** @todo no need to do global sync, right? */
2952
2953 /* Make sure all other VCPUs return to ring 3. */
2954 if (pVM->cCpus > 1)
2955 {
2956 VM_FF_SET(pVM, VM_FF_PGM_POOL_FLUSH_PENDING);
2957 PGM_INVL_ALL_VCPU_TLBS(pVM);
2958 }
2959 return VINF_PGM_SYNC_CR3;
2960 }
2961# endif /* !IN_RING3 */
2962 else
2963 {
2964 pgmPoolMonitorModifiedClearAll(pVM);
2965
2966 /* pgmPoolMonitorModifiedClearAll can cause a pgm pool flush (dirty page clearing), so make sure we handle this! */
2967 if (pVCpu->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)
2968 {
2969 Log(("pgmPoolMonitorModifiedClearAll caused a pgm flush -> call pgmPoolSyncCR3 again!\n"));
2970 return pgmPoolSyncCR3(pVCpu);
2971 }
2972 }
2973 return VINF_SUCCESS;
2974}
2975
2976
2977/**
2978 * Frees up at least one user entry.
2979 *
2980 * @returns VBox status code.
2981 * @retval VINF_SUCCESS if successfully added.
2982 *
2983 * @param pPool The pool.
2984 * @param iUser The user index.
2985 */
2986static int pgmPoolTrackFreeOneUser(PPGMPOOL pPool, uint16_t iUser)
2987{
2988 STAM_COUNTER_INC(&pPool->StatTrackFreeUpOneUser);
2989 /*
2990 * Just free cached pages in a braindead fashion.
2991 */
2992 /** @todo walk the age list backwards and free the first with usage. */
2993 int rc = VINF_SUCCESS;
2994 do
2995 {
2996 int rc2 = pgmPoolCacheFreeOne(pPool, iUser);
2997 if (RT_FAILURE(rc2) && rc == VINF_SUCCESS)
2998 rc = rc2;
2999 } while (pPool->iUserFreeHead == NIL_PGMPOOL_USER_INDEX);
3000 return rc;
3001}
3002
3003
3004/**
3005 * Inserts a page into the cache.
3006 *
3007 * This will create user node for the page, insert it into the GCPhys
3008 * hash, and insert it into the age list.
3009 *
3010 * @returns VBox status code.
3011 * @retval VINF_SUCCESS if successfully added.
3012 *
3013 * @param pPool The pool.
3014 * @param pPage The cached page.
3015 * @param GCPhys The GC physical address of the page we're gonna shadow.
3016 * @param iUser The user index.
3017 * @param iUserTable The user table index.
3018 */
3019DECLINLINE(int) pgmPoolTrackInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTGCPHYS GCPhys, uint16_t iUser, uint32_t iUserTable)
3020{
3021 int rc = VINF_SUCCESS;
3022 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
3023
3024 LogFlow(("pgmPoolTrackInsert GCPhys=%RGp iUser=%d iUserTable=%x\n", GCPhys, iUser, iUserTable)); RT_NOREF_PV(GCPhys);
3025
3026 if (iUser != NIL_PGMPOOL_IDX)
3027 {
3028#ifdef VBOX_STRICT
3029 /*
3030 * Check that the entry doesn't already exists.
3031 */
3032 if (pPage->iUserHead != NIL_PGMPOOL_USER_INDEX)
3033 {
3034 uint16_t i = pPage->iUserHead;
3035 do
3036 {
3037 Assert(i < pPool->cMaxUsers);
3038 AssertMsg(paUsers[i].iUser != iUser || paUsers[i].iUserTable != iUserTable, ("%x %x vs new %x %x\n", paUsers[i].iUser, paUsers[i].iUserTable, iUser, iUserTable));
3039 i = paUsers[i].iNext;
3040 } while (i != NIL_PGMPOOL_USER_INDEX);
3041 }
3042#endif
3043
3044 /*
3045 * Find free a user node.
3046 */
3047 uint16_t i = pPool->iUserFreeHead;
3048 if (i == NIL_PGMPOOL_USER_INDEX)
3049 {
3050 rc = pgmPoolTrackFreeOneUser(pPool, iUser);
3051 if (RT_FAILURE(rc))
3052 return rc;
3053 i = pPool->iUserFreeHead;
3054 }
3055
3056 /*
3057 * Unlink the user node from the free list,
3058 * initialize and insert it into the user list.
3059 */
3060 pPool->iUserFreeHead = paUsers[i].iNext;
3061 paUsers[i].iNext = NIL_PGMPOOL_USER_INDEX;
3062 paUsers[i].iUser = iUser;
3063 paUsers[i].iUserTable = iUserTable;
3064 pPage->iUserHead = i;
3065 }
3066 else
3067 pPage->iUserHead = NIL_PGMPOOL_USER_INDEX;
3068
3069
3070 /*
3071 * Insert into cache and enable monitoring of the guest page if enabled.
3072 *
3073 * Until we implement caching of all levels, including the CR3 one, we'll
3074 * have to make sure we don't try monitor & cache any recursive reuse of
3075 * a monitored CR3 page. Because all windows versions are doing this we'll
3076 * have to be able to do combined access monitoring, CR3 + PT and
3077 * PD + PT (guest PAE).
3078 *
3079 * Update:
3080 * We're now cooperating with the CR3 monitor if an uncachable page is found.
3081 */
3082 const bool fCanBeMonitored = true;
3083 pgmPoolCacheInsert(pPool, pPage, fCanBeMonitored); /* This can be expanded. */
3084 if (fCanBeMonitored)
3085 {
3086 rc = pgmPoolMonitorInsert(pPool, pPage);
3087 AssertRC(rc);
3088 }
3089 return rc;
3090}
3091
3092
3093/**
3094 * Adds a user reference to a page.
3095 *
3096 * This will move the page to the head of the
3097 *
3098 * @returns VBox status code.
3099 * @retval VINF_SUCCESS if successfully added.
3100 *
3101 * @param pPool The pool.
3102 * @param pPage The cached page.
3103 * @param iUser The user index.
3104 * @param iUserTable The user table.
3105 */
3106static int pgmPoolTrackAddUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable)
3107{
3108 Log3(("pgmPoolTrackAddUser: GCPhys=%RGp iUser=%x iUserTable=%x\n", pPage->GCPhys, iUser, iUserTable));
3109 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
3110 Assert(iUser != NIL_PGMPOOL_IDX);
3111
3112# ifdef VBOX_STRICT
3113 /*
3114 * Check that the entry doesn't already exists. We only allow multiple
3115 * users of top-level paging structures (SHW_POOL_ROOT_IDX).
3116 */
3117 if (pPage->iUserHead != NIL_PGMPOOL_USER_INDEX)
3118 {
3119 uint16_t i = pPage->iUserHead;
3120 do
3121 {
3122 Assert(i < pPool->cMaxUsers);
3123 /** @todo this assertion looks odd... Shouldn't it be && here? */
3124 AssertMsg(paUsers[i].iUser != iUser || paUsers[i].iUserTable != iUserTable, ("%x %x vs new %x %x\n", paUsers[i].iUser, paUsers[i].iUserTable, iUser, iUserTable));
3125 i = paUsers[i].iNext;
3126 } while (i != NIL_PGMPOOL_USER_INDEX);
3127 }
3128# endif
3129
3130 /*
3131 * Allocate a user node.
3132 */
3133 uint16_t i = pPool->iUserFreeHead;
3134 if (i == NIL_PGMPOOL_USER_INDEX)
3135 {
3136 int rc = pgmPoolTrackFreeOneUser(pPool, iUser);
3137 if (RT_FAILURE(rc))
3138 return rc;
3139 i = pPool->iUserFreeHead;
3140 }
3141 pPool->iUserFreeHead = paUsers[i].iNext;
3142
3143 /*
3144 * Initialize the user node and insert it.
3145 */
3146 paUsers[i].iNext = pPage->iUserHead;
3147 paUsers[i].iUser = iUser;
3148 paUsers[i].iUserTable = iUserTable;
3149 pPage->iUserHead = i;
3150
3151# ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
3152 if (pPage->fDirty)
3153 pgmPoolFlushDirtyPage(pPool->CTX_SUFF(pVM), pPool, pPage->idxDirtyEntry, false /* do not remove */);
3154# endif
3155
3156 /*
3157 * Tell the cache to update its replacement stats for this page.
3158 */
3159 pgmPoolCacheUsed(pPool, pPage);
3160 return VINF_SUCCESS;
3161}
3162
3163
3164/**
3165 * Frees a user record associated with a page.
3166 *
3167 * This does not clear the entry in the user table, it simply replaces the
3168 * user record to the chain of free records.
3169 *
3170 * @param pPool The pool.
3171 * @param pPage The shadow page.
3172 * @param iUser The shadow page pool index of the user table.
3173 * @param iUserTable The index into the user table (shadowed).
3174 *
3175 * @remarks Don't call this for root pages.
3176 */
3177static void pgmPoolTrackFreeUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable)
3178{
3179 Log3(("pgmPoolTrackFreeUser %RGp %x %x\n", pPage->GCPhys, iUser, iUserTable));
3180 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
3181 Assert(iUser != NIL_PGMPOOL_IDX);
3182
3183 /*
3184 * Unlink and free the specified user entry.
3185 */
3186
3187 /* Special: For PAE and 32-bit paging, there is usually no more than one user. */
3188 uint16_t i = pPage->iUserHead;
3189 if ( i != NIL_PGMPOOL_USER_INDEX
3190 && paUsers[i].iUser == iUser
3191 && paUsers[i].iUserTable == iUserTable)
3192 {
3193 pPage->iUserHead = paUsers[i].iNext;
3194
3195 paUsers[i].iUser = NIL_PGMPOOL_IDX;
3196 paUsers[i].iNext = pPool->iUserFreeHead;
3197 pPool->iUserFreeHead = i;
3198 return;
3199 }
3200
3201 /* General: Linear search. */
3202 uint16_t iPrev = NIL_PGMPOOL_USER_INDEX;
3203 while (i != NIL_PGMPOOL_USER_INDEX)
3204 {
3205 if ( paUsers[i].iUser == iUser
3206 && paUsers[i].iUserTable == iUserTable)
3207 {
3208 if (iPrev != NIL_PGMPOOL_USER_INDEX)
3209 paUsers[iPrev].iNext = paUsers[i].iNext;
3210 else
3211 pPage->iUserHead = paUsers[i].iNext;
3212
3213 paUsers[i].iUser = NIL_PGMPOOL_IDX;
3214 paUsers[i].iNext = pPool->iUserFreeHead;
3215 pPool->iUserFreeHead = i;
3216 return;
3217 }
3218 iPrev = i;
3219 i = paUsers[i].iNext;
3220 }
3221
3222 /* Fatal: didn't find it */
3223 AssertFatalMsgFailed(("Didn't find the user entry! iUser=%d iUserTable=%#x GCPhys=%RGp\n",
3224 iUser, iUserTable, pPage->GCPhys));
3225}
3226
3227
3228#if 0 /* unused */
3229/**
3230 * Gets the entry size of a shadow table.
3231 *
3232 * @param enmKind The kind of page.
3233 *
3234 * @returns The size of the entry in bytes. That is, 4 or 8.
3235 * @returns If the kind is not for a table, an assertion is raised and 0 is
3236 * returned.
3237 */
3238DECLINLINE(unsigned) pgmPoolTrackGetShadowEntrySize(PGMPOOLKIND enmKind)
3239{
3240 switch (enmKind)
3241 {
3242 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
3243 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
3244 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
3245 case PGMPOOLKIND_32BIT_PD:
3246 case PGMPOOLKIND_32BIT_PD_PHYS:
3247 return 4;
3248
3249 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
3250 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
3251 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
3252 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
3253 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
3254 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
3255 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
3256 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
3257 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
3258 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
3259 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
3260 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
3261 case PGMPOOLKIND_64BIT_PML4:
3262 case PGMPOOLKIND_PAE_PDPT:
3263 case PGMPOOLKIND_ROOT_NESTED:
3264 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
3265 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
3266 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
3267 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
3268 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
3269 case PGMPOOLKIND_PAE_PD_PHYS:
3270 case PGMPOOLKIND_PAE_PDPT_PHYS:
3271 return 8;
3272
3273 default:
3274 AssertFatalMsgFailed(("enmKind=%d\n", enmKind));
3275 }
3276}
3277#endif /* unused */
3278
3279#if 0 /* unused */
3280/**
3281 * Gets the entry size of a guest table.
3282 *
3283 * @param enmKind The kind of page.
3284 *
3285 * @returns The size of the entry in bytes. That is, 0, 4 or 8.
3286 * @returns If the kind is not for a table, an assertion is raised and 0 is
3287 * returned.
3288 */
3289DECLINLINE(unsigned) pgmPoolTrackGetGuestEntrySize(PGMPOOLKIND enmKind)
3290{
3291 switch (enmKind)
3292 {
3293 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
3294 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
3295 case PGMPOOLKIND_32BIT_PD:
3296 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
3297 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
3298 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
3299 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
3300 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
3301 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
3302 return 4;
3303
3304 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
3305 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
3306 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
3307 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
3308 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
3309 case PGMPOOLKIND_64BIT_PML4:
3310 case PGMPOOLKIND_PAE_PDPT:
3311 return 8;
3312
3313 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
3314 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
3315 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
3316 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
3317 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
3318 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
3319 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
3320 case PGMPOOLKIND_ROOT_NESTED:
3321 case PGMPOOLKIND_PAE_PD_PHYS:
3322 case PGMPOOLKIND_PAE_PDPT_PHYS:
3323 case PGMPOOLKIND_32BIT_PD_PHYS:
3324 /** @todo can we return 0? (nobody is calling this...) */
3325 AssertFailed();
3326 return 0;
3327
3328 default:
3329 AssertFatalMsgFailed(("enmKind=%d\n", enmKind));
3330 }
3331}
3332#endif /* unused */
3333
3334
3335/**
3336 * Checks one shadow page table entry for a mapping of a physical page.
3337 *
3338 * @returns true / false indicating removal of all relevant PTEs
3339 *
3340 * @param pVM The cross context VM structure.
3341 * @param pPhysPage The guest page in question.
3342 * @param fFlushPTEs Flush PTEs or allow them to be updated (e.g. in case of an RW bit change)
3343 * @param iShw The shadow page table.
3344 * @param iPte Page table entry or NIL_PGMPOOL_PHYSEXT_IDX_PTE if unknown
3345 */
3346static bool pgmPoolTrackFlushGCPhysPTInt(PVM pVM, PCPGMPAGE pPhysPage, bool fFlushPTEs, uint16_t iShw, uint16_t iPte)
3347{
3348 LogFlow(("pgmPoolTrackFlushGCPhysPTInt: pPhysPage=%RHp iShw=%d iPte=%d\n", PGM_PAGE_GET_HCPHYS(pPhysPage), iShw, iPte));
3349 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3350 bool fRet = false;
3351
3352 /*
3353 * Assert sanity.
3354 */
3355 Assert(iPte != NIL_PGMPOOL_PHYSEXT_IDX_PTE);
3356 AssertFatalMsg(iShw < pPool->cCurPages && iShw != NIL_PGMPOOL_IDX, ("iShw=%d\n", iShw));
3357 PPGMPOOLPAGE pPage = &pPool->aPages[iShw];
3358
3359 /*
3360 * Then, clear the actual mappings to the page in the shadow PT.
3361 */
3362 switch (pPage->enmKind)
3363 {
3364 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
3365 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
3366 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
3367 {
3368 const uint32_t u32 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P;
3369 PX86PT pPT = (PX86PT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3370 uint32_t u32AndMask = 0;
3371 uint32_t u32OrMask = 0;
3372
3373 if (!fFlushPTEs)
3374 {
3375 /* Note! Disregarding the PGMPHYSHANDLER_F_NOT_IN_HM bit here. Should be harmless. */
3376 switch (PGM_PAGE_GET_HNDL_PHYS_STATE(pPhysPage))
3377 {
3378 case PGM_PAGE_HNDL_PHYS_STATE_NONE: /* No handler installed. */
3379 case PGM_PAGE_HNDL_PHYS_STATE_DISABLED: /* Monitoring is temporarily disabled. */
3380 u32OrMask = X86_PTE_RW;
3381 u32AndMask = UINT32_MAX;
3382 fRet = true;
3383 STAM_COUNTER_INC(&pPool->StatTrackFlushEntryKeep);
3384 break;
3385
3386 case PGM_PAGE_HNDL_PHYS_STATE_WRITE: /* Write access is monitored. */
3387 u32OrMask = 0;
3388 u32AndMask = ~X86_PTE_RW;
3389 fRet = true;
3390 STAM_COUNTER_INC(&pPool->StatTrackFlushEntryKeep);
3391 break;
3392 default:
3393 /* We will end up here when called with an "ALL" access handler. */
3394 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3395 break;
3396 }
3397 }
3398 else
3399 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3400
3401 /* Update the counter if we're removing references. */
3402 if (!u32AndMask)
3403 {
3404 Assert(pPage->cPresent);
3405 Assert(pPool->cPresent);
3406 pPage->cPresent--;
3407 pPool->cPresent--;
3408 }
3409
3410 if ((pPT->a[iPte].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
3411 {
3412 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pte=%RX32\n", iPte, pPT->a[iPte]));
3413 X86PTE Pte;
3414 Pte.u = (pPT->a[iPte].u & u32AndMask) | u32OrMask;
3415 if (Pte.u & PGM_PTFLAGS_TRACK_DIRTY)
3416 Pte.u &= ~(X86PGUINT)X86_PTE_RW; /* need to disallow writes when dirty bit tracking is still active. */
3417
3418 ASMAtomicWriteU32(&pPT->a[iPte].u, Pte.u);
3419 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);
3420 return fRet;
3421 }
3422#ifdef LOG_ENABLED
3423 Log(("iFirstPresent=%d cPresent=%d\n", pPage->iFirstPresent, pPage->cPresent));
3424 for (unsigned i = 0, cFound = 0; i < RT_ELEMENTS(pPT->a); i++)
3425 if ((pPT->a[i].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
3426 {
3427 Log(("i=%d cFound=%d\n", i, ++cFound));
3428 }
3429#endif
3430 AssertFatalMsgFailed(("iFirstPresent=%d cPresent=%d u32=%RX32 poolkind=%x\n", pPage->iFirstPresent, pPage->cPresent, u32, pPage->enmKind));
3431 /*PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);*/
3432 break;
3433 }
3434
3435 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
3436 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
3437 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
3438 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
3439 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
3440 case PGMPOOLKIND_EPT_PT_FOR_PHYS: /* physical mask the same as PAE; RW bit as well; be careful! */
3441#ifdef VBOX_WITH_NESTED_HWVIRT_VMX_EPT
3442 case PGMPOOLKIND_EPT_PT_FOR_EPT_PT:
3443# ifdef PGM_WITH_LARGE_PAGES
3444 case PGMPOOLKIND_EPT_PT_FOR_EPT_2MB:
3445# endif
3446#endif
3447 {
3448 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P;
3449 PPGMSHWPTPAE pPT = (PPGMSHWPTPAE)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3450 uint64_t u64OrMask = 0;
3451 uint64_t u64AndMask = 0;
3452
3453 if (!fFlushPTEs)
3454 {
3455 /* Note! Disregarding the PGMPHYSHANDLER_F_NOT_IN_HM bit here. Should be harmless. */
3456 switch (PGM_PAGE_GET_HNDL_PHYS_STATE(pPhysPage))
3457 {
3458 case PGM_PAGE_HNDL_PHYS_STATE_NONE: /* No handler installed. */
3459 case PGM_PAGE_HNDL_PHYS_STATE_DISABLED: /* Monitoring is temporarily disabled. */
3460 u64OrMask = X86_PTE_RW;
3461 u64AndMask = UINT64_MAX;
3462 fRet = true;
3463 STAM_COUNTER_INC(&pPool->StatTrackFlushEntryKeep);
3464 break;
3465
3466 case PGM_PAGE_HNDL_PHYS_STATE_WRITE: /* Write access is monitored. */
3467 u64OrMask = 0;
3468 u64AndMask = ~(uint64_t)X86_PTE_RW;
3469 fRet = true;
3470 STAM_COUNTER_INC(&pPool->StatTrackFlushEntryKeep);
3471 break;
3472
3473 default:
3474 /* We will end up here when called with an "ALL" access handler. */
3475 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3476 break;
3477 }
3478 }
3479 else
3480 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3481
3482 /* Update the counter if we're removing references. */
3483 if (!u64AndMask)
3484 {
3485 Assert(pPage->cPresent);
3486 Assert(pPool->cPresent);
3487 pPage->cPresent--;
3488 pPool->cPresent--;
3489 }
3490
3491 if ((PGMSHWPTEPAE_GET_U(pPT->a[iPte]) & (X86_PTE_PAE_PG_MASK | X86_PTE_P | X86_PTE_PAE_MBZ_MASK_NX)) == u64)
3492 {
3493 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pte=%RX64\n", iPte, PGMSHWPTEPAE_GET_LOG(pPT->a[iPte])));
3494 X86PTEPAE Pte;
3495 Pte.u = (PGMSHWPTEPAE_GET_U(pPT->a[iPte]) & u64AndMask) | u64OrMask;
3496 if (Pte.u & PGM_PTFLAGS_TRACK_DIRTY)
3497 Pte.u &= ~(X86PGPAEUINT)X86_PTE_RW; /* need to disallow writes when dirty bit tracking is still active. */
3498
3499 PGMSHWPTEPAE_ATOMIC_SET(pPT->a[iPte], Pte.u);
3500 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);
3501 return fRet;
3502 }
3503#ifdef LOG_ENABLED
3504 Log(("iFirstPresent=%d cPresent=%d\n", pPage->iFirstPresent, pPage->cPresent));
3505 Log(("Found %RX64 expected %RX64\n", PGMSHWPTEPAE_GET_U(pPT->a[iPte]) & (X86_PTE_PAE_PG_MASK | X86_PTE_P | X86_PTE_PAE_MBZ_MASK_NX), u64));
3506 for (unsigned i = 0, cFound = 0; i < RT_ELEMENTS(pPT->a); i++)
3507 if ((PGMSHWPTEPAE_GET_U(pPT->a[i]) & (X86_PTE_PAE_PG_MASK | X86_PTE_P | X86_PTE_PAE_MBZ_MASK_NX)) == u64)
3508 Log(("i=%d cFound=%d\n", i, ++cFound));
3509#endif
3510 AssertFatalMsgFailed(("iFirstPresent=%d cPresent=%d u64=%RX64 poolkind=%x iPte=%d PT=%RX64\n", pPage->iFirstPresent, pPage->cPresent, u64, pPage->enmKind, iPte, PGMSHWPTEPAE_GET_LOG(pPT->a[iPte])));
3511 /*PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);*/
3512 break;
3513 }
3514
3515#ifdef PGM_WITH_LARGE_PAGES
3516 /* Large page case only. */
3517 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
3518 {
3519 Assert(pVM->pgm.s.fNestedPaging);
3520
3521 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PDE4M_P | X86_PDE4M_PS;
3522 PEPTPD pPD = (PEPTPD)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3523
3524 if ((pPD->a[iPte].u & (EPT_PDE2M_PG_MASK | X86_PDE4M_P | X86_PDE4M_PS)) == u64)
3525 {
3526 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pde=%RX64\n", iPte, pPD->a[iPte]));
3527 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3528 pPD->a[iPte].u = 0;
3529 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPD);
3530
3531 /* Update the counter as we're removing references. */
3532 Assert(pPage->cPresent);
3533 Assert(pPool->cPresent);
3534 pPage->cPresent--;
3535 pPool->cPresent--;
3536
3537 return fRet;
3538 }
3539# ifdef LOG_ENABLED
3540 Log(("iFirstPresent=%d cPresent=%d\n", pPage->iFirstPresent, pPage->cPresent));
3541 for (unsigned i = 0, cFound = 0; i < RT_ELEMENTS(pPD->a); i++)
3542 if ((pPD->a[i].u & (EPT_PDE2M_PG_MASK | X86_PDE4M_P | X86_PDE4M_PS)) == u64)
3543 Log(("i=%d cFound=%d\n", i, ++cFound));
3544# endif
3545 AssertFatalMsgFailed(("iFirstPresent=%d cPresent=%d\n", pPage->iFirstPresent, pPage->cPresent));
3546 /*PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPD);*/
3547 break;
3548 }
3549
3550 /* AMD-V nested paging */ /** @todo merge with EPT as we only check the parts that are identical. */
3551 case PGMPOOLKIND_PAE_PD_PHYS:
3552 {
3553 Assert(pVM->pgm.s.fNestedPaging);
3554
3555 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PDE4M_P | X86_PDE4M_PS;
3556 PX86PDPAE pPD = (PX86PDPAE)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3557
3558 if ((pPD->a[iPte].u & (X86_PDE2M_PAE_PG_MASK | X86_PDE4M_P | X86_PDE4M_PS)) == u64)
3559 {
3560 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pde=%RX64\n", iPte, pPD->a[iPte]));
3561 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3562 pPD->a[iPte].u = 0;
3563 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPD);
3564
3565 /* Update the counter as we're removing references. */
3566 Assert(pPage->cPresent);
3567 Assert(pPool->cPresent);
3568 pPage->cPresent--;
3569 pPool->cPresent--;
3570 return fRet;
3571 }
3572# ifdef LOG_ENABLED
3573 Log(("iFirstPresent=%d cPresent=%d\n", pPage->iFirstPresent, pPage->cPresent));
3574 for (unsigned i = 0, cFound = 0; i < RT_ELEMENTS(pPD->a); i++)
3575 if ((pPD->a[i].u & (X86_PDE2M_PAE_PG_MASK | X86_PDE4M_P | X86_PDE4M_PS)) == u64)
3576 Log(("i=%d cFound=%d\n", i, ++cFound));
3577# endif
3578 AssertFatalMsgFailed(("iFirstPresent=%d cPresent=%d\n", pPage->iFirstPresent, pPage->cPresent));
3579 /*PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPD);*/
3580 break;
3581 }
3582#endif /* PGM_WITH_LARGE_PAGES */
3583
3584 default:
3585 AssertFatalMsgFailed(("enmKind=%d iShw=%d\n", pPage->enmKind, iShw));
3586 }
3587
3588 /* not reached. */
3589#ifndef _MSC_VER
3590 return fRet;
3591#endif
3592}
3593
3594
3595/**
3596 * Scans one shadow page table for mappings of a physical page.
3597 *
3598 * @param pVM The cross context VM structure.
3599 * @param pPhysPage The guest page in question.
3600 * @param fFlushPTEs Flush PTEs or allow them to be updated (e.g. in case of an RW bit change)
3601 * @param iShw The shadow page table.
3602 */
3603static void pgmPoolTrackFlushGCPhysPT(PVM pVM, PPGMPAGE pPhysPage, bool fFlushPTEs, uint16_t iShw)
3604{
3605 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool); NOREF(pPool);
3606
3607 /* We should only come here with when there's only one reference to this physical page. */
3608 Assert(PGMPOOL_TD_GET_CREFS(PGM_PAGE_GET_TRACKING(pPhysPage)) == 1);
3609
3610 Log2(("pgmPoolTrackFlushGCPhysPT: pPhysPage=%RHp iShw=%d\n", PGM_PAGE_GET_HCPHYS(pPhysPage), iShw));
3611 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPT, f);
3612 bool fKeptPTEs = pgmPoolTrackFlushGCPhysPTInt(pVM, pPhysPage, fFlushPTEs, iShw, PGM_PAGE_GET_PTE_INDEX(pPhysPage));
3613 if (!fKeptPTEs)
3614 PGM_PAGE_SET_TRACKING(pVM, pPhysPage, 0);
3615 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPT, f);
3616}
3617
3618
3619/**
3620 * Flushes a list of shadow page tables mapping the same physical page.
3621 *
3622 * @param pVM The cross context VM structure.
3623 * @param pPhysPage The guest page in question.
3624 * @param fFlushPTEs Flush PTEs or allow them to be updated (e.g. in case of an RW bit change)
3625 * @param iPhysExt The physical cross reference extent list to flush.
3626 */
3627static void pgmPoolTrackFlushGCPhysPTs(PVMCC pVM, PPGMPAGE pPhysPage, bool fFlushPTEs, uint16_t iPhysExt)
3628{
3629 PGM_LOCK_ASSERT_OWNER(pVM);
3630 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3631 bool fKeepList = false;
3632
3633 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPTs, f);
3634 Log2(("pgmPoolTrackFlushGCPhysPTs: pPhysPage=%RHp iPhysExt=%u\n", PGM_PAGE_GET_HCPHYS(pPhysPage), iPhysExt));
3635
3636 const uint16_t iPhysExtStart = iPhysExt;
3637 PPGMPOOLPHYSEXT pPhysExt;
3638 do
3639 {
3640 Assert(iPhysExt < pPool->cMaxPhysExts);
3641 pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
3642 for (unsigned i = 0; i < RT_ELEMENTS(pPhysExt->aidx); i++)
3643 {
3644 if (pPhysExt->aidx[i] != NIL_PGMPOOL_IDX)
3645 {
3646 bool fKeptPTEs = pgmPoolTrackFlushGCPhysPTInt(pVM, pPhysPage, fFlushPTEs, pPhysExt->aidx[i], pPhysExt->apte[i]);
3647 if (!fKeptPTEs)
3648 {
3649 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
3650 pPhysExt->apte[i] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
3651 }
3652 else
3653 fKeepList = true;
3654 }
3655 }
3656 /* next */
3657 iPhysExt = pPhysExt->iNext;
3658 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
3659
3660 if (!fKeepList)
3661 {
3662 /* insert the list into the free list and clear the ram range entry. */
3663 pPhysExt->iNext = pPool->iPhysExtFreeHead;
3664 pPool->iPhysExtFreeHead = iPhysExtStart;
3665 /* Invalidate the tracking data. */
3666 PGM_PAGE_SET_TRACKING(pVM, pPhysPage, 0);
3667 }
3668
3669 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTs, f);
3670}
3671
3672
3673/**
3674 * Flushes all shadow page table mappings of the given guest page.
3675 *
3676 * This is typically called when the host page backing the guest one has been
3677 * replaced or when the page protection was changed due to a guest access
3678 * caught by the monitoring.
3679 *
3680 * @returns VBox status code.
3681 * @retval VINF_SUCCESS if all references has been successfully cleared.
3682 * @retval VINF_PGM_SYNC_CR3 if we're better off with a CR3 sync and a page
3683 * pool cleaning. FF and sync flags are set.
3684 *
3685 * @param pVM The cross context VM structure.
3686 * @param GCPhysPage GC physical address of the page in question
3687 * @param pPhysPage The guest page in question.
3688 * @param fFlushPTEs Flush PTEs or allow them to be updated (e.g. in case of an RW bit change)
3689 * @param pfFlushTLBs This is set to @a true if the shadow TLBs should be
3690 * flushed, it is NOT touched if this isn't necessary.
3691 * The caller MUST initialized this to @a false.
3692 */
3693int pgmPoolTrackUpdateGCPhys(PVMCC pVM, RTGCPHYS GCPhysPage, PPGMPAGE pPhysPage, bool fFlushPTEs, bool *pfFlushTLBs)
3694{
3695 PVMCPUCC pVCpu = VMMGetCpu(pVM);
3696 PGM_LOCK_VOID(pVM);
3697 int rc = VINF_SUCCESS;
3698
3699#ifdef PGM_WITH_LARGE_PAGES
3700 /* Is this page part of a large page? */
3701 if (PGM_PAGE_GET_PDE_TYPE(pPhysPage) == PGM_PAGE_PDE_TYPE_PDE)
3702 {
3703 RTGCPHYS GCPhysBase = GCPhysPage & X86_PDE2M_PAE_PG_MASK;
3704 GCPhysPage &= X86_PDE_PAE_PG_MASK;
3705
3706 /* Fetch the large page base. */
3707 PPGMPAGE pLargePage;
3708 if (GCPhysBase != GCPhysPage)
3709 {
3710 pLargePage = pgmPhysGetPage(pVM, GCPhysBase);
3711 AssertFatal(pLargePage);
3712 }
3713 else
3714 pLargePage = pPhysPage;
3715
3716 Log(("pgmPoolTrackUpdateGCPhys: update large page PDE for %RGp (%RGp)\n", GCPhysBase, GCPhysPage));
3717
3718 if (PGM_PAGE_GET_PDE_TYPE(pLargePage) == PGM_PAGE_PDE_TYPE_PDE)
3719 {
3720 /* Mark the large page as disabled as we need to break it up to change a single page in the 2 MB range. */
3721 PGM_PAGE_SET_PDE_TYPE(pVM, pLargePage, PGM_PAGE_PDE_TYPE_PDE_DISABLED);
3722 pVM->pgm.s.cLargePagesDisabled++;
3723
3724 /* Update the base as that *only* that one has a reference and there's only one PDE to clear. */
3725 rc = pgmPoolTrackUpdateGCPhys(pVM, GCPhysBase, pLargePage, fFlushPTEs, pfFlushTLBs);
3726
3727 *pfFlushTLBs = true;
3728 PGM_UNLOCK(pVM);
3729 return rc;
3730 }
3731 }
3732#else
3733 NOREF(GCPhysPage);
3734#endif /* PGM_WITH_LARGE_PAGES */
3735
3736 const uint16_t u16 = PGM_PAGE_GET_TRACKING(pPhysPage);
3737 if (u16)
3738 {
3739 /*
3740 * The zero page is currently screwing up the tracking and we'll
3741 * have to flush the whole shebang. Unless VBOX_WITH_NEW_LAZY_PAGE_ALLOC
3742 * is defined, zero pages won't normally be mapped. Some kind of solution
3743 * will be needed for this problem of course, but it will have to wait...
3744 */
3745 if ( PGM_PAGE_IS_ZERO(pPhysPage)
3746 || PGM_PAGE_IS_BALLOONED(pPhysPage))
3747 rc = VINF_PGM_GCPHYS_ALIASED;
3748 else
3749 {
3750 if (PGMPOOL_TD_GET_CREFS(u16) != PGMPOOL_TD_CREFS_PHYSEXT)
3751 {
3752 Assert(PGMPOOL_TD_GET_CREFS(u16) == 1);
3753 pgmPoolTrackFlushGCPhysPT(pVM,
3754 pPhysPage,
3755 fFlushPTEs,
3756 PGMPOOL_TD_GET_IDX(u16));
3757 }
3758 else if (u16 != PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED))
3759 pgmPoolTrackFlushGCPhysPTs(pVM, pPhysPage, fFlushPTEs, PGMPOOL_TD_GET_IDX(u16));
3760 else
3761 rc = pgmPoolTrackFlushGCPhysPTsSlow(pVM, pPhysPage);
3762 *pfFlushTLBs = true;
3763 }
3764 }
3765
3766 if (rc == VINF_PGM_GCPHYS_ALIASED)
3767 {
3768 pVCpu->pgm.s.fSyncFlags |= PGM_SYNC_CLEAR_PGM_POOL;
3769 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
3770 rc = VINF_PGM_SYNC_CR3;
3771 }
3772 PGM_UNLOCK(pVM);
3773 return rc;
3774}
3775
3776
3777/**
3778 * Scans all shadow page tables for mappings of a physical page.
3779 *
3780 * This may be slow, but it's most likely more efficient than cleaning
3781 * out the entire page pool / cache.
3782 *
3783 * @returns VBox status code.
3784 * @retval VINF_SUCCESS if all references has been successfully cleared.
3785 * @retval VINF_PGM_GCPHYS_ALIASED if we're better off with a CR3 sync and
3786 * a page pool cleaning.
3787 *
3788 * @param pVM The cross context VM structure.
3789 * @param pPhysPage The guest page in question.
3790 */
3791int pgmPoolTrackFlushGCPhysPTsSlow(PVMCC pVM, PPGMPAGE pPhysPage)
3792{
3793 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3794 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPTsSlow, s);
3795 LogFlow(("pgmPoolTrackFlushGCPhysPTsSlow: cUsedPages=%d cPresent=%d pPhysPage=%R[pgmpage]\n",
3796 pPool->cUsedPages, pPool->cPresent, pPhysPage));
3797
3798 /*
3799 * There is a limit to what makes sense.
3800 */
3801 if ( pPool->cPresent > 1024
3802 && pVM->cCpus == 1)
3803 {
3804 LogFlow(("pgmPoolTrackFlushGCPhysPTsSlow: giving up... (cPresent=%d)\n", pPool->cPresent));
3805 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTsSlow, s);
3806 return VINF_PGM_GCPHYS_ALIASED;
3807 }
3808
3809 /*
3810 * Iterate all the pages until we've encountered all that in use.
3811 * This is simple but not quite optimal solution.
3812 */
3813 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage);
3814 unsigned cLeft = pPool->cUsedPages;
3815 unsigned iPage = pPool->cCurPages;
3816 while (--iPage >= PGMPOOL_IDX_FIRST)
3817 {
3818 PPGMPOOLPAGE pPage = &pPool->aPages[iPage];
3819 if ( pPage->GCPhys != NIL_RTGCPHYS
3820 && pPage->cPresent)
3821 {
3822 Assert(!PGMPOOL_PAGE_IS_NESTED(pPage)); /* see if it hits */
3823 switch (pPage->enmKind)
3824 {
3825 /*
3826 * We only care about shadow page tables.
3827 */
3828 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
3829 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
3830 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
3831 {
3832 const uint32_t u32 = (uint32_t)u64;
3833 unsigned cPresent = pPage->cPresent;
3834 PX86PT pPT = (PX86PT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3835 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
3836 {
3837 const X86PGUINT uPte = pPT->a[i].u;
3838 if (uPte & X86_PTE_P)
3839 {
3840 if ((uPte & X86_PTE_PG_MASK) == u32)
3841 {
3842 //Log4(("pgmPoolTrackFlushGCPhysPTsSlow: idx=%d i=%d pte=%RX32\n", iPage, i, pPT->a[i]));
3843 ASMAtomicWriteU32(&pPT->a[i].u, 0);
3844
3845 /* Update the counter as we're removing references. */
3846 Assert(pPage->cPresent);
3847 Assert(pPool->cPresent);
3848 pPage->cPresent--;
3849 pPool->cPresent--;
3850 }
3851 if (!--cPresent)
3852 break;
3853 }
3854 }
3855 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);
3856 break;
3857 }
3858
3859 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
3860 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
3861 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
3862 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
3863 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
3864 {
3865 unsigned cPresent = pPage->cPresent;
3866 PPGMSHWPTPAE pPT = (PPGMSHWPTPAE)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3867 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
3868 if (PGMSHWPTEPAE_IS_P(pPT->a[i]))
3869 {
3870 if ((PGMSHWPTEPAE_GET_U(pPT->a[i]) & X86_PTE_PAE_PG_MASK) == u64)
3871 {
3872 //Log4(("pgmPoolTrackFlushGCPhysPTsSlow: idx=%d i=%d pte=%RX64\n", iPage, i, pPT->a[i]));
3873 PGMSHWPTEPAE_ATOMIC_SET(pPT->a[i], 0); /// @todo why not atomic?
3874
3875 /* Update the counter as we're removing references. */
3876 Assert(pPage->cPresent);
3877 Assert(pPool->cPresent);
3878 pPage->cPresent--;
3879 pPool->cPresent--;
3880 }
3881 if (!--cPresent)
3882 break;
3883 }
3884 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);
3885 break;
3886 }
3887
3888 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
3889 {
3890 unsigned cPresent = pPage->cPresent;
3891 PEPTPT pPT = (PEPTPT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3892 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
3893 {
3894 X86PGPAEUINT const uPte = pPT->a[i].u;
3895 if (uPte & EPT_E_READ)
3896 {
3897 if ((uPte & EPT_PTE_PG_MASK) == u64)
3898 {
3899 //Log4(("pgmPoolTrackFlushGCPhysPTsSlow: idx=%d i=%d pte=%RX64\n", iPage, i, pPT->a[i]));
3900 ASMAtomicWriteU64(&pPT->a[i].u, 0);
3901
3902 /* Update the counter as we're removing references. */
3903 Assert(pPage->cPresent);
3904 Assert(pPool->cPresent);
3905 pPage->cPresent--;
3906 pPool->cPresent--;
3907 }
3908 if (!--cPresent)
3909 break;
3910 }
3911 }
3912 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);
3913 break;
3914 }
3915 }
3916
3917 if (!--cLeft)
3918 break;
3919 }
3920 }
3921
3922 PGM_PAGE_SET_TRACKING(pVM, pPhysPage, 0);
3923 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTsSlow, s);
3924
3925 /*
3926 * There is a limit to what makes sense. The above search is very expensive, so force a pgm pool flush.
3927 */
3928 if (pPool->cPresent > 1024)
3929 {
3930 LogFlow(("pgmPoolTrackFlushGCPhysPTsSlow: giving up... (cPresent=%d)\n", pPool->cPresent));
3931 return VINF_PGM_GCPHYS_ALIASED;
3932 }
3933
3934 return VINF_SUCCESS;
3935}
3936
3937
3938/**
3939 * Clears the user entry in a user table.
3940 *
3941 * This is used to remove all references to a page when flushing it.
3942 */
3943static void pgmPoolTrackClearPageUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PCPGMPOOLUSER pUser)
3944{
3945 Assert(pUser->iUser != NIL_PGMPOOL_IDX);
3946 Assert(pUser->iUser < pPool->cCurPages);
3947 uint32_t iUserTable = pUser->iUserTable;
3948
3949 /*
3950 * Map the user page. Ignore references made by fictitious pages.
3951 */
3952 PPGMPOOLPAGE pUserPage = &pPool->aPages[pUser->iUser];
3953 LogFlow(("pgmPoolTrackClearPageUser: clear %x in %s (%RGp) (flushing %s)\n", iUserTable, pgmPoolPoolKindToStr(pUserPage->enmKind), pUserPage->Core.Key, pgmPoolPoolKindToStr(pPage->enmKind)));
3954 union
3955 {
3956 uint64_t *pau64;
3957 uint32_t *pau32;
3958 } u;
3959 if (pUserPage->idx < PGMPOOL_IDX_FIRST)
3960 {
3961 Assert(!pUserPage->pvPageR3);
3962 return;
3963 }
3964 u.pau64 = (uint64_t *)PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pUserPage);
3965
3966
3967 /* Safety precaution in case we change the paging for other modes too in the future. */
3968 Assert(!pgmPoolIsPageLocked(pPage)); RT_NOREF_PV(pPage);
3969
3970#ifdef VBOX_STRICT
3971 /*
3972 * Some sanity checks.
3973 */
3974 switch (pUserPage->enmKind)
3975 {
3976 case PGMPOOLKIND_32BIT_PD:
3977 case PGMPOOLKIND_32BIT_PD_PHYS:
3978 Assert(iUserTable < X86_PG_ENTRIES);
3979 break;
3980 case PGMPOOLKIND_PAE_PDPT:
3981 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
3982 case PGMPOOLKIND_PAE_PDPT_PHYS:
3983 Assert(iUserTable < 4);
3984 Assert(!(u.pau64[iUserTable] & PGM_PLXFLAGS_PERMANENT));
3985 break;
3986 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
3987 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
3988 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
3989 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
3990 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
3991 case PGMPOOLKIND_PAE_PD_PHYS:
3992 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3993 break;
3994 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
3995 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3996 break;
3997 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
3998 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3999 Assert(!(u.pau64[iUserTable] & PGM_PLXFLAGS_PERMANENT));
4000 break;
4001 case PGMPOOLKIND_64BIT_PML4:
4002 Assert(!(u.pau64[iUserTable] & PGM_PLXFLAGS_PERMANENT));
4003 /* GCPhys >> PAGE_SHIFT is the index here */
4004 break;
4005 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
4006 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
4007 Assert(iUserTable < X86_PG_PAE_ENTRIES);
4008 break;
4009
4010 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
4011 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
4012 Assert(iUserTable < X86_PG_PAE_ENTRIES);
4013 break;
4014
4015 case PGMPOOLKIND_ROOT_NESTED:
4016 Assert(iUserTable < X86_PG_PAE_ENTRIES);
4017 break;
4018
4019# ifdef VBOX_WITH_NESTED_HWVIRT_VMX_EPT
4020 case PGMPOOLKIND_EPT_PT_FOR_EPT_PT:
4021 case PGMPOOLKIND_EPT_PT_FOR_EPT_2MB:
4022 case PGMPOOLKIND_EPT_PD_FOR_EPT_PD:
4023 case PGMPOOLKIND_EPT_PDPT_FOR_EPT_PDPT:
4024 case PGMPOOLKIND_EPT_PML4_FOR_EPT_PML4:
4025 Assert(iUserTable < EPT_PG_ENTRIES);
4026 break;
4027# endif
4028
4029 default:
4030 AssertMsgFailed(("enmKind=%d GCPhys=%RGp\n", pUserPage->enmKind, pPage->GCPhys));
4031 break;
4032 }
4033#endif /* VBOX_STRICT */
4034
4035 /*
4036 * Clear the entry in the user page.
4037 */
4038 switch (pUserPage->enmKind)
4039 {
4040 /* 32-bit entries */
4041 case PGMPOOLKIND_32BIT_PD:
4042 case PGMPOOLKIND_32BIT_PD_PHYS:
4043 ASMAtomicWriteU32(&u.pau32[iUserTable], 0);
4044 break;
4045
4046 /* 64-bit entries */
4047 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
4048 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
4049 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
4050 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
4051 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
4052 case PGMPOOLKIND_PAE_PD_PHYS:
4053 case PGMPOOLKIND_PAE_PDPT_PHYS:
4054 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
4055 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
4056 case PGMPOOLKIND_64BIT_PML4:
4057 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
4058 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
4059 case PGMPOOLKIND_PAE_PDPT:
4060 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
4061 case PGMPOOLKIND_ROOT_NESTED:
4062 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
4063 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
4064# ifdef VBOX_WITH_NESTED_HWVIRT_VMX_EPT
4065 case PGMPOOLKIND_EPT_PT_FOR_EPT_PT:
4066 case PGMPOOLKIND_EPT_PT_FOR_EPT_2MB:
4067 case PGMPOOLKIND_EPT_PD_FOR_EPT_PD:
4068 case PGMPOOLKIND_EPT_PDPT_FOR_EPT_PDPT:
4069 case PGMPOOLKIND_EPT_PML4_FOR_EPT_PML4:
4070#endif
4071 ASMAtomicWriteU64(&u.pau64[iUserTable], 0);
4072 break;
4073
4074 default:
4075 AssertFatalMsgFailed(("enmKind=%d iUser=%d iUserTable=%#x\n", pUserPage->enmKind, pUser->iUser, pUser->iUserTable));
4076 }
4077 PGM_DYNMAP_UNUSED_HINT_VM(pPool->CTX_SUFF(pVM), u.pau64);
4078}
4079
4080
4081/**
4082 * Clears all users of a page.
4083 */
4084static void pgmPoolTrackClearPageUsers(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
4085{
4086 /*
4087 * Free all the user records.
4088 */
4089 LogFlow(("pgmPoolTrackClearPageUsers %RGp\n", pPage->GCPhys));
4090
4091 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
4092 uint16_t i = pPage->iUserHead;
4093 while (i != NIL_PGMPOOL_USER_INDEX)
4094 {
4095 /* Clear enter in user table. */
4096 pgmPoolTrackClearPageUser(pPool, pPage, &paUsers[i]);
4097
4098 /* Free it. */
4099 const uint16_t iNext = paUsers[i].iNext;
4100 paUsers[i].iUser = NIL_PGMPOOL_IDX;
4101 paUsers[i].iNext = pPool->iUserFreeHead;
4102 pPool->iUserFreeHead = i;
4103
4104 /* Next. */
4105 i = iNext;
4106 }
4107 pPage->iUserHead = NIL_PGMPOOL_USER_INDEX;
4108}
4109
4110
4111/**
4112 * Allocates a new physical cross reference extent.
4113 *
4114 * @returns Pointer to the allocated extent on success. NULL if we're out of them.
4115 * @param pVM The cross context VM structure.
4116 * @param piPhysExt Where to store the phys ext index.
4117 */
4118PPGMPOOLPHYSEXT pgmPoolTrackPhysExtAlloc(PVMCC pVM, uint16_t *piPhysExt)
4119{
4120 PGM_LOCK_ASSERT_OWNER(pVM);
4121 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
4122 uint16_t iPhysExt = pPool->iPhysExtFreeHead;
4123 if (iPhysExt == NIL_PGMPOOL_PHYSEXT_INDEX)
4124 {
4125 STAM_COUNTER_INC(&pPool->StamTrackPhysExtAllocFailures);
4126 return NULL;
4127 }
4128 PPGMPOOLPHYSEXT pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
4129 pPool->iPhysExtFreeHead = pPhysExt->iNext;
4130 pPhysExt->iNext = NIL_PGMPOOL_PHYSEXT_INDEX;
4131 *piPhysExt = iPhysExt;
4132 return pPhysExt;
4133}
4134
4135
4136/**
4137 * Frees a physical cross reference extent.
4138 *
4139 * @param pVM The cross context VM structure.
4140 * @param iPhysExt The extent to free.
4141 */
4142void pgmPoolTrackPhysExtFree(PVMCC pVM, uint16_t iPhysExt)
4143{
4144 PGM_LOCK_ASSERT_OWNER(pVM);
4145 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
4146 Assert(iPhysExt < pPool->cMaxPhysExts);
4147 PPGMPOOLPHYSEXT pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
4148 for (unsigned i = 0; i < RT_ELEMENTS(pPhysExt->aidx); i++)
4149 {
4150 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
4151 pPhysExt->apte[i] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
4152 }
4153 pPhysExt->iNext = pPool->iPhysExtFreeHead;
4154 pPool->iPhysExtFreeHead = iPhysExt;
4155}
4156
4157
4158/**
4159 * Frees a physical cross reference extent.
4160 *
4161 * @param pVM The cross context VM structure.
4162 * @param iPhysExt The extent to free.
4163 */
4164void pgmPoolTrackPhysExtFreeList(PVMCC pVM, uint16_t iPhysExt)
4165{
4166 PGM_LOCK_ASSERT_OWNER(pVM);
4167 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
4168
4169 const uint16_t iPhysExtStart = iPhysExt;
4170 PPGMPOOLPHYSEXT pPhysExt;
4171 do
4172 {
4173 Assert(iPhysExt < pPool->cMaxPhysExts);
4174 pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
4175 for (unsigned i = 0; i < RT_ELEMENTS(pPhysExt->aidx); i++)
4176 {
4177 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
4178 pPhysExt->apte[i] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
4179 }
4180
4181 /* next */
4182 iPhysExt = pPhysExt->iNext;
4183 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
4184
4185 pPhysExt->iNext = pPool->iPhysExtFreeHead;
4186 pPool->iPhysExtFreeHead = iPhysExtStart;
4187}
4188
4189
4190/**
4191 * Insert a reference into a list of physical cross reference extents.
4192 *
4193 * @returns The new tracking data for PGMPAGE.
4194 *
4195 * @param pVM The cross context VM structure.
4196 * @param iPhysExt The physical extent index of the list head.
4197 * @param iShwPT The shadow page table index.
4198 * @param iPte Page table entry
4199 *
4200 */
4201static uint16_t pgmPoolTrackPhysExtInsert(PVMCC pVM, uint16_t iPhysExt, uint16_t iShwPT, uint16_t iPte)
4202{
4203 PGM_LOCK_ASSERT_OWNER(pVM);
4204 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
4205 PPGMPOOLPHYSEXT paPhysExts = pPool->CTX_SUFF(paPhysExts);
4206
4207 /*
4208 * Special common cases.
4209 */
4210 if (paPhysExts[iPhysExt].aidx[1] == NIL_PGMPOOL_IDX)
4211 {
4212 paPhysExts[iPhysExt].aidx[1] = iShwPT;
4213 paPhysExts[iPhysExt].apte[1] = iPte;
4214 STAM_COUNTER_INC(&pVM->pgm.s.Stats.StatTrackAliasedMany);
4215 LogFlow(("pgmPoolTrackPhysExtInsert: %d:{,%d pte %d,}\n", iPhysExt, iShwPT, iPte));
4216 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExt);
4217 }
4218 if (paPhysExts[iPhysExt].aidx[2] == NIL_PGMPOOL_IDX)
4219 {
4220 paPhysExts[iPhysExt].aidx[2] = iShwPT;
4221 paPhysExts[iPhysExt].apte[2] = iPte;
4222 STAM_COUNTER_INC(&pVM->pgm.s.Stats.StatTrackAliasedMany);
4223 LogFlow(("pgmPoolTrackPhysExtInsert: %d:{,,%d pte %d}\n", iPhysExt, iShwPT, iPte));
4224 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExt);
4225 }
4226 AssertCompile(RT_ELEMENTS(paPhysExts[iPhysExt].aidx) == 3);
4227
4228 /*
4229 * General treatment.
4230 */
4231 const uint16_t iPhysExtStart = iPhysExt;
4232 unsigned cMax = 15;
4233 for (;;)
4234 {
4235 Assert(iPhysExt < pPool->cMaxPhysExts);
4236 for (unsigned i = 0; i < RT_ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
4237 if (paPhysExts[iPhysExt].aidx[i] == NIL_PGMPOOL_IDX)
4238 {
4239 paPhysExts[iPhysExt].aidx[i] = iShwPT;
4240 paPhysExts[iPhysExt].apte[i] = iPte;
4241 STAM_COUNTER_INC(&pVM->pgm.s.Stats.StatTrackAliasedMany);
4242 LogFlow(("pgmPoolTrackPhysExtInsert: %d:{%d pte %d} i=%d cMax=%d\n", iPhysExt, iShwPT, iPte, i, cMax));
4243 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExtStart);
4244 }
4245 if (!--cMax)
4246 {
4247 STAM_COUNTER_INC(&pVM->pgm.s.Stats.StatTrackOverflows);
4248 pgmPoolTrackPhysExtFreeList(pVM, iPhysExtStart);
4249 LogFlow(("pgmPoolTrackPhysExtInsert: overflow (1) iShwPT=%d\n", iShwPT));
4250 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED);
4251 }
4252
4253 /* advance */
4254 iPhysExt = paPhysExts[iPhysExt].iNext;
4255 if (iPhysExt == NIL_PGMPOOL_PHYSEXT_INDEX)
4256 break;
4257 }
4258
4259 /*
4260 * Add another extent to the list.
4261 */
4262 PPGMPOOLPHYSEXT pNew = pgmPoolTrackPhysExtAlloc(pVM, &iPhysExt);
4263 if (!pNew)
4264 {
4265 STAM_COUNTER_INC(&pVM->pgm.s.Stats.StatTrackNoExtentsLeft);
4266 pgmPoolTrackPhysExtFreeList(pVM, iPhysExtStart);
4267 LogFlow(("pgmPoolTrackPhysExtInsert: pgmPoolTrackPhysExtAlloc failed iShwPT=%d\n", iShwPT));
4268 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED);
4269 }
4270 pNew->iNext = iPhysExtStart;
4271 pNew->aidx[0] = iShwPT;
4272 pNew->apte[0] = iPte;
4273 LogFlow(("pgmPoolTrackPhysExtInsert: added new extent %d:{%d pte %d}->%d\n", iPhysExt, iShwPT, iPte, iPhysExtStart));
4274 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExt);
4275}
4276
4277
4278/**
4279 * Add a reference to guest physical page where extents are in use.
4280 *
4281 * @returns The new tracking data for PGMPAGE.
4282 *
4283 * @param pVM The cross context VM structure.
4284 * @param pPhysPage Pointer to the aPages entry in the ram range.
4285 * @param u16 The ram range flags (top 16-bits).
4286 * @param iShwPT The shadow page table index.
4287 * @param iPte Page table entry
4288 */
4289uint16_t pgmPoolTrackPhysExtAddref(PVMCC pVM, PPGMPAGE pPhysPage, uint16_t u16, uint16_t iShwPT, uint16_t iPte)
4290{
4291 PGM_LOCK_VOID(pVM);
4292 if (PGMPOOL_TD_GET_CREFS(u16) != PGMPOOL_TD_CREFS_PHYSEXT)
4293 {
4294 /*
4295 * Convert to extent list.
4296 */
4297 Assert(PGMPOOL_TD_GET_CREFS(u16) == 1);
4298 uint16_t iPhysExt;
4299 PPGMPOOLPHYSEXT pPhysExt = pgmPoolTrackPhysExtAlloc(pVM, &iPhysExt);
4300 if (pPhysExt)
4301 {
4302 LogFlow(("pgmPoolTrackPhysExtAddref: new extent: %d:{%d, %d}\n", iPhysExt, PGMPOOL_TD_GET_IDX(u16), iShwPT));
4303 STAM_COUNTER_INC(&pVM->pgm.s.Stats.StatTrackAliased);
4304 pPhysExt->aidx[0] = PGMPOOL_TD_GET_IDX(u16);
4305 pPhysExt->apte[0] = PGM_PAGE_GET_PTE_INDEX(pPhysPage);
4306 pPhysExt->aidx[1] = iShwPT;
4307 pPhysExt->apte[1] = iPte;
4308 u16 = PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExt);
4309 }
4310 else
4311 u16 = PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED);
4312 }
4313 else if (u16 != PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED))
4314 {
4315 /*
4316 * Insert into the extent list.
4317 */
4318 u16 = pgmPoolTrackPhysExtInsert(pVM, PGMPOOL_TD_GET_IDX(u16), iShwPT, iPte);
4319 }
4320 else
4321 STAM_COUNTER_INC(&pVM->pgm.s.Stats.StatTrackAliasedLots);
4322 PGM_UNLOCK(pVM);
4323 return u16;
4324}
4325
4326
4327/**
4328 * Clear references to guest physical memory.
4329 *
4330 * @param pPool The pool.
4331 * @param pPage The page.
4332 * @param pPhysPage Pointer to the aPages entry in the ram range.
4333 * @param iPte Shadow PTE index
4334 */
4335void pgmPoolTrackPhysExtDerefGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMPAGE pPhysPage, uint16_t iPte)
4336{
4337 PVMCC pVM = pPool->CTX_SUFF(pVM);
4338 const unsigned cRefs = PGM_PAGE_GET_TD_CREFS(pPhysPage);
4339 AssertFatalMsg(cRefs == PGMPOOL_TD_CREFS_PHYSEXT, ("cRefs=%d pPhysPage=%R[pgmpage] pPage=%p:{.idx=%d}\n", cRefs, pPhysPage, pPage, pPage->idx));
4340
4341 uint16_t iPhysExt = PGM_PAGE_GET_TD_IDX(pPhysPage);
4342 if (iPhysExt != PGMPOOL_TD_IDX_OVERFLOWED)
4343 {
4344 PGM_LOCK_VOID(pVM);
4345
4346 uint16_t iPhysExtPrev = NIL_PGMPOOL_PHYSEXT_INDEX;
4347 PPGMPOOLPHYSEXT paPhysExts = pPool->CTX_SUFF(paPhysExts);
4348 do
4349 {
4350 Assert(iPhysExt < pPool->cMaxPhysExts);
4351
4352 /*
4353 * Look for the shadow page and check if it's all freed.
4354 */
4355 for (unsigned i = 0; i < RT_ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
4356 {
4357 if ( paPhysExts[iPhysExt].aidx[i] == pPage->idx
4358 && paPhysExts[iPhysExt].apte[i] == iPte)
4359 {
4360 paPhysExts[iPhysExt].aidx[i] = NIL_PGMPOOL_IDX;
4361 paPhysExts[iPhysExt].apte[i] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
4362
4363 for (i = 0; i < RT_ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
4364 if (paPhysExts[iPhysExt].aidx[i] != NIL_PGMPOOL_IDX)
4365 {
4366 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage] idx=%d\n", pPhysPage, pPage->idx));
4367 PGM_UNLOCK(pVM);
4368 return;
4369 }
4370
4371 /* we can free the node. */
4372 const uint16_t iPhysExtNext = paPhysExts[iPhysExt].iNext;
4373 if ( iPhysExtPrev == NIL_PGMPOOL_PHYSEXT_INDEX
4374 && iPhysExtNext == NIL_PGMPOOL_PHYSEXT_INDEX)
4375 {
4376 /* lonely node */
4377 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
4378 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage] idx=%d lonely\n", pPhysPage, pPage->idx));
4379 PGM_PAGE_SET_TRACKING(pVM, pPhysPage, 0);
4380 }
4381 else if (iPhysExtPrev == NIL_PGMPOOL_PHYSEXT_INDEX)
4382 {
4383 /* head */
4384 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage] idx=%d head\n", pPhysPage, pPage->idx));
4385 PGM_PAGE_SET_TRACKING(pVM, pPhysPage, PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExtNext));
4386 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
4387 }
4388 else
4389 {
4390 /* in list */
4391 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage] idx=%d in list\n", pPhysPage, pPage->idx));
4392 paPhysExts[iPhysExtPrev].iNext = iPhysExtNext;
4393 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
4394 }
4395 iPhysExt = iPhysExtNext;
4396 PGM_UNLOCK(pVM);
4397 return;
4398 }
4399 }
4400
4401 /* next */
4402 iPhysExtPrev = iPhysExt;
4403 iPhysExt = paPhysExts[iPhysExt].iNext;
4404 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
4405
4406 PGM_UNLOCK(pVM);
4407 AssertFatalMsgFailed(("not-found! cRefs=%d pPhysPage=%R[pgmpage] pPage=%p:{.idx=%d}\n", cRefs, pPhysPage, pPage, pPage->idx));
4408 }
4409 else /* nothing to do */
4410 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage]\n", pPhysPage));
4411}
4412
4413/**
4414 * Clear references to guest physical memory.
4415 *
4416 * This is the same as pgmPoolTracDerefGCPhysHint except that the guest
4417 * physical address is assumed to be correct, so the linear search can be
4418 * skipped and we can assert at an earlier point.
4419 *
4420 * @param pPool The pool.
4421 * @param pPage The page.
4422 * @param HCPhys The host physical address corresponding to the guest page.
4423 * @param GCPhys The guest physical address corresponding to HCPhys.
4424 * @param iPte Shadow PTE index
4425 */
4426static void pgmPoolTracDerefGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTHCPHYS HCPhys, RTGCPHYS GCPhys, uint16_t iPte)
4427{
4428 /*
4429 * Lookup the page and check if it checks out before derefing it.
4430 */
4431 PVMCC pVM = pPool->CTX_SUFF(pVM);
4432 PPGMPAGE pPhysPage = pgmPhysGetPage(pVM, GCPhys);
4433 if (pPhysPage)
4434 {
4435 Assert(PGM_PAGE_GET_HCPHYS(pPhysPage));
4436#ifdef LOG_ENABLED
4437 RTHCPHYS HCPhysPage = PGM_PAGE_GET_HCPHYS(pPhysPage);
4438 Log2(("pgmPoolTracDerefGCPhys %RHp vs %RHp\n", HCPhysPage, HCPhys));
4439#endif
4440 if (PGM_PAGE_GET_HCPHYS(pPhysPage) == HCPhys)
4441 {
4442 Assert(pPage->cPresent);
4443 Assert(pPool->cPresent);
4444 pPage->cPresent--;
4445 pPool->cPresent--;
4446 pgmTrackDerefGCPhys(pPool, pPage, pPhysPage, iPte);
4447 return;
4448 }
4449
4450 AssertFatalMsgFailed(("HCPhys=%RHp GCPhys=%RGp; found page has HCPhys=%RHp iPte=%u fIsNested=%RTbool\n",
4451 HCPhys, GCPhys, PGM_PAGE_GET_HCPHYS(pPhysPage), iPte, PGMPOOL_PAGE_IS_NESTED(pPage)));
4452 }
4453 AssertFatalMsgFailed(("HCPhys=%RHp GCPhys=%RGp\n", HCPhys, GCPhys));
4454}
4455
4456
4457/**
4458 * Clear references to guest physical memory.
4459 *
4460 * @param pPool The pool.
4461 * @param pPage The page.
4462 * @param HCPhys The host physical address corresponding to the guest page.
4463 * @param GCPhysHint The guest physical address which may corresponding to HCPhys.
4464 * @param iPte Shadow pte index
4465 */
4466void pgmPoolTracDerefGCPhysHint(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTHCPHYS HCPhys, RTGCPHYS GCPhysHint, uint16_t iPte)
4467{
4468 Log4(("pgmPoolTracDerefGCPhysHint %RHp %RGp\n", HCPhys, GCPhysHint));
4469
4470 /*
4471 * Try the hint first.
4472 */
4473 RTHCPHYS HCPhysHinted;
4474 PVMCC pVM = pPool->CTX_SUFF(pVM);
4475 PPGMPAGE pPhysPage = pgmPhysGetPage(pVM, GCPhysHint);
4476 if (pPhysPage)
4477 {
4478 HCPhysHinted = PGM_PAGE_GET_HCPHYS(pPhysPage);
4479 Assert(HCPhysHinted);
4480 if (HCPhysHinted == HCPhys)
4481 {
4482 Assert(pPage->cPresent);
4483 Assert(pPool->cPresent);
4484 pPage->cPresent--;
4485 pPool->cPresent--;
4486 pgmTrackDerefGCPhys(pPool, pPage, pPhysPage, iPte);
4487 return;
4488 }
4489 }
4490 else
4491 HCPhysHinted = UINT64_C(0xdeadbeefdeadbeef);
4492
4493 /*
4494 * Damn, the hint didn't work. We'll have to do an expensive linear search.
4495 */
4496 STAM_COUNTER_INC(&pPool->StatTrackLinearRamSearches);
4497 PPGMRAMRANGE pRam = pPool->CTX_SUFF(pVM)->pgm.s.CTX_SUFF(pRamRangesX);
4498 while (pRam)
4499 {
4500 unsigned iPage = pRam->cb >> PAGE_SHIFT;
4501 while (iPage-- > 0)
4502 {
4503 if (PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]) == HCPhys)
4504 {
4505 Log4(("pgmPoolTracDerefGCPhysHint: Linear HCPhys=%RHp GCPhysHint=%RGp GCPhysReal=%RGp\n",
4506 HCPhys, GCPhysHint, pRam->GCPhys + (iPage << PAGE_SHIFT)));
4507 Assert(pPage->cPresent);
4508 Assert(pPool->cPresent);
4509 pPage->cPresent--;
4510 pPool->cPresent--;
4511 pgmTrackDerefGCPhys(pPool, pPage, &pRam->aPages[iPage], iPte);
4512 return;
4513 }
4514 }
4515 pRam = pRam->CTX_SUFF(pNext);
4516 }
4517
4518 AssertFatalMsgFailed(("HCPhys=%RHp GCPhysHint=%RGp (Hinted page has HCPhys = %RHp)\n", HCPhys, GCPhysHint, HCPhysHinted));
4519}
4520
4521
4522/**
4523 * Clear references to guest physical memory in a 32-bit / 32-bit page table.
4524 *
4525 * @param pPool The pool.
4526 * @param pPage The page.
4527 * @param pShwPT The shadow page table (mapping of the page).
4528 * @param pGstPT The guest page table.
4529 */
4530DECLINLINE(void) pgmPoolTrackDerefPT32Bit32Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PT pShwPT, PCX86PT pGstPT)
4531{
4532 RTGCPHYS32 const fPgMask = pPage->fA20Enabled ? X86_PTE_PG_MASK : X86_PTE_PG_MASK & ~RT_BIT_32(20);
4533 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
4534 {
4535 const X86PGUINT uPte = pShwPT->a[i].u;
4536 Assert(!(uPte & RT_BIT_32(10)));
4537 if (uPte & X86_PTE_P)
4538 {
4539 Log4(("pgmPoolTrackDerefPT32Bit32Bit: i=%d pte=%RX32 hint=%RX32\n",
4540 i, uPte & X86_PTE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK));
4541 pgmPoolTracDerefGCPhysHint(pPool, pPage, uPte & X86_PTE_PG_MASK, pGstPT->a[i].u & fPgMask, i);
4542 if (!pPage->cPresent)
4543 break;
4544 }
4545 }
4546}
4547
4548
4549/**
4550 * Clear references to guest physical memory in a PAE / 32-bit page table.
4551 *
4552 * @param pPool The pool.
4553 * @param pPage The page.
4554 * @param pShwPT The shadow page table (mapping of the page).
4555 * @param pGstPT The guest page table (just a half one).
4556 */
4557DECLINLINE(void) pgmPoolTrackDerefPTPae32Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT, PCX86PT pGstPT)
4558{
4559 RTGCPHYS32 const fPgMask = pPage->fA20Enabled ? X86_PTE_PG_MASK : X86_PTE_PG_MASK & ~RT_BIT_32(20);
4560 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
4561 {
4562 Assert( (PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & UINT64_C(0x7ff0000000000400)) == 0
4563 || (PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & UINT64_C(0x7ff0000000000400)) == UINT64_C(0x7ff0000000000000));
4564 if (PGMSHWPTEPAE_IS_P(pShwPT->a[i]))
4565 {
4566 Log4(("pgmPoolTrackDerefPTPae32Bit: i=%d pte=%RX64 hint=%RX32\n",
4567 i, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pGstPT->a[i].u & X86_PTE_PG_MASK));
4568 pgmPoolTracDerefGCPhysHint(pPool, pPage, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pGstPT->a[i].u & fPgMask, i);
4569 if (!pPage->cPresent)
4570 break;
4571 }
4572 }
4573}
4574
4575
4576/**
4577 * Clear references to guest physical memory in a PAE / PAE page table.
4578 *
4579 * @param pPool The pool.
4580 * @param pPage The page.
4581 * @param pShwPT The shadow page table (mapping of the page).
4582 * @param pGstPT The guest page table.
4583 */
4584DECLINLINE(void) pgmPoolTrackDerefPTPaePae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT, PCX86PTPAE pGstPT)
4585{
4586 RTGCPHYS const fPgMask = pPage->fA20Enabled ? X86_PTE_PAE_PG_MASK : X86_PTE_PAE_PG_MASK & ~RT_BIT_64(20);
4587 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
4588 {
4589 Assert( (PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & UINT64_C(0x7ff0000000000400)) == 0
4590 || (PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & UINT64_C(0x7ff0000000000400)) == UINT64_C(0x7ff0000000000000));
4591 if (PGMSHWPTEPAE_IS_P(pShwPT->a[i]))
4592 {
4593 Log4(("pgmPoolTrackDerefPTPaePae: i=%d pte=%RX32 hint=%RX32\n",
4594 i, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pGstPT->a[i].u & X86_PTE_PAE_PG_MASK));
4595 pgmPoolTracDerefGCPhysHint(pPool, pPage, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pGstPT->a[i].u & fPgMask, i);
4596 if (!pPage->cPresent)
4597 break;
4598 }
4599 }
4600}
4601
4602
4603/**
4604 * Clear references to guest physical memory in a 32-bit / 4MB page table.
4605 *
4606 * @param pPool The pool.
4607 * @param pPage The page.
4608 * @param pShwPT The shadow page table (mapping of the page).
4609 */
4610DECLINLINE(void) pgmPoolTrackDerefPT32Bit4MB(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PT pShwPT)
4611{
4612 RTGCPHYS const GCPhysA20Mask = pPage->fA20Enabled ? UINT64_MAX : ~RT_BIT_64(20);
4613 RTGCPHYS GCPhys = pPage->GCPhys + PAGE_SIZE * pPage->iFirstPresent;
4614 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
4615 {
4616 const X86PGUINT uPte = pShwPT->a[i].u;
4617 Assert(!(uPte & RT_BIT_32(10)));
4618 if (uPte & X86_PTE_P)
4619 {
4620 Log4(("pgmPoolTrackDerefPT32Bit4MB: i=%d pte=%RX32 GCPhys=%RGp\n",
4621 i, uPte & X86_PTE_PG_MASK, GCPhys));
4622 pgmPoolTracDerefGCPhys(pPool, pPage, uPte & X86_PTE_PG_MASK, GCPhys & GCPhysA20Mask, i);
4623 if (!pPage->cPresent)
4624 break;
4625 }
4626 }
4627}
4628
4629
4630/**
4631 * Clear references to guest physical memory in a PAE / 2/4MB page table.
4632 *
4633 * @param pPool The pool.
4634 * @param pPage The page.
4635 * @param pShwPT The shadow page table (mapping of the page).
4636 */
4637DECLINLINE(void) pgmPoolTrackDerefPTPaeBig(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT)
4638{
4639 RTGCPHYS const GCPhysA20Mask = pPage->fA20Enabled ? UINT64_MAX : ~RT_BIT_64(20);
4640 RTGCPHYS GCPhys = pPage->GCPhys + PAGE_SIZE * pPage->iFirstPresent;
4641 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
4642 {
4643 Assert( (PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & UINT64_C(0x7ff0000000000400)) == 0
4644 || (PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & UINT64_C(0x7ff0000000000400)) == UINT64_C(0x7ff0000000000000));
4645 if (PGMSHWPTEPAE_IS_P(pShwPT->a[i]))
4646 {
4647 Log4(("pgmPoolTrackDerefPTPaeBig: i=%d pte=%RX64 hint=%RGp\n",
4648 i, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), GCPhys));
4649 pgmPoolTracDerefGCPhys(pPool, pPage, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), GCPhys & GCPhysA20Mask, i);
4650 if (!pPage->cPresent)
4651 break;
4652 }
4653 }
4654}
4655
4656
4657/**
4658 * Clear references to shadowed pages in an EPT page table.
4659 *
4660 * @param pPool The pool.
4661 * @param pPage The page.
4662 * @param pShwPT The shadow page directory pointer table (mapping of the
4663 * page).
4664 */
4665DECLINLINE(void) pgmPoolTrackDerefPTEPT(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PEPTPT pShwPT)
4666{
4667 RTGCPHYS const GCPhysA20Mask = pPage->fA20Enabled ? UINT64_MAX : ~RT_BIT_64(20);
4668 RTGCPHYS GCPhys = pPage->GCPhys + PAGE_SIZE * pPage->iFirstPresent;
4669 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
4670 {
4671 X86PGPAEUINT const uPte = pShwPT->a[i].u;
4672 Assert((uPte & UINT64_C(0xfff0000000000f80)) == 0);
4673 if (uPte & EPT_E_READ)
4674 {
4675 Log4(("pgmPoolTrackDerefPTEPT: i=%d pte=%RX64 GCPhys=%RX64\n",
4676 i, uPte & EPT_PTE_PG_MASK, pPage->GCPhys));
4677 pgmPoolTracDerefGCPhys(pPool, pPage, uPte & EPT_PTE_PG_MASK, GCPhys & GCPhysA20Mask, i);
4678 if (!pPage->cPresent)
4679 break;
4680 }
4681 }
4682}
4683
4684#ifdef VBOX_WITH_NESTED_HWVIRT_VMX_EPT
4685
4686/**
4687 * Clears references to shadowed pages in a SLAT EPT page table.
4688 *
4689 * @param pPool The pool.
4690 * @param pPage The page.
4691 * @param pShwPT The shadow page table (mapping of the page).
4692 * @param pGstPT The guest page table.
4693 */
4694DECLINLINE(void) pgmPoolTrackDerefNestedPTEPT(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PEPTPT pShwPT, PCEPTPT pGstPT)
4695{
4696 Assert(PGMPOOL_PAGE_IS_NESTED(pPage));
4697 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
4698 {
4699 X86PGPAEUINT const uShwPte = pShwPT->a[i].u;
4700 Assert((uShwPte & UINT64_C(0xfff0000000000f80)) == 0); /* Access, Dirty, UserX (not supported) and ignored bits 7, 11. */
4701 if (uShwPte & EPT_PRESENT_MASK)
4702 {
4703 Log7Func(("Shw=%RX64 GstPte=%RX64\n", uShwPte, pGstPT->a[i].u));
4704 pgmPoolTracDerefGCPhys(pPool, pPage, uShwPte & EPT_PTE_PG_MASK, pGstPT->a[i].u & EPT_PTE_PG_MASK, i);
4705 if (!pPage->cPresent)
4706 break;
4707 }
4708 }
4709}
4710
4711
4712/**
4713 * Clear references to guest physical memory in a SLAT 2MB EPT page table.
4714 *
4715 * @param pPool The pool.
4716 * @param pPage The page.
4717 * @param pShwPT The shadow page table (mapping of the page).
4718 */
4719DECLINLINE(void) pgmPoolTrackDerefNestedPTEPT2MB(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PEPTPT pShwPT)
4720{
4721 Assert(pPage->fA20Enabled);
4722 RTGCPHYS GCPhys = pPage->GCPhys + PAGE_SIZE * pPage->iFirstPresent;
4723 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
4724 {
4725 X86PGPAEUINT const uShwPte = pShwPT->a[i].u;
4726 Assert((uShwPte & UINT64_C(0xfff0000000000f80)) == 0); /* Access, Dirty, UserX (not supported) and ignored bits 7, 11. */
4727 if (uShwPte & EPT_PRESENT_MASK)
4728 {
4729 Log7Func(("Shw=%RX64 GstPte=%RX64\n", uShwPte, GCPhys));
4730 pgmPoolTracDerefGCPhys(pPool, pPage, uShwPte & EPT_PTE_PG_MASK, GCPhys, i);
4731 if (!pPage->cPresent)
4732 break;
4733 }
4734 }
4735}
4736
4737
4738/**
4739 * Clear references to shadowed pages in a SLAT EPT page directory.
4740 *
4741 * @param pPool The pool.
4742 * @param pPage The page.
4743 * @param pShwPD The shadow page directory (mapping of the page).
4744 * @param pGstPD The guest page directory.
4745 */
4746DECLINLINE(void) pgmPoolTrackDerefNestedPDEpt(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PEPTPD pShwPD, PCEPTPD pGstPD)
4747{
4748 for (unsigned i = 0; i < RT_ELEMENTS(pShwPD->a); i++)
4749 {
4750 X86PGPAEUINT const uPde = pShwPD->a[i].u;
4751#ifdef PGM_WITH_LARGE_PAGES
4752 AssertMsg((uPde & UINT64_C(0xfff0000000000f00)) == 0, ("uPde=%RX64\n", uPde));
4753#else
4754 AssertMsg((uPde & UINT64_C(0xfff0000000000f80)) == 0, ("uPde=%RX64\n", uPde));
4755#endif
4756 if (uPde & EPT_PRESENT_MASK)
4757 {
4758#ifdef PGM_WITH_LARGE_PAGES
4759 if (uPde & EPT_E_LEAF)
4760 {
4761 Log4(("pgmPoolTrackDerefPDEPT: i=%d pde=%RX64 GCPhys=%RX64\n", i, uPde & EPT_PDE2M_PG_MASK, pPage->GCPhys));
4762 pgmPoolTracDerefGCPhys(pPool, pPage, uPde & EPT_PDE2M_PG_MASK, pGstPD->a[i].u & EPT_PDE2M_PG_MASK, i);
4763 }
4764 else
4765#endif
4766 {
4767 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, uPde & EPT_PDE_PG_MASK);
4768 if (pSubPage)
4769 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4770 else
4771 AssertFatalMsgFailed(("%RX64\n", pShwPD->a[i].u & EPT_PDE_PG_MASK));
4772 }
4773 }
4774 }
4775}
4776
4777#endif /* VBOX_WITH_NESTED_HWVIRT_VMX_EPT */
4778
4779
4780/**
4781 * Clear references to shadowed pages in a 32 bits page directory.
4782 *
4783 * @param pPool The pool.
4784 * @param pPage The page.
4785 * @param pShwPD The shadow page directory (mapping of the page).
4786 */
4787DECLINLINE(void) pgmPoolTrackDerefPD(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PD pShwPD)
4788{
4789 for (unsigned i = 0; i < RT_ELEMENTS(pShwPD->a); i++)
4790 {
4791 X86PGUINT const uPde = pShwPD->a[i].u;
4792 if (uPde & X86_PDE_P)
4793 {
4794 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPD->a[i].u & X86_PDE_PG_MASK);
4795 if (pSubPage)
4796 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4797 else
4798 AssertFatalMsgFailed(("%x\n", pShwPD->a[i].u & X86_PDE_PG_MASK));
4799 }
4800 }
4801}
4802
4803
4804/**
4805 * Clear references to shadowed pages in a PAE (legacy or 64 bits) page directory.
4806 *
4807 * @param pPool The pool.
4808 * @param pPage The page.
4809 * @param pShwPD The shadow page directory (mapping of the page).
4810 */
4811DECLINLINE(void) pgmPoolTrackDerefPDPae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PDPAE pShwPD)
4812{
4813 for (unsigned i = 0; i < RT_ELEMENTS(pShwPD->a); i++)
4814 {
4815 X86PGPAEUINT const uPde = pShwPD->a[i].u;
4816 if (uPde & X86_PDE_P)
4817 {
4818#ifdef PGM_WITH_LARGE_PAGES
4819 if (uPde & X86_PDE_PS)
4820 {
4821 Log4(("pgmPoolTrackDerefPDPae: i=%d pde=%RX64 GCPhys=%RX64\n",
4822 i, uPde & X86_PDE2M_PAE_PG_MASK, pPage->GCPhys));
4823 pgmPoolTracDerefGCPhys(pPool, pPage, uPde & X86_PDE2M_PAE_PG_MASK,
4824 pPage->GCPhys + i * 2 * _1M /* pPage->GCPhys = base address of the memory described by the PD */,
4825 i);
4826 }
4827 else
4828#endif
4829 {
4830 Assert((uPde & (X86_PDE_PAE_MBZ_MASK_NX | UINT64_C(0x7ff0000000000000))) == 0);
4831 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, uPde & X86_PDE_PAE_PG_MASK);
4832 if (pSubPage)
4833 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4834 else
4835 AssertFatalMsgFailed(("%RX64\n", uPde & X86_PDE_PAE_PG_MASK));
4836 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4837 }
4838 }
4839 }
4840}
4841
4842
4843/**
4844 * Clear references to shadowed pages in a PAE page directory pointer table.
4845 *
4846 * @param pPool The pool.
4847 * @param pPage The page.
4848 * @param pShwPDPT The shadow page directory pointer table (mapping of the page).
4849 */
4850DECLINLINE(void) pgmPoolTrackDerefPDPTPae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PDPT pShwPDPT)
4851{
4852 for (unsigned i = 0; i < X86_PG_PAE_PDPE_ENTRIES; i++)
4853 {
4854 X86PGPAEUINT const uPdpe = pShwPDPT->a[i].u;
4855 Assert((uPdpe & (X86_PDPE_PAE_MBZ_MASK | UINT64_C(0x7ff0000000000200))) == 0);
4856 if (uPdpe & X86_PDPE_P)
4857 {
4858 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, uPdpe & X86_PDPE_PG_MASK);
4859 if (pSubPage)
4860 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4861 else
4862 AssertFatalMsgFailed(("%RX64\n", uPdpe & X86_PDPE_PG_MASK));
4863 }
4864 }
4865}
4866
4867
4868/**
4869 * Clear references to shadowed pages in a 64-bit page directory pointer table.
4870 *
4871 * @param pPool The pool.
4872 * @param pPage The page.
4873 * @param pShwPDPT The shadow page directory pointer table (mapping of the page).
4874 */
4875DECLINLINE(void) pgmPoolTrackDerefPDPT64Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PDPT pShwPDPT)
4876{
4877 for (unsigned i = 0; i < RT_ELEMENTS(pShwPDPT->a); i++)
4878 {
4879 X86PGPAEUINT const uPdpe = pShwPDPT->a[i].u;
4880 Assert((uPdpe & (X86_PDPE_LM_MBZ_MASK_NX | UINT64_C(0x7ff0000000000200))) == 0);
4881 if (uPdpe & X86_PDPE_P)
4882 {
4883 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, uPdpe & X86_PDPE_PG_MASK);
4884 if (pSubPage)
4885 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4886 else
4887 AssertFatalMsgFailed(("%RX64\n", uPdpe & X86_PDPE_PG_MASK));
4888 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4889 }
4890 }
4891}
4892
4893
4894/**
4895 * Clear references to shadowed pages in a 64-bit level 4 page table.
4896 *
4897 * @param pPool The pool.
4898 * @param pPage The page.
4899 * @param pShwPML4 The shadow page directory pointer table (mapping of the page).
4900 */
4901DECLINLINE(void) pgmPoolTrackDerefPML464Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PML4 pShwPML4)
4902{
4903 for (unsigned i = 0; i < RT_ELEMENTS(pShwPML4->a); i++)
4904 {
4905 X86PGPAEUINT const uPml4e = pShwPML4->a[i].u;
4906 Assert((uPml4e & (X86_PML4E_MBZ_MASK_NX | UINT64_C(0x7ff0000000000200))) == 0);
4907 if (uPml4e & X86_PML4E_P)
4908 {
4909 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, uPml4e & X86_PDPE_PG_MASK);
4910 if (pSubPage)
4911 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4912 else
4913 AssertFatalMsgFailed(("%RX64\n", uPml4e & X86_PML4E_PG_MASK));
4914 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4915 }
4916 }
4917}
4918
4919
4920/**
4921 * Clear references to shadowed pages in an EPT page directory.
4922 *
4923 * @param pPool The pool.
4924 * @param pPage The page.
4925 * @param pShwPD The shadow page directory (mapping of the page).
4926 */
4927DECLINLINE(void) pgmPoolTrackDerefPDEPT(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PEPTPD pShwPD)
4928{
4929 for (unsigned i = 0; i < RT_ELEMENTS(pShwPD->a); i++)
4930 {
4931 X86PGPAEUINT const uPde = pShwPD->a[i].u;
4932#ifdef PGM_WITH_LARGE_PAGES
4933 AssertMsg((uPde & UINT64_C(0xfff0000000000f00)) == 0, ("uPde=%RX64\n", uPde));
4934#else
4935 AssertMsg((uPde & UINT64_C(0xfff0000000000f80)) == 0, ("uPde=%RX64\n", uPde));
4936#endif
4937 if (uPde & EPT_E_READ)
4938 {
4939#ifdef PGM_WITH_LARGE_PAGES
4940 if (uPde & EPT_E_LEAF)
4941 {
4942 Log4(("pgmPoolTrackDerefPDEPT: i=%d pde=%RX64 GCPhys=%RX64\n",
4943 i, uPde & EPT_PDE2M_PG_MASK, pPage->GCPhys));
4944 pgmPoolTracDerefGCPhys(pPool, pPage, uPde & EPT_PDE2M_PG_MASK,
4945 pPage->GCPhys + i * 2 * _1M /* pPage->GCPhys = base address of the memory described by the PD */,
4946 i);
4947 }
4948 else
4949#endif
4950 {
4951 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, uPde & EPT_PDE_PG_MASK);
4952 if (pSubPage)
4953 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4954 else
4955 AssertFatalMsgFailed(("%RX64\n", pShwPD->a[i].u & EPT_PDE_PG_MASK));
4956 }
4957 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4958 }
4959 }
4960}
4961
4962
4963/**
4964 * Clear references to shadowed pages in an EPT page directory pointer table.
4965 *
4966 * @param pPool The pool.
4967 * @param pPage The page.
4968 * @param pShwPDPT The shadow page directory pointer table (mapping of the page).
4969 */
4970DECLINLINE(void) pgmPoolTrackDerefPDPTEPT(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PEPTPDPT pShwPDPT)
4971{
4972 for (unsigned i = 0; i < RT_ELEMENTS(pShwPDPT->a); i++)
4973 {
4974 X86PGPAEUINT const uPdpe = pShwPDPT->a[i].u;
4975 Assert((uPdpe & UINT64_C(0xfff0000000000f80)) == 0);
4976 if (uPdpe & EPT_E_READ)
4977 {
4978 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, uPdpe & EPT_PDPTE_PG_MASK);
4979 if (pSubPage)
4980 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4981 else
4982 AssertFatalMsgFailed(("%RX64\n", uPdpe & EPT_PDPTE_PG_MASK));
4983 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4984 }
4985 }
4986}
4987
4988
4989/**
4990 * Clears all references made by this page.
4991 *
4992 * This includes other shadow pages and GC physical addresses.
4993 *
4994 * @param pPool The pool.
4995 * @param pPage The page.
4996 */
4997static void pgmPoolTrackDeref(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
4998{
4999 /*
5000 * Map the shadow page and take action according to the page kind.
5001 */
5002 PVMCC pVM = pPool->CTX_SUFF(pVM);
5003 void *pvShw = PGMPOOL_PAGE_2_PTR(pVM, pPage);
5004 switch (pPage->enmKind)
5005 {
5006 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
5007 {
5008 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
5009 void *pvGst;
5010 int rc = PGM_GCPHYS_2_PTR(pVM, pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
5011 pgmPoolTrackDerefPT32Bit32Bit(pPool, pPage, (PX86PT)pvShw, (PCX86PT)pvGst);
5012 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvGst);
5013 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
5014 break;
5015 }
5016
5017 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
5018 {
5019 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
5020 void *pvGst;
5021 int rc = PGM_GCPHYS_2_PTR_EX(pVM, pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
5022 pgmPoolTrackDerefPTPae32Bit(pPool, pPage, (PPGMSHWPTPAE)pvShw, (PCX86PT)pvGst);
5023 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvGst);
5024 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
5025 break;
5026 }
5027
5028 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
5029 {
5030 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
5031 void *pvGst;
5032 int rc = PGM_GCPHYS_2_PTR(pVM, pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
5033 pgmPoolTrackDerefPTPaePae(pPool, pPage, (PPGMSHWPTPAE)pvShw, (PCX86PTPAE)pvGst);
5034 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvGst);
5035 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
5036 break;
5037 }
5038
5039 case PGMPOOLKIND_32BIT_PT_FOR_PHYS: /* treat it like a 4 MB page */
5040 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
5041 {
5042 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
5043 pgmPoolTrackDerefPT32Bit4MB(pPool, pPage, (PX86PT)pvShw);
5044 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
5045 break;
5046 }
5047
5048 case PGMPOOLKIND_PAE_PT_FOR_PHYS: /* treat it like a 2 MB page */
5049 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
5050 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
5051 {
5052 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
5053 pgmPoolTrackDerefPTPaeBig(pPool, pPage, (PPGMSHWPTPAE)pvShw);
5054 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
5055 break;
5056 }
5057
5058 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
5059 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
5060 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
5061 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
5062 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
5063 case PGMPOOLKIND_PAE_PD_PHYS:
5064 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
5065 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
5066 pgmPoolTrackDerefPDPae(pPool, pPage, (PX86PDPAE)pvShw);
5067 break;
5068
5069 case PGMPOOLKIND_32BIT_PD_PHYS:
5070 case PGMPOOLKIND_32BIT_PD:
5071 pgmPoolTrackDerefPD(pPool, pPage, (PX86PD)pvShw);
5072 break;
5073
5074 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
5075 case PGMPOOLKIND_PAE_PDPT:
5076 case PGMPOOLKIND_PAE_PDPT_PHYS:
5077 pgmPoolTrackDerefPDPTPae(pPool, pPage, (PX86PDPT)pvShw);
5078 break;
5079
5080 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
5081 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
5082 pgmPoolTrackDerefPDPT64Bit(pPool, pPage, (PX86PDPT)pvShw);
5083 break;
5084
5085 case PGMPOOLKIND_64BIT_PML4:
5086 pgmPoolTrackDerefPML464Bit(pPool, pPage, (PX86PML4)pvShw);
5087 break;
5088
5089 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
5090 pgmPoolTrackDerefPTEPT(pPool, pPage, (PEPTPT)pvShw);
5091 break;
5092
5093 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
5094 pgmPoolTrackDerefPDEPT(pPool, pPage, (PEPTPD)pvShw);
5095 break;
5096
5097 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
5098 pgmPoolTrackDerefPDPTEPT(pPool, pPage, (PEPTPDPT)pvShw);
5099 break;
5100
5101#ifdef VBOX_WITH_NESTED_HWVIRT_VMX_EPT
5102 case PGMPOOLKIND_EPT_PT_FOR_EPT_PT:
5103 {
5104 void *pvGst;
5105 int const rc = PGM_GCPHYS_2_PTR(pVM, pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
5106 pgmPoolTrackDerefNestedPTEPT(pPool, pPage, (PEPTPT)pvShw, (PCEPTPT)pvGst);
5107 break;
5108 }
5109
5110 case PGMPOOLKIND_EPT_PT_FOR_EPT_2MB:
5111 pgmPoolTrackDerefNestedPTEPT2MB(pPool, pPage, (PEPTPT)pvShw);
5112 break;
5113
5114 case PGMPOOLKIND_EPT_PD_FOR_EPT_PD:
5115 {
5116 void *pvGst;
5117 int const rc = PGM_GCPHYS_2_PTR(pVM, pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
5118 pgmPoolTrackDerefNestedPDEpt(pPool, pPage, (PEPTPD)pvShw, (PCEPTPD)pvGst);
5119 break;
5120 }
5121
5122 case PGMPOOLKIND_EPT_PDPT_FOR_EPT_PDPT:
5123 pgmPoolTrackDerefPDPTEPT(pPool, pPage, (PEPTPDPT)pvShw);
5124 break;
5125#endif
5126
5127 default:
5128 AssertFatalMsgFailed(("enmKind=%d GCPhys=%RGp\n", pPage->enmKind, pPage->GCPhys));
5129 }
5130
5131 /* paranoia, clear the shadow page. Remove this laser (i.e. let Alloc and ClearAll do it). */
5132 STAM_PROFILE_START(&pPool->StatZeroPage, z);
5133 ASMMemZeroPage(pvShw);
5134 STAM_PROFILE_STOP(&pPool->StatZeroPage, z);
5135 pPage->fZeroed = true;
5136 Assert(!pPage->cPresent);
5137 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvShw);
5138}
5139
5140
5141/**
5142 * Flushes a pool page.
5143 *
5144 * This moves the page to the free list after removing all user references to it.
5145 *
5146 * @returns VBox status code.
5147 * @retval VINF_SUCCESS on success.
5148 * @param pPool The pool.
5149 * @param pPage The shadow page.
5150 * @param fFlush Flush the TLBS when required (should only be false in very specific use cases!!)
5151 */
5152int pgmPoolFlushPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage, bool fFlush)
5153{
5154 PVMCC pVM = pPool->CTX_SUFF(pVM);
5155 bool fFlushRequired = false;
5156
5157 int rc = VINF_SUCCESS;
5158 STAM_PROFILE_START(&pPool->StatFlushPage, f);
5159 LogFlow(("pgmPoolFlushPage: pPage=%p:{.Key=%RHp, .idx=%d, .enmKind=%s, .GCPhys=%RGp}\n",
5160 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), pPage->GCPhys));
5161
5162 if (PGMPOOL_PAGE_IS_NESTED(pPage))
5163 Log7Func(("pPage=%p:{.Key=%RHp, .idx=%d, .enmKind=%s, .GCPhys=%RGp}\n",
5164 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), pPage->GCPhys));
5165
5166 /*
5167 * Reject any attempts at flushing any of the special root pages (shall
5168 * not happen).
5169 */
5170 AssertMsgReturn(pPage->idx >= PGMPOOL_IDX_FIRST,
5171 ("pgmPoolFlushPage: special root page, rejected. enmKind=%s idx=%d\n",
5172 pgmPoolPoolKindToStr(pPage->enmKind), pPage->idx),
5173 VINF_SUCCESS);
5174
5175 PGM_LOCK_VOID(pVM);
5176
5177 /*
5178 * Quietly reject any attempts at flushing the currently active shadow CR3 mapping
5179 */
5180 if (pgmPoolIsPageLocked(pPage))
5181 {
5182#if !defined(VBOX_VMM_TARGET_ARMV8)
5183 AssertMsg( pPage->enmKind == PGMPOOLKIND_64BIT_PML4
5184 || pPage->enmKind == PGMPOOLKIND_PAE_PDPT
5185 || pPage->enmKind == PGMPOOLKIND_PAE_PDPT_FOR_32BIT
5186 || pPage->enmKind == PGMPOOLKIND_32BIT_PD
5187 || pPage->enmKind == PGMPOOLKIND_PAE_PD_FOR_PAE_PD
5188 || pPage->enmKind == PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD
5189 || pPage->enmKind == PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD
5190 || pPage->enmKind == PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD
5191 || pPage->enmKind == PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD
5192 || pPage->enmKind == PGMPOOLKIND_ROOT_NESTED,
5193 ("Can't free the shadow CR3! (%RHp vs %RHp kind=%d\n", PGMGetHyperCR3(VMMGetCpu(pVM)), pPage->Core.Key, pPage->enmKind));
5194#endif
5195 Log(("pgmPoolFlushPage: current active shadow CR3, rejected. enmKind=%s idx=%d\n", pgmPoolPoolKindToStr(pPage->enmKind), pPage->idx));
5196 PGM_UNLOCK(pVM);
5197 return VINF_SUCCESS;
5198 }
5199
5200 /*
5201 * Mark the page as being in need of an ASMMemZeroPage().
5202 */
5203 pPage->fZeroed = false;
5204
5205#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
5206 if (pPage->fDirty)
5207 pgmPoolFlushDirtyPage(pVM, pPool, pPage->idxDirtyEntry, false /* do not remove */);
5208#endif
5209
5210 /* If there are any users of this table, then we *must* issue a tlb flush on all VCPUs. */
5211 if (pPage->iUserHead != NIL_PGMPOOL_USER_INDEX)
5212 fFlushRequired = true;
5213
5214 /*
5215 * Clear the page.
5216 */
5217 pgmPoolTrackClearPageUsers(pPool, pPage);
5218 STAM_PROFILE_START(&pPool->StatTrackDeref,a);
5219 pgmPoolTrackDeref(pPool, pPage);
5220 STAM_PROFILE_STOP(&pPool->StatTrackDeref,a);
5221
5222 /*
5223 * Flush it from the cache.
5224 */
5225 pgmPoolCacheFlushPage(pPool, pPage);
5226
5227 /*
5228 * Deregistering the monitoring.
5229 */
5230 if (pPage->fMonitored)
5231 rc = pgmPoolMonitorFlush(pPool, pPage);
5232
5233 /*
5234 * Free the page.
5235 */
5236 Assert(pPage->iNext == NIL_PGMPOOL_IDX);
5237 pPage->iNext = pPool->iFreeHead;
5238 pPool->iFreeHead = pPage->idx;
5239 pPage->enmKind = PGMPOOLKIND_FREE;
5240 pPage->enmAccess = PGMPOOLACCESS_DONTCARE;
5241 pPage->GCPhys = NIL_RTGCPHYS;
5242 pPage->fReusedFlushPending = false;
5243
5244 pPool->cUsedPages--;
5245
5246 /* Flush the TLBs of all VCPUs if required. */
5247 if ( fFlushRequired
5248 && fFlush)
5249 {
5250 PGM_INVL_ALL_VCPU_TLBS(pVM);
5251 }
5252
5253 PGM_UNLOCK(pVM);
5254 STAM_PROFILE_STOP(&pPool->StatFlushPage, f);
5255 return rc;
5256}
5257
5258
5259/**
5260 * Frees a usage of a pool page.
5261 *
5262 * The caller is responsible to updating the user table so that it no longer
5263 * references the shadow page.
5264 *
5265 * @param pPool The pool.
5266 * @param pPage The shadow page.
5267 * @param iUser The shadow page pool index of the user table.
5268 * NIL_PGMPOOL_IDX for root pages.
5269 * @param iUserTable The index into the user table (shadowed). Ignored if
5270 * root page.
5271 */
5272void pgmPoolFreeByPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable)
5273{
5274 PVMCC pVM = pPool->CTX_SUFF(pVM);
5275
5276 STAM_PROFILE_START(&pPool->StatFree, a);
5277 LogFlow(("pgmPoolFreeByPage: pPage=%p:{.Key=%RHp, .idx=%d, enmKind=%s} iUser=%d iUserTable=%#x\n",
5278 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), iUser, iUserTable));
5279 AssertReturnVoid(pPage->idx >= PGMPOOL_IDX_FIRST); /* paranoia (#6349) */
5280
5281 PGM_LOCK_VOID(pVM);
5282 if (iUser != NIL_PGMPOOL_IDX)
5283 pgmPoolTrackFreeUser(pPool, pPage, iUser, iUserTable);
5284 if (!pPage->fCached)
5285 pgmPoolFlushPage(pPool, pPage);
5286 PGM_UNLOCK(pVM);
5287 STAM_PROFILE_STOP(&pPool->StatFree, a);
5288}
5289
5290
5291/**
5292 * Makes one or more free page free.
5293 *
5294 * @returns VBox status code.
5295 * @retval VINF_SUCCESS on success.
5296 *
5297 * @param pPool The pool.
5298 * @param enmKind Page table kind
5299 * @param iUser The user of the page.
5300 */
5301static int pgmPoolMakeMoreFreePages(PPGMPOOL pPool, PGMPOOLKIND enmKind, uint16_t iUser)
5302{
5303 PVMCC pVM = pPool->CTX_SUFF(pVM);
5304 LogFlow(("pgmPoolMakeMoreFreePages: enmKind=%d iUser=%d\n", enmKind, iUser));
5305 NOREF(enmKind);
5306
5307 /*
5308 * If the pool isn't full grown yet, expand it.
5309 */
5310 if (pPool->cCurPages < pPool->cMaxPages)
5311 {
5312 STAM_PROFILE_ADV_SUSPEND(&pPool->StatAlloc, a);
5313#ifdef IN_RING3
5314 int rc = PGMR3PoolGrow(pVM, VMMGetCpu(pVM));
5315#else
5316 int rc = PGMR0PoolGrow(pVM, VMMGetCpuId(pVM));
5317#endif
5318 if (RT_FAILURE(rc))
5319 return rc;
5320 STAM_PROFILE_ADV_RESUME(&pPool->StatAlloc, a);
5321 if (pPool->iFreeHead != NIL_PGMPOOL_IDX)
5322 return VINF_SUCCESS;
5323 }
5324
5325 /*
5326 * Free one cached page.
5327 */
5328 return pgmPoolCacheFreeOne(pPool, iUser);
5329}
5330
5331
5332/**
5333 * Allocates a page from the pool.
5334 *
5335 * This page may actually be a cached page and not in need of any processing
5336 * on the callers part.
5337 *
5338 * @returns VBox status code.
5339 * @retval VINF_SUCCESS if a NEW page was allocated.
5340 * @retval VINF_PGM_CACHED_PAGE if a CACHED page was returned.
5341 *
5342 * @param pVM The cross context VM structure.
5343 * @param GCPhys The GC physical address of the page we're gonna shadow.
5344 * For 4MB and 2MB PD entries, it's the first address the
5345 * shadow PT is covering.
5346 * @param enmKind The kind of mapping.
5347 * @param enmAccess Access type for the mapping (only relevant for big pages)
5348 * @param fA20Enabled Whether the A20 gate is enabled or not.
5349 * @param iUser The shadow page pool index of the user table. Root
5350 * pages should pass NIL_PGMPOOL_IDX.
5351 * @param iUserTable The index into the user table (shadowed). Ignored for
5352 * root pages (iUser == NIL_PGMPOOL_IDX).
5353 * @param fLockPage Lock the page
5354 * @param ppPage Where to store the pointer to the page. NULL is stored here on failure.
5355 */
5356int pgmPoolAlloc(PVMCC pVM, RTGCPHYS GCPhys, PGMPOOLKIND enmKind, PGMPOOLACCESS enmAccess, bool fA20Enabled,
5357 uint16_t iUser, uint32_t iUserTable, bool fLockPage, PPPGMPOOLPAGE ppPage)
5358{
5359 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
5360 STAM_PROFILE_ADV_START(&pPool->StatAlloc, a);
5361 LogFlow(("pgmPoolAlloc: GCPhys=%RGp enmKind=%s iUser=%d iUserTable=%#x\n", GCPhys, pgmPoolPoolKindToStr(enmKind), iUser, iUserTable));
5362 *ppPage = NULL;
5363 /** @todo CSAM/PGMPrefetchPage messes up here during CSAMR3CheckGates
5364 * (TRPMR3SyncIDT) because of FF priority. Try fix that?
5365 * Assert(!(pVM->pgm.s.fGlobalSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)); */
5366
5367#if defined(VBOX_STRICT) && defined(VBOX_WITH_NESTED_HWVIRT_VMX_EPT)
5368 PVMCPUCC pVCpu = VMMGetCpu(pVM);
5369 Assert(pVCpu->pgm.s.enmGuestSlatMode == PGMSLAT_DIRECT || PGMPOOL_PAGE_IS_KIND_NESTED(enmKind));
5370#endif
5371
5372 PGM_LOCK_VOID(pVM);
5373
5374 if (pPool->fCacheEnabled)
5375 {
5376 int rc2 = pgmPoolCacheAlloc(pPool, GCPhys, enmKind, enmAccess, fA20Enabled, iUser, iUserTable, ppPage);
5377 if (RT_SUCCESS(rc2))
5378 {
5379 if (fLockPage)
5380 pgmPoolLockPage(pPool, *ppPage);
5381 PGM_UNLOCK(pVM);
5382 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
5383 LogFlow(("pgmPoolAlloc: cached returns %Rrc *ppPage=%p:{.Key=%RHp, .idx=%d}\n", rc2, *ppPage, (*ppPage)->Core.Key, (*ppPage)->idx));
5384 return rc2;
5385 }
5386 }
5387
5388 /*
5389 * Allocate a new one.
5390 */
5391 int rc = VINF_SUCCESS;
5392 uint16_t iNew = pPool->iFreeHead;
5393 if (iNew == NIL_PGMPOOL_IDX)
5394 {
5395 rc = pgmPoolMakeMoreFreePages(pPool, enmKind, iUser);
5396 if (RT_FAILURE(rc))
5397 {
5398 PGM_UNLOCK(pVM);
5399 Log(("pgmPoolAlloc: returns %Rrc (Free)\n", rc));
5400 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
5401 return rc;
5402 }
5403 iNew = pPool->iFreeHead;
5404 AssertReleaseMsgReturn(iNew != NIL_PGMPOOL_IDX, ("iNew=%#x\n", iNew), VERR_PGM_POOL_IPE);
5405 }
5406
5407 /* unlink the free head */
5408 PPGMPOOLPAGE pPage = &pPool->aPages[iNew];
5409 pPool->iFreeHead = pPage->iNext;
5410 pPage->iNext = NIL_PGMPOOL_IDX;
5411
5412 /*
5413 * Initialize it.
5414 */
5415 pPool->cUsedPages++; /* physical handler registration / pgmPoolTrackFlushGCPhysPTsSlow requirement. */
5416 pPage->enmKind = enmKind;
5417 pPage->enmAccess = enmAccess;
5418 pPage->GCPhys = GCPhys;
5419 pPage->fA20Enabled = fA20Enabled;
5420 pPage->fSeenNonGlobal = false; /* Set this to 'true' to disable this feature. */
5421 pPage->fMonitored = false;
5422 pPage->fCached = false;
5423 pPage->fDirty = false;
5424 pPage->fReusedFlushPending = false;
5425 pPage->cModifications = 0;
5426 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
5427 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
5428 pPage->cPresent = 0;
5429 pPage->iFirstPresent = NIL_PGMPOOL_PRESENT_INDEX;
5430 pPage->idxDirtyEntry = 0;
5431 pPage->GCPtrLastAccessHandlerFault = NIL_RTGCPTR;
5432 pPage->GCPtrLastAccessHandlerRip = NIL_RTGCPTR;
5433 pPage->cLastAccessHandler = 0;
5434 pPage->cLocked = 0;
5435# ifdef VBOX_STRICT
5436 pPage->GCPtrDirtyFault = NIL_RTGCPTR;
5437# endif
5438
5439 /*
5440 * Insert into the tracking and cache. If this fails, free the page.
5441 */
5442 int rc3 = pgmPoolTrackInsert(pPool, pPage, GCPhys, iUser, iUserTable);
5443 if (RT_FAILURE(rc3))
5444 {
5445 pPool->cUsedPages--;
5446 pPage->enmKind = PGMPOOLKIND_FREE;
5447 pPage->enmAccess = PGMPOOLACCESS_DONTCARE;
5448 pPage->GCPhys = NIL_RTGCPHYS;
5449 pPage->iNext = pPool->iFreeHead;
5450 pPool->iFreeHead = pPage->idx;
5451 PGM_UNLOCK(pVM);
5452 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
5453 Log(("pgmPoolAlloc: returns %Rrc (Insert)\n", rc3));
5454 return rc3;
5455 }
5456
5457 /*
5458 * Commit the allocation, clear the page and return.
5459 */
5460#ifdef VBOX_WITH_STATISTICS
5461 if (pPool->cUsedPages > pPool->cUsedPagesHigh)
5462 pPool->cUsedPagesHigh = pPool->cUsedPages;
5463#endif
5464
5465 if (!pPage->fZeroed)
5466 {
5467 STAM_PROFILE_START(&pPool->StatZeroPage, z);
5468 void *pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
5469 ASMMemZeroPage(pv);
5470 STAM_PROFILE_STOP(&pPool->StatZeroPage, z);
5471 }
5472
5473 *ppPage = pPage;
5474 if (fLockPage)
5475 pgmPoolLockPage(pPool, pPage);
5476 PGM_UNLOCK(pVM);
5477 LogFlow(("pgmPoolAlloc: returns %Rrc *ppPage=%p:{.Key=%RHp, .idx=%d, .fCached=%RTbool, .fMonitored=%RTbool}\n",
5478 rc, pPage, pPage->Core.Key, pPage->idx, pPage->fCached, pPage->fMonitored));
5479 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
5480 return rc;
5481}
5482
5483
5484/**
5485 * Frees a usage of a pool page.
5486 *
5487 * @param pVM The cross context VM structure.
5488 * @param HCPhys The HC physical address of the shadow page.
5489 * @param iUser The shadow page pool index of the user table.
5490 * NIL_PGMPOOL_IDX if root page.
5491 * @param iUserTable The index into the user table (shadowed). Ignored if
5492 * root page.
5493 */
5494void pgmPoolFree(PVM pVM, RTHCPHYS HCPhys, uint16_t iUser, uint32_t iUserTable)
5495{
5496 LogFlow(("pgmPoolFree: HCPhys=%RHp iUser=%d iUserTable=%#x\n", HCPhys, iUser, iUserTable));
5497 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
5498 pgmPoolFreeByPage(pPool, pgmPoolGetPage(pPool, HCPhys), iUser, iUserTable);
5499}
5500
5501
5502/**
5503 * Internal worker for finding a 'in-use' shadow page give by it's physical address.
5504 *
5505 * @returns Pointer to the shadow page structure.
5506 * @param pPool The pool.
5507 * @param HCPhys The HC physical address of the shadow page.
5508 */
5509PPGMPOOLPAGE pgmPoolGetPage(PPGMPOOL pPool, RTHCPHYS HCPhys)
5510{
5511 PGM_LOCK_ASSERT_OWNER(pPool->CTX_SUFF(pVM));
5512
5513 /*
5514 * Look up the page.
5515 */
5516 PPGMPOOLPAGE pPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, HCPhys & X86_PTE_PAE_PG_MASK);
5517
5518 AssertFatalMsg(pPage && pPage->enmKind != PGMPOOLKIND_FREE, ("HCPhys=%RHp pPage=%p idx=%d\n", HCPhys, pPage, (pPage) ? pPage->idx : 0));
5519 return pPage;
5520}
5521
5522
5523/**
5524 * Internal worker for finding a page for debugging purposes, no assertions.
5525 *
5526 * @returns Pointer to the shadow page structure. NULL on if not found.
5527 * @param pPool The pool.
5528 * @param HCPhys The HC physical address of the shadow page.
5529 */
5530PPGMPOOLPAGE pgmPoolQueryPageForDbg(PPGMPOOL pPool, RTHCPHYS HCPhys)
5531{
5532 PGM_LOCK_ASSERT_OWNER(pPool->CTX_SUFF(pVM));
5533 return (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, HCPhys & X86_PTE_PAE_PG_MASK);
5534}
5535
5536
5537/**
5538 * Internal worker for PGM_HCPHYS_2_PTR.
5539 *
5540 * @returns VBox status code.
5541 * @param pVM The cross context VM structure.
5542 * @param HCPhys The HC physical address of the shadow page.
5543 * @param ppv Where to return the address.
5544 */
5545int pgmPoolHCPhys2Ptr(PVM pVM, RTHCPHYS HCPhys, void **ppv)
5546{
5547 PPGMPOOLPAGE pPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pVM->pgm.s.CTX_SUFF(pPool)->HCPhysTree, HCPhys & X86_PTE_PAE_PG_MASK);
5548 AssertMsgReturn(pPage && pPage->enmKind != PGMPOOLKIND_FREE,
5549 ("HCPhys=%RHp pPage=%p idx=%d\n", HCPhys, pPage, (pPage) ? pPage->idx : 0),
5550 VERR_PGM_POOL_GET_PAGE_FAILED);
5551 *ppv = (uint8_t *)pPage->CTX_SUFF(pvPage) + (HCPhys & PAGE_OFFSET_MASK);
5552 return VINF_SUCCESS;
5553}
5554
5555#ifdef IN_RING3 /* currently only used in ring 3; save some space in the R0 & GC modules (left it here as we might need it elsewhere later on) */
5556
5557/**
5558 * Flush the specified page if present
5559 *
5560 * @param pVM The cross context VM structure.
5561 * @param GCPhys Guest physical address of the page to flush
5562 */
5563void pgmPoolFlushPageByGCPhys(PVM pVM, RTGCPHYS GCPhys)
5564{
5565 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
5566
5567 VM_ASSERT_EMT(pVM);
5568
5569 /*
5570 * Look up the GCPhys in the hash.
5571 */
5572 GCPhys = GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK;
5573 unsigned i = pPool->aiHash[PGMPOOL_HASH(GCPhys)];
5574 if (i == NIL_PGMPOOL_IDX)
5575 return;
5576
5577 do
5578 {
5579 PPGMPOOLPAGE pPage = &pPool->aPages[i];
5580 if (pPage->GCPhys - GCPhys < PAGE_SIZE)
5581 {
5582 Assert(!PGMPOOL_PAGE_IS_NESTED(pPage)); /* Temporary to see if it hits. Remove later. */
5583 switch (pPage->enmKind)
5584 {
5585 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
5586 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
5587 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
5588 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
5589 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
5590 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
5591 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
5592 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
5593 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
5594 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
5595 case PGMPOOLKIND_64BIT_PML4:
5596 case PGMPOOLKIND_32BIT_PD:
5597 case PGMPOOLKIND_PAE_PDPT:
5598 {
5599 Log(("PGMPoolFlushPage: found pgm pool pages for %RGp\n", GCPhys));
5600# ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
5601 if (pPage->fDirty)
5602 STAM_COUNTER_INC(&pPool->StatForceFlushDirtyPage);
5603 else
5604# endif
5605 STAM_COUNTER_INC(&pPool->StatForceFlushPage);
5606 Assert(!pgmPoolIsPageLocked(pPage));
5607 pgmPoolMonitorChainFlush(pPool, pPage);
5608 return;
5609 }
5610
5611 /* ignore, no monitoring. */
5612 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
5613 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
5614 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
5615 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
5616 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
5617 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
5618 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
5619 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
5620 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
5621 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
5622 case PGMPOOLKIND_ROOT_NESTED:
5623 case PGMPOOLKIND_PAE_PD_PHYS:
5624 case PGMPOOLKIND_PAE_PDPT_PHYS:
5625 case PGMPOOLKIND_32BIT_PD_PHYS:
5626 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
5627 break;
5628
5629 default:
5630 AssertFatalMsgFailed(("enmKind=%d idx=%d\n", pPage->enmKind, pPage->idx));
5631 }
5632 }
5633
5634 /* next */
5635 i = pPage->iNext;
5636 } while (i != NIL_PGMPOOL_IDX);
5637 return;
5638}
5639
5640
5641/**
5642 * Reset CPU on hot plugging.
5643 *
5644 * @param pVM The cross context VM structure.
5645 * @param pVCpu The cross context virtual CPU structure.
5646 */
5647void pgmR3PoolResetUnpluggedCpu(PVM pVM, PVMCPU pVCpu)
5648{
5649 pgmR3ExitShadowModeBeforePoolFlush(pVCpu);
5650
5651 pgmR3ReEnterShadowModeAfterPoolFlush(pVM, pVCpu);
5652 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
5653 VMCPU_FF_SET(pVCpu, VMCPU_FF_TLB_FLUSH);
5654}
5655
5656
5657/**
5658 * Flushes the entire cache.
5659 *
5660 * It will assert a global CR3 flush (FF) and assumes the caller is aware of
5661 * this and execute this CR3 flush.
5662 *
5663 * @param pVM The cross context VM structure.
5664 */
5665void pgmR3PoolReset(PVM pVM)
5666{
5667 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
5668
5669 PGM_LOCK_ASSERT_OWNER(pVM);
5670 STAM_PROFILE_START(&pPool->StatR3Reset, a);
5671 LogFlow(("pgmR3PoolReset:\n"));
5672
5673 /*
5674 * If there are no pages in the pool, there is nothing to do.
5675 */
5676 if (pPool->cCurPages <= PGMPOOL_IDX_FIRST)
5677 {
5678 STAM_PROFILE_STOP(&pPool->StatR3Reset, a);
5679 return;
5680 }
5681
5682 /*
5683 * Exit the shadow mode since we're going to clear everything,
5684 * including the root page.
5685 */
5686 VMCC_FOR_EACH_VMCPU(pVM)
5687 pgmR3ExitShadowModeBeforePoolFlush(pVCpu);
5688 VMCC_FOR_EACH_VMCPU_END(pVM);
5689
5690
5691 /*
5692 * Nuke the free list and reinsert all pages into it.
5693 */
5694 for (unsigned i = pPool->cCurPages - 1; i >= PGMPOOL_IDX_FIRST; i--)
5695 {
5696 PPGMPOOLPAGE pPage = &pPool->aPages[i];
5697
5698 if (pPage->fMonitored)
5699 pgmPoolMonitorFlush(pPool, pPage);
5700 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
5701 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
5702 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
5703 pPage->iMonitoredPrev = NIL_PGMPOOL_IDX;
5704 pPage->GCPhys = NIL_RTGCPHYS;
5705 pPage->enmKind = PGMPOOLKIND_FREE;
5706 pPage->enmAccess = PGMPOOLACCESS_DONTCARE;
5707 Assert(pPage->idx == i);
5708 pPage->iNext = i + 1;
5709 pPage->fA20Enabled = true;
5710 pPage->fZeroed = false; /* This could probably be optimized, but better safe than sorry. */
5711 pPage->fSeenNonGlobal = false;
5712 pPage->fMonitored = false;
5713 pPage->fDirty = false;
5714 pPage->fCached = false;
5715 pPage->fReusedFlushPending = false;
5716 pPage->iUserHead = NIL_PGMPOOL_USER_INDEX;
5717 pPage->cPresent = 0;
5718 pPage->iFirstPresent = NIL_PGMPOOL_PRESENT_INDEX;
5719 pPage->cModifications = 0;
5720 pPage->iAgeNext = NIL_PGMPOOL_IDX;
5721 pPage->iAgePrev = NIL_PGMPOOL_IDX;
5722 pPage->idxDirtyEntry = 0;
5723 pPage->GCPtrLastAccessHandlerRip = NIL_RTGCPTR;
5724 pPage->GCPtrLastAccessHandlerFault = NIL_RTGCPTR;
5725 pPage->cLastAccessHandler = 0;
5726 pPage->cLocked = 0;
5727# ifdef VBOX_STRICT
5728 pPage->GCPtrDirtyFault = NIL_RTGCPTR;
5729# endif
5730 }
5731 pPool->aPages[pPool->cCurPages - 1].iNext = NIL_PGMPOOL_IDX;
5732 pPool->iFreeHead = PGMPOOL_IDX_FIRST;
5733 pPool->cUsedPages = 0;
5734
5735 /*
5736 * Zap and reinitialize the user records.
5737 */
5738 pPool->cPresent = 0;
5739 pPool->iUserFreeHead = 0;
5740 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
5741 const unsigned cMaxUsers = pPool->cMaxUsers;
5742 for (unsigned i = 0; i < cMaxUsers; i++)
5743 {
5744 paUsers[i].iNext = i + 1;
5745 paUsers[i].iUser = NIL_PGMPOOL_IDX;
5746 paUsers[i].iUserTable = 0xfffffffe;
5747 }
5748 paUsers[cMaxUsers - 1].iNext = NIL_PGMPOOL_USER_INDEX;
5749
5750 /*
5751 * Clear all the GCPhys links and rebuild the phys ext free list.
5752 */
5753 for (PPGMRAMRANGE pRam = pVM->pgm.s.CTX_SUFF(pRamRangesX);
5754 pRam;
5755 pRam = pRam->CTX_SUFF(pNext))
5756 {
5757 unsigned iPage = pRam->cb >> PAGE_SHIFT;
5758 while (iPage-- > 0)
5759 PGM_PAGE_SET_TRACKING(pVM, &pRam->aPages[iPage], 0);
5760 }
5761
5762 pPool->iPhysExtFreeHead = 0;
5763 PPGMPOOLPHYSEXT paPhysExts = pPool->CTX_SUFF(paPhysExts);
5764 const unsigned cMaxPhysExts = pPool->cMaxPhysExts;
5765 for (unsigned i = 0; i < cMaxPhysExts; i++)
5766 {
5767 paPhysExts[i].iNext = i + 1;
5768 paPhysExts[i].aidx[0] = NIL_PGMPOOL_IDX;
5769 paPhysExts[i].apte[0] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
5770 paPhysExts[i].aidx[1] = NIL_PGMPOOL_IDX;
5771 paPhysExts[i].apte[1] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
5772 paPhysExts[i].aidx[2] = NIL_PGMPOOL_IDX;
5773 paPhysExts[i].apte[2] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
5774 }
5775 paPhysExts[cMaxPhysExts - 1].iNext = NIL_PGMPOOL_PHYSEXT_INDEX;
5776
5777 /*
5778 * Just zap the modified list.
5779 */
5780 pPool->cModifiedPages = 0;
5781 pPool->iModifiedHead = NIL_PGMPOOL_IDX;
5782
5783 /*
5784 * Clear the GCPhys hash and the age list.
5785 */
5786 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aiHash); i++)
5787 pPool->aiHash[i] = NIL_PGMPOOL_IDX;
5788 pPool->iAgeHead = NIL_PGMPOOL_IDX;
5789 pPool->iAgeTail = NIL_PGMPOOL_IDX;
5790
5791# ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
5792 /* Clear all dirty pages. */
5793 pPool->idxFreeDirtyPage = 0;
5794 pPool->cDirtyPages = 0;
5795 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aidxDirtyPages); i++)
5796 pPool->aidxDirtyPages[i] = NIL_PGMPOOL_IDX;
5797# endif
5798
5799 /*
5800 * Reinsert active pages into the hash and ensure monitoring chains are correct.
5801 */
5802 VMCC_FOR_EACH_VMCPU(pVM)
5803 {
5804 /*
5805 * Re-enter the shadowing mode and assert Sync CR3 FF.
5806 */
5807 pgmR3ReEnterShadowModeAfterPoolFlush(pVM, pVCpu);
5808 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
5809 VMCPU_FF_SET(pVCpu, VMCPU_FF_TLB_FLUSH);
5810 }
5811 VMCC_FOR_EACH_VMCPU_END(pVM);
5812
5813 STAM_PROFILE_STOP(&pPool->StatR3Reset, a);
5814}
5815
5816#endif /* IN_RING3 */
5817
5818#if defined(LOG_ENABLED) || defined(VBOX_STRICT)
5819/**
5820 * Stringifies a PGMPOOLKIND value.
5821 */
5822static const char *pgmPoolPoolKindToStr(uint8_t enmKind)
5823{
5824 switch ((PGMPOOLKIND)enmKind)
5825 {
5826 case PGMPOOLKIND_INVALID:
5827 return "PGMPOOLKIND_INVALID";
5828 case PGMPOOLKIND_FREE:
5829 return "PGMPOOLKIND_FREE";
5830 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
5831 return "PGMPOOLKIND_32BIT_PT_FOR_PHYS";
5832 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
5833 return "PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT";
5834 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
5835 return "PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB";
5836 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
5837 return "PGMPOOLKIND_PAE_PT_FOR_PHYS";
5838 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
5839 return "PGMPOOLKIND_PAE_PT_FOR_32BIT_PT";
5840 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
5841 return "PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB";
5842 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
5843 return "PGMPOOLKIND_PAE_PT_FOR_PAE_PT";
5844 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
5845 return "PGMPOOLKIND_PAE_PT_FOR_PAE_2MB";
5846 case PGMPOOLKIND_32BIT_PD:
5847 return "PGMPOOLKIND_32BIT_PD";
5848 case PGMPOOLKIND_32BIT_PD_PHYS:
5849 return "PGMPOOLKIND_32BIT_PD_PHYS";
5850 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
5851 return "PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD";
5852 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
5853 return "PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD";
5854 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
5855 return "PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD";
5856 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
5857 return "PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD";
5858 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
5859 return "PGMPOOLKIND_PAE_PD_FOR_PAE_PD";
5860 case PGMPOOLKIND_PAE_PD_PHYS:
5861 return "PGMPOOLKIND_PAE_PD_PHYS";
5862 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
5863 return "PGMPOOLKIND_PAE_PDPT_FOR_32BIT";
5864 case PGMPOOLKIND_PAE_PDPT:
5865 return "PGMPOOLKIND_PAE_PDPT";
5866 case PGMPOOLKIND_PAE_PDPT_PHYS:
5867 return "PGMPOOLKIND_PAE_PDPT_PHYS";
5868 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
5869 return "PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT";
5870 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
5871 return "PGMPOOLKIND_64BIT_PDPT_FOR_PHYS";
5872 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
5873 return "PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD";
5874 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
5875 return "PGMPOOLKIND_64BIT_PD_FOR_PHYS";
5876 case PGMPOOLKIND_64BIT_PML4:
5877 return "PGMPOOLKIND_64BIT_PML4";
5878 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
5879 return "PGMPOOLKIND_EPT_PDPT_FOR_PHYS";
5880 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
5881 return "PGMPOOLKIND_EPT_PD_FOR_PHYS";
5882 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
5883 return "PGMPOOLKIND_EPT_PT_FOR_PHYS";
5884 case PGMPOOLKIND_ROOT_NESTED:
5885 return "PGMPOOLKIND_ROOT_NESTED";
5886 case PGMPOOLKIND_EPT_PT_FOR_EPT_PT:
5887 return "PGMPOOLKIND_EPT_PT_FOR_EPT_PT";
5888 case PGMPOOLKIND_EPT_PT_FOR_EPT_2MB:
5889 return "PGMPOOLKIND_EPT_PT_FOR_EPT_2MB";
5890 case PGMPOOLKIND_EPT_PD_FOR_EPT_PD:
5891 return "PGMPOOLKIND_EPT_PD_FOR_EPT_PD";
5892 case PGMPOOLKIND_EPT_PDPT_FOR_EPT_PDPT:
5893 return "PGMPOOLKIND_EPT_PDPT_FOR_EPT_PDPT";
5894 case PGMPOOLKIND_EPT_PML4_FOR_EPT_PML4:
5895 return "PGMPOOLKIND_EPT_PML4_FOR_EPT_PML4";
5896 }
5897 return "Unknown kind!";
5898}
5899#endif /* LOG_ENABLED || VBOX_STRICT */
5900
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette