VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMR3/PGMPool.cpp@ 38838

Last change on this file since 38838 was 37354, checked in by vboxsync, 14 years ago

PGM: Fixed locking issues in PGMR3PhysMMIORegister and PGMR3PhysMMIODeregister. Also addressed a harmless on in PGMR3PhysRomRegister (only used at init time, so no races). Fortified the code with assertions more lock assertion, replacing the incorrect PGMIsLocked() checks (we only care if the current thread is the lock owner). Cleaned up some ReturnStmt macros and adding more of them.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id
File size: 53.7 KB
Line 
1/* $Id: PGMPool.cpp 37354 2011-06-07 15:05:32Z vboxsync $ */
2/** @file
3 * PGM Shadow Page Pool.
4 */
5
6/*
7 * Copyright (C) 2006-2007 Oracle Corporation
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 */
17
18/** @page pg_pgm_pool PGM Shadow Page Pool
19 *
20 * Motivations:
21 * -# Relationship between shadow page tables and physical guest pages. This
22 * should allow us to skip most of the global flushes now following access
23 * handler changes. The main expense is flushing shadow pages.
24 * -# Limit the pool size if necessary (default is kind of limitless).
25 * -# Allocate shadow pages from RC. We use to only do this in SyncCR3.
26 * -# Required for 64-bit guests.
27 * -# Combining the PD cache and page pool in order to simplify caching.
28 *
29 *
30 * @section sec_pgm_pool_outline Design Outline
31 *
32 * The shadow page pool tracks pages used for shadowing paging structures (i.e.
33 * page tables, page directory, page directory pointer table and page map
34 * level-4). Each page in the pool has an unique identifier. This identifier is
35 * used to link a guest physical page to a shadow PT. The identifier is a
36 * non-zero value and has a relativly low max value - say 14 bits. This makes it
37 * possible to fit it into the upper bits of the of the aHCPhys entries in the
38 * ram range.
39 *
40 * By restricting host physical memory to the first 48 bits (which is the
41 * announced physical memory range of the K8L chip (scheduled for 2008)), we
42 * can safely use the upper 16 bits for shadow page ID and reference counting.
43 *
44 * Update: The 48 bit assumption will be lifted with the new physical memory
45 * management (PGMPAGE), so we won't have any trouble when someone stuffs 2TB
46 * into a box in some years.
47 *
48 * Now, it's possible for a page to be aliased, i.e. mapped by more than one PT
49 * or PD. This is solved by creating a list of physical cross reference extents
50 * when ever this happens. Each node in the list (extent) is can contain 3 page
51 * pool indexes. The list it self is chained using indexes into the paPhysExt
52 * array.
53 *
54 *
55 * @section sec_pgm_pool_life Life Cycle of a Shadow Page
56 *
57 * -# The SyncPT function requests a page from the pool.
58 * The request includes the kind of page it is (PT/PD, PAE/legacy), the
59 * address of the page it's shadowing, and more.
60 * -# The pool responds to the request by allocating a new page.
61 * When the cache is enabled, it will first check if it's in the cache.
62 * Should the pool be exhausted, one of two things can be done:
63 * -# Flush the whole pool and current CR3.
64 * -# Use the cache to find a page which can be flushed (~age).
65 * -# The SyncPT function will sync one or more pages and insert it into the
66 * shadow PD.
67 * -# The SyncPage function may sync more pages on a later \#PFs.
68 * -# The page is freed / flushed in SyncCR3 (perhaps) and some other cases.
69 * When caching is enabled, the page isn't flush but remains in the cache.
70 *
71 *
72 * @section sec_pgm_pool_impl Monitoring
73 *
74 * We always monitor PAGE_SIZE chunks of memory. When we've got multiple shadow
75 * pages for the same PAGE_SIZE of guest memory (PAE and mixed PD/PT) the pages
76 * sharing the monitor get linked using the iMonitoredNext/Prev. The head page
77 * is the pvUser to the access handlers.
78 *
79 *
80 * @section sec_pgm_pool_impl Implementation
81 *
82 * The pool will take pages from the MM page pool. The tracking data
83 * (attributes, bitmaps and so on) are allocated from the hypervisor heap. The
84 * pool content can be accessed both by using the page id and the physical
85 * address (HC). The former is managed by means of an array, the latter by an
86 * offset based AVL tree.
87 *
88 * Flushing of a pool page means that we iterate the content (we know what kind
89 * it is) and updates the link information in the ram range.
90 *
91 * ...
92 */
93
94
95/*******************************************************************************
96* Header Files *
97*******************************************************************************/
98#define LOG_GROUP LOG_GROUP_PGM_POOL
99#include <VBox/vmm/pgm.h>
100#include <VBox/vmm/mm.h>
101#include "PGMInternal.h"
102#include <VBox/vmm/vm.h>
103#include "PGMInline.h"
104
105#include <VBox/log.h>
106#include <VBox/err.h>
107#include <iprt/asm.h>
108#include <iprt/string.h>
109#include <VBox/dbg.h>
110
111
112/*******************************************************************************
113* Internal Functions *
114*******************************************************************************/
115static DECLCALLBACK(int) pgmR3PoolAccessHandler(PVM pVM, RTGCPHYS GCPhys, void *pvPhys, void *pvBuf, size_t cbBuf, PGMACCESSTYPE enmAccessType, void *pvUser);
116#ifdef VBOX_WITH_DEBUGGER
117static DECLCALLBACK(int) pgmR3PoolCmdCheck(PCDBGCCMD pCmd, PDBGCCMDHLP pCmdHlp, PVM pVM, PCDBGCVAR paArgs, unsigned cArgs);
118#endif
119
120#ifdef VBOX_WITH_DEBUGGER
121/** Command descriptors. */
122static const DBGCCMD g_aCmds[] =
123{
124 /* pszCmd, cArgsMin, cArgsMax, paArgDesc, cArgDescs, fFlags, pfnHandler pszSyntax, ....pszDescription */
125 { "pgmpoolcheck", 0, 0, NULL, 0, 0, pgmR3PoolCmdCheck, "", "Check the pgm pool pages." },
126};
127#endif
128
129/**
130 * Initializes the pool
131 *
132 * @returns VBox status code.
133 * @param pVM The VM handle.
134 */
135int pgmR3PoolInit(PVM pVM)
136{
137 int rc;
138
139 AssertCompile(NIL_PGMPOOL_IDX == 0);
140 /* pPage->cLocked is an unsigned byte. */
141 AssertCompile(VMM_MAX_CPU_COUNT <= 255);
142
143 /*
144 * Query Pool config.
145 */
146 PCFGMNODE pCfg = CFGMR3GetChild(CFGMR3GetRoot(pVM), "/PGM/Pool");
147
148 /* Default pgm pool size is 1024 pages (4MB). */
149 uint16_t cMaxPages = 1024;
150
151 /* Adjust it up relative to the RAM size, using the nested paging formula. */
152 uint64_t cbRam;
153 rc = CFGMR3QueryU64Def(CFGMR3GetRoot(pVM), "RamSize", &cbRam, 0); AssertRCReturn(rc, rc);
154 uint64_t u64MaxPages = (cbRam >> 9)
155 + (cbRam >> 18)
156 + (cbRam >> 27)
157 + 32 * PAGE_SIZE;
158 u64MaxPages >>= PAGE_SHIFT;
159 if (u64MaxPages > PGMPOOL_IDX_LAST)
160 cMaxPages = PGMPOOL_IDX_LAST;
161 else
162 cMaxPages = (uint16_t)u64MaxPages;
163
164 /** @cfgm{/PGM/Pool/MaxPages, uint16_t, #pages, 16, 0x3fff, F(ram-size)}
165 * The max size of the shadow page pool in pages. The pool will grow dynamically
166 * up to this limit.
167 */
168 rc = CFGMR3QueryU16Def(pCfg, "MaxPages", &cMaxPages, cMaxPages);
169 AssertLogRelRCReturn(rc, rc);
170 AssertLogRelMsgReturn(cMaxPages <= PGMPOOL_IDX_LAST && cMaxPages >= RT_ALIGN(PGMPOOL_IDX_FIRST, 16),
171 ("cMaxPages=%u (%#x)\n", cMaxPages, cMaxPages), VERR_INVALID_PARAMETER);
172 cMaxPages = RT_ALIGN(cMaxPages, 16);
173 if (cMaxPages > PGMPOOL_IDX_LAST)
174 cMaxPages = PGMPOOL_IDX_LAST;
175 LogRel(("PGMPool: cMaxPages=%u (u64MaxPages=%llu)\n", cMaxPages, u64MaxPages));
176
177 /** todo:
178 * We need to be much more careful with our allocation strategy here.
179 * For nested paging we don't need pool user info nor extents at all, but
180 * we can't check for nested paging here (too early during init to get a
181 * confirmation it can be used). The default for large memory configs is a
182 * bit large for shadow paging, so I've restricted the extent maximum to 8k
183 * (8k * 16 = 128k of hyper heap).
184 *
185 * Also when large page support is enabled, we typically don't need so much,
186 * although that depends on the availability of 2 MB chunks on the host.
187 */
188
189 /** @cfgm{/PGM/Pool/MaxUsers, uint16_t, #users, MaxUsers, 32K, MaxPages*2}
190 * The max number of shadow page user tracking records. Each shadow page has
191 * zero of other shadow pages (or CR3s) that references it, or uses it if you
192 * like. The structures describing these relationships are allocated from a
193 * fixed sized pool. This configuration variable defines the pool size.
194 */
195 uint16_t cMaxUsers;
196 rc = CFGMR3QueryU16Def(pCfg, "MaxUsers", &cMaxUsers, cMaxPages * 2);
197 AssertLogRelRCReturn(rc, rc);
198 AssertLogRelMsgReturn(cMaxUsers >= cMaxPages && cMaxPages <= _32K,
199 ("cMaxUsers=%u (%#x)\n", cMaxUsers, cMaxUsers), VERR_INVALID_PARAMETER);
200
201 /** @cfgm{/PGM/Pool/MaxPhysExts, uint16_t, #extents, 16, MaxPages * 2, MIN(MaxPages*2,8192)}
202 * The max number of extents for tracking aliased guest pages.
203 */
204 uint16_t cMaxPhysExts;
205 rc = CFGMR3QueryU16Def(pCfg, "MaxPhysExts", &cMaxPhysExts,
206 RT_MIN(cMaxPages * 2, 8192 /* 8Ki max as this eat too much hyper heap */));
207 AssertLogRelRCReturn(rc, rc);
208 AssertLogRelMsgReturn(cMaxPhysExts >= 16 && cMaxPhysExts <= PGMPOOL_IDX_LAST,
209 ("cMaxPhysExts=%u (%#x)\n", cMaxPhysExts, cMaxPhysExts), VERR_INVALID_PARAMETER);
210
211 /** @cfgm{/PGM/Pool/ChacheEnabled, bool, true}
212 * Enables or disabling caching of shadow pages. Caching means that we will try
213 * reuse shadow pages instead of recreating them everything SyncCR3, SyncPT or
214 * SyncPage requests one. When reusing a shadow page, we can save time
215 * reconstructing it and it's children.
216 */
217 bool fCacheEnabled;
218 rc = CFGMR3QueryBoolDef(pCfg, "CacheEnabled", &fCacheEnabled, true);
219 AssertLogRelRCReturn(rc, rc);
220
221 LogRel(("pgmR3PoolInit: cMaxPages=%#RX16 cMaxUsers=%#RX16 cMaxPhysExts=%#RX16 fCacheEnable=%RTbool\n",
222 cMaxPages, cMaxUsers, cMaxPhysExts, fCacheEnabled));
223
224 /*
225 * Allocate the data structures.
226 */
227 uint32_t cb = RT_OFFSETOF(PGMPOOL, aPages[cMaxPages]);
228 cb += cMaxUsers * sizeof(PGMPOOLUSER);
229 cb += cMaxPhysExts * sizeof(PGMPOOLPHYSEXT);
230 PPGMPOOL pPool;
231 rc = MMR3HyperAllocOnceNoRel(pVM, cb, 0, MM_TAG_PGM_POOL, (void **)&pPool);
232 if (RT_FAILURE(rc))
233 return rc;
234 pVM->pgm.s.pPoolR3 = pPool;
235 pVM->pgm.s.pPoolR0 = MMHyperR3ToR0(pVM, pPool);
236 pVM->pgm.s.pPoolRC = MMHyperR3ToRC(pVM, pPool);
237
238 /*
239 * Initialize it.
240 */
241 pPool->pVMR3 = pVM;
242 pPool->pVMR0 = pVM->pVMR0;
243 pPool->pVMRC = pVM->pVMRC;
244 pPool->cMaxPages = cMaxPages;
245 pPool->cCurPages = PGMPOOL_IDX_FIRST;
246 pPool->iUserFreeHead = 0;
247 pPool->cMaxUsers = cMaxUsers;
248 PPGMPOOLUSER paUsers = (PPGMPOOLUSER)&pPool->aPages[pPool->cMaxPages];
249 pPool->paUsersR3 = paUsers;
250 pPool->paUsersR0 = MMHyperR3ToR0(pVM, paUsers);
251 pPool->paUsersRC = MMHyperR3ToRC(pVM, paUsers);
252 for (unsigned i = 0; i < cMaxUsers; i++)
253 {
254 paUsers[i].iNext = i + 1;
255 paUsers[i].iUser = NIL_PGMPOOL_IDX;
256 paUsers[i].iUserTable = 0xfffffffe;
257 }
258 paUsers[cMaxUsers - 1].iNext = NIL_PGMPOOL_USER_INDEX;
259 pPool->iPhysExtFreeHead = 0;
260 pPool->cMaxPhysExts = cMaxPhysExts;
261 PPGMPOOLPHYSEXT paPhysExts = (PPGMPOOLPHYSEXT)&paUsers[cMaxUsers];
262 pPool->paPhysExtsR3 = paPhysExts;
263 pPool->paPhysExtsR0 = MMHyperR3ToR0(pVM, paPhysExts);
264 pPool->paPhysExtsRC = MMHyperR3ToRC(pVM, paPhysExts);
265 for (unsigned i = 0; i < cMaxPhysExts; i++)
266 {
267 paPhysExts[i].iNext = i + 1;
268 paPhysExts[i].aidx[0] = NIL_PGMPOOL_IDX;
269 paPhysExts[i].apte[0] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
270 paPhysExts[i].aidx[1] = NIL_PGMPOOL_IDX;
271 paPhysExts[i].apte[1] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
272 paPhysExts[i].aidx[2] = NIL_PGMPOOL_IDX;
273 paPhysExts[i].apte[2] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
274 }
275 paPhysExts[cMaxPhysExts - 1].iNext = NIL_PGMPOOL_PHYSEXT_INDEX;
276 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aiHash); i++)
277 pPool->aiHash[i] = NIL_PGMPOOL_IDX;
278 pPool->iAgeHead = NIL_PGMPOOL_IDX;
279 pPool->iAgeTail = NIL_PGMPOOL_IDX;
280 pPool->fCacheEnabled = fCacheEnabled;
281 pPool->pfnAccessHandlerR3 = pgmR3PoolAccessHandler;
282 pPool->pszAccessHandler = "Guest Paging Access Handler";
283 pPool->HCPhysTree = 0;
284
285 /* The NIL entry. */
286 Assert(NIL_PGMPOOL_IDX == 0);
287 pPool->aPages[NIL_PGMPOOL_IDX].enmKind = PGMPOOLKIND_INVALID;
288
289 /* The Shadow 32-bit PD. (32 bits guest paging) */
290 pPool->aPages[PGMPOOL_IDX_PD].Core.Key = NIL_RTHCPHYS;
291 pPool->aPages[PGMPOOL_IDX_PD].GCPhys = NIL_RTGCPHYS;
292 pPool->aPages[PGMPOOL_IDX_PD].pvPageR3 = 0;
293 pPool->aPages[PGMPOOL_IDX_PD].enmKind = PGMPOOLKIND_32BIT_PD;
294 pPool->aPages[PGMPOOL_IDX_PD].idx = PGMPOOL_IDX_PD;
295
296 /* The Shadow PDPT. */
297 pPool->aPages[PGMPOOL_IDX_PDPT].Core.Key = NIL_RTHCPHYS;
298 pPool->aPages[PGMPOOL_IDX_PDPT].GCPhys = NIL_RTGCPHYS;
299 pPool->aPages[PGMPOOL_IDX_PDPT].pvPageR3 = 0;
300 pPool->aPages[PGMPOOL_IDX_PDPT].enmKind = PGMPOOLKIND_PAE_PDPT;
301 pPool->aPages[PGMPOOL_IDX_PDPT].idx = PGMPOOL_IDX_PDPT;
302
303 /* The Shadow AMD64 CR3. */
304 pPool->aPages[PGMPOOL_IDX_AMD64_CR3].Core.Key = NIL_RTHCPHYS;
305 pPool->aPages[PGMPOOL_IDX_AMD64_CR3].GCPhys = NIL_RTGCPHYS;
306 pPool->aPages[PGMPOOL_IDX_AMD64_CR3].pvPageR3 = 0;
307 pPool->aPages[PGMPOOL_IDX_AMD64_CR3].enmKind = PGMPOOLKIND_64BIT_PML4;
308 pPool->aPages[PGMPOOL_IDX_AMD64_CR3].idx = PGMPOOL_IDX_AMD64_CR3;
309
310 /* The Nested Paging CR3. */
311 pPool->aPages[PGMPOOL_IDX_NESTED_ROOT].Core.Key = NIL_RTHCPHYS;
312 pPool->aPages[PGMPOOL_IDX_NESTED_ROOT].GCPhys = NIL_RTGCPHYS;
313 pPool->aPages[PGMPOOL_IDX_NESTED_ROOT].pvPageR3 = 0;
314 pPool->aPages[PGMPOOL_IDX_NESTED_ROOT].enmKind = PGMPOOLKIND_ROOT_NESTED;
315 pPool->aPages[PGMPOOL_IDX_NESTED_ROOT].idx = PGMPOOL_IDX_NESTED_ROOT;
316
317 /*
318 * Set common stuff.
319 */
320 for (unsigned iPage = 1; iPage < PGMPOOL_IDX_FIRST; iPage++)
321 {
322 pPool->aPages[iPage].iNext = NIL_PGMPOOL_IDX;
323 pPool->aPages[iPage].iUserHead = NIL_PGMPOOL_USER_INDEX;
324 pPool->aPages[iPage].iModifiedNext = NIL_PGMPOOL_IDX;
325 pPool->aPages[iPage].iModifiedPrev = NIL_PGMPOOL_IDX;
326 pPool->aPages[iPage].iMonitoredNext = NIL_PGMPOOL_IDX;
327 pPool->aPages[iPage].iMonitoredNext = NIL_PGMPOOL_IDX;
328 pPool->aPages[iPage].iAgeNext = NIL_PGMPOOL_IDX;
329 pPool->aPages[iPage].iAgePrev = NIL_PGMPOOL_IDX;
330 Assert(pPool->aPages[iPage].idx == iPage);
331 Assert(pPool->aPages[iPage].GCPhys == NIL_RTGCPHYS);
332 Assert(!pPool->aPages[iPage].fSeenNonGlobal);
333 Assert(!pPool->aPages[iPage].fMonitored);
334 Assert(!pPool->aPages[iPage].fCached);
335 Assert(!pPool->aPages[iPage].fZeroed);
336 Assert(!pPool->aPages[iPage].fReusedFlushPending);
337 }
338
339#ifdef VBOX_WITH_STATISTICS
340 /*
341 * Register statistics.
342 */
343 STAM_REG(pVM, &pPool->cCurPages, STAMTYPE_U16, "/PGM/Pool/cCurPages", STAMUNIT_PAGES, "Current pool size.");
344 STAM_REG(pVM, &pPool->cMaxPages, STAMTYPE_U16, "/PGM/Pool/cMaxPages", STAMUNIT_PAGES, "Max pool size.");
345 STAM_REG(pVM, &pPool->cUsedPages, STAMTYPE_U16, "/PGM/Pool/cUsedPages", STAMUNIT_PAGES, "The number of pages currently in use.");
346 STAM_REG(pVM, &pPool->cUsedPagesHigh, STAMTYPE_U16_RESET, "/PGM/Pool/cUsedPagesHigh", STAMUNIT_PAGES, "The high watermark for cUsedPages.");
347 STAM_REG(pVM, &pPool->StatAlloc, STAMTYPE_PROFILE_ADV, "/PGM/Pool/Alloc", STAMUNIT_TICKS_PER_CALL, "Profiling of pgmPoolAlloc.");
348 STAM_REG(pVM, &pPool->StatClearAll, STAMTYPE_PROFILE, "/PGM/Pool/ClearAll", STAMUNIT_TICKS_PER_CALL, "Profiling of pgmR3PoolClearAll.");
349 STAM_REG(pVM, &pPool->StatR3Reset, STAMTYPE_PROFILE, "/PGM/Pool/R3Reset", STAMUNIT_TICKS_PER_CALL, "Profiling of pgmR3PoolReset.");
350 STAM_REG(pVM, &pPool->StatFlushPage, STAMTYPE_PROFILE, "/PGM/Pool/FlushPage", STAMUNIT_TICKS_PER_CALL, "Profiling of pgmPoolFlushPage.");
351 STAM_REG(pVM, &pPool->StatFree, STAMTYPE_PROFILE, "/PGM/Pool/Free", STAMUNIT_TICKS_PER_CALL, "Profiling of pgmPoolFree.");
352 STAM_REG(pVM, &pPool->StatForceFlushPage, STAMTYPE_COUNTER, "/PGM/Pool/FlushForce", STAMUNIT_OCCURENCES, "Counting explicit flushes by PGMPoolFlushPage().");
353 STAM_REG(pVM, &pPool->StatForceFlushDirtyPage, STAMTYPE_COUNTER, "/PGM/Pool/FlushForceDirty", STAMUNIT_OCCURENCES, "Counting explicit flushes of dirty pages by PGMPoolFlushPage().");
354 STAM_REG(pVM, &pPool->StatForceFlushReused, STAMTYPE_COUNTER, "/PGM/Pool/FlushReused", STAMUNIT_OCCURENCES, "Counting flushes for reused pages.");
355 STAM_REG(pVM, &pPool->StatZeroPage, STAMTYPE_PROFILE, "/PGM/Pool/ZeroPage", STAMUNIT_TICKS_PER_CALL, "Profiling time spent zeroing pages. Overlaps with Alloc.");
356 STAM_REG(pVM, &pPool->cMaxUsers, STAMTYPE_U16, "/PGM/Pool/Track/cMaxUsers", STAMUNIT_COUNT, "Max user tracking records.");
357 STAM_REG(pVM, &pPool->cPresent, STAMTYPE_U32, "/PGM/Pool/Track/cPresent", STAMUNIT_COUNT, "Number of present page table entries.");
358 STAM_REG(pVM, &pPool->StatTrackDeref, STAMTYPE_PROFILE, "/PGM/Pool/Track/Deref", STAMUNIT_TICKS_PER_CALL, "Profiling of pgmPoolTrackDeref.");
359 STAM_REG(pVM, &pPool->StatTrackFlushGCPhysPT, STAMTYPE_PROFILE, "/PGM/Pool/Track/FlushGCPhysPT", STAMUNIT_TICKS_PER_CALL, "Profiling of pgmPoolTrackFlushGCPhysPT.");
360 STAM_REG(pVM, &pPool->StatTrackFlushGCPhysPTs, STAMTYPE_PROFILE, "/PGM/Pool/Track/FlushGCPhysPTs", STAMUNIT_TICKS_PER_CALL, "Profiling of pgmPoolTrackFlushGCPhysPTs.");
361 STAM_REG(pVM, &pPool->StatTrackFlushGCPhysPTsSlow, STAMTYPE_PROFILE, "/PGM/Pool/Track/FlushGCPhysPTsSlow", STAMUNIT_TICKS_PER_CALL, "Profiling of pgmPoolTrackFlushGCPhysPTsSlow.");
362 STAM_REG(pVM, &pPool->StatTrackFlushEntry, STAMTYPE_COUNTER, "/PGM/Pool/Track/Entry/Flush", STAMUNIT_COUNT, "Nr of flushed entries.");
363 STAM_REG(pVM, &pPool->StatTrackFlushEntryKeep, STAMTYPE_COUNTER, "/PGM/Pool/Track/Entry/Update", STAMUNIT_COUNT, "Nr of updated entries.");
364 STAM_REG(pVM, &pPool->StatTrackFreeUpOneUser, STAMTYPE_COUNTER, "/PGM/Pool/Track/FreeUpOneUser", STAMUNIT_TICKS_PER_CALL, "The number of times we were out of user tracking records.");
365 STAM_REG(pVM, &pPool->StatTrackDerefGCPhys, STAMTYPE_PROFILE, "/PGM/Pool/Track/DrefGCPhys", STAMUNIT_TICKS_PER_CALL, "Profiling deref activity related tracking GC physical pages.");
366 STAM_REG(pVM, &pPool->StatTrackLinearRamSearches, STAMTYPE_COUNTER, "/PGM/Pool/Track/LinearRamSearches", STAMUNIT_OCCURENCES, "The number of times we had to do linear ram searches.");
367 STAM_REG(pVM, &pPool->StamTrackPhysExtAllocFailures,STAMTYPE_COUNTER, "/PGM/Pool/Track/PhysExtAllocFailures", STAMUNIT_OCCURENCES, "The number of failing pgmPoolTrackPhysExtAlloc calls.");
368 STAM_REG(pVM, &pPool->StatMonitorRZ, STAMTYPE_PROFILE, "/PGM/Pool/Monitor/RZ", STAMUNIT_TICKS_PER_CALL, "Profiling the RC/R0 access handler.");
369 STAM_REG(pVM, &pPool->StatMonitorRZEmulateInstr, STAMTYPE_COUNTER, "/PGM/Pool/Monitor/RZ/EmulateInstr", STAMUNIT_OCCURENCES, "Times we've failed interpreting the instruction.");
370 STAM_REG(pVM, &pPool->StatMonitorRZFlushPage, STAMTYPE_PROFILE, "/PGM/Pool/Monitor/RZ/FlushPage", STAMUNIT_TICKS_PER_CALL, "Profiling the pgmPoolFlushPage calls made from the RC/R0 access handler.");
371 STAM_REG(pVM, &pPool->StatMonitorRZFlushReinit, STAMTYPE_COUNTER, "/PGM/Pool/Monitor/RZ/FlushReinit", STAMUNIT_OCCURENCES, "Times we've detected a page table reinit.");
372 STAM_REG(pVM, &pPool->StatMonitorRZFlushModOverflow,STAMTYPE_COUNTER, "/PGM/Pool/Monitor/RZ/FlushOverflow", STAMUNIT_OCCURENCES, "Counting flushes for pages that are modified too often.");
373 STAM_REG(pVM, &pPool->StatMonitorRZFork, STAMTYPE_COUNTER, "/PGM/Pool/Monitor/RZ/Fork", STAMUNIT_OCCURENCES, "Times we've detected fork().");
374 STAM_REG(pVM, &pPool->StatMonitorRZHandled, STAMTYPE_PROFILE, "/PGM/Pool/Monitor/RZ/Handled", STAMUNIT_TICKS_PER_CALL, "Profiling the RC/R0 access we've handled (except REP STOSD).");
375 STAM_REG(pVM, &pPool->StatMonitorRZIntrFailPatch1, STAMTYPE_COUNTER, "/PGM/Pool/Monitor/RZ/IntrFailPatch1", STAMUNIT_OCCURENCES, "Times we've failed interpreting a patch code instruction.");
376 STAM_REG(pVM, &pPool->StatMonitorRZIntrFailPatch2, STAMTYPE_COUNTER, "/PGM/Pool/Monitor/RZ/IntrFailPatch2", STAMUNIT_OCCURENCES, "Times we've failed interpreting a patch code instruction during flushing.");
377 STAM_REG(pVM, &pPool->StatMonitorRZRepPrefix, STAMTYPE_COUNTER, "/PGM/Pool/Monitor/RZ/RepPrefix", STAMUNIT_OCCURENCES, "The number of times we've seen rep prefixes we can't handle.");
378 STAM_REG(pVM, &pPool->StatMonitorRZRepStosd, STAMTYPE_PROFILE, "/PGM/Pool/Monitor/RZ/RepStosd", STAMUNIT_TICKS_PER_CALL, "Profiling the REP STOSD cases we've handled.");
379 STAM_REG(pVM, &pPool->StatMonitorRZFaultPT, STAMTYPE_COUNTER, "/PGM/Pool/Monitor/RZ/Fault/PT", STAMUNIT_OCCURENCES, "Nr of handled PT faults.");
380 STAM_REG(pVM, &pPool->StatMonitorRZFaultPD, STAMTYPE_COUNTER, "/PGM/Pool/Monitor/RZ/Fault/PD", STAMUNIT_OCCURENCES, "Nr of handled PD faults.");
381 STAM_REG(pVM, &pPool->StatMonitorRZFaultPDPT, STAMTYPE_COUNTER, "/PGM/Pool/Monitor/RZ/Fault/PDPT", STAMUNIT_OCCURENCES, "Nr of handled PDPT faults.");
382 STAM_REG(pVM, &pPool->StatMonitorRZFaultPML4, STAMTYPE_COUNTER, "/PGM/Pool/Monitor/RZ/Fault/PML4", STAMUNIT_OCCURENCES, "Nr of handled PML4 faults.");
383 STAM_REG(pVM, &pPool->StatMonitorR3, STAMTYPE_PROFILE, "/PGM/Pool/Monitor/R3", STAMUNIT_TICKS_PER_CALL, "Profiling the R3 access handler.");
384 STAM_REG(pVM, &pPool->StatMonitorR3EmulateInstr, STAMTYPE_COUNTER, "/PGM/Pool/Monitor/R3/EmulateInstr", STAMUNIT_OCCURENCES, "Times we've failed interpreting the instruction.");
385 STAM_REG(pVM, &pPool->StatMonitorR3FlushPage, STAMTYPE_PROFILE, "/PGM/Pool/Monitor/R3/FlushPage", STAMUNIT_TICKS_PER_CALL, "Profiling the pgmPoolFlushPage calls made from the R3 access handler.");
386 STAM_REG(pVM, &pPool->StatMonitorR3FlushReinit, STAMTYPE_COUNTER, "/PGM/Pool/Monitor/R3/FlushReinit", STAMUNIT_OCCURENCES, "Times we've detected a page table reinit.");
387 STAM_REG(pVM, &pPool->StatMonitorR3FlushModOverflow,STAMTYPE_COUNTER, "/PGM/Pool/Monitor/R3/FlushOverflow", STAMUNIT_OCCURENCES, "Counting flushes for pages that are modified too often.");
388 STAM_REG(pVM, &pPool->StatMonitorR3Fork, STAMTYPE_COUNTER, "/PGM/Pool/Monitor/R3/Fork", STAMUNIT_OCCURENCES, "Times we've detected fork().");
389 STAM_REG(pVM, &pPool->StatMonitorR3Handled, STAMTYPE_PROFILE, "/PGM/Pool/Monitor/R3/Handled", STAMUNIT_TICKS_PER_CALL, "Profiling the R3 access we've handled (except REP STOSD).");
390 STAM_REG(pVM, &pPool->StatMonitorR3RepPrefix, STAMTYPE_COUNTER, "/PGM/Pool/Monitor/R3/RepPrefix", STAMUNIT_OCCURENCES, "The number of times we've seen rep prefixes we can't handle.");
391 STAM_REG(pVM, &pPool->StatMonitorR3RepStosd, STAMTYPE_PROFILE, "/PGM/Pool/Monitor/R3/RepStosd", STAMUNIT_TICKS_PER_CALL, "Profiling the REP STOSD cases we've handled.");
392 STAM_REG(pVM, &pPool->StatMonitorR3FaultPT, STAMTYPE_COUNTER, "/PGM/Pool/Monitor/R3/Fault/PT", STAMUNIT_OCCURENCES, "Nr of handled PT faults.");
393 STAM_REG(pVM, &pPool->StatMonitorR3FaultPD, STAMTYPE_COUNTER, "/PGM/Pool/Monitor/R3/Fault/PD", STAMUNIT_OCCURENCES, "Nr of handled PD faults.");
394 STAM_REG(pVM, &pPool->StatMonitorR3FaultPDPT, STAMTYPE_COUNTER, "/PGM/Pool/Monitor/R3/Fault/PDPT", STAMUNIT_OCCURENCES, "Nr of handled PDPT faults.");
395 STAM_REG(pVM, &pPool->StatMonitorR3FaultPML4, STAMTYPE_COUNTER, "/PGM/Pool/Monitor/R3/Fault/PML4", STAMUNIT_OCCURENCES, "Nr of handled PML4 faults.");
396 STAM_REG(pVM, &pPool->StatMonitorR3Async, STAMTYPE_COUNTER, "/PGM/Pool/Monitor/R3/Async", STAMUNIT_OCCURENCES, "Times we're called in an async thread and need to flush.");
397 STAM_REG(pVM, &pPool->cModifiedPages, STAMTYPE_U16, "/PGM/Pool/Monitor/cModifiedPages", STAMUNIT_PAGES, "The current cModifiedPages value.");
398 STAM_REG(pVM, &pPool->cModifiedPagesHigh, STAMTYPE_U16_RESET, "/PGM/Pool/Monitor/cModifiedPagesHigh", STAMUNIT_PAGES, "The high watermark for cModifiedPages.");
399 STAM_REG(pVM, &pPool->StatResetDirtyPages, STAMTYPE_COUNTER, "/PGM/Pool/Monitor/Dirty/Resets", STAMUNIT_OCCURENCES, "Times we've called pgmPoolResetDirtyPages (and there were dirty page).");
400 STAM_REG(pVM, &pPool->StatDirtyPage, STAMTYPE_COUNTER, "/PGM/Pool/Monitor/Dirty/Pages", STAMUNIT_OCCURENCES, "Times we've called pgmPoolAddDirtyPage.");
401 STAM_REG(pVM, &pPool->StatDirtyPageDupFlush, STAMTYPE_COUNTER, "/PGM/Pool/Monitor/Dirty/FlushDup", STAMUNIT_OCCURENCES, "Times we've had to flush duplicates for dirty page management.");
402 STAM_REG(pVM, &pPool->StatDirtyPageOverFlowFlush, STAMTYPE_COUNTER, "/PGM/Pool/Monitor/Dirty/FlushOverflow",STAMUNIT_OCCURENCES, "Times we've had to flush because of overflow.");
403 STAM_REG(pVM, &pPool->StatCacheHits, STAMTYPE_COUNTER, "/PGM/Pool/Cache/Hits", STAMUNIT_OCCURENCES, "The number of pgmPoolAlloc calls satisfied by the cache.");
404 STAM_REG(pVM, &pPool->StatCacheMisses, STAMTYPE_COUNTER, "/PGM/Pool/Cache/Misses", STAMUNIT_OCCURENCES, "The number of pgmPoolAlloc calls not statisfied by the cache.");
405 STAM_REG(pVM, &pPool->StatCacheKindMismatches, STAMTYPE_COUNTER, "/PGM/Pool/Cache/KindMismatches", STAMUNIT_OCCURENCES, "The number of shadow page kind mismatches. (Better be low, preferably 0!)");
406 STAM_REG(pVM, &pPool->StatCacheFreeUpOne, STAMTYPE_COUNTER, "/PGM/Pool/Cache/FreeUpOne", STAMUNIT_OCCURENCES, "The number of times the cache was asked to free up a page.");
407 STAM_REG(pVM, &pPool->StatCacheCacheable, STAMTYPE_COUNTER, "/PGM/Pool/Cache/Cacheable", STAMUNIT_OCCURENCES, "The number of cacheable allocations.");
408 STAM_REG(pVM, &pPool->StatCacheUncacheable, STAMTYPE_COUNTER, "/PGM/Pool/Cache/Uncacheable", STAMUNIT_OCCURENCES, "The number of uncacheable allocations.");
409#endif /* VBOX_WITH_STATISTICS */
410
411#ifdef VBOX_WITH_DEBUGGER
412 /*
413 * Debugger commands.
414 */
415 static bool s_fRegisteredCmds = false;
416 if (!s_fRegisteredCmds)
417 {
418 rc = DBGCRegisterCommands(&g_aCmds[0], RT_ELEMENTS(g_aCmds));
419 if (RT_SUCCESS(rc))
420 s_fRegisteredCmds = true;
421 }
422#endif
423
424 return VINF_SUCCESS;
425}
426
427
428/**
429 * Relocate the page pool data.
430 *
431 * @param pVM The VM handle.
432 */
433void pgmR3PoolRelocate(PVM pVM)
434{
435 pVM->pgm.s.pPoolRC = MMHyperR3ToRC(pVM, pVM->pgm.s.pPoolR3);
436 pVM->pgm.s.pPoolR3->pVMRC = pVM->pVMRC;
437 pVM->pgm.s.pPoolR3->paUsersRC = MMHyperR3ToRC(pVM, pVM->pgm.s.pPoolR3->paUsersR3);
438 pVM->pgm.s.pPoolR3->paPhysExtsRC = MMHyperR3ToRC(pVM, pVM->pgm.s.pPoolR3->paPhysExtsR3);
439 int rc = PDMR3LdrGetSymbolRC(pVM, NULL, "pgmPoolAccessHandler", &pVM->pgm.s.pPoolR3->pfnAccessHandlerRC);
440 AssertReleaseRC(rc);
441 /* init order hack. */
442 if (!pVM->pgm.s.pPoolR3->pfnAccessHandlerR0)
443 {
444 rc = PDMR3LdrGetSymbolR0(pVM, NULL, "pgmPoolAccessHandler", &pVM->pgm.s.pPoolR3->pfnAccessHandlerR0);
445 AssertReleaseRC(rc);
446 }
447}
448
449
450/**
451 * Grows the shadow page pool.
452 *
453 * I.e. adds more pages to it, assuming that hasn't reached cMaxPages yet.
454 *
455 * @returns VBox status code.
456 * @param pVM The VM handle.
457 */
458VMMR3DECL(int) PGMR3PoolGrow(PVM pVM)
459{
460 PPGMPOOL pPool = pVM->pgm.s.pPoolR3;
461 AssertReturn(pPool->cCurPages < pPool->cMaxPages, VERR_INTERNAL_ERROR);
462
463 pgmLock(pVM);
464
465 /*
466 * How much to grow it by?
467 */
468 uint32_t cPages = pPool->cMaxPages - pPool->cCurPages;
469 cPages = RT_MIN(PGMPOOL_CFG_MAX_GROW, cPages);
470 LogFlow(("PGMR3PoolGrow: Growing the pool by %d (%#x) pages.\n", cPages, cPages));
471
472 for (unsigned i = pPool->cCurPages; cPages-- > 0; i++)
473 {
474 PPGMPOOLPAGE pPage = &pPool->aPages[i];
475
476 /* Allocate all pages in low (below 4 GB) memory as 32 bits guests need a page table root in low memory. */
477 pPage->pvPageR3 = MMR3PageAllocLow(pVM);
478 if (!pPage->pvPageR3)
479 {
480 Log(("We're out of memory!! i=%d\n", i));
481 pgmUnlock(pVM);
482 return i ? VINF_SUCCESS : VERR_NO_PAGE_MEMORY;
483 }
484 pPage->Core.Key = MMPage2Phys(pVM, pPage->pvPageR3);
485 AssertFatal(pPage->Core.Key < _4G);
486 pPage->GCPhys = NIL_RTGCPHYS;
487 pPage->enmKind = PGMPOOLKIND_FREE;
488 pPage->idx = pPage - &pPool->aPages[0];
489 LogFlow(("PGMR3PoolGrow: insert page #%#x - %RHp\n", pPage->idx, pPage->Core.Key));
490 pPage->iNext = pPool->iFreeHead;
491 pPage->iUserHead = NIL_PGMPOOL_USER_INDEX;
492 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
493 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
494 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
495 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
496 pPage->iAgeNext = NIL_PGMPOOL_IDX;
497 pPage->iAgePrev = NIL_PGMPOOL_IDX;
498 /* commit it */
499 bool fRc = RTAvloHCPhysInsert(&pPool->HCPhysTree, &pPage->Core); Assert(fRc); NOREF(fRc);
500 pPool->iFreeHead = i;
501 pPool->cCurPages = i + 1;
502 }
503
504 pgmUnlock(pVM);
505 Assert(pPool->cCurPages <= pPool->cMaxPages);
506 return VINF_SUCCESS;
507}
508
509
510
511/**
512 * Worker used by pgmR3PoolAccessHandler when it's invoked by an async thread.
513 *
514 * @param pPool The pool.
515 * @param pPage The page.
516 */
517static DECLCALLBACK(void) pgmR3PoolFlushReusedPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
518{
519 /* for the present this should be safe enough I think... */
520 pgmLock(pPool->pVMR3);
521 if ( pPage->fReusedFlushPending
522 && pPage->enmKind != PGMPOOLKIND_FREE)
523 pgmPoolFlushPage(pPool, pPage);
524 pgmUnlock(pPool->pVMR3);
525}
526
527
528/**
529 * \#PF Handler callback for PT write accesses.
530 *
531 * The handler can not raise any faults, it's mainly for monitoring write access
532 * to certain pages.
533 *
534 * @returns VINF_SUCCESS if the handler has carried out the operation.
535 * @returns VINF_PGM_HANDLER_DO_DEFAULT if the caller should carry out the access operation.
536 * @param pVM VM Handle.
537 * @param GCPhys The physical address the guest is writing to.
538 * @param pvPhys The HC mapping of that address.
539 * @param pvBuf What the guest is reading/writing.
540 * @param cbBuf How much it's reading/writing.
541 * @param enmAccessType The access type.
542 * @param pvUser User argument.
543 */
544static DECLCALLBACK(int) pgmR3PoolAccessHandler(PVM pVM, RTGCPHYS GCPhys, void *pvPhys, void *pvBuf, size_t cbBuf, PGMACCESSTYPE enmAccessType, void *pvUser)
545{
546 STAM_PROFILE_START(&pVM->pgm.s.pPoolR3->StatMonitorR3, a);
547 PPGMPOOL pPool = pVM->pgm.s.pPoolR3;
548 PPGMPOOLPAGE pPage = (PPGMPOOLPAGE)pvUser;
549 LogFlow(("pgmR3PoolAccessHandler: GCPhys=%RGp %p:{.Core=%RHp, .idx=%d, .GCPhys=%RGp, .enmType=%d}\n",
550 GCPhys, pPage, pPage->Core.Key, pPage->idx, pPage->GCPhys, pPage->enmKind));
551
552 PVMCPU pVCpu = VMMGetCpu(pVM);
553
554 /*
555 * We don't have to be very sophisticated about this since there are relativly few calls here.
556 * However, we must try our best to detect any non-cpu accesses (disk / networking).
557 *
558 * Just to make life more interesting, we'll have to deal with the async threads too.
559 * We cannot flush a page if we're in an async thread because of REM notifications.
560 */
561 pgmLock(pVM);
562 if (PHYS_PAGE_ADDRESS(GCPhys) != PHYS_PAGE_ADDRESS(pPage->GCPhys))
563 {
564 /* Pool page changed while we were waiting for the lock; ignore. */
565 Log(("CPU%d: pgmR3PoolAccessHandler pgm pool page for %RGp changed (to %RGp) while waiting!\n", pVCpu->idCpu, PHYS_PAGE_ADDRESS(GCPhys), PHYS_PAGE_ADDRESS(pPage->GCPhys)));
566 pgmUnlock(pVM);
567 return VINF_PGM_HANDLER_DO_DEFAULT;
568 }
569
570 Assert(pPage->enmKind != PGMPOOLKIND_FREE);
571
572 /* @todo this code doesn't make any sense. remove the if (!pVCpu) block */
573 if (!pVCpu) /** @todo This shouldn't happen any longer, all access handlers will be called on an EMT. All ring-3 handlers, except MMIO, already own the PGM lock. @bugref{3170} */
574 {
575 Log(("pgmR3PoolAccessHandler: async thread, requesting EMT to flush the page: %p:{.Core=%RHp, .idx=%d, .GCPhys=%RGp, .enmType=%d}\n",
576 pPage, pPage->Core.Key, pPage->idx, pPage->GCPhys, pPage->enmKind));
577 STAM_COUNTER_INC(&pPool->StatMonitorR3Async);
578 if (!pPage->fReusedFlushPending)
579 {
580 pgmUnlock(pVM);
581 int rc = VMR3ReqCallVoidNoWait(pPool->pVMR3, VMCPUID_ANY, (PFNRT)pgmR3PoolFlushReusedPage, 2, pPool, pPage);
582 AssertRCReturn(rc, rc);
583 pgmLock(pVM);
584 pPage->fReusedFlushPending = true;
585 pPage->cModifications += 0x1000;
586 }
587
588 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhys, pvPhys, 0 /* unknown write size */);
589 /** @todo r=bird: making unsafe assumption about not crossing entries here! */
590 while (cbBuf > 4)
591 {
592 cbBuf -= 4;
593 pvPhys = (uint8_t *)pvPhys + 4;
594 GCPhys += 4;
595 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhys, pvPhys, 0 /* unknown write size */);
596 }
597 STAM_PROFILE_STOP(&pPool->StatMonitorR3, a);
598 }
599 else if ( ( pPage->cModifications < 96 /* it's cheaper here. */
600 || pgmPoolIsPageLocked(pPage)
601 )
602 && cbBuf <= 4)
603 {
604 /* Clear the shadow entry. */
605 if (!pPage->cModifications++)
606 pgmPoolMonitorModifiedInsert(pPool, pPage);
607 /** @todo r=bird: making unsafe assumption about not crossing entries here! */
608 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhys, pvPhys, 0 /* unknown write size */);
609 STAM_PROFILE_STOP(&pPool->StatMonitorR3, a);
610 }
611 else
612 {
613 pgmPoolMonitorChainFlush(pPool, pPage); /* ASSUME that VERR_PGM_POOL_CLEARED can be ignored here and that FFs will deal with it in due time. */
614 STAM_PROFILE_STOP_EX(&pPool->StatMonitorR3, &pPool->StatMonitorR3FlushPage, a);
615 }
616 pgmUnlock(pVM);
617 return VINF_PGM_HANDLER_DO_DEFAULT;
618}
619
620
621/**
622 * Rendezvous callback used by pgmR3PoolClearAll that clears all shadow pages
623 * and all modification counters.
624 *
625 * This is only called on one of the EMTs while the other ones are waiting for
626 * it to complete this function.
627 *
628 * @returns VINF_SUCCESS (VBox strict status code).
629 * @param pVM The VM handle.
630 * @param pVCpu The VMCPU for the EMT we're being called on. Unused.
631 * @param fpvFlushRemTlb When not NULL, we'll flush the REM TLB as well.
632 * (This is the pvUser, so it has to be void *.)
633 *
634 */
635DECLCALLBACK(VBOXSTRICTRC) pgmR3PoolClearAllRendezvous(PVM pVM, PVMCPU pVCpu, void *fpvFlushRemTbl)
636{
637 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
638 STAM_PROFILE_START(&pPool->StatClearAll, c);
639
640 pgmLock(pVM);
641 Log(("pgmR3PoolClearAllRendezvous: cUsedPages=%d fpvFlushRemTbl=%RTbool\n", pPool->cUsedPages, !!fpvFlushRemTbl));
642
643 /*
644 * Iterate all the pages until we've encountered all that are in use.
645 * This is a simple but not quite optimal solution.
646 */
647 unsigned cModifiedPages = 0; NOREF(cModifiedPages);
648 unsigned cLeft = pPool->cUsedPages;
649 uint32_t iPage = pPool->cCurPages;
650 while (--iPage >= PGMPOOL_IDX_FIRST)
651 {
652 PPGMPOOLPAGE pPage = &pPool->aPages[iPage];
653 if (pPage->GCPhys != NIL_RTGCPHYS)
654 {
655 switch (pPage->enmKind)
656 {
657 /*
658 * We only care about shadow page tables that reference physical memory
659 */
660#ifdef PGM_WITH_LARGE_PAGES
661 case PGMPOOLKIND_EPT_PD_FOR_PHYS: /* Large pages reference 2 MB of physical memory, so we must clear them. */
662 if (pPage->cPresent)
663 {
664 PX86PDPAE pShwPD = (PX86PDPAE)PGMPOOL_PAGE_2_PTR_V2(pPool->CTX_SUFF(pVM), pVCpu, pPage);
665 for (unsigned i = 0; i < RT_ELEMENTS(pShwPD->a); i++)
666 {
667 if ( pShwPD->a[i].n.u1Present
668 && pShwPD->a[i].b.u1Size)
669 {
670 Assert(!(pShwPD->a[i].u & PGM_PDFLAGS_MAPPING));
671 pShwPD->a[i].u = 0;
672 Assert(pPage->cPresent);
673 pPage->cPresent--;
674 }
675 }
676 if (pPage->cPresent == 0)
677 pPage->iFirstPresent = NIL_PGMPOOL_PRESENT_INDEX;
678 }
679 goto default_case;
680
681 case PGMPOOLKIND_PAE_PD_PHYS: /* Large pages reference 2 MB of physical memory, so we must clear them. */
682 if (pPage->cPresent)
683 {
684 PEPTPD pShwPD = (PEPTPD)PGMPOOL_PAGE_2_PTR_V2(pPool->CTX_SUFF(pVM), pVCpu, pPage);
685 for (unsigned i = 0; i < RT_ELEMENTS(pShwPD->a); i++)
686 {
687 Assert((pShwPD->a[i].u & UINT64_C(0xfff0000000000f80)) == 0);
688 if ( pShwPD->a[i].n.u1Present
689 && pShwPD->a[i].b.u1Size)
690 {
691 Assert(!(pShwPD->a[i].u & PGM_PDFLAGS_MAPPING));
692 pShwPD->a[i].u = 0;
693 Assert(pPage->cPresent);
694 pPage->cPresent--;
695 }
696 }
697 if (pPage->cPresent == 0)
698 pPage->iFirstPresent = NIL_PGMPOOL_PRESENT_INDEX;
699 }
700 goto default_case;
701#endif /* PGM_WITH_LARGE_PAGES */
702
703 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
704 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
705 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
706 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
707 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
708 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
709 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
710 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
711 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
712 {
713 if (pPage->cPresent)
714 {
715 void *pvShw = PGMPOOL_PAGE_2_PTR_V2(pPool->CTX_SUFF(pVM), pVCpu, pPage);
716 STAM_PROFILE_START(&pPool->StatZeroPage, z);
717#if 0
718 /* Useful check for leaking references; *very* expensive though. */
719 switch (pPage->enmKind)
720 {
721 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
722 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
723 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
724 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
725 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
726 {
727 bool fFoundFirst = false;
728 PPGMSHWPTPAE pPT = (PPGMSHWPTPAE)pvShw;
729 for (unsigned ptIndex = 0; ptIndex < RT_ELEMENTS(pPT->a); ptIndex++)
730 {
731 if (pPT->a[ptIndex].u)
732 {
733 if (!fFoundFirst)
734 {
735 AssertFatalMsg(pPage->iFirstPresent <= ptIndex, ("ptIndex = %d first present = %d\n", ptIndex, pPage->iFirstPresent));
736 if (pPage->iFirstPresent != ptIndex)
737 Log(("ptIndex = %d first present = %d\n", ptIndex, pPage->iFirstPresent));
738 fFoundFirst = true;
739 }
740 if (PGMSHWPTEPAE_IS_P(pPT->a[ptIndex]))
741 {
742 pgmPoolTracDerefGCPhysHint(pPool, pPage, PGMSHWPTEPAE_GET_HCPHYS(pPT->a[ptIndex]), NIL_RTGCPHYS);
743 if (pPage->iFirstPresent == ptIndex)
744 pPage->iFirstPresent = NIL_PGMPOOL_PRESENT_INDEX;
745 }
746 }
747 }
748 AssertFatalMsg(pPage->cPresent == 0, ("cPresent = %d pPage = %RGv\n", pPage->cPresent, pPage->GCPhys));
749 break;
750 }
751 default:
752 break;
753 }
754#endif
755 ASMMemZeroPage(pvShw);
756 STAM_PROFILE_STOP(&pPool->StatZeroPage, z);
757 pPage->cPresent = 0;
758 pPage->iFirstPresent = NIL_PGMPOOL_PRESENT_INDEX;
759 }
760 }
761 /* fall thru */
762
763#ifdef PGM_WITH_LARGE_PAGES
764 default_case:
765#endif
766 default:
767 Assert(!pPage->cModifications || ++cModifiedPages);
768 Assert(pPage->iModifiedNext == NIL_PGMPOOL_IDX || pPage->cModifications);
769 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX || pPage->cModifications);
770 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
771 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
772 pPage->cModifications = 0;
773 break;
774
775 }
776 if (!--cLeft)
777 break;
778 }
779 }
780
781 /* swipe the special pages too. */
782 for (iPage = PGMPOOL_IDX_FIRST_SPECIAL; iPage < PGMPOOL_IDX_FIRST; iPage++)
783 {
784 PPGMPOOLPAGE pPage = &pPool->aPages[iPage];
785 if (pPage->GCPhys != NIL_RTGCPHYS)
786 {
787 Assert(!pPage->cModifications || ++cModifiedPages);
788 Assert(pPage->iModifiedNext == NIL_PGMPOOL_IDX || pPage->cModifications);
789 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX || pPage->cModifications);
790 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
791 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
792 pPage->cModifications = 0;
793 }
794 }
795
796#ifndef DEBUG_michael
797 AssertMsg(cModifiedPages == pPool->cModifiedPages, ("%d != %d\n", cModifiedPages, pPool->cModifiedPages));
798#endif
799 pPool->iModifiedHead = NIL_PGMPOOL_IDX;
800 pPool->cModifiedPages = 0;
801
802 /*
803 * Clear all the GCPhys links and rebuild the phys ext free list.
804 */
805 for (PPGMRAMRANGE pRam = pPool->CTX_SUFF(pVM)->pgm.s.CTX_SUFF(pRamRangesX);
806 pRam;
807 pRam = pRam->CTX_SUFF(pNext))
808 {
809 iPage = pRam->cb >> PAGE_SHIFT;
810 while (iPage-- > 0)
811 PGM_PAGE_SET_TRACKING(pVM, &pRam->aPages[iPage], 0);
812 }
813
814 pPool->iPhysExtFreeHead = 0;
815 PPGMPOOLPHYSEXT paPhysExts = pPool->CTX_SUFF(paPhysExts);
816 const unsigned cMaxPhysExts = pPool->cMaxPhysExts;
817 for (unsigned i = 0; i < cMaxPhysExts; i++)
818 {
819 paPhysExts[i].iNext = i + 1;
820 paPhysExts[i].aidx[0] = NIL_PGMPOOL_IDX;
821 paPhysExts[i].apte[0] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
822 paPhysExts[i].aidx[1] = NIL_PGMPOOL_IDX;
823 paPhysExts[i].apte[1] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
824 paPhysExts[i].aidx[2] = NIL_PGMPOOL_IDX;
825 paPhysExts[i].apte[2] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
826 }
827 paPhysExts[cMaxPhysExts - 1].iNext = NIL_PGMPOOL_PHYSEXT_INDEX;
828
829
830#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
831 /* Reset all dirty pages to reactivate the page monitoring. */
832 /* Note: we must do this *after* clearing all page references and shadow page tables as there might be stale references to
833 * recently removed MMIO ranges around that might otherwise end up asserting in pgmPoolTracDerefGCPhysHint
834 */
835 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
836 {
837 PPGMPOOLPAGE pPage;
838 unsigned idxPage;
839
840 if (pPool->aDirtyPages[i].uIdx == NIL_PGMPOOL_IDX)
841 continue;
842
843 idxPage = pPool->aDirtyPages[i].uIdx;
844 AssertRelease(idxPage != NIL_PGMPOOL_IDX);
845 pPage = &pPool->aPages[idxPage];
846 Assert(pPage->idx == idxPage);
847 Assert(pPage->iMonitoredNext == NIL_PGMPOOL_IDX && pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
848
849 AssertMsg(pPage->fDirty, ("Page %RGp (slot=%d) not marked dirty!", pPage->GCPhys, i));
850
851 Log(("Reactivate dirty page %RGp\n", pPage->GCPhys));
852
853 /* First write protect the page again to catch all write accesses. (before checking for changes -> SMP) */
854 int rc = PGMHandlerPhysicalReset(pVM, pPage->GCPhys & PAGE_BASE_GC_MASK);
855 Assert(rc == VINF_SUCCESS);
856 pPage->fDirty = false;
857
858 pPool->aDirtyPages[i].uIdx = NIL_PGMPOOL_IDX;
859 }
860
861 /* Clear all dirty pages. */
862 pPool->idxFreeDirtyPage = 0;
863 pPool->cDirtyPages = 0;
864#endif
865
866 /* Clear the PGM_SYNC_CLEAR_PGM_POOL flag on all VCPUs to prevent redundant flushes. */
867 for (VMCPUID idCpu = 0; idCpu < pVM->cCpus; idCpu++)
868 pVM->aCpus[idCpu].pgm.s.fSyncFlags &= ~PGM_SYNC_CLEAR_PGM_POOL;
869
870 /* Flush job finished. */
871 VM_FF_CLEAR(pVM, VM_FF_PGM_POOL_FLUSH_PENDING);
872 pPool->cPresent = 0;
873 pgmUnlock(pVM);
874
875 PGM_INVL_ALL_VCPU_TLBS(pVM);
876
877 if (fpvFlushRemTbl)
878 for (VMCPUID idCpu = 0; idCpu < pVM->cCpus; idCpu++)
879 CPUMSetChangedFlags(&pVM->aCpus[idCpu], CPUM_CHANGED_GLOBAL_TLB_FLUSH);
880
881 STAM_PROFILE_STOP(&pPool->StatClearAll, c);
882 return VINF_SUCCESS;
883}
884
885
886/**
887 * Clears the shadow page pool.
888 *
889 * @param pVM The VM handle.
890 * @param fFlushRemTlb When set, the REM TLB is scheduled for flushing as
891 * well.
892 */
893void pgmR3PoolClearAll(PVM pVM, bool fFlushRemTlb)
894{
895 int rc = VMMR3EmtRendezvous(pVM, VMMEMTRENDEZVOUS_FLAGS_TYPE_ONCE, pgmR3PoolClearAllRendezvous, &fFlushRemTlb);
896 AssertRC(rc);
897}
898
899/**
900 * Protect all pgm pool page table entries to monitor writes
901 *
902 * @param pVM The VM handle.
903 *
904 * Remark: assumes the caller will flush all TLBs (!!)
905 */
906void pgmR3PoolWriteProtectPages(PVM pVM)
907{
908 PGM_LOCK_ASSERT_OWNER(pVM);
909 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
910 unsigned cLeft = pPool->cUsedPages;
911 unsigned iPage = pPool->cCurPages;
912 while (--iPage >= PGMPOOL_IDX_FIRST)
913 {
914 PPGMPOOLPAGE pPage = &pPool->aPages[iPage];
915 if ( pPage->GCPhys != NIL_RTGCPHYS
916 && pPage->cPresent)
917 {
918 union
919 {
920 void *pv;
921 PX86PT pPT;
922 PPGMSHWPTPAE pPTPae;
923 PEPTPT pPTEpt;
924 } uShw;
925 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
926
927 switch (pPage->enmKind)
928 {
929 /*
930 * We only care about shadow page tables.
931 */
932 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
933 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
934 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
935 for (unsigned iShw = 0; iShw < RT_ELEMENTS(uShw.pPT->a); iShw++)
936 {
937 if (uShw.pPT->a[iShw].n.u1Present)
938 uShw.pPT->a[iShw].n.u1Write = 0;
939 }
940 break;
941
942 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
943 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
944 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
945 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
946 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
947 for (unsigned iShw = 0; iShw < RT_ELEMENTS(uShw.pPTPae->a); iShw++)
948 {
949 if (PGMSHWPTEPAE_IS_P(uShw.pPTPae->a[iShw]))
950 PGMSHWPTEPAE_SET_RO(uShw.pPTPae->a[iShw]);
951 }
952 break;
953
954 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
955 for (unsigned iShw = 0; iShw < RT_ELEMENTS(uShw.pPTEpt->a); iShw++)
956 {
957 if (uShw.pPTEpt->a[iShw].n.u1Present)
958 uShw.pPTEpt->a[iShw].n.u1Write = 0;
959 }
960 break;
961
962 default:
963 break;
964 }
965 if (!--cLeft)
966 break;
967 }
968 }
969}
970
971#ifdef VBOX_WITH_DEBUGGER
972/**
973 * The '.pgmpoolcheck' command.
974 *
975 * @returns VBox status.
976 * @param pCmd Pointer to the command descriptor (as registered).
977 * @param pCmdHlp Pointer to command helper functions.
978 * @param pVM Pointer to the current VM (if any).
979 * @param paArgs Pointer to (readonly) array of arguments.
980 * @param cArgs Number of arguments in the array.
981 */
982static DECLCALLBACK(int) pgmR3PoolCmdCheck(PCDBGCCMD pCmd, PDBGCCMDHLP pCmdHlp, PVM pVM, PCDBGCVAR paArgs, unsigned cArgs)
983{
984 DBGC_CMDHLP_REQ_VM_RET(pCmdHlp, pCmd, pVM);
985 DBGC_CMDHLP_ASSERT_PARSER_RET(pCmdHlp, pCmd, -1, cArgs == 0);
986 uint32_t cErrors = 0;
987
988 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
989 for (unsigned i = 0; i < pPool->cCurPages; i++)
990 {
991 PPGMPOOLPAGE pPage = &pPool->aPages[i];
992 bool fFirstMsg = true;
993
994 /* Todo: cover other paging modes too. */
995 if (pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT)
996 {
997 PPGMSHWPTPAE pShwPT = (PPGMSHWPTPAE)PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage);
998 {
999 PX86PTPAE pGstPT;
1000 PGMPAGEMAPLOCK LockPage;
1001 int rc = PGMPhysGCPhys2CCPtrReadOnly(pVM, pPage->GCPhys, (const void **)&pGstPT, &LockPage); AssertReleaseRC(rc);
1002
1003 /* Check if any PTEs are out of sync. */
1004 for (unsigned j = 0; j < RT_ELEMENTS(pShwPT->a); j++)
1005 {
1006 if (PGMSHWPTEPAE_IS_P(pShwPT->a[j]))
1007 {
1008 RTHCPHYS HCPhys = NIL_RTHCPHYS;
1009 rc = PGMPhysGCPhys2HCPhys(pPool->CTX_SUFF(pVM), pGstPT->a[j].u & X86_PTE_PAE_PG_MASK, &HCPhys);
1010 if ( rc != VINF_SUCCESS
1011 || PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[j]) != HCPhys)
1012 {
1013 if (fFirstMsg)
1014 {
1015 DBGCCmdHlpPrintf(pCmdHlp, "Check pool page %RGp\n", pPage->GCPhys);
1016 fFirstMsg = false;
1017 }
1018 DBGCCmdHlpPrintf(pCmdHlp, "Mismatch HCPhys: rc=%Rrc idx=%d guest %RX64 shw=%RX64 vs %RHp\n", rc, j, pGstPT->a[j].u, PGMSHWPTEPAE_GET_LOG(pShwPT->a[j]), HCPhys);
1019 cErrors++;
1020 }
1021 else if ( PGMSHWPTEPAE_IS_RW(pShwPT->a[j])
1022 && !pGstPT->a[j].n.u1Write)
1023 {
1024 if (fFirstMsg)
1025 {
1026 DBGCCmdHlpPrintf(pCmdHlp, "Check pool page %RGp\n", pPage->GCPhys);
1027 fFirstMsg = false;
1028 }
1029 DBGCCmdHlpPrintf(pCmdHlp, "Mismatch r/w gst/shw: idx=%d guest %RX64 shw=%RX64 vs %RHp\n", j, pGstPT->a[j].u, PGMSHWPTEPAE_GET_LOG(pShwPT->a[j]), HCPhys);
1030 cErrors++;
1031 }
1032 }
1033 }
1034 PGMPhysReleasePageMappingLock(pVM, &LockPage);
1035 }
1036
1037 /* Make sure this page table can't be written to from any shadow mapping. */
1038 RTHCPHYS HCPhysPT = NIL_RTHCPHYS;
1039 int rc = PGMPhysGCPhys2HCPhys(pPool->CTX_SUFF(pVM), pPage->GCPhys, &HCPhysPT);
1040 AssertMsgRC(rc, ("PGMPhysGCPhys2HCPhys failed with rc=%d for %RGp\n", rc, pPage->GCPhys));
1041 if (rc == VINF_SUCCESS)
1042 {
1043 for (unsigned j = 0; j < pPool->cCurPages; j++)
1044 {
1045 PPGMPOOLPAGE pTempPage = &pPool->aPages[j];
1046
1047 if (pTempPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT)
1048 {
1049 PPGMSHWPTPAE pShwPT2 = (PPGMSHWPTPAE)PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pTempPage);
1050
1051 for (unsigned k = 0; k < RT_ELEMENTS(pShwPT->a); k++)
1052 {
1053 if ( PGMSHWPTEPAE_IS_P_RW(pShwPT2->a[k])
1054# ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
1055 && !pPage->fDirty
1056# endif
1057 && PGMSHWPTEPAE_GET_HCPHYS(pShwPT2->a[k]) == HCPhysPT)
1058 {
1059 if (fFirstMsg)
1060 {
1061 DBGCCmdHlpPrintf(pCmdHlp, "Check pool page %RGp\n", pPage->GCPhys);
1062 fFirstMsg = false;
1063 }
1064 DBGCCmdHlpPrintf(pCmdHlp, "Mismatch: r/w: GCPhys=%RGp idx=%d shw %RX64 %RX64\n", pTempPage->GCPhys, k, PGMSHWPTEPAE_GET_LOG(pShwPT->a[k]), PGMSHWPTEPAE_GET_LOG(pShwPT2->a[k]));
1065 cErrors++;
1066 }
1067 }
1068 }
1069 }
1070 }
1071 }
1072 }
1073 if (cErrors > 0)
1074 return DBGCCmdHlpFail(pCmdHlp, pCmd, "Found %#x errors", cErrors);
1075 return VINF_SUCCESS;
1076}
1077#endif /* VBOX_WITH_DEBUGGER */
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette