VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMR3/PGMPhys.cpp@ 37354

Last change on this file since 37354 was 37354, checked in by vboxsync, 14 years ago

PGM: Fixed locking issues in PGMR3PhysMMIORegister and PGMR3PhysMMIODeregister. Also addressed a harmless on in PGMR3PhysRomRegister (only used at init time, so no races). Fortified the code with assertions more lock assertion, replacing the incorrect PGMIsLocked() checks (we only care if the current thread is the lock owner). Cleaned up some ReturnStmt macros and adding more of them.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id
File size: 161.6 KB
Line 
1/* $Id: PGMPhys.cpp 37354 2011-06-07 15:05:32Z vboxsync $ */
2/** @file
3 * PGM - Page Manager and Monitor, Physical Memory Addressing.
4 */
5
6/*
7 * Copyright (C) 2006-2007 Oracle Corporation
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 */
17
18
19/*******************************************************************************
20* Header Files *
21*******************************************************************************/
22#define LOG_GROUP LOG_GROUP_PGM_PHYS
23#include <VBox/vmm/pgm.h>
24#include <VBox/vmm/iom.h>
25#include <VBox/vmm/mm.h>
26#include <VBox/vmm/stam.h>
27#include <VBox/vmm/rem.h>
28#include <VBox/vmm/pdmdev.h>
29#include "PGMInternal.h"
30#include <VBox/vmm/vm.h>
31#include "PGMInline.h"
32#include <VBox/sup.h>
33#include <VBox/param.h>
34#include <VBox/err.h>
35#include <VBox/log.h>
36#include <iprt/assert.h>
37#include <iprt/alloc.h>
38#include <iprt/asm.h>
39#include <iprt/thread.h>
40#include <iprt/string.h>
41#include <iprt/system.h>
42
43
44/*******************************************************************************
45* Defined Constants And Macros *
46*******************************************************************************/
47/** The number of pages to free in one batch. */
48#define PGMPHYS_FREE_PAGE_BATCH_SIZE 128
49
50
51/*******************************************************************************
52* Internal Functions *
53*******************************************************************************/
54static DECLCALLBACK(int) pgmR3PhysRomWriteHandler(PVM pVM, RTGCPHYS GCPhys, void *pvPhys, void *pvBuf, size_t cbBuf, PGMACCESSTYPE enmAccessType, void *pvUser);
55
56
57/*
58 * PGMR3PhysReadU8-64
59 * PGMR3PhysWriteU8-64
60 */
61#define PGMPHYSFN_READNAME PGMR3PhysReadU8
62#define PGMPHYSFN_WRITENAME PGMR3PhysWriteU8
63#define PGMPHYS_DATASIZE 1
64#define PGMPHYS_DATATYPE uint8_t
65#include "PGMPhysRWTmpl.h"
66
67#define PGMPHYSFN_READNAME PGMR3PhysReadU16
68#define PGMPHYSFN_WRITENAME PGMR3PhysWriteU16
69#define PGMPHYS_DATASIZE 2
70#define PGMPHYS_DATATYPE uint16_t
71#include "PGMPhysRWTmpl.h"
72
73#define PGMPHYSFN_READNAME PGMR3PhysReadU32
74#define PGMPHYSFN_WRITENAME PGMR3PhysWriteU32
75#define PGMPHYS_DATASIZE 4
76#define PGMPHYS_DATATYPE uint32_t
77#include "PGMPhysRWTmpl.h"
78
79#define PGMPHYSFN_READNAME PGMR3PhysReadU64
80#define PGMPHYSFN_WRITENAME PGMR3PhysWriteU64
81#define PGMPHYS_DATASIZE 8
82#define PGMPHYS_DATATYPE uint64_t
83#include "PGMPhysRWTmpl.h"
84
85
86/**
87 * EMT worker for PGMR3PhysReadExternal.
88 */
89static DECLCALLBACK(int) pgmR3PhysReadExternalEMT(PVM pVM, PRTGCPHYS pGCPhys, void *pvBuf, size_t cbRead)
90{
91 PGMPhysRead(pVM, *pGCPhys, pvBuf, cbRead);
92 return VINF_SUCCESS;
93}
94
95
96/**
97 * Read from physical memory, external users.
98 *
99 * @returns VBox status code.
100 * @retval VINF_SUCCESS.
101 *
102 * @param pVM VM Handle.
103 * @param GCPhys Physical address to read from.
104 * @param pvBuf Where to read into.
105 * @param cbRead How many bytes to read.
106 *
107 * @thread Any but EMTs.
108 */
109VMMR3DECL(int) PGMR3PhysReadExternal(PVM pVM, RTGCPHYS GCPhys, void *pvBuf, size_t cbRead)
110{
111 VM_ASSERT_OTHER_THREAD(pVM);
112
113 AssertMsgReturn(cbRead > 0, ("don't even think about reading zero bytes!\n"), VINF_SUCCESS);
114 LogFlow(("PGMR3PhysReadExternal: %RGp %d\n", GCPhys, cbRead));
115
116 pgmLock(pVM);
117
118 /*
119 * Copy loop on ram ranges.
120 */
121 PPGMRAMRANGE pRam = pgmPhysGetRangeAtOrAbove(pVM, GCPhys);
122 for (;;)
123 {
124 /* Inside range or not? */
125 if (pRam && GCPhys >= pRam->GCPhys)
126 {
127 /*
128 * Must work our way thru this page by page.
129 */
130 RTGCPHYS off = GCPhys - pRam->GCPhys;
131 while (off < pRam->cb)
132 {
133 unsigned iPage = off >> PAGE_SHIFT;
134 PPGMPAGE pPage = &pRam->aPages[iPage];
135
136 /*
137 * If the page has an ALL access handler, we'll have to
138 * delegate the job to EMT.
139 */
140 if (PGM_PAGE_HAS_ACTIVE_ALL_HANDLERS(pPage))
141 {
142 pgmUnlock(pVM);
143
144 return VMR3ReqCallWait(pVM, VMCPUID_ANY, (PFNRT)pgmR3PhysReadExternalEMT, 4,
145 pVM, &GCPhys, pvBuf, cbRead);
146 }
147 Assert(!PGM_PAGE_IS_MMIO(pPage));
148
149 /*
150 * Simple stuff, go ahead.
151 */
152 size_t cb = PAGE_SIZE - (off & PAGE_OFFSET_MASK);
153 if (cb > cbRead)
154 cb = cbRead;
155 const void *pvSrc;
156 int rc = pgmPhysGCPhys2CCPtrInternalReadOnly(pVM, pPage, pRam->GCPhys + off, &pvSrc);
157 if (RT_SUCCESS(rc))
158 memcpy(pvBuf, pvSrc, cb);
159 else
160 {
161 AssertLogRelMsgFailed(("pgmPhysGCPhys2CCPtrInternalReadOnly failed on %RGp / %R[pgmpage] -> %Rrc\n",
162 pRam->GCPhys + off, pPage, rc));
163 memset(pvBuf, 0xff, cb);
164 }
165
166 /* next page */
167 if (cb >= cbRead)
168 {
169 pgmUnlock(pVM);
170 return VINF_SUCCESS;
171 }
172 cbRead -= cb;
173 off += cb;
174 GCPhys += cb;
175 pvBuf = (char *)pvBuf + cb;
176 } /* walk pages in ram range. */
177 }
178 else
179 {
180 LogFlow(("PGMPhysRead: Unassigned %RGp size=%u\n", GCPhys, cbRead));
181
182 /*
183 * Unassigned address space.
184 */
185 size_t cb = pRam ? pRam->GCPhys - GCPhys : ~(size_t)0;
186 if (cb >= cbRead)
187 {
188 memset(pvBuf, 0xff, cbRead);
189 break;
190 }
191 memset(pvBuf, 0xff, cb);
192
193 cbRead -= cb;
194 pvBuf = (char *)pvBuf + cb;
195 GCPhys += cb;
196 }
197
198 /* Advance range if necessary. */
199 while (pRam && GCPhys > pRam->GCPhysLast)
200 pRam = pRam->CTX_SUFF(pNext);
201 } /* Ram range walk */
202
203 pgmUnlock(pVM);
204
205 return VINF_SUCCESS;
206}
207
208
209/**
210 * EMT worker for PGMR3PhysWriteExternal.
211 */
212static DECLCALLBACK(int) pgmR3PhysWriteExternalEMT(PVM pVM, PRTGCPHYS pGCPhys, const void *pvBuf, size_t cbWrite)
213{
214 /** @todo VERR_EM_NO_MEMORY */
215 PGMPhysWrite(pVM, *pGCPhys, pvBuf, cbWrite);
216 return VINF_SUCCESS;
217}
218
219
220/**
221 * Write to physical memory, external users.
222 *
223 * @returns VBox status code.
224 * @retval VINF_SUCCESS.
225 * @retval VERR_EM_NO_MEMORY.
226 *
227 * @param pVM VM Handle.
228 * @param GCPhys Physical address to write to.
229 * @param pvBuf What to write.
230 * @param cbWrite How many bytes to write.
231 * @param pszWho Who is writing. For tracking down who is writing
232 * after we've saved the state.
233 *
234 * @thread Any but EMTs.
235 */
236VMMDECL(int) PGMR3PhysWriteExternal(PVM pVM, RTGCPHYS GCPhys, const void *pvBuf, size_t cbWrite, const char *pszWho)
237{
238 VM_ASSERT_OTHER_THREAD(pVM);
239
240 AssertMsg(!pVM->pgm.s.fNoMorePhysWrites,
241 ("Calling PGMR3PhysWriteExternal after pgmR3Save()! GCPhys=%RGp cbWrite=%#x pszWho=%s\n",
242 GCPhys, cbWrite, pszWho));
243 AssertMsgReturn(cbWrite > 0, ("don't even think about writing zero bytes!\n"), VINF_SUCCESS);
244 LogFlow(("PGMR3PhysWriteExternal: %RGp %d\n", GCPhys, cbWrite));
245
246 pgmLock(pVM);
247
248 /*
249 * Copy loop on ram ranges, stop when we hit something difficult.
250 */
251 PPGMRAMRANGE pRam = pgmPhysGetRangeAtOrAbove(pVM, GCPhys);
252 for (;;)
253 {
254 /* Inside range or not? */
255 if (pRam && GCPhys >= pRam->GCPhys)
256 {
257 /*
258 * Must work our way thru this page by page.
259 */
260 RTGCPTR off = GCPhys - pRam->GCPhys;
261 while (off < pRam->cb)
262 {
263 RTGCPTR iPage = off >> PAGE_SHIFT;
264 PPGMPAGE pPage = &pRam->aPages[iPage];
265
266 /*
267 * Is the page problematic, we have to do the work on the EMT.
268 *
269 * Allocating writable pages and access handlers are
270 * problematic, write monitored pages are simple and can be
271 * dealt with here.
272 */
273 if ( PGM_PAGE_HAS_ACTIVE_HANDLERS(pPage)
274 || PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_ALLOCATED)
275 {
276 if ( PGM_PAGE_GET_STATE(pPage) == PGM_PAGE_STATE_WRITE_MONITORED
277 && !PGM_PAGE_HAS_ACTIVE_HANDLERS(pPage))
278 pgmPhysPageMakeWriteMonitoredWritable(pVM, pPage);
279 else
280 {
281 pgmUnlock(pVM);
282
283 return VMR3ReqCallWait(pVM, VMCPUID_ANY, (PFNRT)pgmR3PhysWriteExternalEMT, 4,
284 pVM, &GCPhys, pvBuf, cbWrite);
285 }
286 }
287 Assert(!PGM_PAGE_IS_MMIO(pPage));
288
289 /*
290 * Simple stuff, go ahead.
291 */
292 size_t cb = PAGE_SIZE - (off & PAGE_OFFSET_MASK);
293 if (cb > cbWrite)
294 cb = cbWrite;
295 void *pvDst;
296 int rc = pgmPhysGCPhys2CCPtrInternal(pVM, pPage, pRam->GCPhys + off, &pvDst);
297 if (RT_SUCCESS(rc))
298 memcpy(pvDst, pvBuf, cb);
299 else
300 AssertLogRelMsgFailed(("pgmPhysGCPhys2CCPtrInternal failed on %RGp / %R[pgmpage] -> %Rrc\n",
301 pRam->GCPhys + off, pPage, rc));
302
303 /* next page */
304 if (cb >= cbWrite)
305 {
306 pgmUnlock(pVM);
307 return VINF_SUCCESS;
308 }
309
310 cbWrite -= cb;
311 off += cb;
312 GCPhys += cb;
313 pvBuf = (const char *)pvBuf + cb;
314 } /* walk pages in ram range */
315 }
316 else
317 {
318 /*
319 * Unassigned address space, skip it.
320 */
321 if (!pRam)
322 break;
323 size_t cb = pRam->GCPhys - GCPhys;
324 if (cb >= cbWrite)
325 break;
326 cbWrite -= cb;
327 pvBuf = (const char *)pvBuf + cb;
328 GCPhys += cb;
329 }
330
331 /* Advance range if necessary. */
332 while (pRam && GCPhys > pRam->GCPhysLast)
333 pRam = pRam->CTX_SUFF(pNext);
334 } /* Ram range walk */
335
336 pgmUnlock(pVM);
337 return VINF_SUCCESS;
338}
339
340
341/**
342 * VMR3ReqCall worker for PGMR3PhysGCPhys2CCPtrExternal to make pages writable.
343 *
344 * @returns see PGMR3PhysGCPhys2CCPtrExternal
345 * @param pVM The VM handle.
346 * @param pGCPhys Pointer to the guest physical address.
347 * @param ppv Where to store the mapping address.
348 * @param pLock Where to store the lock.
349 */
350static DECLCALLBACK(int) pgmR3PhysGCPhys2CCPtrDelegated(PVM pVM, PRTGCPHYS pGCPhys, void **ppv, PPGMPAGEMAPLOCK pLock)
351{
352 /*
353 * Just hand it to PGMPhysGCPhys2CCPtr and check that it's not a page with
354 * an access handler after it succeeds.
355 */
356 int rc = pgmLock(pVM);
357 AssertRCReturn(rc, rc);
358
359 rc = PGMPhysGCPhys2CCPtr(pVM, *pGCPhys, ppv, pLock);
360 if (RT_SUCCESS(rc))
361 {
362 PPGMPAGEMAPTLBE pTlbe;
363 int rc2 = pgmPhysPageQueryTlbe(pVM, *pGCPhys, &pTlbe);
364 AssertFatalRC(rc2);
365 PPGMPAGE pPage = pTlbe->pPage;
366 if (PGM_PAGE_IS_MMIO(pPage))
367 {
368 PGMPhysReleasePageMappingLock(pVM, pLock);
369 rc = VERR_PGM_PHYS_PAGE_RESERVED;
370 }
371 else if ( PGM_PAGE_HAS_ACTIVE_HANDLERS(pPage)
372#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
373 || pgmPoolIsDirtyPage(pVM, *pGCPhys)
374#endif
375 )
376 {
377 /* We *must* flush any corresponding pgm pool page here, otherwise we'll
378 * not be informed about writes and keep bogus gst->shw mappings around.
379 */
380 pgmPoolFlushPageByGCPhys(pVM, *pGCPhys);
381 Assert(!PGM_PAGE_HAS_ACTIVE_HANDLERS(pPage));
382 /** @todo r=bird: return VERR_PGM_PHYS_PAGE_RESERVED here if it still has
383 * active handlers, see the PGMR3PhysGCPhys2CCPtrExternal docs. */
384 }
385 }
386
387 pgmUnlock(pVM);
388 return rc;
389}
390
391
392/**
393 * Requests the mapping of a guest page into ring-3, external threads.
394 *
395 * When you're done with the page, call PGMPhysReleasePageMappingLock() ASAP to
396 * release it.
397 *
398 * This API will assume your intention is to write to the page, and will
399 * therefore replace shared and zero pages. If you do not intend to modify the
400 * page, use the PGMR3PhysGCPhys2CCPtrReadOnlyExternal() API.
401 *
402 * @returns VBox status code.
403 * @retval VINF_SUCCESS on success.
404 * @retval VERR_PGM_PHYS_PAGE_RESERVED it it's a valid page but has no physical
405 * backing or if the page has any active access handlers. The caller
406 * must fall back on using PGMR3PhysWriteExternal.
407 * @retval VERR_PGM_INVALID_GC_PHYSICAL_ADDRESS if it's not a valid physical address.
408 *
409 * @param pVM The VM handle.
410 * @param GCPhys The guest physical address of the page that should be mapped.
411 * @param ppv Where to store the address corresponding to GCPhys.
412 * @param pLock Where to store the lock information that PGMPhysReleasePageMappingLock needs.
413 *
414 * @remark Avoid calling this API from within critical sections (other than the
415 * PGM one) because of the deadlock risk when we have to delegating the
416 * task to an EMT.
417 * @thread Any.
418 */
419VMMR3DECL(int) PGMR3PhysGCPhys2CCPtrExternal(PVM pVM, RTGCPHYS GCPhys, void **ppv, PPGMPAGEMAPLOCK pLock)
420{
421 AssertPtr(ppv);
422 AssertPtr(pLock);
423
424 Assert(VM_IS_EMT(pVM) || !PGMIsLockOwner(pVM));
425
426 int rc = pgmLock(pVM);
427 AssertRCReturn(rc, rc);
428
429 /*
430 * Query the Physical TLB entry for the page (may fail).
431 */
432 PPGMPAGEMAPTLBE pTlbe;
433 rc = pgmPhysPageQueryTlbe(pVM, GCPhys, &pTlbe);
434 if (RT_SUCCESS(rc))
435 {
436 PPGMPAGE pPage = pTlbe->pPage;
437 if (PGM_PAGE_IS_MMIO(pPage))
438 rc = VERR_PGM_PHYS_PAGE_RESERVED;
439 else
440 {
441 /*
442 * If the page is shared, the zero page, or being write monitored
443 * it must be converted to an page that's writable if possible.
444 * We can only deal with write monitored pages here, the rest have
445 * to be on an EMT.
446 */
447 if ( PGM_PAGE_HAS_ACTIVE_HANDLERS(pPage)
448 || PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_ALLOCATED
449#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
450 || pgmPoolIsDirtyPage(pVM, GCPhys)
451#endif
452 )
453 {
454 if ( PGM_PAGE_GET_STATE(pPage) == PGM_PAGE_STATE_WRITE_MONITORED
455 && !PGM_PAGE_HAS_ACTIVE_HANDLERS(pPage)
456#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
457 && !pgmPoolIsDirtyPage(pVM, GCPhys)
458#endif
459 )
460 pgmPhysPageMakeWriteMonitoredWritable(pVM, pPage);
461 else
462 {
463 pgmUnlock(pVM);
464
465 return VMR3ReqCallWait(pVM, VMCPUID_ANY, (PFNRT)pgmR3PhysGCPhys2CCPtrDelegated, 4,
466 pVM, &GCPhys, ppv, pLock);
467 }
468 }
469
470 /*
471 * Now, just perform the locking and calculate the return address.
472 */
473 PPGMPAGEMAP pMap = pTlbe->pMap;
474 if (pMap)
475 pMap->cRefs++;
476
477 unsigned cLocks = PGM_PAGE_GET_WRITE_LOCKS(pPage);
478 if (RT_LIKELY(cLocks < PGM_PAGE_MAX_LOCKS - 1))
479 {
480 if (cLocks == 0)
481 pVM->pgm.s.cWriteLockedPages++;
482 PGM_PAGE_INC_WRITE_LOCKS(pPage);
483 }
484 else if (cLocks != PGM_PAGE_GET_WRITE_LOCKS(pPage))
485 {
486 PGM_PAGE_INC_WRITE_LOCKS(pPage);
487 AssertMsgFailed(("%RGp / %R[pgmpage] is entering permanent write locked state!\n", GCPhys, pPage));
488 if (pMap)
489 pMap->cRefs++; /* Extra ref to prevent it from going away. */
490 }
491
492 *ppv = (void *)((uintptr_t)pTlbe->pv | (uintptr_t)(GCPhys & PAGE_OFFSET_MASK));
493 pLock->uPageAndType = (uintptr_t)pPage | PGMPAGEMAPLOCK_TYPE_WRITE;
494 pLock->pvMap = pMap;
495 }
496 }
497
498 pgmUnlock(pVM);
499 return rc;
500}
501
502
503/**
504 * Requests the mapping of a guest page into ring-3, external threads.
505 *
506 * When you're done with the page, call PGMPhysReleasePageMappingLock() ASAP to
507 * release it.
508 *
509 * @returns VBox status code.
510 * @retval VINF_SUCCESS on success.
511 * @retval VERR_PGM_PHYS_PAGE_RESERVED it it's a valid page but has no physical
512 * backing or if the page as an active ALL access handler. The caller
513 * must fall back on using PGMPhysRead.
514 * @retval VERR_PGM_INVALID_GC_PHYSICAL_ADDRESS if it's not a valid physical address.
515 *
516 * @param pVM The VM handle.
517 * @param GCPhys The guest physical address of the page that should be mapped.
518 * @param ppv Where to store the address corresponding to GCPhys.
519 * @param pLock Where to store the lock information that PGMPhysReleasePageMappingLock needs.
520 *
521 * @remark Avoid calling this API from within critical sections (other than
522 * the PGM one) because of the deadlock risk.
523 * @thread Any.
524 */
525VMMR3DECL(int) PGMR3PhysGCPhys2CCPtrReadOnlyExternal(PVM pVM, RTGCPHYS GCPhys, void const **ppv, PPGMPAGEMAPLOCK pLock)
526{
527 int rc = pgmLock(pVM);
528 AssertRCReturn(rc, rc);
529
530 /*
531 * Query the Physical TLB entry for the page (may fail).
532 */
533 PPGMPAGEMAPTLBE pTlbe;
534 rc = pgmPhysPageQueryTlbe(pVM, GCPhys, &pTlbe);
535 if (RT_SUCCESS(rc))
536 {
537 PPGMPAGE pPage = pTlbe->pPage;
538#if 1
539 /* MMIO pages doesn't have any readable backing. */
540 if (PGM_PAGE_IS_MMIO(pPage))
541 rc = VERR_PGM_PHYS_PAGE_RESERVED;
542#else
543 if (PGM_PAGE_HAS_ACTIVE_ALL_HANDLERS(pPage))
544 rc = VERR_PGM_PHYS_PAGE_RESERVED;
545#endif
546 else
547 {
548 /*
549 * Now, just perform the locking and calculate the return address.
550 */
551 PPGMPAGEMAP pMap = pTlbe->pMap;
552 if (pMap)
553 pMap->cRefs++;
554
555 unsigned cLocks = PGM_PAGE_GET_READ_LOCKS(pPage);
556 if (RT_LIKELY(cLocks < PGM_PAGE_MAX_LOCKS - 1))
557 {
558 if (cLocks == 0)
559 pVM->pgm.s.cReadLockedPages++;
560 PGM_PAGE_INC_READ_LOCKS(pPage);
561 }
562 else if (cLocks != PGM_PAGE_GET_READ_LOCKS(pPage))
563 {
564 PGM_PAGE_INC_READ_LOCKS(pPage);
565 AssertMsgFailed(("%RGp / %R[pgmpage] is entering permanent readonly locked state!\n", GCPhys, pPage));
566 if (pMap)
567 pMap->cRefs++; /* Extra ref to prevent it from going away. */
568 }
569
570 *ppv = (void *)((uintptr_t)pTlbe->pv | (uintptr_t)(GCPhys & PAGE_OFFSET_MASK));
571 pLock->uPageAndType = (uintptr_t)pPage | PGMPAGEMAPLOCK_TYPE_READ;
572 pLock->pvMap = pMap;
573 }
574 }
575
576 pgmUnlock(pVM);
577 return rc;
578}
579
580
581#define MAKE_LEAF(a_pNode) \
582 do { \
583 (a_pNode)->pLeftR3 = NIL_RTR3PTR; \
584 (a_pNode)->pRightR3 = NIL_RTR3PTR; \
585 (a_pNode)->pLeftR0 = NIL_RTR0PTR; \
586 (a_pNode)->pRightR0 = NIL_RTR0PTR; \
587 (a_pNode)->pLeftRC = NIL_RTRCPTR; \
588 (a_pNode)->pRightRC = NIL_RTRCPTR; \
589 } while (0)
590
591#define INSERT_LEFT(a_pParent, a_pNode) \
592 do { \
593 (a_pParent)->pLeftR3 = (a_pNode); \
594 (a_pParent)->pLeftR0 = (a_pNode)->pSelfR0; \
595 (a_pParent)->pLeftRC = (a_pNode)->pSelfRC; \
596 } while (0)
597#define INSERT_RIGHT(a_pParent, a_pNode) \
598 do { \
599 (a_pParent)->pRightR3 = (a_pNode); \
600 (a_pParent)->pRightR0 = (a_pNode)->pSelfR0; \
601 (a_pParent)->pRightRC = (a_pNode)->pSelfRC; \
602 } while (0)
603
604
605/**
606 * Recursive tree builder.
607 *
608 * @param ppRam Pointer to the iterator variable.
609 * @param iHeight The hight about normal leaf nodes. Inserts a leaf
610 * node if 0.
611 */
612static PPGMRAMRANGE pgmR3PhysRebuildRamRangeSearchTreesRecursively(PPGMRAMRANGE *ppRam, int iDepth)
613{
614 PPGMRAMRANGE pRam;
615 if (iDepth <= 0)
616 {
617 /*
618 * Leaf node.
619 */
620 pRam = *ppRam;
621 if (pRam)
622 {
623 *ppRam = pRam->pNextR3;
624 MAKE_LEAF(pRam);
625 }
626 }
627 else
628 {
629
630 /*
631 * Intermediate node.
632 */
633 PPGMRAMRANGE pLeft = pgmR3PhysRebuildRamRangeSearchTreesRecursively(ppRam, iDepth - 1);
634
635 pRam = *ppRam;
636 if (!pRam)
637 return pLeft;
638 *ppRam = pRam->pNextR3;
639 MAKE_LEAF(pRam);
640 INSERT_LEFT(pRam, pLeft);
641
642 PPGMRAMRANGE pRight = pgmR3PhysRebuildRamRangeSearchTreesRecursively(ppRam, iDepth - 1);
643 if (pRight)
644 INSERT_RIGHT(pRam, pRight);
645 }
646 return pRam;
647}
648
649
650/**
651 * Rebuilds the RAM range search trees.
652 *
653 * @param pVM The VM handle.
654 */
655static void pgmR3PhysRebuildRamRangeSearchTrees(PVM pVM)
656{
657
658 /*
659 * Create the reasonably balanced tree in a sequential fashion.
660 * For simplicity (laziness) we use standard recursion here.
661 */
662 int iDepth = 0;
663 PPGMRAMRANGE pRam = pVM->pgm.s.pRamRangesXR3;
664 PPGMRAMRANGE pRoot = pgmR3PhysRebuildRamRangeSearchTreesRecursively(&pRam, 0);
665 while (pRam)
666 {
667 PPGMRAMRANGE pLeft = pRoot;
668
669 pRoot = pRam;
670 pRam = pRam->pNextR3;
671 MAKE_LEAF(pRoot);
672 INSERT_LEFT(pRoot, pLeft);
673
674 PPGMRAMRANGE pRight = pgmR3PhysRebuildRamRangeSearchTreesRecursively(&pRam, iDepth);
675 if (pRight)
676 INSERT_RIGHT(pRoot, pRight);
677 /** @todo else: rotate the tree. */
678
679 iDepth++;
680 }
681
682 pVM->pgm.s.pRamRangeTreeR3 = pRoot;
683 pVM->pgm.s.pRamRangeTreeR0 = pRoot ? pRoot->pSelfR0 : NIL_RTR0PTR;
684 pVM->pgm.s.pRamRangeTreeRC = pRoot ? pRoot->pSelfRC : NIL_RTRCPTR;
685
686#ifdef VBOX_STRICT
687 /*
688 * Verify that the above code works.
689 */
690 unsigned cRanges = 0;
691 for (pRam = pVM->pgm.s.pRamRangesXR3; pRam; pRam = pRam->pNextR3)
692 cRanges++;
693 Assert(cRanges > 0);
694
695 unsigned cMaxDepth = ASMBitLastSetU32(cRanges);
696 if ((1U << cMaxDepth) < cRanges)
697 cMaxDepth++;
698
699 for (pRam = pVM->pgm.s.pRamRangesXR3; pRam; pRam = pRam->pNextR3)
700 {
701 unsigned cDepth = 0;
702 PPGMRAMRANGE pRam2 = pVM->pgm.s.pRamRangeTreeR3;
703 for (;;)
704 {
705 if (pRam == pRam2)
706 break;
707 Assert(pRam2);
708 if (pRam->GCPhys < pRam2->GCPhys)
709 pRam2 = pRam2->pLeftR3;
710 else
711 pRam2 = pRam2->pRightR3;
712 }
713 AssertMsg(cDepth <= cMaxDepth, ("cDepth=%d cMaxDepth=%d\n", cDepth, cMaxDepth));
714 }
715#endif /* VBOX_STRICT */
716}
717
718#undef MAKE_LEAF
719#undef INSERT_LEFT
720#undef INSERT_RIGHT
721
722/**
723 * Relinks the RAM ranges using the pSelfRC and pSelfR0 pointers.
724 *
725 * Called when anything was relocated.
726 *
727 * @param pVM Pointer to the shared VM structure.
728 */
729void pgmR3PhysRelinkRamRanges(PVM pVM)
730{
731 PPGMRAMRANGE pCur;
732
733#ifdef VBOX_STRICT
734 for (pCur = pVM->pgm.s.pRamRangesXR3; pCur; pCur = pCur->pNextR3)
735 {
736 Assert((pCur->fFlags & PGM_RAM_RANGE_FLAGS_FLOATING) || pCur->pSelfR0 == MMHyperCCToR0(pVM, pCur));
737 Assert((pCur->fFlags & PGM_RAM_RANGE_FLAGS_FLOATING) || pCur->pSelfRC == MMHyperCCToRC(pVM, pCur));
738 Assert((pCur->GCPhys & PAGE_OFFSET_MASK) == 0);
739 Assert((pCur->GCPhysLast & PAGE_OFFSET_MASK) == PAGE_OFFSET_MASK);
740 Assert((pCur->cb & PAGE_OFFSET_MASK) == 0);
741 Assert(pCur->cb == pCur->GCPhysLast - pCur->GCPhys + 1);
742 for (PPGMRAMRANGE pCur2 = pVM->pgm.s.pRamRangesXR3; pCur2; pCur2 = pCur2->pNextR3)
743 Assert( pCur2 == pCur
744 || strcmp(pCur2->pszDesc, pCur->pszDesc)); /** @todo fix MMIO ranges!! */
745 }
746#endif
747
748 pCur = pVM->pgm.s.pRamRangesXR3;
749 if (pCur)
750 {
751 pVM->pgm.s.pRamRangesXR0 = pCur->pSelfR0;
752 pVM->pgm.s.pRamRangesXRC = pCur->pSelfRC;
753
754 for (; pCur->pNextR3; pCur = pCur->pNextR3)
755 {
756 pCur->pNextR0 = pCur->pNextR3->pSelfR0;
757 pCur->pNextRC = pCur->pNextR3->pSelfRC;
758 }
759
760 Assert(pCur->pNextR0 == NIL_RTR0PTR);
761 Assert(pCur->pNextRC == NIL_RTRCPTR);
762 }
763 else
764 {
765 Assert(pVM->pgm.s.pRamRangesXR0 == NIL_RTR0PTR);
766 Assert(pVM->pgm.s.pRamRangesXRC == NIL_RTRCPTR);
767 }
768 ASMAtomicIncU32(&pVM->pgm.s.idRamRangesGen);
769
770 pgmR3PhysRebuildRamRangeSearchTrees(pVM);
771}
772
773
774/**
775 * Links a new RAM range into the list.
776 *
777 * @param pVM Pointer to the shared VM structure.
778 * @param pNew Pointer to the new list entry.
779 * @param pPrev Pointer to the previous list entry. If NULL, insert as head.
780 */
781static void pgmR3PhysLinkRamRange(PVM pVM, PPGMRAMRANGE pNew, PPGMRAMRANGE pPrev)
782{
783 AssertMsg(pNew->pszDesc, ("%RGp-%RGp\n", pNew->GCPhys, pNew->GCPhysLast));
784 Assert((pNew->fFlags & PGM_RAM_RANGE_FLAGS_FLOATING) || pNew->pSelfR0 == MMHyperCCToR0(pVM, pNew));
785 Assert((pNew->fFlags & PGM_RAM_RANGE_FLAGS_FLOATING) || pNew->pSelfRC == MMHyperCCToRC(pVM, pNew));
786
787 pgmLock(pVM);
788
789 PPGMRAMRANGE pRam = pPrev ? pPrev->pNextR3 : pVM->pgm.s.pRamRangesXR3;
790 pNew->pNextR3 = pRam;
791 pNew->pNextR0 = pRam ? pRam->pSelfR0 : NIL_RTR0PTR;
792 pNew->pNextRC = pRam ? pRam->pSelfRC : NIL_RTRCPTR;
793
794 if (pPrev)
795 {
796 pPrev->pNextR3 = pNew;
797 pPrev->pNextR0 = pNew->pSelfR0;
798 pPrev->pNextRC = pNew->pSelfRC;
799 }
800 else
801 {
802 pVM->pgm.s.pRamRangesXR3 = pNew;
803 pVM->pgm.s.pRamRangesXR0 = pNew->pSelfR0;
804 pVM->pgm.s.pRamRangesXRC = pNew->pSelfRC;
805 }
806 ASMAtomicIncU32(&pVM->pgm.s.idRamRangesGen);
807
808 pgmR3PhysRebuildRamRangeSearchTrees(pVM);
809 pgmUnlock(pVM);
810}
811
812
813/**
814 * Unlink an existing RAM range from the list.
815 *
816 * @param pVM Pointer to the shared VM structure.
817 * @param pRam Pointer to the new list entry.
818 * @param pPrev Pointer to the previous list entry. If NULL, insert as head.
819 */
820static void pgmR3PhysUnlinkRamRange2(PVM pVM, PPGMRAMRANGE pRam, PPGMRAMRANGE pPrev)
821{
822 Assert(pPrev ? pPrev->pNextR3 == pRam : pVM->pgm.s.pRamRangesXR3 == pRam);
823 Assert((pRam->fFlags & PGM_RAM_RANGE_FLAGS_FLOATING) || pRam->pSelfR0 == MMHyperCCToR0(pVM, pRam));
824 Assert((pRam->fFlags & PGM_RAM_RANGE_FLAGS_FLOATING) || pRam->pSelfRC == MMHyperCCToRC(pVM, pRam));
825
826 pgmLock(pVM);
827
828 PPGMRAMRANGE pNext = pRam->pNextR3;
829 if (pPrev)
830 {
831 pPrev->pNextR3 = pNext;
832 pPrev->pNextR0 = pNext ? pNext->pSelfR0 : NIL_RTR0PTR;
833 pPrev->pNextRC = pNext ? pNext->pSelfRC : NIL_RTRCPTR;
834 }
835 else
836 {
837 Assert(pVM->pgm.s.pRamRangesXR3 == pRam);
838 pVM->pgm.s.pRamRangesXR3 = pNext;
839 pVM->pgm.s.pRamRangesXR0 = pNext ? pNext->pSelfR0 : NIL_RTR0PTR;
840 pVM->pgm.s.pRamRangesXRC = pNext ? pNext->pSelfRC : NIL_RTRCPTR;
841 }
842 ASMAtomicIncU32(&pVM->pgm.s.idRamRangesGen);
843
844 pgmR3PhysRebuildRamRangeSearchTrees(pVM);
845 pgmUnlock(pVM);
846}
847
848
849/**
850 * Unlink an existing RAM range from the list.
851 *
852 * @param pVM Pointer to the shared VM structure.
853 * @param pRam Pointer to the new list entry.
854 */
855static void pgmR3PhysUnlinkRamRange(PVM pVM, PPGMRAMRANGE pRam)
856{
857 pgmLock(pVM);
858
859 /* find prev. */
860 PPGMRAMRANGE pPrev = NULL;
861 PPGMRAMRANGE pCur = pVM->pgm.s.pRamRangesXR3;
862 while (pCur != pRam)
863 {
864 pPrev = pCur;
865 pCur = pCur->pNextR3;
866 }
867 AssertFatal(pCur);
868
869 pgmR3PhysUnlinkRamRange2(pVM, pRam, pPrev);
870 pgmUnlock(pVM);
871}
872
873
874/**
875 * Frees a range of pages, replacing them with ZERO pages of the specified type.
876 *
877 * @returns VBox status code.
878 * @param pVM The VM handle.
879 * @param pRam The RAM range in which the pages resides.
880 * @param GCPhys The address of the first page.
881 * @param GCPhysLast The address of the last page.
882 * @param uType The page type to replace then with.
883 */
884static int pgmR3PhysFreePageRange(PVM pVM, PPGMRAMRANGE pRam, RTGCPHYS GCPhys, RTGCPHYS GCPhysLast, uint8_t uType)
885{
886 PGM_LOCK_ASSERT_OWNER(pVM);
887 uint32_t cPendingPages = 0;
888 PGMMFREEPAGESREQ pReq;
889 int rc = GMMR3FreePagesPrepare(pVM, &pReq, PGMPHYS_FREE_PAGE_BATCH_SIZE, GMMACCOUNT_BASE);
890 AssertLogRelRCReturn(rc, rc);
891
892 /* Iterate the pages. */
893 PPGMPAGE pPageDst = &pRam->aPages[(GCPhys - pRam->GCPhys) >> PAGE_SHIFT];
894 uint32_t cPagesLeft = ((GCPhysLast - GCPhys) >> PAGE_SHIFT) + 1;
895 while (cPagesLeft-- > 0)
896 {
897 rc = pgmPhysFreePage(pVM, pReq, &cPendingPages, pPageDst, GCPhys);
898 AssertLogRelRCReturn(rc, rc); /* We're done for if this goes wrong. */
899
900 PGM_PAGE_SET_TYPE(pVM, pPageDst, uType);
901
902 GCPhys += PAGE_SIZE;
903 pPageDst++;
904 }
905
906 if (cPendingPages)
907 {
908 rc = GMMR3FreePagesPerform(pVM, pReq, cPendingPages);
909 AssertLogRelRCReturn(rc, rc);
910 }
911 GMMR3FreePagesCleanup(pReq);
912
913 return rc;
914}
915
916#if HC_ARCH_BITS == 64 && (defined(RT_OS_WINDOWS) || defined(RT_OS_SOLARIS) || defined(RT_OS_LINUX) || defined(RT_OS_FREEBSD))
917/**
918 * Rendezvous callback used by PGMR3ChangeMemBalloon that changes the memory balloon size
919 *
920 * This is only called on one of the EMTs while the other ones are waiting for
921 * it to complete this function.
922 *
923 * @returns VINF_SUCCESS (VBox strict status code).
924 * @param pVM The VM handle.
925 * @param pVCpu The VMCPU for the EMT we're being called on. Unused.
926 * @param pvUser User parameter
927 */
928static DECLCALLBACK(VBOXSTRICTRC) pgmR3PhysChangeMemBalloonRendezvous(PVM pVM, PVMCPU pVCpu, void *pvUser)
929{
930 uintptr_t *paUser = (uintptr_t *)pvUser;
931 bool fInflate = !!paUser[0];
932 unsigned cPages = paUser[1];
933 RTGCPHYS *paPhysPage = (RTGCPHYS *)paUser[2];
934 uint32_t cPendingPages = 0;
935 PGMMFREEPAGESREQ pReq;
936 int rc;
937
938 Log(("pgmR3PhysChangeMemBalloonRendezvous: %s %x pages\n", (fInflate) ? "inflate" : "deflate", cPages));
939 pgmLock(pVM);
940
941 if (fInflate)
942 {
943 /* Flush the PGM pool cache as we might have stale references to pages that we just freed. */
944 pgmR3PoolClearAllRendezvous(pVM, pVCpu, NULL);
945
946 /* Replace pages with ZERO pages. */
947 rc = GMMR3FreePagesPrepare(pVM, &pReq, PGMPHYS_FREE_PAGE_BATCH_SIZE, GMMACCOUNT_BASE);
948 if (RT_FAILURE(rc))
949 {
950 pgmUnlock(pVM);
951 AssertLogRelRC(rc);
952 return rc;
953 }
954
955 /* Iterate the pages. */
956 for (unsigned i = 0; i < cPages; i++)
957 {
958 PPGMPAGE pPage = pgmPhysGetPage(pVM, paPhysPage[i]);
959 if ( pPage == NULL
960 || PGM_PAGE_GET_TYPE(pPage) != PGMPAGETYPE_RAM)
961 {
962 Log(("pgmR3PhysChangeMemBalloonRendezvous: invalid physical page %RGp pPage->u3Type=%d\n", paPhysPage[i], pPage ? PGM_PAGE_GET_TYPE(pPage) : 0));
963 break;
964 }
965
966 LogFlow(("balloon page: %RGp\n", paPhysPage[i]));
967
968 /* Flush the shadow PT if this page was previously used as a guest page table. */
969 pgmPoolFlushPageByGCPhys(pVM, paPhysPage[i]);
970
971 rc = pgmPhysFreePage(pVM, pReq, &cPendingPages, pPage, paPhysPage[i]);
972 if (RT_FAILURE(rc))
973 {
974 pgmUnlock(pVM);
975 AssertLogRelRC(rc);
976 return rc;
977 }
978 Assert(PGM_PAGE_IS_ZERO(pPage));
979 PGM_PAGE_SET_STATE(pVM, pPage, PGM_PAGE_STATE_BALLOONED);
980 }
981
982 if (cPendingPages)
983 {
984 rc = GMMR3FreePagesPerform(pVM, pReq, cPendingPages);
985 if (RT_FAILURE(rc))
986 {
987 pgmUnlock(pVM);
988 AssertLogRelRC(rc);
989 return rc;
990 }
991 }
992 GMMR3FreePagesCleanup(pReq);
993 }
994 else
995 {
996 /* Iterate the pages. */
997 for (unsigned i = 0; i < cPages; i++)
998 {
999 PPGMPAGE pPage = pgmPhysGetPage(pVM, paPhysPage[i]);
1000 AssertBreak(pPage && PGM_PAGE_GET_TYPE(pPage) == PGMPAGETYPE_RAM);
1001
1002 LogFlow(("Free ballooned page: %RGp\n", paPhysPage[i]));
1003
1004 Assert(PGM_PAGE_IS_BALLOONED(pPage));
1005
1006 /* Change back to zero page. */
1007 PGM_PAGE_SET_STATE(pVM, pPage, PGM_PAGE_STATE_ZERO);
1008 }
1009
1010 /* Note that we currently do not map any ballooned pages in our shadow page tables, so no need to flush the pgm pool. */
1011 }
1012
1013 /* Notify GMM about the balloon change. */
1014 rc = GMMR3BalloonedPages(pVM, (fInflate) ? GMMBALLOONACTION_INFLATE : GMMBALLOONACTION_DEFLATE, cPages);
1015 if (RT_SUCCESS(rc))
1016 {
1017 if (!fInflate)
1018 {
1019 Assert(pVM->pgm.s.cBalloonedPages >= cPages);
1020 pVM->pgm.s.cBalloonedPages -= cPages;
1021 }
1022 else
1023 pVM->pgm.s.cBalloonedPages += cPages;
1024 }
1025
1026 pgmUnlock(pVM);
1027
1028 /* Flush the recompiler's TLB as well. */
1029 for (VMCPUID i = 0; i < pVM->cCpus; i++)
1030 CPUMSetChangedFlags(&pVM->aCpus[i], CPUM_CHANGED_GLOBAL_TLB_FLUSH);
1031
1032 AssertLogRelRC(rc);
1033 return rc;
1034}
1035
1036/**
1037 * Frees a range of ram pages, replacing them with ZERO pages; helper for PGMR3PhysFreeRamPages
1038 *
1039 * @returns VBox status code.
1040 * @param pVM The VM handle.
1041 * @param fInflate Inflate or deflate memory balloon
1042 * @param cPages Number of pages to free
1043 * @param paPhysPage Array of guest physical addresses
1044 */
1045static DECLCALLBACK(void) pgmR3PhysChangeMemBalloonHelper(PVM pVM, bool fInflate, unsigned cPages, RTGCPHYS *paPhysPage)
1046{
1047 uintptr_t paUser[3];
1048
1049 paUser[0] = fInflate;
1050 paUser[1] = cPages;
1051 paUser[2] = (uintptr_t)paPhysPage;
1052 int rc = VMMR3EmtRendezvous(pVM, VMMEMTRENDEZVOUS_FLAGS_TYPE_ONCE, pgmR3PhysChangeMemBalloonRendezvous, (void *)paUser);
1053 AssertRC(rc);
1054
1055 /* Made a copy in PGMR3PhysFreeRamPages; free it here. */
1056 RTMemFree(paPhysPage);
1057}
1058#endif
1059
1060/**
1061 * Inflate or deflate a memory balloon
1062 *
1063 * @returns VBox status code.
1064 * @param pVM The VM handle.
1065 * @param fInflate Inflate or deflate memory balloon
1066 * @param cPages Number of pages to free
1067 * @param paPhysPage Array of guest physical addresses
1068 */
1069VMMR3DECL(int) PGMR3PhysChangeMemBalloon(PVM pVM, bool fInflate, unsigned cPages, RTGCPHYS *paPhysPage)
1070{
1071 /* This must match GMMR0Init; currently we only support memory ballooning on all 64-bit hosts except Mac OS X */
1072#if HC_ARCH_BITS == 64 && (defined(RT_OS_WINDOWS) || defined(RT_OS_SOLARIS) || defined(RT_OS_LINUX) || defined(RT_OS_FREEBSD))
1073 int rc;
1074
1075 /* Older additions (ancient non-functioning balloon code) pass wrong physical addresses. */
1076 AssertReturn(!(paPhysPage[0] & 0xfff), VERR_INVALID_PARAMETER);
1077
1078 /* We own the IOM lock here and could cause a deadlock by waiting for another VCPU that is blocking on the IOM lock.
1079 * In the SMP case we post a request packet to postpone the job.
1080 */
1081 if (pVM->cCpus > 1)
1082 {
1083 unsigned cbPhysPage = cPages * sizeof(paPhysPage[0]);
1084 RTGCPHYS *paPhysPageCopy = (RTGCPHYS *)RTMemAlloc(cbPhysPage);
1085 AssertReturn(paPhysPageCopy, VERR_NO_MEMORY);
1086
1087 memcpy(paPhysPageCopy, paPhysPage, cbPhysPage);
1088
1089 rc = VMR3ReqCallNoWait(pVM, VMCPUID_ANY_QUEUE, (PFNRT)pgmR3PhysChangeMemBalloonHelper, 4, pVM, fInflate, cPages, paPhysPageCopy);
1090 AssertRC(rc);
1091 }
1092 else
1093 {
1094 uintptr_t paUser[3];
1095
1096 paUser[0] = fInflate;
1097 paUser[1] = cPages;
1098 paUser[2] = (uintptr_t)paPhysPage;
1099 rc = VMMR3EmtRendezvous(pVM, VMMEMTRENDEZVOUS_FLAGS_TYPE_ONCE, pgmR3PhysChangeMemBalloonRendezvous, (void *)paUser);
1100 AssertRC(rc);
1101 }
1102 return rc;
1103#else
1104 return VERR_NOT_IMPLEMENTED;
1105#endif
1106}
1107
1108/**
1109 * Rendezvous callback used by PGMR3WriteProtectRAM that write protects all
1110 * physical RAM.
1111 *
1112 * This is only called on one of the EMTs while the other ones are waiting for
1113 * it to complete this function.
1114 *
1115 * @returns VINF_SUCCESS (VBox strict status code).
1116 * @param pVM The VM handle.
1117 * @param pVCpu The VMCPU for the EMT we're being called on. Unused.
1118 * @param pvUser User parameter, unused.
1119 */
1120static DECLCALLBACK(VBOXSTRICTRC) pgmR3PhysWriteProtectRAMRendezvous(PVM pVM, PVMCPU pVCpu, void *pvUser)
1121{
1122 int rc = VINF_SUCCESS;
1123 NOREF(pvUser);
1124
1125 pgmLock(pVM);
1126#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
1127 pgmPoolResetDirtyPages(pVM);
1128#endif
1129
1130 /** @todo pointless to write protect the physical page pointed to by RSP. */
1131
1132 for (PPGMRAMRANGE pRam = pVM->pgm.s.CTX_SUFF(pRamRangesX);
1133 pRam;
1134 pRam = pRam->CTX_SUFF(pNext))
1135 {
1136 uint32_t cPages = pRam->cb >> PAGE_SHIFT;
1137 for (uint32_t iPage = 0; iPage < cPages; iPage++)
1138 {
1139 PPGMPAGE pPage = &pRam->aPages[iPage];
1140 PGMPAGETYPE enmPageType = (PGMPAGETYPE)PGM_PAGE_GET_TYPE(pPage);
1141
1142 if ( RT_LIKELY(enmPageType == PGMPAGETYPE_RAM)
1143 || enmPageType == PGMPAGETYPE_MMIO2)
1144 {
1145 /*
1146 * A RAM page.
1147 */
1148 switch (PGM_PAGE_GET_STATE(pPage))
1149 {
1150 case PGM_PAGE_STATE_ALLOCATED:
1151 /** @todo Optimize this: Don't always re-enable write
1152 * monitoring if the page is known to be very busy. */
1153 if (PGM_PAGE_IS_WRITTEN_TO(pPage))
1154 {
1155 PGM_PAGE_CLEAR_WRITTEN_TO(pVM, pPage);
1156 /* Remember this dirty page for the next (memory) sync. */
1157 PGM_PAGE_SET_FT_DIRTY(pPage);
1158 }
1159
1160 pgmPhysPageWriteMonitor(pVM, pPage, pRam->GCPhys + ((RTGCPHYS)iPage << PAGE_SHIFT));
1161 break;
1162
1163 case PGM_PAGE_STATE_SHARED:
1164 AssertFailed();
1165 break;
1166
1167 case PGM_PAGE_STATE_WRITE_MONITORED: /* nothing to change. */
1168 default:
1169 break;
1170 }
1171 }
1172 }
1173 }
1174 pgmR3PoolWriteProtectPages(pVM);
1175 PGM_INVL_ALL_VCPU_TLBS(pVM);
1176 for (VMCPUID idCpu = 0; idCpu < pVM->cCpus; idCpu++)
1177 CPUMSetChangedFlags(&pVM->aCpus[idCpu], CPUM_CHANGED_GLOBAL_TLB_FLUSH);
1178
1179 pgmUnlock(pVM);
1180 return rc;
1181}
1182
1183/**
1184 * Protect all physical RAM to monitor writes
1185 *
1186 * @returns VBox status code.
1187 * @param pVM The VM handle.
1188 */
1189VMMR3DECL(int) PGMR3PhysWriteProtectRAM(PVM pVM)
1190{
1191 VM_ASSERT_EMT_RETURN(pVM, VERR_VM_THREAD_NOT_EMT);
1192
1193 int rc = VMMR3EmtRendezvous(pVM, VMMEMTRENDEZVOUS_FLAGS_TYPE_ONCE, pgmR3PhysWriteProtectRAMRendezvous, NULL);
1194 AssertRC(rc);
1195 return rc;
1196}
1197
1198/**
1199 * Enumerate all dirty FT pages.
1200 *
1201 * @returns VBox status code.
1202 * @param pVM The VM handle.
1203 * @param pfnEnum Enumerate callback handler.
1204 * @param pvUser Enumerate callback handler parameter.
1205 */
1206VMMR3DECL(int) PGMR3PhysEnumDirtyFTPages(PVM pVM, PFNPGMENUMDIRTYFTPAGES pfnEnum, void *pvUser)
1207{
1208 int rc = VINF_SUCCESS;
1209
1210 pgmLock(pVM);
1211 for (PPGMRAMRANGE pRam = pVM->pgm.s.CTX_SUFF(pRamRangesX);
1212 pRam;
1213 pRam = pRam->CTX_SUFF(pNext))
1214 {
1215 uint32_t cPages = pRam->cb >> PAGE_SHIFT;
1216 for (uint32_t iPage = 0; iPage < cPages; iPage++)
1217 {
1218 PPGMPAGE pPage = &pRam->aPages[iPage];
1219 PGMPAGETYPE enmPageType = (PGMPAGETYPE)PGM_PAGE_GET_TYPE(pPage);
1220
1221 if ( RT_LIKELY(enmPageType == PGMPAGETYPE_RAM)
1222 || enmPageType == PGMPAGETYPE_MMIO2)
1223 {
1224 /*
1225 * A RAM page.
1226 */
1227 switch (PGM_PAGE_GET_STATE(pPage))
1228 {
1229 case PGM_PAGE_STATE_ALLOCATED:
1230 case PGM_PAGE_STATE_WRITE_MONITORED:
1231 if ( !PGM_PAGE_IS_WRITTEN_TO(pPage) /* not very recently updated? */
1232 && PGM_PAGE_IS_FT_DIRTY(pPage))
1233 {
1234 unsigned cbPageRange = PAGE_SIZE;
1235 unsigned iPageClean = iPage + 1;
1236 RTGCPHYS GCPhysPage = pRam->GCPhys + iPage * PAGE_SIZE;
1237 uint8_t *pu8Page = NULL;
1238 PGMPAGEMAPLOCK Lock;
1239
1240 /* Find the next clean page, so we can merge adjacent dirty pages. */
1241 for (; iPageClean < cPages; iPageClean++)
1242 {
1243 PPGMPAGE pPageNext = &pRam->aPages[iPageClean];
1244 if ( RT_UNLIKELY(PGM_PAGE_GET_TYPE(pPageNext) != PGMPAGETYPE_RAM)
1245 || PGM_PAGE_GET_STATE(pPageNext) != PGM_PAGE_STATE_ALLOCATED
1246 || PGM_PAGE_IS_WRITTEN_TO(pPageNext)
1247 || !PGM_PAGE_IS_FT_DIRTY(pPageNext)
1248 /* Crossing a chunk boundary? */
1249 || (GCPhysPage & GMM_PAGEID_IDX_MASK) != ((GCPhysPage + cbPageRange) & GMM_PAGEID_IDX_MASK)
1250 )
1251 break;
1252
1253 cbPageRange += PAGE_SIZE;
1254 }
1255
1256 rc = PGMPhysGCPhys2CCPtrReadOnly(pVM, GCPhysPage, (const void **)&pu8Page, &Lock);
1257 if (RT_SUCCESS(rc))
1258 {
1259 /** @todo this is risky; the range might be changed, but little choice as the sync
1260 * costs a lot of time. */
1261 pgmUnlock(pVM);
1262 pfnEnum(pVM, GCPhysPage, pu8Page, cbPageRange, pvUser);
1263 pgmLock(pVM);
1264 PGMPhysReleasePageMappingLock(pVM, &Lock);
1265 }
1266
1267 for (iPage; iPage < iPageClean; iPage++)
1268 PGM_PAGE_CLEAR_FT_DIRTY(&pRam->aPages[iPage]);
1269
1270 iPage = iPageClean - 1;
1271 }
1272 break;
1273 }
1274 }
1275 }
1276 }
1277 pgmUnlock(pVM);
1278 return rc;
1279}
1280
1281
1282/**
1283 * Gets the number of ram ranges.
1284 *
1285 * @returns Number of ram ranges. Returns UINT32_MAX if @a pVM is invalid.
1286 * @param pVM The VM handle.
1287 */
1288VMMR3DECL(uint32_t) PGMR3PhysGetRamRangeCount(PVM pVM)
1289{
1290 VM_ASSERT_VALID_EXT_RETURN(pVM, UINT32_MAX);
1291
1292 pgmLock(pVM);
1293 uint32_t cRamRanges = 0;
1294 for (PPGMRAMRANGE pCur = pVM->pgm.s.CTX_SUFF(pRamRangesX); pCur; pCur = pCur->CTX_SUFF(pNext))
1295 cRamRanges++;
1296 pgmUnlock(pVM);
1297 return cRamRanges;
1298}
1299
1300
1301/**
1302 * Get information about a range.
1303 *
1304 * @returns VINF_SUCCESS or VERR_OUT_OF_RANGE.
1305 * @param pVM The VM handle
1306 * @param iRange The ordinal of the range.
1307 * @param pGCPhysStart Where to return the start of the range. Optional.
1308 * @param pGCPhysLast Where to return the address of the last byte in the
1309 * range. Optional.
1310 * @param pfIsMmio Where to indicate that this is a pure MMIO range.
1311 * Optional.
1312 */
1313VMMR3DECL(int) PGMR3PhysGetRange(PVM pVM, uint32_t iRange, PRTGCPHYS pGCPhysStart, PRTGCPHYS pGCPhysLast,
1314 const char **ppszDesc, bool *pfIsMmio)
1315{
1316 VM_ASSERT_VALID_EXT_RETURN(pVM, VERR_INVALID_VM_HANDLE);
1317
1318 pgmLock(pVM);
1319 uint32_t iCurRange = 0;
1320 for (PPGMRAMRANGE pCur = pVM->pgm.s.CTX_SUFF(pRamRangesX); pCur; pCur = pCur->CTX_SUFF(pNext), iCurRange++)
1321 if (iCurRange == iRange)
1322 {
1323 if (pGCPhysStart)
1324 *pGCPhysStart = pCur->GCPhys;
1325 if (pGCPhysLast)
1326 *pGCPhysLast = pCur->GCPhysLast;
1327 if (pfIsMmio)
1328 *pfIsMmio = !!(pCur->fFlags & PGM_RAM_RANGE_FLAGS_AD_HOC_MMIO);
1329
1330 pgmUnlock(pVM);
1331 return VINF_SUCCESS;
1332 }
1333 pgmUnlock(pVM);
1334 return VERR_OUT_OF_RANGE;
1335}
1336
1337
1338/**
1339 * Query the amount of free memory inside VMMR0
1340 *
1341 * @returns VBox status code.
1342 * @param pVM The VM handle.
1343 * @param pcbAllocMem Where to return the amount of memory allocated
1344 * by VMs.
1345 * @param pcbFreeMem Where to return the amount of memory that is
1346 * allocated from the host but not currently used
1347 * by any VMs.
1348 * @param pcbBallonedMem Where to return the sum of memory that is
1349 * currently ballooned by the VMs.
1350 * @param pcbSharedMem Where to return the amount of memory that is
1351 * currently shared.
1352 */
1353VMMR3DECL(int) PGMR3QueryGlobalMemoryStats(PVM pVM, uint64_t *pcbAllocMem, uint64_t *pcbFreeMem,
1354 uint64_t *pcbBallonedMem, uint64_t *pcbSharedMem)
1355{
1356 uint64_t cAllocPages = 0;
1357 uint64_t cFreePages = 0;
1358 uint64_t cBalloonPages = 0;
1359 uint64_t cSharedPages = 0;
1360 int rc = GMMR3QueryHypervisorMemoryStats(pVM, &cAllocPages, &cFreePages, &cBalloonPages, &cSharedPages);
1361 AssertRCReturn(rc, rc);
1362
1363 if (pcbAllocMem)
1364 *pcbAllocMem = cAllocPages * _4K;
1365
1366 if (pcbFreeMem)
1367 *pcbFreeMem = cFreePages * _4K;
1368
1369 if (pcbBallonedMem)
1370 *pcbBallonedMem = cBalloonPages * _4K;
1371
1372 if (pcbSharedMem)
1373 *pcbSharedMem = cSharedPages * _4K;
1374
1375 Log(("PGMR3QueryVMMMemoryStats: all=%llx free=%llx ballooned=%llx shared=%llx\n",
1376 cAllocPages, cFreePages, cBalloonPages, cSharedPages));
1377 return VINF_SUCCESS;
1378}
1379
1380
1381/**
1382 * Query memory stats for the VM.
1383 *
1384 * @returns VBox status code.
1385 * @param pVM The VM handle.
1386 * @param pcbTotalMem Where to return total amount memory the VM may
1387 * possibly use.
1388 * @param pcbPrivateMem Where to return the amount of private memory
1389 * currently allocated.
1390 * @param pcbSharedMem Where to return the amount of actually shared
1391 * memory currently used by the VM.
1392 * @param pcbZeroMem Where to return the amount of memory backed by
1393 * zero pages.
1394 *
1395 * @remarks The total mem is normally larger than the sum of the three
1396 * components. There are two reasons for this, first the amount of
1397 * shared memory is what we're sure is shared instead of what could
1398 * possibly be shared with someone. Secondly, because the total may
1399 * include some pure MMIO pages that doesn't go into any of the three
1400 * sub-counts.
1401 *
1402 * @todo Why do we return reused shared pages instead of anything that could
1403 * potentially be shared? Doesn't this mean the first VM gets a much
1404 * lower number of shared pages?
1405 */
1406VMMR3DECL(int) PGMR3QueryMemoryStats(PVM pVM, uint64_t *pcbTotalMem, uint64_t *pcbPrivateMem,
1407 uint64_t *pcbSharedMem, uint64_t *pcbZeroMem)
1408{
1409 VM_ASSERT_VALID_EXT_RETURN(pVM, VERR_INVALID_VM_HANDLE);
1410
1411 if (pcbTotalMem)
1412 *pcbTotalMem = (uint64_t)pVM->pgm.s.cAllPages * PAGE_SIZE;
1413
1414 if (pcbPrivateMem)
1415 *pcbPrivateMem = (uint64_t)pVM->pgm.s.cPrivatePages * PAGE_SIZE;
1416
1417 if (pcbSharedMem)
1418 *pcbSharedMem = (uint64_t)pVM->pgm.s.cReusedSharedPages * PAGE_SIZE;
1419
1420 if (pcbZeroMem)
1421 *pcbZeroMem = (uint64_t)pVM->pgm.s.cZeroPages * PAGE_SIZE;
1422
1423 Log(("PGMR3QueryMemoryStats: all=%x private=%x reused=%x zero=%x\n", pVM->pgm.s.cAllPages, pVM->pgm.s.cPrivatePages, pVM->pgm.s.cReusedSharedPages, pVM->pgm.s.cZeroPages));
1424 return VINF_SUCCESS;
1425}
1426
1427
1428/**
1429 * PGMR3PhysRegisterRam worker that initializes and links a RAM range.
1430 *
1431 * @param pVM The VM handle.
1432 * @param pNew The new RAM range.
1433 * @param GCPhys The address of the RAM range.
1434 * @param GCPhysLast The last address of the RAM range.
1435 * @param RCPtrNew The RC address if the range is floating. NIL_RTRCPTR
1436 * if in HMA.
1437 * @param R0PtrNew Ditto for R0.
1438 * @param pszDesc The description.
1439 * @param pPrev The previous RAM range (for linking).
1440 */
1441static void pgmR3PhysInitAndLinkRamRange(PVM pVM, PPGMRAMRANGE pNew, RTGCPHYS GCPhys, RTGCPHYS GCPhysLast,
1442 RTRCPTR RCPtrNew, RTR0PTR R0PtrNew, const char *pszDesc, PPGMRAMRANGE pPrev)
1443{
1444 /*
1445 * Initialize the range.
1446 */
1447 pNew->pSelfR0 = R0PtrNew != NIL_RTR0PTR ? R0PtrNew : MMHyperCCToR0(pVM, pNew);
1448 pNew->pSelfRC = RCPtrNew != NIL_RTRCPTR ? RCPtrNew : MMHyperCCToRC(pVM, pNew);
1449 pNew->GCPhys = GCPhys;
1450 pNew->GCPhysLast = GCPhysLast;
1451 pNew->cb = GCPhysLast - GCPhys + 1;
1452 pNew->pszDesc = pszDesc;
1453 pNew->fFlags = RCPtrNew != NIL_RTRCPTR ? PGM_RAM_RANGE_FLAGS_FLOATING : 0;
1454 pNew->pvR3 = NULL;
1455 pNew->paLSPages = NULL;
1456
1457 uint32_t const cPages = pNew->cb >> PAGE_SHIFT;
1458 RTGCPHYS iPage = cPages;
1459 while (iPage-- > 0)
1460 PGM_PAGE_INIT_ZERO(&pNew->aPages[iPage], pVM, PGMPAGETYPE_RAM);
1461
1462 /* Update the page count stats. */
1463 pVM->pgm.s.cZeroPages += cPages;
1464 pVM->pgm.s.cAllPages += cPages;
1465
1466 /*
1467 * Link it.
1468 */
1469 pgmR3PhysLinkRamRange(pVM, pNew, pPrev);
1470}
1471
1472
1473/**
1474 * Relocate a floating RAM range.
1475 *
1476 * @copydoc FNPGMRELOCATE.
1477 */
1478static DECLCALLBACK(bool) pgmR3PhysRamRangeRelocate(PVM pVM, RTGCPTR GCPtrOld, RTGCPTR GCPtrNew, PGMRELOCATECALL enmMode, void *pvUser)
1479{
1480 PPGMRAMRANGE pRam = (PPGMRAMRANGE)pvUser;
1481 Assert(pRam->fFlags & PGM_RAM_RANGE_FLAGS_FLOATING);
1482 Assert(pRam->pSelfRC == GCPtrOld + PAGE_SIZE);
1483
1484 switch (enmMode)
1485 {
1486 case PGMRELOCATECALL_SUGGEST:
1487 return true;
1488
1489 case PGMRELOCATECALL_RELOCATE:
1490 {
1491 /*
1492 * Update myself, then relink all the ranges and flush the RC TLB.
1493 */
1494 pgmLock(pVM);
1495
1496 pRam->pSelfRC = (RTRCPTR)(GCPtrNew + PAGE_SIZE);
1497
1498 pgmR3PhysRelinkRamRanges(pVM);
1499 for (unsigned i = 0; i < PGM_RAMRANGE_TLB_ENTRIES; i++)
1500 pVM->pgm.s.apRamRangesTlbRC[i] = NIL_RTRCPTR;
1501
1502 pgmUnlock(pVM);
1503 return true;
1504 }
1505
1506 default:
1507 AssertFailedReturn(false);
1508 }
1509}
1510
1511
1512/**
1513 * PGMR3PhysRegisterRam worker that registers a high chunk.
1514 *
1515 * @returns VBox status code.
1516 * @param pVM The VM handle.
1517 * @param GCPhys The address of the RAM.
1518 * @param cRamPages The number of RAM pages to register.
1519 * @param cbChunk The size of the PGMRAMRANGE guest mapping.
1520 * @param iChunk The chunk number.
1521 * @param pszDesc The RAM range description.
1522 * @param ppPrev Previous RAM range pointer. In/Out.
1523 */
1524static int pgmR3PhysRegisterHighRamChunk(PVM pVM, RTGCPHYS GCPhys, uint32_t cRamPages,
1525 uint32_t cbChunk, uint32_t iChunk, const char *pszDesc,
1526 PPGMRAMRANGE *ppPrev)
1527{
1528 const char *pszDescChunk = iChunk == 0
1529 ? pszDesc
1530 : MMR3HeapAPrintf(pVM, MM_TAG_PGM_PHYS, "%s (#%u)", pszDesc, iChunk + 1);
1531 AssertReturn(pszDescChunk, VERR_NO_MEMORY);
1532
1533 /*
1534 * Allocate memory for the new chunk.
1535 */
1536 size_t const cChunkPages = RT_ALIGN_Z(RT_UOFFSETOF(PGMRAMRANGE, aPages[cRamPages]), PAGE_SIZE) >> PAGE_SHIFT;
1537 PSUPPAGE paChunkPages = (PSUPPAGE)RTMemTmpAllocZ(sizeof(SUPPAGE) * cChunkPages);
1538 AssertReturn(paChunkPages, VERR_NO_TMP_MEMORY);
1539 RTR0PTR R0PtrChunk = NIL_RTR0PTR;
1540 void *pvChunk = NULL;
1541 int rc = SUPR3PageAllocEx(cChunkPages, 0 /*fFlags*/, &pvChunk,
1542#ifdef VBOX_WITH_2X_4GB_ADDR_SPACE
1543 VMMIsHwVirtExtForced(pVM) ? &R0PtrChunk : NULL,
1544#else
1545 NULL,
1546#endif
1547 paChunkPages);
1548 if (RT_SUCCESS(rc))
1549 {
1550#ifdef VBOX_WITH_2X_4GB_ADDR_SPACE
1551 if (!VMMIsHwVirtExtForced(pVM))
1552 R0PtrChunk = NIL_RTR0PTR;
1553#else
1554 R0PtrChunk = (uintptr_t)pvChunk;
1555#endif
1556 memset(pvChunk, 0, cChunkPages << PAGE_SHIFT);
1557
1558 PPGMRAMRANGE pNew = (PPGMRAMRANGE)pvChunk;
1559
1560 /*
1561 * Create a mapping and map the pages into it.
1562 * We push these in below the HMA.
1563 */
1564 RTGCPTR GCPtrChunkMap = pVM->pgm.s.GCPtrPrevRamRangeMapping - cbChunk;
1565 rc = PGMR3MapPT(pVM, GCPtrChunkMap, cbChunk, 0 /*fFlags*/, pgmR3PhysRamRangeRelocate, pNew, pszDescChunk);
1566 if (RT_SUCCESS(rc))
1567 {
1568 pVM->pgm.s.GCPtrPrevRamRangeMapping = GCPtrChunkMap;
1569
1570 RTGCPTR const GCPtrChunk = GCPtrChunkMap + PAGE_SIZE;
1571 RTGCPTR GCPtrPage = GCPtrChunk;
1572 for (uint32_t iPage = 0; iPage < cChunkPages && RT_SUCCESS(rc); iPage++, GCPtrPage += PAGE_SIZE)
1573 rc = PGMMap(pVM, GCPtrPage, paChunkPages[iPage].Phys, PAGE_SIZE, 0);
1574 if (RT_SUCCESS(rc))
1575 {
1576 /*
1577 * Ok, init and link the range.
1578 */
1579 pgmR3PhysInitAndLinkRamRange(pVM, pNew, GCPhys, GCPhys + ((RTGCPHYS)cRamPages << PAGE_SHIFT) - 1,
1580 (RTRCPTR)GCPtrChunk, R0PtrChunk, pszDescChunk, *ppPrev);
1581 *ppPrev = pNew;
1582 }
1583 }
1584
1585 if (RT_FAILURE(rc))
1586 SUPR3PageFreeEx(pvChunk, cChunkPages);
1587 }
1588
1589 RTMemTmpFree(paChunkPages);
1590 return rc;
1591}
1592
1593
1594/**
1595 * Sets up a range RAM.
1596 *
1597 * This will check for conflicting registrations, make a resource
1598 * reservation for the memory (with GMM), and setup the per-page
1599 * tracking structures (PGMPAGE).
1600 *
1601 * @returns VBox status code.
1602 * @param pVM Pointer to the shared VM structure.
1603 * @param GCPhys The physical address of the RAM.
1604 * @param cb The size of the RAM.
1605 * @param pszDesc The description - not copied, so, don't free or change it.
1606 */
1607VMMR3DECL(int) PGMR3PhysRegisterRam(PVM pVM, RTGCPHYS GCPhys, RTGCPHYS cb, const char *pszDesc)
1608{
1609 /*
1610 * Validate input.
1611 */
1612 Log(("PGMR3PhysRegisterRam: GCPhys=%RGp cb=%RGp pszDesc=%s\n", GCPhys, cb, pszDesc));
1613 AssertReturn(RT_ALIGN_T(GCPhys, PAGE_SIZE, RTGCPHYS) == GCPhys, VERR_INVALID_PARAMETER);
1614 AssertReturn(RT_ALIGN_T(cb, PAGE_SIZE, RTGCPHYS) == cb, VERR_INVALID_PARAMETER);
1615 AssertReturn(cb > 0, VERR_INVALID_PARAMETER);
1616 RTGCPHYS GCPhysLast = GCPhys + (cb - 1);
1617 AssertMsgReturn(GCPhysLast > GCPhys, ("The range wraps! GCPhys=%RGp cb=%RGp\n", GCPhys, cb), VERR_INVALID_PARAMETER);
1618 AssertPtrReturn(pszDesc, VERR_INVALID_POINTER);
1619 VM_ASSERT_EMT_RETURN(pVM, VERR_VM_THREAD_NOT_EMT);
1620
1621 pgmLock(pVM);
1622
1623 /*
1624 * Find range location and check for conflicts.
1625 * (We don't lock here because the locking by EMT is only required on update.)
1626 */
1627 PPGMRAMRANGE pPrev = NULL;
1628 PPGMRAMRANGE pRam = pVM->pgm.s.pRamRangesXR3;
1629 while (pRam && GCPhysLast >= pRam->GCPhys)
1630 {
1631 if ( GCPhysLast >= pRam->GCPhys
1632 && GCPhys <= pRam->GCPhysLast)
1633 AssertLogRelMsgFailedReturn(("%RGp-%RGp (%s) conflicts with existing %RGp-%RGp (%s)\n",
1634 GCPhys, GCPhysLast, pszDesc,
1635 pRam->GCPhys, pRam->GCPhysLast, pRam->pszDesc),
1636 VERR_PGM_RAM_CONFLICT);
1637
1638 /* next */
1639 pPrev = pRam;
1640 pRam = pRam->pNextR3;
1641 }
1642
1643 /*
1644 * Register it with GMM (the API bitches).
1645 */
1646 const RTGCPHYS cPages = cb >> PAGE_SHIFT;
1647 int rc = MMR3IncreaseBaseReservation(pVM, cPages);
1648 if (RT_FAILURE(rc))
1649 {
1650 pgmUnlock(pVM);
1651 return rc;
1652 }
1653
1654 if ( GCPhys >= _4G
1655 && cPages > 256)
1656 {
1657 /*
1658 * The PGMRAMRANGE structures for the high memory can get very big.
1659 * In order to avoid SUPR3PageAllocEx allocation failures due to the
1660 * allocation size limit there and also to avoid being unable to find
1661 * guest mapping space for them, we split this memory up into 4MB in
1662 * (potential) raw-mode configs and 16MB chunks in forced AMD-V/VT-x
1663 * mode.
1664 *
1665 * The first and last page of each mapping are guard pages and marked
1666 * not-present. So, we've got 4186112 and 16769024 bytes available for
1667 * the PGMRAMRANGE structure.
1668 *
1669 * Note! The sizes used here will influence the saved state.
1670 */
1671 uint32_t cbChunk;
1672 uint32_t cPagesPerChunk;
1673 if (VMMIsHwVirtExtForced(pVM))
1674 {
1675 cbChunk = 16U*_1M;
1676 cPagesPerChunk = 1048048; /* max ~1048059 */
1677 AssertCompile(sizeof(PGMRAMRANGE) + sizeof(PGMPAGE) * 1048048 < 16U*_1M - PAGE_SIZE * 2);
1678 }
1679 else
1680 {
1681 cbChunk = 4U*_1M;
1682 cPagesPerChunk = 261616; /* max ~261627 */
1683 AssertCompile(sizeof(PGMRAMRANGE) + sizeof(PGMPAGE) * 261616 < 4U*_1M - PAGE_SIZE * 2);
1684 }
1685 AssertRelease(RT_UOFFSETOF(PGMRAMRANGE, aPages[cPagesPerChunk]) + PAGE_SIZE * 2 <= cbChunk);
1686
1687 RTGCPHYS cPagesLeft = cPages;
1688 RTGCPHYS GCPhysChunk = GCPhys;
1689 uint32_t iChunk = 0;
1690 while (cPagesLeft > 0)
1691 {
1692 uint32_t cPagesInChunk = cPagesLeft;
1693 if (cPagesInChunk > cPagesPerChunk)
1694 cPagesInChunk = cPagesPerChunk;
1695
1696 rc = pgmR3PhysRegisterHighRamChunk(pVM, GCPhysChunk, cPagesInChunk, cbChunk, iChunk, pszDesc, &pPrev);
1697 AssertRCReturn(rc, rc);
1698
1699 /* advance */
1700 GCPhysChunk += (RTGCPHYS)cPagesInChunk << PAGE_SHIFT;
1701 cPagesLeft -= cPagesInChunk;
1702 iChunk++;
1703 }
1704 }
1705 else
1706 {
1707 /*
1708 * Allocate, initialize and link the new RAM range.
1709 */
1710 const size_t cbRamRange = RT_OFFSETOF(PGMRAMRANGE, aPages[cPages]);
1711 PPGMRAMRANGE pNew;
1712 rc = MMR3HyperAllocOnceNoRel(pVM, cbRamRange, 0, MM_TAG_PGM_PHYS, (void **)&pNew);
1713 AssertLogRelMsgRCReturn(rc, ("cbRamRange=%zu\n", cbRamRange), rc);
1714
1715 pgmR3PhysInitAndLinkRamRange(pVM, pNew, GCPhys, GCPhysLast, NIL_RTRCPTR, NIL_RTR0PTR, pszDesc, pPrev);
1716 }
1717 pgmPhysInvalidatePageMapTLB(pVM);
1718 pgmUnlock(pVM);
1719
1720 /*
1721 * Notify REM.
1722 */
1723 REMR3NotifyPhysRamRegister(pVM, GCPhys, cb, REM_NOTIFY_PHYS_RAM_FLAGS_RAM);
1724
1725 return VINF_SUCCESS;
1726}
1727
1728
1729/**
1730 * Worker called by PGMR3InitFinalize if we're configured to pre-allocate RAM.
1731 *
1732 * We do this late in the init process so that all the ROM and MMIO ranges have
1733 * been registered already and we don't go wasting memory on them.
1734 *
1735 * @returns VBox status code.
1736 *
1737 * @param pVM Pointer to the shared VM structure.
1738 */
1739int pgmR3PhysRamPreAllocate(PVM pVM)
1740{
1741 Assert(pVM->pgm.s.fRamPreAlloc);
1742 Log(("pgmR3PhysRamPreAllocate: enter\n"));
1743
1744 /*
1745 * Walk the RAM ranges and allocate all RAM pages, halt at
1746 * the first allocation error.
1747 */
1748 uint64_t cPages = 0;
1749 uint64_t NanoTS = RTTimeNanoTS();
1750 pgmLock(pVM);
1751 for (PPGMRAMRANGE pRam = pVM->pgm.s.pRamRangesXR3; pRam; pRam = pRam->pNextR3)
1752 {
1753 PPGMPAGE pPage = &pRam->aPages[0];
1754 RTGCPHYS GCPhys = pRam->GCPhys;
1755 uint32_t cLeft = pRam->cb >> PAGE_SHIFT;
1756 while (cLeft-- > 0)
1757 {
1758 if (PGM_PAGE_GET_TYPE(pPage) == PGMPAGETYPE_RAM)
1759 {
1760 switch (PGM_PAGE_GET_STATE(pPage))
1761 {
1762 case PGM_PAGE_STATE_ZERO:
1763 {
1764 int rc = pgmPhysAllocPage(pVM, pPage, GCPhys);
1765 if (RT_FAILURE(rc))
1766 {
1767 LogRel(("PGM: RAM Pre-allocation failed at %RGp (in %s) with rc=%Rrc\n", GCPhys, pRam->pszDesc, rc));
1768 pgmUnlock(pVM);
1769 return rc;
1770 }
1771 cPages++;
1772 break;
1773 }
1774
1775 case PGM_PAGE_STATE_BALLOONED:
1776 case PGM_PAGE_STATE_ALLOCATED:
1777 case PGM_PAGE_STATE_WRITE_MONITORED:
1778 case PGM_PAGE_STATE_SHARED:
1779 /* nothing to do here. */
1780 break;
1781 }
1782 }
1783
1784 /* next */
1785 pPage++;
1786 GCPhys += PAGE_SIZE;
1787 }
1788 }
1789 pgmUnlock(pVM);
1790 NanoTS = RTTimeNanoTS() - NanoTS;
1791
1792 LogRel(("PGM: Pre-allocated %llu pages in %llu ms\n", cPages, NanoTS / 1000000));
1793 Log(("pgmR3PhysRamPreAllocate: returns VINF_SUCCESS\n"));
1794 return VINF_SUCCESS;
1795}
1796
1797
1798/**
1799 * Resets (zeros) the RAM.
1800 *
1801 * ASSUMES that the caller owns the PGM lock.
1802 *
1803 * @returns VBox status code.
1804 * @param pVM Pointer to the shared VM structure.
1805 */
1806int pgmR3PhysRamReset(PVM pVM)
1807{
1808 PGM_LOCK_ASSERT_OWNER(pVM);
1809
1810 /* Reset the memory balloon. */
1811 int rc = GMMR3BalloonedPages(pVM, GMMBALLOONACTION_RESET, 0);
1812 AssertRC(rc);
1813
1814#ifdef VBOX_WITH_PAGE_SHARING
1815 /* Clear all registered shared modules. */
1816 rc = GMMR3ResetSharedModules(pVM);
1817 AssertRC(rc);
1818#endif
1819 /* Reset counters. */
1820 pVM->pgm.s.cReusedSharedPages = 0;
1821 pVM->pgm.s.cBalloonedPages = 0;
1822
1823 /*
1824 * We batch up pages that should be freed instead of calling GMM for
1825 * each and every one of them.
1826 */
1827 uint32_t cPendingPages = 0;
1828 PGMMFREEPAGESREQ pReq;
1829 rc = GMMR3FreePagesPrepare(pVM, &pReq, PGMPHYS_FREE_PAGE_BATCH_SIZE, GMMACCOUNT_BASE);
1830 AssertLogRelRCReturn(rc, rc);
1831
1832 /*
1833 * Walk the ram ranges.
1834 */
1835 for (PPGMRAMRANGE pRam = pVM->pgm.s.pRamRangesXR3; pRam; pRam = pRam->pNextR3)
1836 {
1837 uint32_t iPage = pRam->cb >> PAGE_SHIFT;
1838 AssertMsg(((RTGCPHYS)iPage << PAGE_SHIFT) == pRam->cb, ("%RGp %RGp\n", (RTGCPHYS)iPage << PAGE_SHIFT, pRam->cb));
1839
1840 if (!pVM->pgm.s.fRamPreAlloc)
1841 {
1842 /* Replace all RAM pages by ZERO pages. */
1843 while (iPage-- > 0)
1844 {
1845 PPGMPAGE pPage = &pRam->aPages[iPage];
1846 switch (PGM_PAGE_GET_TYPE(pPage))
1847 {
1848 case PGMPAGETYPE_RAM:
1849 /* Do not replace pages part of a 2 MB continuous range
1850 with zero pages, but zero them instead. */
1851 if ( PGM_PAGE_GET_PDE_TYPE(pPage) == PGM_PAGE_PDE_TYPE_PDE
1852 || PGM_PAGE_GET_PDE_TYPE(pPage) == PGM_PAGE_PDE_TYPE_PDE_DISABLED)
1853 {
1854 void *pvPage;
1855 rc = pgmPhysPageMap(pVM, pPage, pRam->GCPhys + ((RTGCPHYS)iPage << PAGE_SHIFT), &pvPage);
1856 AssertLogRelRCReturn(rc, rc);
1857 ASMMemZeroPage(pvPage);
1858 }
1859 else if (PGM_PAGE_IS_BALLOONED(pPage))
1860 {
1861 /* Turn into a zero page; the balloon status is lost when the VM reboots. */
1862 PGM_PAGE_SET_STATE(pVM, pPage, PGM_PAGE_STATE_ZERO);
1863 }
1864 else if (!PGM_PAGE_IS_ZERO(pPage))
1865 {
1866 rc = pgmPhysFreePage(pVM, pReq, &cPendingPages, pPage, pRam->GCPhys + ((RTGCPHYS)iPage << PAGE_SHIFT));
1867 AssertLogRelRCReturn(rc, rc);
1868 }
1869 break;
1870
1871 case PGMPAGETYPE_MMIO2_ALIAS_MMIO:
1872 pgmHandlerPhysicalResetAliasedPage(pVM, pPage, pRam->GCPhys + ((RTGCPHYS)iPage << PAGE_SHIFT),
1873 true /*fDoAccounting*/);
1874 break;
1875
1876 case PGMPAGETYPE_MMIO2:
1877 case PGMPAGETYPE_ROM_SHADOW: /* handled by pgmR3PhysRomReset. */
1878 case PGMPAGETYPE_ROM:
1879 case PGMPAGETYPE_MMIO:
1880 break;
1881 default:
1882 AssertFailed();
1883 }
1884 } /* for each page */
1885 }
1886 else
1887 {
1888 /* Zero the memory. */
1889 while (iPage-- > 0)
1890 {
1891 PPGMPAGE pPage = &pRam->aPages[iPage];
1892 switch (PGM_PAGE_GET_TYPE(pPage))
1893 {
1894 case PGMPAGETYPE_RAM:
1895 switch (PGM_PAGE_GET_STATE(pPage))
1896 {
1897 case PGM_PAGE_STATE_ZERO:
1898 break;
1899
1900 case PGM_PAGE_STATE_BALLOONED:
1901 /* Turn into a zero page; the balloon status is lost when the VM reboots. */
1902 PGM_PAGE_SET_STATE(pVM, pPage, PGM_PAGE_STATE_ZERO);
1903 break;
1904
1905 case PGM_PAGE_STATE_SHARED:
1906 case PGM_PAGE_STATE_WRITE_MONITORED:
1907 rc = pgmPhysPageMakeWritable(pVM, pPage, pRam->GCPhys + ((RTGCPHYS)iPage << PAGE_SHIFT));
1908 AssertLogRelRCReturn(rc, rc);
1909 /* no break */
1910
1911 case PGM_PAGE_STATE_ALLOCATED:
1912 {
1913 void *pvPage;
1914 rc = pgmPhysPageMap(pVM, pPage, pRam->GCPhys + ((RTGCPHYS)iPage << PAGE_SHIFT), &pvPage);
1915 AssertLogRelRCReturn(rc, rc);
1916 ASMMemZeroPage(pvPage);
1917 break;
1918 }
1919 }
1920 break;
1921
1922 case PGMPAGETYPE_MMIO2_ALIAS_MMIO:
1923 pgmHandlerPhysicalResetAliasedPage(pVM, pPage, pRam->GCPhys + ((RTGCPHYS)iPage << PAGE_SHIFT),
1924 true /*fDoAccounting*/);
1925 break;
1926
1927 case PGMPAGETYPE_MMIO2:
1928 case PGMPAGETYPE_ROM_SHADOW:
1929 case PGMPAGETYPE_ROM:
1930 case PGMPAGETYPE_MMIO:
1931 break;
1932 default:
1933 AssertFailed();
1934
1935 }
1936 } /* for each page */
1937 }
1938
1939 }
1940
1941 /*
1942 * Finish off any pages pending freeing.
1943 */
1944 if (cPendingPages)
1945 {
1946 rc = GMMR3FreePagesPerform(pVM, pReq, cPendingPages);
1947 AssertLogRelRCReturn(rc, rc);
1948 }
1949 GMMR3FreePagesCleanup(pReq);
1950
1951 return VINF_SUCCESS;
1952}
1953
1954/**
1955 * Frees all RAM during VM termination
1956 *
1957 * ASSUMES that the caller owns the PGM lock.
1958 *
1959 * @returns VBox status code.
1960 * @param pVM Pointer to the shared VM structure.
1961 */
1962int pgmR3PhysRamTerm(PVM pVM)
1963{
1964 PGM_LOCK_ASSERT_OWNER(pVM);
1965
1966 /* Reset the memory balloon. */
1967 int rc = GMMR3BalloonedPages(pVM, GMMBALLOONACTION_RESET, 0);
1968 AssertRC(rc);
1969
1970#ifdef VBOX_WITH_PAGE_SHARING
1971 /* Clear all registered shared modules. */
1972 rc = GMMR3ResetSharedModules(pVM);
1973 AssertRC(rc);
1974#endif
1975
1976 /*
1977 * We batch up pages that should be freed instead of calling GMM for
1978 * each and every one of them.
1979 */
1980 uint32_t cPendingPages = 0;
1981 PGMMFREEPAGESREQ pReq;
1982 rc = GMMR3FreePagesPrepare(pVM, &pReq, PGMPHYS_FREE_PAGE_BATCH_SIZE, GMMACCOUNT_BASE);
1983 AssertLogRelRCReturn(rc, rc);
1984
1985 /*
1986 * Walk the ram ranges.
1987 */
1988 for (PPGMRAMRANGE pRam = pVM->pgm.s.pRamRangesXR3; pRam; pRam = pRam->pNextR3)
1989 {
1990 uint32_t iPage = pRam->cb >> PAGE_SHIFT;
1991 AssertMsg(((RTGCPHYS)iPage << PAGE_SHIFT) == pRam->cb, ("%RGp %RGp\n", (RTGCPHYS)iPage << PAGE_SHIFT, pRam->cb));
1992
1993 /* Replace all RAM pages by ZERO pages. */
1994 while (iPage-- > 0)
1995 {
1996 PPGMPAGE pPage = &pRam->aPages[iPage];
1997 switch (PGM_PAGE_GET_TYPE(pPage))
1998 {
1999 case PGMPAGETYPE_RAM:
2000 /* Free all shared pages. Private pages are automatically freed during GMM VM cleanup. */
2001 if (PGM_PAGE_IS_SHARED(pPage))
2002 {
2003 rc = pgmPhysFreePage(pVM, pReq, &cPendingPages, pPage, pRam->GCPhys + ((RTGCPHYS)iPage << PAGE_SHIFT));
2004 AssertLogRelRCReturn(rc, rc);
2005 }
2006 break;
2007
2008 case PGMPAGETYPE_MMIO2_ALIAS_MMIO:
2009 case PGMPAGETYPE_MMIO2:
2010 case PGMPAGETYPE_ROM_SHADOW: /* handled by pgmR3PhysRomReset. */
2011 case PGMPAGETYPE_ROM:
2012 case PGMPAGETYPE_MMIO:
2013 break;
2014 default:
2015 AssertFailed();
2016 }
2017 } /* for each page */
2018 }
2019
2020 /*
2021 * Finish off any pages pending freeing.
2022 */
2023 if (cPendingPages)
2024 {
2025 rc = GMMR3FreePagesPerform(pVM, pReq, cPendingPages);
2026 AssertLogRelRCReturn(rc, rc);
2027 }
2028 GMMR3FreePagesCleanup(pReq);
2029 return VINF_SUCCESS;
2030}
2031
2032/**
2033 * This is the interface IOM is using to register an MMIO region.
2034 *
2035 * It will check for conflicts and ensure that a RAM range structure
2036 * is present before calling the PGMR3HandlerPhysicalRegister API to
2037 * register the callbacks.
2038 *
2039 * @returns VBox status code.
2040 *
2041 * @param pVM Pointer to the shared VM structure.
2042 * @param GCPhys The start of the MMIO region.
2043 * @param cb The size of the MMIO region.
2044 * @param pfnHandlerR3 The address of the ring-3 handler. (IOMR3MMIOHandler)
2045 * @param pvUserR3 The user argument for R3.
2046 * @param pfnHandlerR0 The address of the ring-0 handler. (IOMMMIOHandler)
2047 * @param pvUserR0 The user argument for R0.
2048 * @param pfnHandlerRC The address of the RC handler. (IOMMMIOHandler)
2049 * @param pvUserRC The user argument for RC.
2050 * @param pszDesc The description of the MMIO region.
2051 */
2052VMMR3DECL(int) PGMR3PhysMMIORegister(PVM pVM, RTGCPHYS GCPhys, RTGCPHYS cb,
2053 R3PTRTYPE(PFNPGMR3PHYSHANDLER) pfnHandlerR3, RTR3PTR pvUserR3,
2054 R0PTRTYPE(PFNPGMR0PHYSHANDLER) pfnHandlerR0, RTR0PTR pvUserR0,
2055 RCPTRTYPE(PFNPGMRCPHYSHANDLER) pfnHandlerRC, RTRCPTR pvUserRC,
2056 R3PTRTYPE(const char *) pszDesc)
2057{
2058 /*
2059 * Assert on some assumption.
2060 */
2061 VM_ASSERT_EMT(pVM);
2062 AssertReturn(!(cb & PAGE_OFFSET_MASK), VERR_INVALID_PARAMETER);
2063 AssertReturn(!(GCPhys & PAGE_OFFSET_MASK), VERR_INVALID_PARAMETER);
2064 AssertPtrReturn(pszDesc, VERR_INVALID_POINTER);
2065 AssertReturn(*pszDesc, VERR_INVALID_PARAMETER);
2066
2067 int rc = pgmLock(pVM);
2068 AssertRCReturn(rc, rc);
2069
2070 /*
2071 * Make sure there's a RAM range structure for the region.
2072 */
2073 RTGCPHYS GCPhysLast = GCPhys + (cb - 1);
2074 bool fRamExists = false;
2075 PPGMRAMRANGE pRamPrev = NULL;
2076 PPGMRAMRANGE pRam = pVM->pgm.s.pRamRangesXR3;
2077 while (pRam && GCPhysLast >= pRam->GCPhys)
2078 {
2079 if ( GCPhysLast >= pRam->GCPhys
2080 && GCPhys <= pRam->GCPhysLast)
2081 {
2082 /* Simplification: all within the same range. */
2083 AssertLogRelMsgReturnStmt( GCPhys >= pRam->GCPhys
2084 && GCPhysLast <= pRam->GCPhysLast,
2085 ("%RGp-%RGp (MMIO/%s) falls partly outside %RGp-%RGp (%s)\n",
2086 GCPhys, GCPhysLast, pszDesc,
2087 pRam->GCPhys, pRam->GCPhysLast, pRam->pszDesc),
2088 pgmUnlock(pVM),
2089 VERR_PGM_RAM_CONFLICT);
2090
2091 /* Check that it's all RAM or MMIO pages. */
2092 PCPGMPAGE pPage = &pRam->aPages[(GCPhys - pRam->GCPhys) >> PAGE_SHIFT];
2093 uint32_t cLeft = cb >> PAGE_SHIFT;
2094 while (cLeft-- > 0)
2095 {
2096 AssertLogRelMsgReturnStmt( PGM_PAGE_GET_TYPE(pPage) == PGMPAGETYPE_RAM
2097 || PGM_PAGE_GET_TYPE(pPage) == PGMPAGETYPE_MMIO,
2098 ("%RGp-%RGp (MMIO/%s): %RGp is not a RAM or MMIO page - type=%d desc=%s\n",
2099 GCPhys, GCPhysLast, pszDesc, PGM_PAGE_GET_TYPE(pPage), pRam->pszDesc),
2100 pgmUnlock(pVM),
2101 VERR_PGM_RAM_CONFLICT);
2102 pPage++;
2103 }
2104
2105 /* Looks good. */
2106 fRamExists = true;
2107 break;
2108 }
2109
2110 /* next */
2111 pRamPrev = pRam;
2112 pRam = pRam->pNextR3;
2113 }
2114 PPGMRAMRANGE pNew;
2115 if (fRamExists)
2116 {
2117 pNew = NULL;
2118
2119 /*
2120 * Make all the pages in the range MMIO/ZERO pages, freeing any
2121 * RAM pages currently mapped here. This might not be 100% correct
2122 * for PCI memory, but we're doing the same thing for MMIO2 pages.
2123 */
2124 rc = pgmR3PhysFreePageRange(pVM, pRam, GCPhys, GCPhysLast, PGMPAGETYPE_MMIO);
2125 AssertRCReturnStmt(rc, pgmUnlock(pVM), rc);
2126
2127 /* Force a PGM pool flush as guest ram references have been changed. */
2128 /** @todo not entirely SMP safe; assuming for now the guest takes
2129 * care of this internally (not touch mapped mmio while changing the
2130 * mapping). */
2131 PVMCPU pVCpu = VMMGetCpu(pVM);
2132 pVCpu->pgm.s.fSyncFlags |= PGM_SYNC_CLEAR_PGM_POOL;
2133 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
2134 }
2135 else
2136 {
2137
2138 /*
2139 * No RAM range, insert an ad hoc one.
2140 *
2141 * Note that we don't have to tell REM about this range because
2142 * PGMHandlerPhysicalRegisterEx will do that for us.
2143 */
2144 Log(("PGMR3PhysMMIORegister: Adding ad hoc MMIO range for %RGp-%RGp %s\n", GCPhys, GCPhysLast, pszDesc));
2145
2146 const uint32_t cPages = cb >> PAGE_SHIFT;
2147 const size_t cbRamRange = RT_OFFSETOF(PGMRAMRANGE, aPages[cPages]);
2148 rc = MMHyperAlloc(pVM, RT_OFFSETOF(PGMRAMRANGE, aPages[cPages]), 16, MM_TAG_PGM_PHYS, (void **)&pNew);
2149 AssertLogRelMsgRCReturnStmt(rc, ("cbRamRange=%zu\n", cbRamRange), pgmUnlock(pVM), rc);
2150
2151 /* Initialize the range. */
2152 pNew->pSelfR0 = MMHyperCCToR0(pVM, pNew);
2153 pNew->pSelfRC = MMHyperCCToRC(pVM, pNew);
2154 pNew->GCPhys = GCPhys;
2155 pNew->GCPhysLast = GCPhysLast;
2156 pNew->cb = cb;
2157 pNew->pszDesc = pszDesc;
2158 pNew->fFlags = PGM_RAM_RANGE_FLAGS_AD_HOC_MMIO;
2159 pNew->pvR3 = NULL;
2160 pNew->paLSPages = NULL;
2161
2162 uint32_t iPage = cPages;
2163 while (iPage-- > 0)
2164 PGM_PAGE_INIT_ZERO(&pNew->aPages[iPage], pVM, PGMPAGETYPE_MMIO);
2165 Assert(PGM_PAGE_GET_TYPE(&pNew->aPages[0]) == PGMPAGETYPE_MMIO);
2166
2167 /* update the page count stats. */
2168 pVM->pgm.s.cPureMmioPages += cPages;
2169 pVM->pgm.s.cAllPages += cPages;
2170
2171 /* link it */
2172 pgmR3PhysLinkRamRange(pVM, pNew, pRamPrev);
2173 }
2174
2175 /*
2176 * Register the access handler.
2177 */
2178 rc = PGMHandlerPhysicalRegisterEx(pVM, PGMPHYSHANDLERTYPE_MMIO, GCPhys, GCPhysLast,
2179 pfnHandlerR3, pvUserR3,
2180 pfnHandlerR0, pvUserR0,
2181 pfnHandlerRC, pvUserRC, pszDesc);
2182 if ( RT_FAILURE(rc)
2183 && !fRamExists)
2184 {
2185 pVM->pgm.s.cPureMmioPages -= cb >> PAGE_SHIFT;
2186 pVM->pgm.s.cAllPages -= cb >> PAGE_SHIFT;
2187
2188 /* remove the ad hoc range. */
2189 pgmR3PhysUnlinkRamRange2(pVM, pNew, pRamPrev);
2190 pNew->cb = pNew->GCPhys = pNew->GCPhysLast = NIL_RTGCPHYS;
2191 MMHyperFree(pVM, pRam);
2192 }
2193 pgmPhysInvalidatePageMapTLB(pVM);
2194
2195 pgmUnlock(pVM);
2196 return rc;
2197}
2198
2199
2200/**
2201 * This is the interface IOM is using to register an MMIO region.
2202 *
2203 * It will take care of calling PGMHandlerPhysicalDeregister and clean up
2204 * any ad hoc PGMRAMRANGE left behind.
2205 *
2206 * @returns VBox status code.
2207 * @param pVM Pointer to the shared VM structure.
2208 * @param GCPhys The start of the MMIO region.
2209 * @param cb The size of the MMIO region.
2210 */
2211VMMR3DECL(int) PGMR3PhysMMIODeregister(PVM pVM, RTGCPHYS GCPhys, RTGCPHYS cb)
2212{
2213 VM_ASSERT_EMT(pVM);
2214
2215 int rc = pgmLock(pVM);
2216 AssertRCReturn(rc, rc);
2217
2218 /*
2219 * First deregister the handler, then check if we should remove the ram range.
2220 */
2221 rc = PGMHandlerPhysicalDeregister(pVM, GCPhys);
2222 if (RT_SUCCESS(rc))
2223 {
2224 RTGCPHYS GCPhysLast = GCPhys + (cb - 1);
2225 PPGMRAMRANGE pRamPrev = NULL;
2226 PPGMRAMRANGE pRam = pVM->pgm.s.pRamRangesXR3;
2227 while (pRam && GCPhysLast >= pRam->GCPhys)
2228 {
2229 /** @todo We're being a bit too careful here. rewrite. */
2230 if ( GCPhysLast == pRam->GCPhysLast
2231 && GCPhys == pRam->GCPhys)
2232 {
2233 Assert(pRam->cb == cb);
2234
2235 /*
2236 * See if all the pages are dead MMIO pages.
2237 */
2238 uint32_t const cPages = cb >> PAGE_SHIFT;
2239 bool fAllMMIO = true;
2240 uint32_t iPage = 0;
2241 uint32_t cLeft = cPages;
2242 while (cLeft-- > 0)
2243 {
2244 PPGMPAGE pPage = &pRam->aPages[iPage];
2245 if ( PGM_PAGE_GET_TYPE(pPage) != PGMPAGETYPE_MMIO
2246 /*|| not-out-of-action later */)
2247 {
2248 fAllMMIO = false;
2249 Assert(PGM_PAGE_GET_TYPE(pPage) != PGMPAGETYPE_MMIO2_ALIAS_MMIO);
2250 AssertMsgFailed(("%RGp %R[pgmpage]\n", pRam->GCPhys + ((RTGCPHYS)iPage << PAGE_SHIFT), pPage));
2251 break;
2252 }
2253 Assert(PGM_PAGE_IS_ZERO(pPage));
2254 pPage++;
2255 }
2256 if (fAllMMIO)
2257 {
2258 /*
2259 * Ad-hoc range, unlink and free it.
2260 */
2261 Log(("PGMR3PhysMMIODeregister: Freeing ad hoc MMIO range for %RGp-%RGp %s\n",
2262 GCPhys, GCPhysLast, pRam->pszDesc));
2263
2264 pVM->pgm.s.cAllPages -= cPages;
2265 pVM->pgm.s.cPureMmioPages -= cPages;
2266
2267 pgmR3PhysUnlinkRamRange2(pVM, pRam, pRamPrev);
2268 pRam->cb = pRam->GCPhys = pRam->GCPhysLast = NIL_RTGCPHYS;
2269 MMHyperFree(pVM, pRam);
2270 break;
2271 }
2272 }
2273
2274 /*
2275 * Range match? It will all be within one range (see PGMAllHandler.cpp).
2276 */
2277 if ( GCPhysLast >= pRam->GCPhys
2278 && GCPhys <= pRam->GCPhysLast)
2279 {
2280 Assert(GCPhys >= pRam->GCPhys);
2281 Assert(GCPhysLast <= pRam->GCPhysLast);
2282
2283 /*
2284 * Turn the pages back into RAM pages.
2285 */
2286 uint32_t iPage = (GCPhys - pRam->GCPhys) >> PAGE_SHIFT;
2287 uint32_t cLeft = cb >> PAGE_SHIFT;
2288 while (cLeft--)
2289 {
2290 PPGMPAGE pPage = &pRam->aPages[iPage];
2291 AssertMsg(PGM_PAGE_IS_MMIO(pPage), ("%RGp %R[pgmpage]\n", pRam->GCPhys + ((RTGCPHYS)iPage << PAGE_SHIFT), pPage));
2292 AssertMsg(PGM_PAGE_IS_ZERO(pPage), ("%RGp %R[pgmpage]\n", pRam->GCPhys + ((RTGCPHYS)iPage << PAGE_SHIFT), pPage));
2293 if (PGM_PAGE_GET_TYPE(pPage) == PGMPAGETYPE_MMIO)
2294 PGM_PAGE_SET_TYPE(pVM, pPage, PGMPAGETYPE_RAM);
2295 }
2296 break;
2297 }
2298
2299 /* next */
2300 pRamPrev = pRam;
2301 pRam = pRam->pNextR3;
2302 }
2303 }
2304
2305 /* Force a PGM pool flush as guest ram references have been changed. */
2306 /** todo; not entirely SMP safe; assuming for now the guest takes care of this internally (not touch mapped mmio while changing the mapping). */
2307 PVMCPU pVCpu = VMMGetCpu(pVM);
2308 pVCpu->pgm.s.fSyncFlags |= PGM_SYNC_CLEAR_PGM_POOL;
2309 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
2310
2311 pgmPhysInvalidatePageMapTLB(pVM);
2312 pgmPhysInvalidRamRangeTlbs(pVM);
2313 pgmUnlock(pVM);
2314 return rc;
2315}
2316
2317
2318/**
2319 * Locate a MMIO2 range.
2320 *
2321 * @returns Pointer to the MMIO2 range.
2322 * @param pVM Pointer to the shared VM structure.
2323 * @param pDevIns The device instance owning the region.
2324 * @param iRegion The region.
2325 */
2326DECLINLINE(PPGMMMIO2RANGE) pgmR3PhysMMIO2Find(PVM pVM, PPDMDEVINS pDevIns, uint32_t iRegion)
2327{
2328 /*
2329 * Search the list.
2330 */
2331 for (PPGMMMIO2RANGE pCur = pVM->pgm.s.pMmio2RangesR3; pCur; pCur = pCur->pNextR3)
2332 if ( pCur->pDevInsR3 == pDevIns
2333 && pCur->iRegion == iRegion)
2334 return pCur;
2335 return NULL;
2336}
2337
2338
2339/**
2340 * Allocate and register an MMIO2 region.
2341 *
2342 * As mentioned elsewhere, MMIO2 is just RAM spelled differently. It's RAM
2343 * associated with a device. It is also non-shared memory with a permanent
2344 * ring-3 mapping and page backing (presently).
2345 *
2346 * A MMIO2 range may overlap with base memory if a lot of RAM is configured for
2347 * the VM, in which case we'll drop the base memory pages. Presently we will
2348 * make no attempt to preserve anything that happens to be present in the base
2349 * memory that is replaced, this is of course incorrectly but it's too much
2350 * effort.
2351 *
2352 * @returns VBox status code.
2353 * @retval VINF_SUCCESS on success, *ppv pointing to the R3 mapping of the
2354 * memory.
2355 * @retval VERR_ALREADY_EXISTS if the region already exists.
2356 *
2357 * @param pVM Pointer to the shared VM structure.
2358 * @param pDevIns The device instance owning the region.
2359 * @param iRegion The region number. If the MMIO2 memory is a PCI
2360 * I/O region this number has to be the number of that
2361 * region. Otherwise it can be any number safe
2362 * UINT8_MAX.
2363 * @param cb The size of the region. Must be page aligned.
2364 * @param fFlags Reserved for future use, must be zero.
2365 * @param ppv Where to store the pointer to the ring-3 mapping of
2366 * the memory.
2367 * @param pszDesc The description.
2368 */
2369VMMR3DECL(int) PGMR3PhysMMIO2Register(PVM pVM, PPDMDEVINS pDevIns, uint32_t iRegion, RTGCPHYS cb, uint32_t fFlags, void **ppv, const char *pszDesc)
2370{
2371 /*
2372 * Validate input.
2373 */
2374 VM_ASSERT_EMT_RETURN(pVM, VERR_VM_THREAD_NOT_EMT);
2375 AssertPtrReturn(pDevIns, VERR_INVALID_PARAMETER);
2376 AssertReturn(iRegion <= UINT8_MAX, VERR_INVALID_PARAMETER);
2377 AssertPtrReturn(ppv, VERR_INVALID_POINTER);
2378 AssertPtrReturn(pszDesc, VERR_INVALID_POINTER);
2379 AssertReturn(*pszDesc, VERR_INVALID_PARAMETER);
2380 AssertReturn(pgmR3PhysMMIO2Find(pVM, pDevIns, iRegion) == NULL, VERR_ALREADY_EXISTS);
2381 AssertReturn(!(cb & PAGE_OFFSET_MASK), VERR_INVALID_PARAMETER);
2382 AssertReturn(cb, VERR_INVALID_PARAMETER);
2383 AssertReturn(!fFlags, VERR_INVALID_PARAMETER);
2384
2385 const uint32_t cPages = cb >> PAGE_SHIFT;
2386 AssertLogRelReturn(((RTGCPHYS)cPages << PAGE_SHIFT) == cb, VERR_INVALID_PARAMETER);
2387 AssertLogRelReturn(cPages <= INT32_MAX / 2, VERR_NO_MEMORY);
2388
2389 /*
2390 * For the 2nd+ instance, mangle the description string so it's unique.
2391 */
2392 if (pDevIns->iInstance > 0) /** @todo Move to PDMDevHlp.cpp and use a real string cache. */
2393 {
2394 pszDesc = MMR3HeapAPrintf(pVM, MM_TAG_PGM_PHYS, "%s [%u]", pszDesc, pDevIns->iInstance);
2395 if (!pszDesc)
2396 return VERR_NO_MEMORY;
2397 }
2398
2399 /*
2400 * Try reserve and allocate the backing memory first as this is what is
2401 * most likely to fail.
2402 */
2403 int rc = MMR3AdjustFixedReservation(pVM, cPages, pszDesc);
2404 if (RT_SUCCESS(rc))
2405 {
2406 void *pvPages;
2407 PSUPPAGE paPages = (PSUPPAGE)RTMemTmpAlloc(cPages * sizeof(SUPPAGE));
2408 if (RT_SUCCESS(rc))
2409 rc = SUPR3PageAllocEx(cPages, 0 /*fFlags*/, &pvPages, NULL /*pR0Ptr*/, paPages);
2410 if (RT_SUCCESS(rc))
2411 {
2412 memset(pvPages, 0, cPages * PAGE_SIZE);
2413
2414 /*
2415 * Create the MMIO2 range record for it.
2416 */
2417 const size_t cbRange = RT_OFFSETOF(PGMMMIO2RANGE, RamRange.aPages[cPages]);
2418 PPGMMMIO2RANGE pNew;
2419 rc = MMR3HyperAllocOnceNoRel(pVM, cbRange, 0, MM_TAG_PGM_PHYS, (void **)&pNew);
2420 AssertLogRelMsgRC(rc, ("cbRamRange=%zu\n", cbRange));
2421 if (RT_SUCCESS(rc))
2422 {
2423 pNew->pDevInsR3 = pDevIns;
2424 pNew->pvR3 = pvPages;
2425 //pNew->pNext = NULL;
2426 //pNew->fMapped = false;
2427 //pNew->fOverlapping = false;
2428 pNew->iRegion = iRegion;
2429 pNew->idSavedState = UINT8_MAX;
2430 pNew->RamRange.pSelfR0 = MMHyperCCToR0(pVM, &pNew->RamRange);
2431 pNew->RamRange.pSelfRC = MMHyperCCToRC(pVM, &pNew->RamRange);
2432 pNew->RamRange.GCPhys = NIL_RTGCPHYS;
2433 pNew->RamRange.GCPhysLast = NIL_RTGCPHYS;
2434 pNew->RamRange.pszDesc = pszDesc;
2435 pNew->RamRange.cb = cb;
2436 pNew->RamRange.fFlags = PGM_RAM_RANGE_FLAGS_AD_HOC_MMIO2;
2437 pNew->RamRange.pvR3 = pvPages;
2438 //pNew->RamRange.paLSPages = NULL;
2439
2440 uint32_t iPage = cPages;
2441 while (iPage-- > 0)
2442 {
2443 PGM_PAGE_INIT(&pNew->RamRange.aPages[iPage],
2444 paPages[iPage].Phys, NIL_GMM_PAGEID,
2445 PGMPAGETYPE_MMIO2, PGM_PAGE_STATE_ALLOCATED);
2446 }
2447
2448 /* update page count stats */
2449 pVM->pgm.s.cAllPages += cPages;
2450 pVM->pgm.s.cPrivatePages += cPages;
2451
2452 /*
2453 * Link it into the list.
2454 * Since there is no particular order, just push it.
2455 */
2456 pgmLock(pVM);
2457 pNew->pNextR3 = pVM->pgm.s.pMmio2RangesR3;
2458 pVM->pgm.s.pMmio2RangesR3 = pNew;
2459 pgmUnlock(pVM);
2460
2461 *ppv = pvPages;
2462 RTMemTmpFree(paPages);
2463 pgmPhysInvalidatePageMapTLB(pVM);
2464 return VINF_SUCCESS;
2465 }
2466
2467 SUPR3PageFreeEx(pvPages, cPages);
2468 }
2469 RTMemTmpFree(paPages);
2470 MMR3AdjustFixedReservation(pVM, -(int32_t)cPages, pszDesc);
2471 }
2472 if (pDevIns->iInstance > 0)
2473 MMR3HeapFree((void *)pszDesc);
2474 return rc;
2475}
2476
2477
2478/**
2479 * Deregisters and frees an MMIO2 region.
2480 *
2481 * Any physical (and virtual) access handlers registered for the region must
2482 * be deregistered before calling this function.
2483 *
2484 * @returns VBox status code.
2485 * @param pVM Pointer to the shared VM structure.
2486 * @param pDevIns The device instance owning the region.
2487 * @param iRegion The region. If it's UINT32_MAX it'll be a wildcard match.
2488 */
2489VMMR3DECL(int) PGMR3PhysMMIO2Deregister(PVM pVM, PPDMDEVINS pDevIns, uint32_t iRegion)
2490{
2491 /*
2492 * Validate input.
2493 */
2494 VM_ASSERT_EMT_RETURN(pVM, VERR_VM_THREAD_NOT_EMT);
2495 AssertPtrReturn(pDevIns, VERR_INVALID_PARAMETER);
2496 AssertReturn(iRegion <= UINT8_MAX || iRegion == UINT32_MAX, VERR_INVALID_PARAMETER);
2497
2498 pgmLock(pVM);
2499 int rc = VINF_SUCCESS;
2500 unsigned cFound = 0;
2501 PPGMMMIO2RANGE pPrev = NULL;
2502 PPGMMMIO2RANGE pCur = pVM->pgm.s.pMmio2RangesR3;
2503 while (pCur)
2504 {
2505 if ( pCur->pDevInsR3 == pDevIns
2506 && ( iRegion == UINT32_MAX
2507 || pCur->iRegion == iRegion))
2508 {
2509 cFound++;
2510
2511 /*
2512 * Unmap it if it's mapped.
2513 */
2514 if (pCur->fMapped)
2515 {
2516 int rc2 = PGMR3PhysMMIO2Unmap(pVM, pCur->pDevInsR3, pCur->iRegion, pCur->RamRange.GCPhys);
2517 AssertRC(rc2);
2518 if (RT_FAILURE(rc2) && RT_SUCCESS(rc))
2519 rc = rc2;
2520 }
2521
2522 /*
2523 * Unlink it
2524 */
2525 PPGMMMIO2RANGE pNext = pCur->pNextR3;
2526 if (pPrev)
2527 pPrev->pNextR3 = pNext;
2528 else
2529 pVM->pgm.s.pMmio2RangesR3 = pNext;
2530 pCur->pNextR3 = NULL;
2531
2532 /*
2533 * Free the memory.
2534 */
2535 int rc2 = SUPR3PageFreeEx(pCur->pvR3, pCur->RamRange.cb >> PAGE_SHIFT);
2536 AssertRC(rc2);
2537 if (RT_FAILURE(rc2) && RT_SUCCESS(rc))
2538 rc = rc2;
2539
2540 uint32_t const cPages = pCur->RamRange.cb >> PAGE_SHIFT;
2541 rc2 = MMR3AdjustFixedReservation(pVM, -(int32_t)cPages, pCur->RamRange.pszDesc);
2542 AssertRC(rc2);
2543 if (RT_FAILURE(rc2) && RT_SUCCESS(rc))
2544 rc = rc2;
2545
2546 /* we're leaking hyper memory here if done at runtime. */
2547#ifdef VBOX_STRICT
2548 VMSTATE const enmState = VMR3GetState(pVM);
2549 AssertMsg( enmState == VMSTATE_POWERING_OFF
2550 || enmState == VMSTATE_POWERING_OFF_LS
2551 || enmState == VMSTATE_OFF
2552 || enmState == VMSTATE_OFF_LS
2553 || enmState == VMSTATE_DESTROYING
2554 || enmState == VMSTATE_TERMINATED
2555 || enmState == VMSTATE_CREATING
2556 , ("%s\n", VMR3GetStateName(enmState)));
2557#endif
2558 /*rc = MMHyperFree(pVM, pCur);
2559 AssertRCReturn(rc, rc); - not safe, see the alloc call. */
2560
2561
2562 /* update page count stats */
2563 pVM->pgm.s.cAllPages -= cPages;
2564 pVM->pgm.s.cPrivatePages -= cPages;
2565
2566 /* next */
2567 pCur = pNext;
2568 }
2569 else
2570 {
2571 pPrev = pCur;
2572 pCur = pCur->pNextR3;
2573 }
2574 }
2575 pgmPhysInvalidatePageMapTLB(pVM);
2576 pgmUnlock(pVM);
2577 return !cFound && iRegion != UINT32_MAX ? VERR_NOT_FOUND : rc;
2578}
2579
2580
2581/**
2582 * Maps a MMIO2 region.
2583 *
2584 * This is done when a guest / the bios / state loading changes the
2585 * PCI config. The replacing of base memory has the same restrictions
2586 * as during registration, of course.
2587 *
2588 * @returns VBox status code.
2589 *
2590 * @param pVM Pointer to the shared VM structure.
2591 * @param pDevIns The
2592 */
2593VMMR3DECL(int) PGMR3PhysMMIO2Map(PVM pVM, PPDMDEVINS pDevIns, uint32_t iRegion, RTGCPHYS GCPhys)
2594{
2595 /*
2596 * Validate input
2597 */
2598 VM_ASSERT_EMT_RETURN(pVM, VERR_VM_THREAD_NOT_EMT);
2599 AssertPtrReturn(pDevIns, VERR_INVALID_PARAMETER);
2600 AssertReturn(iRegion <= UINT8_MAX, VERR_INVALID_PARAMETER);
2601 AssertReturn(GCPhys != NIL_RTGCPHYS, VERR_INVALID_PARAMETER);
2602 AssertReturn(GCPhys != 0, VERR_INVALID_PARAMETER);
2603 AssertReturn(!(GCPhys & PAGE_OFFSET_MASK), VERR_INVALID_PARAMETER);
2604
2605 PPGMMMIO2RANGE pCur = pgmR3PhysMMIO2Find(pVM, pDevIns, iRegion);
2606 AssertReturn(pCur, VERR_NOT_FOUND);
2607 AssertReturn(!pCur->fMapped, VERR_WRONG_ORDER);
2608 Assert(pCur->RamRange.GCPhys == NIL_RTGCPHYS);
2609 Assert(pCur->RamRange.GCPhysLast == NIL_RTGCPHYS);
2610
2611 const RTGCPHYS GCPhysLast = GCPhys + pCur->RamRange.cb - 1;
2612 AssertReturn(GCPhysLast > GCPhys, VERR_INVALID_PARAMETER);
2613
2614 /*
2615 * Find our location in the ram range list, checking for
2616 * restriction we don't bother implementing yet (partially overlapping).
2617 */
2618 bool fRamExists = false;
2619 PPGMRAMRANGE pRamPrev = NULL;
2620 PPGMRAMRANGE pRam = pVM->pgm.s.pRamRangesXR3;
2621 while (pRam && GCPhysLast >= pRam->GCPhys)
2622 {
2623 if ( GCPhys <= pRam->GCPhysLast
2624 && GCPhysLast >= pRam->GCPhys)
2625 {
2626 /* completely within? */
2627 AssertLogRelMsgReturn( GCPhys >= pRam->GCPhys
2628 && GCPhysLast <= pRam->GCPhysLast,
2629 ("%RGp-%RGp (MMIO2/%s) falls partly outside %RGp-%RGp (%s)\n",
2630 GCPhys, GCPhysLast, pCur->RamRange.pszDesc,
2631 pRam->GCPhys, pRam->GCPhysLast, pRam->pszDesc),
2632 VERR_PGM_RAM_CONFLICT);
2633 fRamExists = true;
2634 break;
2635 }
2636
2637 /* next */
2638 pRamPrev = pRam;
2639 pRam = pRam->pNextR3;
2640 }
2641 if (fRamExists)
2642 {
2643 PPGMPAGE pPage = &pRam->aPages[(GCPhys - pRam->GCPhys) >> PAGE_SHIFT];
2644 uint32_t cPagesLeft = pCur->RamRange.cb >> PAGE_SHIFT;
2645 while (cPagesLeft-- > 0)
2646 {
2647 AssertLogRelMsgReturn(PGM_PAGE_GET_TYPE(pPage) == PGMPAGETYPE_RAM,
2648 ("%RGp isn't a RAM page (%d) - mapping %RGp-%RGp (MMIO2/%s).\n",
2649 GCPhys, PGM_PAGE_GET_TYPE(pPage), GCPhys, GCPhysLast, pCur->RamRange.pszDesc),
2650 VERR_PGM_RAM_CONFLICT);
2651 pPage++;
2652 }
2653 }
2654 Log(("PGMR3PhysMMIO2Map: %RGp-%RGp fRamExists=%RTbool %s\n",
2655 GCPhys, GCPhysLast, fRamExists, pCur->RamRange.pszDesc));
2656
2657 /*
2658 * Make the changes.
2659 */
2660 pgmLock(pVM);
2661
2662 pCur->RamRange.GCPhys = GCPhys;
2663 pCur->RamRange.GCPhysLast = GCPhysLast;
2664 pCur->fMapped = true;
2665 pCur->fOverlapping = fRamExists;
2666
2667 if (fRamExists)
2668 {
2669/** @todo use pgmR3PhysFreePageRange here. */
2670 uint32_t cPendingPages = 0;
2671 PGMMFREEPAGESREQ pReq;
2672 int rc = GMMR3FreePagesPrepare(pVM, &pReq, PGMPHYS_FREE_PAGE_BATCH_SIZE, GMMACCOUNT_BASE);
2673 AssertLogRelRCReturn(rc, rc);
2674
2675 /* replace the pages, freeing all present RAM pages. */
2676 PPGMPAGE pPageSrc = &pCur->RamRange.aPages[0];
2677 PPGMPAGE pPageDst = &pRam->aPages[(GCPhys - pRam->GCPhys) >> PAGE_SHIFT];
2678 uint32_t cPagesLeft = pCur->RamRange.cb >> PAGE_SHIFT;
2679 while (cPagesLeft-- > 0)
2680 {
2681 rc = pgmPhysFreePage(pVM, pReq, &cPendingPages, pPageDst, GCPhys);
2682 AssertLogRelRCReturn(rc, rc); /* We're done for if this goes wrong. */
2683
2684 RTHCPHYS const HCPhys = PGM_PAGE_GET_HCPHYS(pPageSrc);
2685 PGM_PAGE_SET_HCPHYS(pVM, pPageDst, HCPhys);
2686 PGM_PAGE_SET_TYPE(pVM, pPageDst, PGMPAGETYPE_MMIO2);
2687 PGM_PAGE_SET_STATE(pVM, pPageDst, PGM_PAGE_STATE_ALLOCATED);
2688 PGM_PAGE_SET_PDE_TYPE(pVM, pPageDst, PGM_PAGE_PDE_TYPE_DONTCARE);
2689 PGM_PAGE_SET_PTE_INDEX(pVM, pPageDst, 0);
2690 PGM_PAGE_SET_TRACKING(pVM, pPageDst, 0);
2691
2692 pVM->pgm.s.cZeroPages--;
2693 GCPhys += PAGE_SIZE;
2694 pPageSrc++;
2695 pPageDst++;
2696 }
2697
2698 /* Flush physical page map TLB. */
2699 pgmPhysInvalidatePageMapTLB(pVM);
2700
2701 if (cPendingPages)
2702 {
2703 rc = GMMR3FreePagesPerform(pVM, pReq, cPendingPages);
2704 AssertLogRelRCReturn(rc, rc);
2705 }
2706 GMMR3FreePagesCleanup(pReq);
2707
2708 /* Force a PGM pool flush as guest ram references have been changed. */
2709 /** todo; not entirely SMP safe; assuming for now the guest takes care of this internally (not touch mapped mmio while changing the mapping). */
2710 PVMCPU pVCpu = VMMGetCpu(pVM);
2711 pVCpu->pgm.s.fSyncFlags |= PGM_SYNC_CLEAR_PGM_POOL;
2712 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
2713
2714 pgmUnlock(pVM);
2715 }
2716 else
2717 {
2718 RTGCPHYS cb = pCur->RamRange.cb;
2719
2720 /* Clear the tracking data of pages we're going to reactivate. */
2721 PPGMPAGE pPageSrc = &pCur->RamRange.aPages[0];
2722 uint32_t cPagesLeft = pCur->RamRange.cb >> PAGE_SHIFT;
2723 while (cPagesLeft-- > 0)
2724 {
2725 PGM_PAGE_SET_TRACKING(pVM, pPageSrc, 0);
2726 PGM_PAGE_SET_PTE_INDEX(pVM, pPageSrc, 0);
2727 pPageSrc++;
2728 }
2729
2730 /* link in the ram range */
2731 pgmR3PhysLinkRamRange(pVM, &pCur->RamRange, pRamPrev);
2732 pgmUnlock(pVM);
2733
2734 REMR3NotifyPhysRamRegister(pVM, GCPhys, cb, REM_NOTIFY_PHYS_RAM_FLAGS_MMIO2);
2735 }
2736
2737 pgmPhysInvalidatePageMapTLB(pVM);
2738 return VINF_SUCCESS;
2739}
2740
2741
2742/**
2743 * Unmaps a MMIO2 region.
2744 *
2745 * This is done when a guest / the bios / state loading changes the
2746 * PCI config. The replacing of base memory has the same restrictions
2747 * as during registration, of course.
2748 */
2749VMMR3DECL(int) PGMR3PhysMMIO2Unmap(PVM pVM, PPDMDEVINS pDevIns, uint32_t iRegion, RTGCPHYS GCPhys)
2750{
2751 /*
2752 * Validate input
2753 */
2754 VM_ASSERT_EMT_RETURN(pVM, VERR_VM_THREAD_NOT_EMT);
2755 AssertPtrReturn(pDevIns, VERR_INVALID_PARAMETER);
2756 AssertReturn(iRegion <= UINT8_MAX, VERR_INVALID_PARAMETER);
2757 AssertReturn(GCPhys != NIL_RTGCPHYS, VERR_INVALID_PARAMETER);
2758 AssertReturn(GCPhys != 0, VERR_INVALID_PARAMETER);
2759 AssertReturn(!(GCPhys & PAGE_OFFSET_MASK), VERR_INVALID_PARAMETER);
2760
2761 PPGMMMIO2RANGE pCur = pgmR3PhysMMIO2Find(pVM, pDevIns, iRegion);
2762 AssertReturn(pCur, VERR_NOT_FOUND);
2763 AssertReturn(pCur->fMapped, VERR_WRONG_ORDER);
2764 AssertReturn(pCur->RamRange.GCPhys == GCPhys, VERR_INVALID_PARAMETER);
2765 Assert(pCur->RamRange.GCPhysLast != NIL_RTGCPHYS);
2766
2767 Log(("PGMR3PhysMMIO2Unmap: %RGp-%RGp %s\n",
2768 pCur->RamRange.GCPhys, pCur->RamRange.GCPhysLast, pCur->RamRange.pszDesc));
2769
2770 /*
2771 * Unmap it.
2772 */
2773 pgmLock(pVM);
2774
2775 RTGCPHYS GCPhysRangeREM;
2776 RTGCPHYS cbRangeREM;
2777 bool fInformREM;
2778 if (pCur->fOverlapping)
2779 {
2780 /* Restore the RAM pages we've replaced. */
2781 PPGMRAMRANGE pRam = pVM->pgm.s.pRamRangesXR3;
2782 while (pRam->GCPhys > pCur->RamRange.GCPhysLast)
2783 pRam = pRam->pNextR3;
2784
2785 PPGMPAGE pPageDst = &pRam->aPages[(pCur->RamRange.GCPhys - pRam->GCPhys) >> PAGE_SHIFT];
2786 uint32_t cPagesLeft = pCur->RamRange.cb >> PAGE_SHIFT;
2787 while (cPagesLeft-- > 0)
2788 {
2789 PGM_PAGE_INIT_ZERO(pPageDst, pVM, PGMPAGETYPE_RAM);
2790 pVM->pgm.s.cZeroPages++;
2791 pPageDst++;
2792 }
2793
2794 /* Flush physical page map TLB. */
2795 pgmPhysInvalidatePageMapTLB(pVM);
2796
2797 GCPhysRangeREM = NIL_RTGCPHYS; /* shuts up gcc */
2798 cbRangeREM = RTGCPHYS_MAX; /* ditto */
2799 fInformREM = false;
2800 }
2801 else
2802 {
2803 GCPhysRangeREM = pCur->RamRange.GCPhys;
2804 cbRangeREM = pCur->RamRange.cb;
2805 fInformREM = true;
2806
2807 pgmR3PhysUnlinkRamRange(pVM, &pCur->RamRange);
2808 }
2809
2810 pCur->RamRange.GCPhys = NIL_RTGCPHYS;
2811 pCur->RamRange.GCPhysLast = NIL_RTGCPHYS;
2812 pCur->fOverlapping = false;
2813 pCur->fMapped = false;
2814
2815 /* Force a PGM pool flush as guest ram references have been changed. */
2816 /** todo; not entirely SMP safe; assuming for now the guest takes care of this internally (not touch mapped mmio while changing the mapping). */
2817 PVMCPU pVCpu = VMMGetCpu(pVM);
2818 pVCpu->pgm.s.fSyncFlags |= PGM_SYNC_CLEAR_PGM_POOL;
2819 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
2820
2821 pgmPhysInvalidatePageMapTLB(pVM);
2822 pgmPhysInvalidRamRangeTlbs(pVM);
2823 pgmUnlock(pVM);
2824
2825 if (fInformREM)
2826 REMR3NotifyPhysRamDeregister(pVM, GCPhysRangeREM, cbRangeREM);
2827
2828 return VINF_SUCCESS;
2829}
2830
2831
2832/**
2833 * Checks if the given address is an MMIO2 base address or not.
2834 *
2835 * @returns true/false accordingly.
2836 * @param pVM Pointer to the shared VM structure.
2837 * @param pDevIns The owner of the memory, optional.
2838 * @param GCPhys The address to check.
2839 */
2840VMMR3DECL(bool) PGMR3PhysMMIO2IsBase(PVM pVM, PPDMDEVINS pDevIns, RTGCPHYS GCPhys)
2841{
2842 /*
2843 * Validate input
2844 */
2845 VM_ASSERT_EMT_RETURN(pVM, false);
2846 AssertPtrReturn(pDevIns, false);
2847 AssertReturn(GCPhys != NIL_RTGCPHYS, false);
2848 AssertReturn(GCPhys != 0, false);
2849 AssertReturn(!(GCPhys & PAGE_OFFSET_MASK), false);
2850
2851 /*
2852 * Search the list.
2853 */
2854 pgmLock(pVM);
2855 for (PPGMMMIO2RANGE pCur = pVM->pgm.s.pMmio2RangesR3; pCur; pCur = pCur->pNextR3)
2856 if (pCur->RamRange.GCPhys == GCPhys)
2857 {
2858 Assert(pCur->fMapped);
2859 pgmUnlock(pVM);
2860 return true;
2861 }
2862 pgmUnlock(pVM);
2863 return false;
2864}
2865
2866
2867/**
2868 * Gets the HC physical address of a page in the MMIO2 region.
2869 *
2870 * This is API is intended for MMHyper and shouldn't be called
2871 * by anyone else...
2872 *
2873 * @returns VBox status code.
2874 * @param pVM Pointer to the shared VM structure.
2875 * @param pDevIns The owner of the memory, optional.
2876 * @param iRegion The region.
2877 * @param off The page expressed an offset into the MMIO2 region.
2878 * @param pHCPhys Where to store the result.
2879 */
2880VMMR3DECL(int) PGMR3PhysMMIO2GetHCPhys(PVM pVM, PPDMDEVINS pDevIns, uint32_t iRegion, RTGCPHYS off, PRTHCPHYS pHCPhys)
2881{
2882 /*
2883 * Validate input
2884 */
2885 VM_ASSERT_EMT_RETURN(pVM, VERR_VM_THREAD_NOT_EMT);
2886 AssertPtrReturn(pDevIns, VERR_INVALID_PARAMETER);
2887 AssertReturn(iRegion <= UINT8_MAX, VERR_INVALID_PARAMETER);
2888
2889 pgmLock(pVM);
2890 PPGMMMIO2RANGE pCur = pgmR3PhysMMIO2Find(pVM, pDevIns, iRegion);
2891 AssertReturn(pCur, VERR_NOT_FOUND);
2892 AssertReturn(off < pCur->RamRange.cb, VERR_INVALID_PARAMETER);
2893
2894 PCPGMPAGE pPage = &pCur->RamRange.aPages[off >> PAGE_SHIFT];
2895 *pHCPhys = PGM_PAGE_GET_HCPHYS(pPage);
2896 pgmUnlock(pVM);
2897 return VINF_SUCCESS;
2898}
2899
2900
2901/**
2902 * Maps a portion of an MMIO2 region into kernel space (host).
2903 *
2904 * The kernel mapping will become invalid when the MMIO2 memory is deregistered
2905 * or the VM is terminated.
2906 *
2907 * @return VBox status code.
2908 *
2909 * @param pVM Pointer to the shared VM structure.
2910 * @param pDevIns The device owning the MMIO2 memory.
2911 * @param iRegion The region.
2912 * @param off The offset into the region. Must be page aligned.
2913 * @param cb The number of bytes to map. Must be page aligned.
2914 * @param pszDesc Mapping description.
2915 * @param pR0Ptr Where to store the R0 address.
2916 */
2917VMMR3DECL(int) PGMR3PhysMMIO2MapKernel(PVM pVM, PPDMDEVINS pDevIns, uint32_t iRegion, RTGCPHYS off, RTGCPHYS cb,
2918 const char *pszDesc, PRTR0PTR pR0Ptr)
2919{
2920 /*
2921 * Validate input.
2922 */
2923 VM_ASSERT_EMT_RETURN(pVM, VERR_VM_THREAD_NOT_EMT);
2924 AssertPtrReturn(pDevIns, VERR_INVALID_PARAMETER);
2925 AssertReturn(iRegion <= UINT8_MAX, VERR_INVALID_PARAMETER);
2926
2927 PPGMMMIO2RANGE pCur = pgmR3PhysMMIO2Find(pVM, pDevIns, iRegion);
2928 AssertReturn(pCur, VERR_NOT_FOUND);
2929 AssertReturn(off < pCur->RamRange.cb, VERR_INVALID_PARAMETER);
2930 AssertReturn(cb <= pCur->RamRange.cb, VERR_INVALID_PARAMETER);
2931 AssertReturn(off + cb <= pCur->RamRange.cb, VERR_INVALID_PARAMETER);
2932
2933 /*
2934 * Pass the request on to the support library/driver.
2935 */
2936 int rc = SUPR3PageMapKernel(pCur->pvR3, off, cb, 0, pR0Ptr);
2937
2938 return rc;
2939}
2940
2941
2942/**
2943 * Worker for PGMR3PhysRomRegister.
2944 *
2945 * This is here to simplify lock management, i.e. the caller does all the
2946 * locking and we can simply return without needing to remember to unlock
2947 * anything first.
2948 *
2949 * @returns VBox status.
2950 * @param pVM VM Handle.
2951 * @param pDevIns The device instance owning the ROM.
2952 * @param GCPhys First physical address in the range.
2953 * Must be page aligned!
2954 * @param cb The size of the range (in bytes).
2955 * Must be page aligned!
2956 * @param pvBinary Pointer to the binary data backing the ROM image.
2957 * @param cbBinary The size of the binary data pvBinary points to.
2958 * This must be less or equal to @a cb.
2959 * @param fFlags Mask of flags. PGMPHYS_ROM_FLAGS_SHADOWED
2960 * and/or PGMPHYS_ROM_FLAGS_PERMANENT_BINARY.
2961 * @param pszDesc Pointer to description string. This must not be freed.
2962 */
2963static int pgmR3PhysRomRegister(PVM pVM, PPDMDEVINS pDevIns, RTGCPHYS GCPhys, RTGCPHYS cb,
2964 const void *pvBinary, uint32_t cbBinary, uint32_t fFlags, const char *pszDesc)
2965{
2966 /*
2967 * Validate input.
2968 */
2969 AssertPtrReturn(pDevIns, VERR_INVALID_PARAMETER);
2970 AssertReturn(RT_ALIGN_T(GCPhys, PAGE_SIZE, RTGCPHYS) == GCPhys, VERR_INVALID_PARAMETER);
2971 AssertReturn(RT_ALIGN_T(cb, PAGE_SIZE, RTGCPHYS) == cb, VERR_INVALID_PARAMETER);
2972 RTGCPHYS GCPhysLast = GCPhys + (cb - 1);
2973 AssertReturn(GCPhysLast > GCPhys, VERR_INVALID_PARAMETER);
2974 AssertPtrReturn(pvBinary, VERR_INVALID_PARAMETER);
2975 AssertPtrReturn(pszDesc, VERR_INVALID_POINTER);
2976 AssertReturn(!(fFlags & ~(PGMPHYS_ROM_FLAGS_SHADOWED | PGMPHYS_ROM_FLAGS_PERMANENT_BINARY)), VERR_INVALID_PARAMETER);
2977 VM_ASSERT_STATE_RETURN(pVM, VMSTATE_CREATING, VERR_VM_INVALID_VM_STATE);
2978
2979 const uint32_t cPages = cb >> PAGE_SHIFT;
2980
2981 /*
2982 * Find the ROM location in the ROM list first.
2983 */
2984 PPGMROMRANGE pRomPrev = NULL;
2985 PPGMROMRANGE pRom = pVM->pgm.s.pRomRangesR3;
2986 while (pRom && GCPhysLast >= pRom->GCPhys)
2987 {
2988 if ( GCPhys <= pRom->GCPhysLast
2989 && GCPhysLast >= pRom->GCPhys)
2990 AssertLogRelMsgFailedReturn(("%RGp-%RGp (%s) conflicts with existing %RGp-%RGp (%s)\n",
2991 GCPhys, GCPhysLast, pszDesc,
2992 pRom->GCPhys, pRom->GCPhysLast, pRom->pszDesc),
2993 VERR_PGM_RAM_CONFLICT);
2994 /* next */
2995 pRomPrev = pRom;
2996 pRom = pRom->pNextR3;
2997 }
2998
2999 /*
3000 * Find the RAM location and check for conflicts.
3001 *
3002 * Conflict detection is a bit different than for RAM
3003 * registration since a ROM can be located within a RAM
3004 * range. So, what we have to check for is other memory
3005 * types (other than RAM that is) and that we don't span
3006 * more than one RAM range (layz).
3007 */
3008 bool fRamExists = false;
3009 PPGMRAMRANGE pRamPrev = NULL;
3010 PPGMRAMRANGE pRam = pVM->pgm.s.pRamRangesXR3;
3011 while (pRam && GCPhysLast >= pRam->GCPhys)
3012 {
3013 if ( GCPhys <= pRam->GCPhysLast
3014 && GCPhysLast >= pRam->GCPhys)
3015 {
3016 /* completely within? */
3017 AssertLogRelMsgReturn( GCPhys >= pRam->GCPhys
3018 && GCPhysLast <= pRam->GCPhysLast,
3019 ("%RGp-%RGp (%s) falls partly outside %RGp-%RGp (%s)\n",
3020 GCPhys, GCPhysLast, pszDesc,
3021 pRam->GCPhys, pRam->GCPhysLast, pRam->pszDesc),
3022 VERR_PGM_RAM_CONFLICT);
3023 fRamExists = true;
3024 break;
3025 }
3026
3027 /* next */
3028 pRamPrev = pRam;
3029 pRam = pRam->pNextR3;
3030 }
3031 if (fRamExists)
3032 {
3033 PPGMPAGE pPage = &pRam->aPages[(GCPhys - pRam->GCPhys) >> PAGE_SHIFT];
3034 uint32_t cPagesLeft = cPages;
3035 while (cPagesLeft-- > 0)
3036 {
3037 AssertLogRelMsgReturn(PGM_PAGE_GET_TYPE(pPage) == PGMPAGETYPE_RAM,
3038 ("%RGp (%R[pgmpage]) isn't a RAM page - registering %RGp-%RGp (%s).\n",
3039 pRam->GCPhys + ((RTGCPHYS)(uintptr_t)(pPage - &pRam->aPages[0]) << PAGE_SHIFT),
3040 pPage, GCPhys, GCPhysLast, pszDesc), VERR_PGM_RAM_CONFLICT);
3041 Assert(PGM_PAGE_IS_ZERO(pPage));
3042 pPage++;
3043 }
3044 }
3045
3046 /*
3047 * Update the base memory reservation if necessary.
3048 */
3049 uint32_t cExtraBaseCost = fRamExists ? 0 : cPages;
3050 if (fFlags & PGMPHYS_ROM_FLAGS_SHADOWED)
3051 cExtraBaseCost += cPages;
3052 if (cExtraBaseCost)
3053 {
3054 int rc = MMR3IncreaseBaseReservation(pVM, cExtraBaseCost);
3055 if (RT_FAILURE(rc))
3056 return rc;
3057 }
3058
3059 /*
3060 * Allocate memory for the virgin copy of the RAM.
3061 */
3062 PGMMALLOCATEPAGESREQ pReq;
3063 int rc = GMMR3AllocatePagesPrepare(pVM, &pReq, cPages, GMMACCOUNT_BASE);
3064 AssertRCReturn(rc, rc);
3065
3066 for (uint32_t iPage = 0; iPage < cPages; iPage++)
3067 {
3068 pReq->aPages[iPage].HCPhysGCPhys = GCPhys + (iPage << PAGE_SHIFT);
3069 pReq->aPages[iPage].idPage = NIL_GMM_PAGEID;
3070 pReq->aPages[iPage].idSharedPage = NIL_GMM_PAGEID;
3071 }
3072
3073 rc = GMMR3AllocatePagesPerform(pVM, pReq);
3074 if (RT_FAILURE(rc))
3075 {
3076 GMMR3AllocatePagesCleanup(pReq);
3077 return rc;
3078 }
3079
3080 /*
3081 * Allocate the new ROM range and RAM range (if necessary).
3082 */
3083 PPGMROMRANGE pRomNew;
3084 rc = MMHyperAlloc(pVM, RT_OFFSETOF(PGMROMRANGE, aPages[cPages]), 0, MM_TAG_PGM_PHYS, (void **)&pRomNew);
3085 if (RT_SUCCESS(rc))
3086 {
3087 PPGMRAMRANGE pRamNew = NULL;
3088 if (!fRamExists)
3089 rc = MMHyperAlloc(pVM, RT_OFFSETOF(PGMRAMRANGE, aPages[cPages]), sizeof(PGMPAGE), MM_TAG_PGM_PHYS, (void **)&pRamNew);
3090 if (RT_SUCCESS(rc))
3091 {
3092 /*
3093 * Initialize and insert the RAM range (if required).
3094 */
3095 PPGMROMPAGE pRomPage = &pRomNew->aPages[0];
3096 if (!fRamExists)
3097 {
3098 pRamNew->pSelfR0 = MMHyperCCToR0(pVM, pRamNew);
3099 pRamNew->pSelfRC = MMHyperCCToRC(pVM, pRamNew);
3100 pRamNew->GCPhys = GCPhys;
3101 pRamNew->GCPhysLast = GCPhysLast;
3102 pRamNew->cb = cb;
3103 pRamNew->pszDesc = pszDesc;
3104 pRamNew->fFlags = PGM_RAM_RANGE_FLAGS_AD_HOC_ROM;
3105 pRamNew->pvR3 = NULL;
3106 pRamNew->paLSPages = NULL;
3107
3108 PPGMPAGE pPage = &pRamNew->aPages[0];
3109 for (uint32_t iPage = 0; iPage < cPages; iPage++, pPage++, pRomPage++)
3110 {
3111 PGM_PAGE_INIT(pPage,
3112 pReq->aPages[iPage].HCPhysGCPhys,
3113 pReq->aPages[iPage].idPage,
3114 PGMPAGETYPE_ROM,
3115 PGM_PAGE_STATE_ALLOCATED);
3116
3117 pRomPage->Virgin = *pPage;
3118 }
3119
3120 pVM->pgm.s.cAllPages += cPages;
3121 pgmR3PhysLinkRamRange(pVM, pRamNew, pRamPrev);
3122 }
3123 else
3124 {
3125 PPGMPAGE pPage = &pRam->aPages[(GCPhys - pRam->GCPhys) >> PAGE_SHIFT];
3126 for (uint32_t iPage = 0; iPage < cPages; iPage++, pPage++, pRomPage++)
3127 {
3128 PGM_PAGE_SET_TYPE(pVM, pPage, PGMPAGETYPE_ROM);
3129 PGM_PAGE_SET_HCPHYS(pVM, pPage, pReq->aPages[iPage].HCPhysGCPhys);
3130 PGM_PAGE_SET_STATE(pVM, pPage, PGM_PAGE_STATE_ALLOCATED);
3131 PGM_PAGE_SET_PAGEID(pVM, pPage, pReq->aPages[iPage].idPage);
3132 PGM_PAGE_SET_PDE_TYPE(pVM, pPage, PGM_PAGE_PDE_TYPE_DONTCARE);
3133 PGM_PAGE_SET_PTE_INDEX(pVM, pPage, 0);
3134 PGM_PAGE_SET_TRACKING(pVM, pPage, 0);
3135
3136 pRomPage->Virgin = *pPage;
3137 }
3138
3139 pRamNew = pRam;
3140
3141 pVM->pgm.s.cZeroPages -= cPages;
3142 }
3143 pVM->pgm.s.cPrivatePages += cPages;
3144
3145 /* Flush physical page map TLB. */
3146 pgmPhysInvalidatePageMapTLB(pVM);
3147
3148
3149 /*
3150 * !HACK ALERT! REM + (Shadowed) ROM ==> mess.
3151 *
3152 * If it's shadowed we'll register the handler after the ROM notification
3153 * so we get the access handler callbacks that we should. If it isn't
3154 * shadowed we'll do it the other way around to make REM use the built-in
3155 * ROM behavior and not the handler behavior (which is to route all access
3156 * to PGM atm).
3157 */
3158 if (fFlags & PGMPHYS_ROM_FLAGS_SHADOWED)
3159 {
3160 REMR3NotifyPhysRomRegister(pVM, GCPhys, cb, NULL, true /* fShadowed */);
3161 rc = PGMR3HandlerPhysicalRegister(pVM,
3162 fFlags & PGMPHYS_ROM_FLAGS_SHADOWED
3163 ? PGMPHYSHANDLERTYPE_PHYSICAL_ALL
3164 : PGMPHYSHANDLERTYPE_PHYSICAL_WRITE,
3165 GCPhys, GCPhysLast,
3166 pgmR3PhysRomWriteHandler, pRomNew,
3167 NULL, "pgmPhysRomWriteHandler", MMHyperCCToR0(pVM, pRomNew),
3168 NULL, "pgmPhysRomWriteHandler", MMHyperCCToRC(pVM, pRomNew), pszDesc);
3169 }
3170 else
3171 {
3172 rc = PGMR3HandlerPhysicalRegister(pVM,
3173 fFlags & PGMPHYS_ROM_FLAGS_SHADOWED
3174 ? PGMPHYSHANDLERTYPE_PHYSICAL_ALL
3175 : PGMPHYSHANDLERTYPE_PHYSICAL_WRITE,
3176 GCPhys, GCPhysLast,
3177 pgmR3PhysRomWriteHandler, pRomNew,
3178 NULL, "pgmPhysRomWriteHandler", MMHyperCCToR0(pVM, pRomNew),
3179 NULL, "pgmPhysRomWriteHandler", MMHyperCCToRC(pVM, pRomNew), pszDesc);
3180 REMR3NotifyPhysRomRegister(pVM, GCPhys, cb, NULL, false /* fShadowed */);
3181 }
3182 if (RT_SUCCESS(rc))
3183 {
3184 /*
3185 * Copy the image over to the virgin pages.
3186 * This must be done after linking in the RAM range.
3187 */
3188 size_t cbBinaryLeft = cbBinary;
3189 PPGMPAGE pRamPage = &pRamNew->aPages[(GCPhys - pRamNew->GCPhys) >> PAGE_SHIFT];
3190 for (uint32_t iPage = 0; iPage < cPages; iPage++, pRamPage++)
3191 {
3192 void *pvDstPage;
3193 rc = pgmPhysPageMap(pVM, pRamPage, GCPhys + (iPage << PAGE_SHIFT), &pvDstPage);
3194 if (RT_FAILURE(rc))
3195 {
3196 VMSetError(pVM, rc, RT_SRC_POS, "Failed to map virgin ROM page at %RGp", GCPhys);
3197 break;
3198 }
3199 if (cbBinaryLeft >= PAGE_SIZE)
3200 {
3201 memcpy(pvDstPage, (uint8_t const *)pvBinary + ((size_t)iPage << PAGE_SHIFT), PAGE_SIZE);
3202 cbBinaryLeft -= PAGE_SIZE;
3203 }
3204 else
3205 {
3206 ASMMemZeroPage(pvDstPage); /* (shouldn't be necessary, but can't hurt either) */
3207 if (cbBinaryLeft > 0)
3208 {
3209 memcpy(pvDstPage, (uint8_t const *)pvBinary + ((size_t)iPage << PAGE_SHIFT), cbBinaryLeft);
3210 cbBinaryLeft = 0;
3211 }
3212 }
3213 }
3214 if (RT_SUCCESS(rc))
3215 {
3216 /*
3217 * Initialize the ROM range.
3218 * Note that the Virgin member of the pages has already been initialized above.
3219 */
3220 pRomNew->GCPhys = GCPhys;
3221 pRomNew->GCPhysLast = GCPhysLast;
3222 pRomNew->cb = cb;
3223 pRomNew->fFlags = fFlags;
3224 pRomNew->idSavedState = UINT8_MAX;
3225 pRomNew->cbOriginal = cbBinary;
3226#ifdef VBOX_STRICT
3227 pRomNew->pvOriginal = fFlags & PGMPHYS_ROM_FLAGS_PERMANENT_BINARY
3228 ? pvBinary : RTMemDup(pvBinary, cbBinary);
3229#else
3230 pRomNew->pvOriginal = fFlags & PGMPHYS_ROM_FLAGS_PERMANENT_BINARY ? pvBinary : NULL;
3231#endif
3232 pRomNew->pszDesc = pszDesc;
3233
3234 for (unsigned iPage = 0; iPage < cPages; iPage++)
3235 {
3236 PPGMROMPAGE pPage = &pRomNew->aPages[iPage];
3237 pPage->enmProt = PGMROMPROT_READ_ROM_WRITE_IGNORE;
3238 PGM_PAGE_INIT_ZERO(&pPage->Shadow, pVM, PGMPAGETYPE_ROM_SHADOW);
3239 }
3240
3241 /* update the page count stats for the shadow pages. */
3242 if (fFlags & PGMPHYS_ROM_FLAGS_SHADOWED)
3243 {
3244 pVM->pgm.s.cZeroPages += cPages;
3245 pVM->pgm.s.cAllPages += cPages;
3246 }
3247
3248 /*
3249 * Insert the ROM range, tell REM and return successfully.
3250 */
3251 pRomNew->pNextR3 = pRom;
3252 pRomNew->pNextR0 = pRom ? MMHyperCCToR0(pVM, pRom) : NIL_RTR0PTR;
3253 pRomNew->pNextRC = pRom ? MMHyperCCToRC(pVM, pRom) : NIL_RTRCPTR;
3254
3255 if (pRomPrev)
3256 {
3257 pRomPrev->pNextR3 = pRomNew;
3258 pRomPrev->pNextR0 = MMHyperCCToR0(pVM, pRomNew);
3259 pRomPrev->pNextRC = MMHyperCCToRC(pVM, pRomNew);
3260 }
3261 else
3262 {
3263 pVM->pgm.s.pRomRangesR3 = pRomNew;
3264 pVM->pgm.s.pRomRangesR0 = MMHyperCCToR0(pVM, pRomNew);
3265 pVM->pgm.s.pRomRangesRC = MMHyperCCToRC(pVM, pRomNew);
3266 }
3267
3268 pgmPhysInvalidatePageMapTLB(pVM);
3269 GMMR3AllocatePagesCleanup(pReq);
3270 return VINF_SUCCESS;
3271 }
3272
3273 /* bail out */
3274
3275 int rc2 = PGMHandlerPhysicalDeregister(pVM, GCPhys);
3276 AssertRC(rc2);
3277 }
3278
3279 if (!fRamExists)
3280 {
3281 pgmR3PhysUnlinkRamRange2(pVM, pRamNew, pRamPrev);
3282 MMHyperFree(pVM, pRamNew);
3283 }
3284 }
3285 MMHyperFree(pVM, pRomNew);
3286 }
3287
3288 /** @todo Purge the mapping cache or something... */
3289 GMMR3FreeAllocatedPages(pVM, pReq);
3290 GMMR3AllocatePagesCleanup(pReq);
3291 return rc;
3292}
3293
3294
3295/**
3296 * Registers a ROM image.
3297 *
3298 * Shadowed ROM images requires double the amount of backing memory, so,
3299 * don't use that unless you have to. Shadowing of ROM images is process
3300 * where we can select where the reads go and where the writes go. On real
3301 * hardware the chipset provides means to configure this. We provide
3302 * PGMR3PhysProtectROM() for this purpose.
3303 *
3304 * A read-only copy of the ROM image will always be kept around while we
3305 * will allocate RAM pages for the changes on demand (unless all memory
3306 * is configured to be preallocated).
3307 *
3308 * @returns VBox status.
3309 * @param pVM VM Handle.
3310 * @param pDevIns The device instance owning the ROM.
3311 * @param GCPhys First physical address in the range.
3312 * Must be page aligned!
3313 * @param cb The size of the range (in bytes).
3314 * Must be page aligned!
3315 * @param pvBinary Pointer to the binary data backing the ROM image.
3316 * @param cbBinary The size of the binary data pvBinary points to.
3317 * This must be less or equal to @a cb.
3318 * @param fFlags Mask of flags. PGMPHYS_ROM_FLAGS_SHADOWED
3319 * and/or PGMPHYS_ROM_FLAGS_PERMANENT_BINARY.
3320 * @param pszDesc Pointer to description string. This must not be freed.
3321 *
3322 * @remark There is no way to remove the rom, automatically on device cleanup or
3323 * manually from the device yet. This isn't difficult in any way, it's
3324 * just not something we expect to be necessary for a while.
3325 */
3326VMMR3DECL(int) PGMR3PhysRomRegister(PVM pVM, PPDMDEVINS pDevIns, RTGCPHYS GCPhys, RTGCPHYS cb,
3327 const void *pvBinary, uint32_t cbBinary, uint32_t fFlags, const char *pszDesc)
3328{
3329 Log(("PGMR3PhysRomRegister: pDevIns=%p GCPhys=%RGp(-%RGp) cb=%RGp pvBinary=%p cbBinary=%#x fFlags=%#x pszDesc=%s\n",
3330 pDevIns, GCPhys, GCPhys + cb, cb, pvBinary, cbBinary, fFlags, pszDesc));
3331 pgmLock(pVM);
3332 int rc = pgmR3PhysRomRegister(pVM, pDevIns, GCPhys, cb, pvBinary, cbBinary, fFlags, pszDesc);
3333 pgmUnlock(pVM);
3334 return rc;
3335}
3336
3337
3338/**
3339 * \#PF Handler callback for ROM write accesses.
3340 *
3341 * @returns VINF_SUCCESS if the handler have carried out the operation.
3342 * @returns VINF_PGM_HANDLER_DO_DEFAULT if the caller should carry out the access operation.
3343 * @param pVM VM Handle.
3344 * @param GCPhys The physical address the guest is writing to.
3345 * @param pvPhys The HC mapping of that address.
3346 * @param pvBuf What the guest is reading/writing.
3347 * @param cbBuf How much it's reading/writing.
3348 * @param enmAccessType The access type.
3349 * @param pvUser User argument.
3350 */
3351static DECLCALLBACK(int) pgmR3PhysRomWriteHandler(PVM pVM, RTGCPHYS GCPhys, void *pvPhys, void *pvBuf, size_t cbBuf,
3352 PGMACCESSTYPE enmAccessType, void *pvUser)
3353{
3354 PPGMROMRANGE pRom = (PPGMROMRANGE)pvUser;
3355 const uint32_t iPage = (GCPhys - pRom->GCPhys) >> PAGE_SHIFT;
3356 Assert(iPage < (pRom->cb >> PAGE_SHIFT));
3357 PPGMROMPAGE pRomPage = &pRom->aPages[iPage];
3358 Log5(("pgmR3PhysRomWriteHandler: %d %c %#08RGp %#04zx\n", pRomPage->enmProt, enmAccessType == PGMACCESSTYPE_READ ? 'R' : 'W', GCPhys, cbBuf));
3359
3360 if (enmAccessType == PGMACCESSTYPE_READ)
3361 {
3362 switch (pRomPage->enmProt)
3363 {
3364 /*
3365 * Take the default action.
3366 */
3367 case PGMROMPROT_READ_ROM_WRITE_IGNORE:
3368 case PGMROMPROT_READ_RAM_WRITE_IGNORE:
3369 case PGMROMPROT_READ_ROM_WRITE_RAM:
3370 case PGMROMPROT_READ_RAM_WRITE_RAM:
3371 return VINF_PGM_HANDLER_DO_DEFAULT;
3372
3373 default:
3374 AssertMsgFailedReturn(("enmProt=%d iPage=%d GCPhys=%RGp\n",
3375 pRom->aPages[iPage].enmProt, iPage, GCPhys),
3376 VERR_INTERNAL_ERROR);
3377 }
3378 }
3379 else
3380 {
3381 Assert(enmAccessType == PGMACCESSTYPE_WRITE);
3382 switch (pRomPage->enmProt)
3383 {
3384 /*
3385 * Ignore writes.
3386 */
3387 case PGMROMPROT_READ_ROM_WRITE_IGNORE:
3388 case PGMROMPROT_READ_RAM_WRITE_IGNORE:
3389 return VINF_SUCCESS;
3390
3391 /*
3392 * Write to the RAM page.
3393 */
3394 case PGMROMPROT_READ_ROM_WRITE_RAM:
3395 case PGMROMPROT_READ_RAM_WRITE_RAM: /* yes this will get here too, it's *way* simpler that way. */
3396 {
3397 /* This should be impossible now, pvPhys doesn't work cross page anylonger. */
3398 Assert(((GCPhys - pRom->GCPhys + cbBuf - 1) >> PAGE_SHIFT) == iPage);
3399
3400 /*
3401 * Take the lock, do lazy allocation, map the page and copy the data.
3402 *
3403 * Note that we have to bypass the mapping TLB since it works on
3404 * guest physical addresses and entering the shadow page would
3405 * kind of screw things up...
3406 */
3407 int rc = pgmLock(pVM);
3408 AssertRC(rc);
3409
3410 PPGMPAGE pShadowPage = &pRomPage->Shadow;
3411 if (!PGMROMPROT_IS_ROM(pRomPage->enmProt))
3412 {
3413 pShadowPage = pgmPhysGetPage(pVM, GCPhys);
3414 AssertLogRelReturn(pShadowPage, VERR_INTERNAL_ERROR);
3415 }
3416
3417 void *pvDstPage;
3418 rc = pgmPhysPageMakeWritableAndMap(pVM, pShadowPage, GCPhys & X86_PTE_PG_MASK, &pvDstPage);
3419 if (RT_SUCCESS(rc))
3420 {
3421 memcpy((uint8_t *)pvDstPage + (GCPhys & PAGE_OFFSET_MASK), pvBuf, cbBuf);
3422 pRomPage->LiveSave.fWrittenTo = true;
3423 }
3424
3425 pgmUnlock(pVM);
3426 return rc;
3427 }
3428
3429 default:
3430 AssertMsgFailedReturn(("enmProt=%d iPage=%d GCPhys=%RGp\n",
3431 pRom->aPages[iPage].enmProt, iPage, GCPhys),
3432 VERR_INTERNAL_ERROR);
3433 }
3434 }
3435}
3436
3437
3438/**
3439 * Called by PGMR3Reset to reset the shadow, switch to the virgin,
3440 * and verify that the virgin part is untouched.
3441 *
3442 * This is done after the normal memory has been cleared.
3443 *
3444 * ASSUMES that the caller owns the PGM lock.
3445 *
3446 * @param pVM The VM handle.
3447 */
3448int pgmR3PhysRomReset(PVM pVM)
3449{
3450 PGM_LOCK_ASSERT_OWNER(pVM);
3451 for (PPGMROMRANGE pRom = pVM->pgm.s.pRomRangesR3; pRom; pRom = pRom->pNextR3)
3452 {
3453 const uint32_t cPages = pRom->cb >> PAGE_SHIFT;
3454
3455 if (pRom->fFlags & PGMPHYS_ROM_FLAGS_SHADOWED)
3456 {
3457 /*
3458 * Reset the physical handler.
3459 */
3460 int rc = PGMR3PhysRomProtect(pVM, pRom->GCPhys, pRom->cb, PGMROMPROT_READ_ROM_WRITE_IGNORE);
3461 AssertRCReturn(rc, rc);
3462
3463 /*
3464 * What we do with the shadow pages depends on the memory
3465 * preallocation option. If not enabled, we'll just throw
3466 * out all the dirty pages and replace them by the zero page.
3467 */
3468 if (!pVM->pgm.s.fRamPreAlloc)
3469 {
3470 /* Free the dirty pages. */
3471 uint32_t cPendingPages = 0;
3472 PGMMFREEPAGESREQ pReq;
3473 rc = GMMR3FreePagesPrepare(pVM, &pReq, PGMPHYS_FREE_PAGE_BATCH_SIZE, GMMACCOUNT_BASE);
3474 AssertRCReturn(rc, rc);
3475
3476 for (uint32_t iPage = 0; iPage < cPages; iPage++)
3477 if ( !PGM_PAGE_IS_ZERO(&pRom->aPages[iPage].Shadow)
3478 && !PGM_PAGE_IS_BALLOONED(&pRom->aPages[iPage].Shadow))
3479 {
3480 Assert(PGM_PAGE_GET_STATE(&pRom->aPages[iPage].Shadow) == PGM_PAGE_STATE_ALLOCATED);
3481 rc = pgmPhysFreePage(pVM, pReq, &cPendingPages, &pRom->aPages[iPage].Shadow,
3482 pRom->GCPhys + (iPage << PAGE_SHIFT));
3483 AssertLogRelRCReturn(rc, rc);
3484 }
3485
3486 if (cPendingPages)
3487 {
3488 rc = GMMR3FreePagesPerform(pVM, pReq, cPendingPages);
3489 AssertLogRelRCReturn(rc, rc);
3490 }
3491 GMMR3FreePagesCleanup(pReq);
3492 }
3493 else
3494 {
3495 /* clear all the shadow pages. */
3496 for (uint32_t iPage = 0; iPage < cPages; iPage++)
3497 {
3498 if (PGM_PAGE_IS_ZERO(&pRom->aPages[iPage].Shadow))
3499 continue;
3500 Assert(!PGM_PAGE_IS_BALLOONED(&pRom->aPages[iPage].Shadow));
3501 void *pvDstPage;
3502 const RTGCPHYS GCPhys = pRom->GCPhys + (iPage << PAGE_SHIFT);
3503 rc = pgmPhysPageMakeWritableAndMap(pVM, &pRom->aPages[iPage].Shadow, GCPhys, &pvDstPage);
3504 if (RT_FAILURE(rc))
3505 break;
3506 ASMMemZeroPage(pvDstPage);
3507 }
3508 AssertRCReturn(rc, rc);
3509 }
3510 }
3511
3512#ifdef VBOX_STRICT
3513 /*
3514 * Verify that the virgin page is unchanged if possible.
3515 */
3516 if (pRom->pvOriginal)
3517 {
3518 size_t cbSrcLeft = pRom->cbOriginal;
3519 uint8_t const *pbSrcPage = (uint8_t const *)pRom->pvOriginal;
3520 for (uint32_t iPage = 0; iPage < cPages && cbSrcLeft > 0; iPage++, pbSrcPage += PAGE_SIZE)
3521 {
3522 const RTGCPHYS GCPhys = pRom->GCPhys + (iPage << PAGE_SHIFT);
3523 void const *pvDstPage;
3524 int rc = pgmPhysPageMapReadOnly(pVM, &pRom->aPages[iPage].Virgin, GCPhys, &pvDstPage);
3525 if (RT_FAILURE(rc))
3526 break;
3527
3528 if (memcmp(pvDstPage, pbSrcPage, RT_MIN(cbSrcLeft, PAGE_SIZE)))
3529 LogRel(("pgmR3PhysRomReset: %RGp rom page changed (%s) - loaded saved state?\n",
3530 GCPhys, pRom->pszDesc));
3531 cbSrcLeft -= RT_MIN(cbSrcLeft, PAGE_SIZE);
3532 }
3533 }
3534#endif
3535 }
3536
3537 return VINF_SUCCESS;
3538}
3539
3540
3541/**
3542 * Called by PGMR3Term to free resources.
3543 *
3544 * ASSUMES that the caller owns the PGM lock.
3545 *
3546 * @param pVM The VM handle.
3547 */
3548void pgmR3PhysRomTerm(PVM pVM)
3549{
3550#ifdef RT_STRICT
3551 /*
3552 * Free the heap copy of the original bits.
3553 */
3554 for (PPGMROMRANGE pRom = pVM->pgm.s.pRomRangesR3; pRom; pRom = pRom->pNextR3)
3555 {
3556 if ( pRom->pvOriginal
3557 && !(pRom->fFlags & PGMPHYS_ROM_FLAGS_PERMANENT_BINARY))
3558 {
3559 RTMemFree((void *)pRom->pvOriginal);
3560 pRom->pvOriginal = NULL;
3561 }
3562 }
3563#endif
3564}
3565
3566
3567/**
3568 * Change the shadowing of a range of ROM pages.
3569 *
3570 * This is intended for implementing chipset specific memory registers
3571 * and will not be very strict about the input. It will silently ignore
3572 * any pages that are not the part of a shadowed ROM.
3573 *
3574 * @returns VBox status code.
3575 * @retval VINF_PGM_SYNC_CR3
3576 *
3577 * @param pVM Pointer to the shared VM structure.
3578 * @param GCPhys Where to start. Page aligned.
3579 * @param cb How much to change. Page aligned.
3580 * @param enmProt The new ROM protection.
3581 */
3582VMMR3DECL(int) PGMR3PhysRomProtect(PVM pVM, RTGCPHYS GCPhys, RTGCPHYS cb, PGMROMPROT enmProt)
3583{
3584 /*
3585 * Check input
3586 */
3587 if (!cb)
3588 return VINF_SUCCESS;
3589 AssertReturn(!(GCPhys & PAGE_OFFSET_MASK), VERR_INVALID_PARAMETER);
3590 AssertReturn(!(cb & PAGE_OFFSET_MASK), VERR_INVALID_PARAMETER);
3591 RTGCPHYS GCPhysLast = GCPhys + (cb - 1);
3592 AssertReturn(GCPhysLast > GCPhys, VERR_INVALID_PARAMETER);
3593 AssertReturn(enmProt >= PGMROMPROT_INVALID && enmProt <= PGMROMPROT_END, VERR_INVALID_PARAMETER);
3594
3595 /*
3596 * Process the request.
3597 */
3598 pgmLock(pVM);
3599 int rc = VINF_SUCCESS;
3600 bool fFlushTLB = false;
3601 for (PPGMROMRANGE pRom = pVM->pgm.s.pRomRangesR3; pRom; pRom = pRom->pNextR3)
3602 {
3603 if ( GCPhys <= pRom->GCPhysLast
3604 && GCPhysLast >= pRom->GCPhys
3605 && (pRom->fFlags & PGMPHYS_ROM_FLAGS_SHADOWED))
3606 {
3607 /*
3608 * Iterate the relevant pages and make necessary the changes.
3609 */
3610 bool fChanges = false;
3611 uint32_t const cPages = pRom->GCPhysLast <= GCPhysLast
3612 ? pRom->cb >> PAGE_SHIFT
3613 : (GCPhysLast - pRom->GCPhys + 1) >> PAGE_SHIFT;
3614 for (uint32_t iPage = (GCPhys - pRom->GCPhys) >> PAGE_SHIFT;
3615 iPage < cPages;
3616 iPage++)
3617 {
3618 PPGMROMPAGE pRomPage = &pRom->aPages[iPage];
3619 if (PGMROMPROT_IS_ROM(pRomPage->enmProt) != PGMROMPROT_IS_ROM(enmProt))
3620 {
3621 fChanges = true;
3622
3623 /* flush references to the page. */
3624 PPGMPAGE pRamPage = pgmPhysGetPage(pVM, pRom->GCPhys + (iPage << PAGE_SHIFT));
3625 int rc2 = pgmPoolTrackUpdateGCPhys(pVM, pRom->GCPhys + (iPage << PAGE_SHIFT), pRamPage,
3626 true /*fFlushPTEs*/, &fFlushTLB);
3627 if (rc2 != VINF_SUCCESS && (rc == VINF_SUCCESS || RT_FAILURE(rc2)))
3628 rc = rc2;
3629
3630 PPGMPAGE pOld = PGMROMPROT_IS_ROM(pRomPage->enmProt) ? &pRomPage->Virgin : &pRomPage->Shadow;
3631 PPGMPAGE pNew = PGMROMPROT_IS_ROM(pRomPage->enmProt) ? &pRomPage->Shadow : &pRomPage->Virgin;
3632
3633 *pOld = *pRamPage;
3634 *pRamPage = *pNew;
3635 /** @todo preserve the volatile flags (handlers) when these have been moved out of HCPhys! */
3636 }
3637 pRomPage->enmProt = enmProt;
3638 }
3639
3640 /*
3641 * Reset the access handler if we made changes, no need
3642 * to optimize this.
3643 */
3644 if (fChanges)
3645 {
3646 int rc2 = PGMHandlerPhysicalReset(pVM, pRom->GCPhys);
3647 if (RT_FAILURE(rc2))
3648 {
3649 pgmUnlock(pVM);
3650 AssertRC(rc);
3651 return rc2;
3652 }
3653 }
3654
3655 /* Advance - cb isn't updated. */
3656 GCPhys = pRom->GCPhys + (cPages << PAGE_SHIFT);
3657 }
3658 }
3659 pgmUnlock(pVM);
3660 if (fFlushTLB)
3661 PGM_INVL_ALL_VCPU_TLBS(pVM);
3662
3663 return rc;
3664}
3665
3666
3667/**
3668 * Sets the Address Gate 20 state.
3669 *
3670 * @param pVCpu The VCPU to operate on.
3671 * @param fEnable True if the gate should be enabled.
3672 * False if the gate should be disabled.
3673 */
3674VMMDECL(void) PGMR3PhysSetA20(PVMCPU pVCpu, bool fEnable)
3675{
3676 LogFlow(("PGMR3PhysSetA20 %d (was %d)\n", fEnable, pVCpu->pgm.s.fA20Enabled));
3677 if (pVCpu->pgm.s.fA20Enabled != fEnable)
3678 {
3679 pVCpu->pgm.s.fA20Enabled = fEnable;
3680 pVCpu->pgm.s.GCPhysA20Mask = ~(RTGCPHYS)(!fEnable << 20);
3681 REMR3A20Set(pVCpu->pVMR3, pVCpu, fEnable);
3682 /** @todo we're not handling this correctly for VT-x / AMD-V. See #2911 */
3683 }
3684}
3685
3686#ifdef PGM_WITH_LARGE_ADDRESS_SPACE_ON_32_BIT_HOST
3687/**
3688 * Tree enumeration callback for dealing with age rollover.
3689 * It will perform a simple compression of the current age.
3690 */
3691static DECLCALLBACK(int) pgmR3PhysChunkAgeingRolloverCallback(PAVLU32NODECORE pNode, void *pvUser)
3692{
3693 PGM_LOCK_ASSERT_OWNER(pVM);
3694
3695 /* Age compression - ASSUMES iNow == 4. */
3696 PPGMCHUNKR3MAP pChunk = (PPGMCHUNKR3MAP)pNode;
3697 if (pChunk->iAge >= UINT32_C(0xffffff00))
3698 pChunk->iAge = 3;
3699 else if (pChunk->iAge >= UINT32_C(0xfffff000))
3700 pChunk->iAge = 2;
3701 else if (pChunk->iAge)
3702 pChunk->iAge = 1;
3703 else /* iAge = 0 */
3704 pChunk->iAge = 4;
3705 return 0;
3706}
3707
3708
3709/**
3710 * Tree enumeration callback that updates the chunks that have
3711 * been used since the last
3712 */
3713static DECLCALLBACK(int) pgmR3PhysChunkAgeingCallback(PAVLU32NODECORE pNode, void *pvUser)
3714{
3715 PPGMCHUNKR3MAP pChunk = (PPGMCHUNKR3MAP)pNode;
3716 if (!pChunk->iAge)
3717 {
3718 PVM pVM = (PVM)pvUser;
3719 pChunk->iAge = pVM->pgm.s.ChunkR3Map.iNow;
3720 }
3721 return 0;
3722}
3723
3724
3725/**
3726 * Performs ageing of the ring-3 chunk mappings.
3727 *
3728 * @param pVM The VM handle.
3729 */
3730VMMR3DECL(void) PGMR3PhysChunkAgeing(PVM pVM)
3731{
3732 pgmLock(pVM);
3733 pVM->pgm.s.ChunkR3Map.AgeingCountdown = RT_MIN(pVM->pgm.s.ChunkR3Map.cMax / 4, 1024);
3734 pVM->pgm.s.ChunkR3Map.iNow++;
3735 if (pVM->pgm.s.ChunkR3Map.iNow == 0)
3736 {
3737 pVM->pgm.s.ChunkR3Map.iNow = 4;
3738 RTAvlU32DoWithAll(&pVM->pgm.s.ChunkR3Map.pTree, true /*fFromLeft*/, pgmR3PhysChunkAgeingRolloverCallback, pVM);
3739 }
3740 else
3741 RTAvlU32DoWithAll(&pVM->pgm.s.ChunkR3Map.pTree, true /*fFromLeft*/, pgmR3PhysChunkAgeingCallback, pVM);
3742 pgmUnlock(pVM);
3743}
3744
3745
3746/**
3747 * The structure passed in the pvUser argument of pgmR3PhysChunkUnmapCandidateCallback().
3748 */
3749typedef struct PGMR3PHYSCHUNKUNMAPCB
3750{
3751 PVM pVM; /**< The VM handle. */
3752 PPGMCHUNKR3MAP pChunk; /**< The chunk to unmap. */
3753 uint32_t iLastAge; /**< Highest age found so far. */
3754} PGMR3PHYSCHUNKUNMAPCB, *PPGMR3PHYSCHUNKUNMAPCB;
3755
3756
3757/**
3758 * Callback used to find the mapping that's been unused for
3759 * the longest time.
3760 */
3761static DECLCALLBACK(int) pgmR3PhysChunkUnmapCandidateCallback(PAVLU32NODECORE pNode, void *pvUser)
3762{
3763 PPGMCHUNKR3MAP pChunk = (PPGMCHUNKR3MAP)pNode;
3764 PPGMR3PHYSCHUNKUNMAPCB pArg = (PPGMR3PHYSCHUNKUNMAPCB)pvUser;
3765
3766 if ( pChunk->iAge
3767 && !pChunk->cRefs
3768 && pArg->iLastAge < pChunk->iAge)
3769 {
3770 /*
3771 * Check that it's not in any of the TLBs.
3772 */
3773 PVM pVM = pArg->pVM;
3774 for (unsigned i = 0; i < RT_ELEMENTS(pVM->pgm.s.ChunkR3Map.Tlb.aEntries); i++)
3775 if (pVM->pgm.s.ChunkR3Map.Tlb.aEntries[i].pChunk == pChunk)
3776 {
3777 pChunk = NULL;
3778 break;
3779 }
3780 if (pChunk)
3781 for (unsigned i = 0; i < RT_ELEMENTS(pVM->pgm.s.PhysTlbHC.aEntries); i++)
3782 if (pVM->pgm.s.PhysTlbHC.aEntries[i].pMap == pChunk)
3783 {
3784 pChunk = NULL;
3785 break;
3786 }
3787 if (pChunk)
3788 {
3789 pArg->pChunk = pChunk;
3790 pArg->iLastAge = pChunk->iAge;
3791 }
3792 }
3793 return 0;
3794}
3795
3796
3797/**
3798 * Finds a good candidate for unmapping when the ring-3 mapping cache is full.
3799 *
3800 * The candidate will not be part of any TLBs, so no need to flush
3801 * anything afterwards.
3802 *
3803 * @returns Chunk id.
3804 * @param pVM The VM handle.
3805 */
3806static int32_t pgmR3PhysChunkFindUnmapCandidate(PVM pVM)
3807{
3808 PGM_LOCK_ASSERT_OWNER(pVM);
3809
3810 /*
3811 * Do tree ageing first?
3812 */
3813 if (pVM->pgm.s.ChunkR3Map.AgeingCountdown-- == 0)
3814 {
3815 STAM_PROFILE_START(&pVM->pgm.s.CTX_SUFF(pStats)->StatChunkAging, a);
3816 PGMR3PhysChunkAgeing(pVM);
3817 STAM_PROFILE_STOP(&pVM->pgm.s.CTX_SUFF(pStats)->StatChunkAging, a);
3818 }
3819
3820 /*
3821 * Enumerate the age tree starting with the left most node.
3822 */
3823 STAM_PROFILE_START(&pVM->pgm.s.CTX_SUFF(pStats)->StatChunkFindCandidate, a);
3824 PGMR3PHYSCHUNKUNMAPCB Args;
3825 Args.pVM = pVM;
3826 Args.pChunk = NULL;
3827 Args.iLastAge = 0;
3828 RTAvlU32DoWithAll(&pVM->pgm.s.ChunkR3Map.pTree, true /*fFromLeft*/, pgmR3PhysChunkUnmapCandidateCallback, &Args);
3829 Assert(Args.pChunk);
3830 if (Args.pChunk)
3831 {
3832 STAM_PROFILE_STOP(&pVM->pgm.s.CTX_SUFF(pStats)->StatChunkFindCandidate, a);
3833 return Args.pChunk->Core.Key;
3834 }
3835
3836 STAM_PROFILE_STOP(&pVM->pgm.s.CTX_SUFF(pStats)->StatChunkFindCandidate, a);
3837 return INT32_MAX;
3838}
3839
3840/**
3841 * Rendezvous callback used by pgmR3PhysUnmapChunk that unmaps a chunk
3842 *
3843 * This is only called on one of the EMTs while the other ones are waiting for
3844 * it to complete this function.
3845 *
3846 * @returns VINF_SUCCESS (VBox strict status code).
3847 * @param pVM The VM handle.
3848 * @param pVCpu The VMCPU for the EMT we're being called on. Unused.
3849 * @param pvUser User pointer. Unused
3850 *
3851 */
3852DECLCALLBACK(VBOXSTRICTRC) pgmR3PhysUnmapChunkRendezvous(PVM pVM, PVMCPU pVCpu, void *pvUser)
3853{
3854 int rc = VINF_SUCCESS;
3855 pgmLock(pVM);
3856
3857 if (pVM->pgm.s.ChunkR3Map.c >= pVM->pgm.s.ChunkR3Map.cMax)
3858 {
3859 /* Flush the pgm pool cache; call the internal rendezvous handler as we're already in a rendezvous handler here. */
3860 /* todo: also not really efficient to unmap a chunk that contains PD or PT pages. */
3861 pgmR3PoolClearAllRendezvous(pVM, &pVM->aCpus[0], NULL /* no need to flush the REM TLB as we already did that above */);
3862
3863 /*
3864 * Request the ring-0 part to unmap a chunk to make space in the mapping cache.
3865 */
3866 GMMMAPUNMAPCHUNKREQ Req;
3867 Req.Hdr.u32Magic = SUPVMMR0REQHDR_MAGIC;
3868 Req.Hdr.cbReq = sizeof(Req);
3869 Req.pvR3 = NULL;
3870 Req.idChunkMap = NIL_GMM_CHUNKID;
3871 Req.idChunkUnmap = pgmR3PhysChunkFindUnmapCandidate(pVM);
3872
3873 if (Req.idChunkUnmap != INT32_MAX)
3874 {
3875 STAM_PROFILE_START(&pVM->pgm.s.CTX_SUFF(pStats)->StatChunkUnmap, a);
3876 rc = VMMR3CallR0(pVM, VMMR0_DO_GMM_MAP_UNMAP_CHUNK, 0, &Req.Hdr);
3877 STAM_PROFILE_STOP(&pVM->pgm.s.CTX_SUFF(pStats)->StatChunkUnmap, a);
3878 if (RT_SUCCESS(rc))
3879 {
3880 /* remove the unmapped one. */
3881 PPGMCHUNKR3MAP pUnmappedChunk = (PPGMCHUNKR3MAP)RTAvlU32Remove(&pVM->pgm.s.ChunkR3Map.pTree, Req.idChunkUnmap);
3882 AssertRelease(pUnmappedChunk);
3883 pUnmappedChunk->pv = NULL;
3884 pUnmappedChunk->Core.Key = UINT32_MAX;
3885#ifdef VBOX_WITH_2X_4GB_ADDR_SPACE
3886 MMR3HeapFree(pUnmappedChunk);
3887#else
3888 MMR3UkHeapFree(pVM, pUnmappedChunk, MM_TAG_PGM_CHUNK_MAPPING);
3889#endif
3890 pVM->pgm.s.ChunkR3Map.c--;
3891 pVM->pgm.s.cUnmappedChunks++;
3892
3893 /* Flush dangling PGM pointers (R3 & R0 ptrs to GC physical addresses) */
3894 /* todo: we should not flush chunks which include cr3 mappings. */
3895 for (VMCPUID idCpu = 0; idCpu < pVM->cCpus; idCpu++)
3896 {
3897 PPGMCPU pPGM = &pVM->aCpus[idCpu].pgm.s;
3898
3899 pPGM->pGst32BitPdR3 = NULL;
3900 pPGM->pGstPaePdptR3 = NULL;
3901 pPGM->pGstAmd64Pml4R3 = NULL;
3902#ifndef VBOX_WITH_2X_4GB_ADDR_SPACE
3903 pPGM->pGst32BitPdR0 = NIL_RTR0PTR;
3904 pPGM->pGstPaePdptR0 = NIL_RTR0PTR;
3905 pPGM->pGstAmd64Pml4R0 = NIL_RTR0PTR;
3906#endif
3907 for (unsigned i = 0; i < RT_ELEMENTS(pPGM->apGstPaePDsR3); i++)
3908 {
3909 pPGM->apGstPaePDsR3[i] = NULL;
3910#ifndef VBOX_WITH_2X_4GB_ADDR_SPACE
3911 pPGM->apGstPaePDsR0[i] = NIL_RTR0PTR;
3912#endif
3913 }
3914
3915 /* Flush REM TLBs. */
3916 CPUMSetChangedFlags(&pVM->aCpus[idCpu], CPUM_CHANGED_GLOBAL_TLB_FLUSH);
3917 }
3918
3919 /* Flush REM translation blocks. */
3920 REMFlushTBs(pVM);
3921 }
3922 }
3923 }
3924 pgmUnlock(pVM);
3925 return rc;
3926}
3927
3928/**
3929 * Unmap a chunk to free up virtual address space (request packet handler for pgmR3PhysChunkMap)
3930 *
3931 * @returns VBox status code.
3932 * @param pVM The VM to operate on.
3933 */
3934void pgmR3PhysUnmapChunk(PVM pVM)
3935{
3936 int rc = VMMR3EmtRendezvous(pVM, VMMEMTRENDEZVOUS_FLAGS_TYPE_ONCE, pgmR3PhysUnmapChunkRendezvous, NULL);
3937 AssertRC(rc);
3938}
3939#endif /* PGM_WITH_LARGE_ADDRESS_SPACE_ON_32_BIT_HOST */
3940
3941/**
3942 * Maps the given chunk into the ring-3 mapping cache.
3943 *
3944 * This will call ring-0.
3945 *
3946 * @returns VBox status code.
3947 * @param pVM The VM handle.
3948 * @param idChunk The chunk in question.
3949 * @param ppChunk Where to store the chunk tracking structure.
3950 *
3951 * @remarks Called from within the PGM critical section.
3952 * @remarks Can be called from any thread!
3953 */
3954int pgmR3PhysChunkMap(PVM pVM, uint32_t idChunk, PPPGMCHUNKR3MAP ppChunk)
3955{
3956 int rc;
3957
3958 PGM_LOCK_ASSERT_OWNER(pVM);
3959
3960 /*
3961 * Allocate a new tracking structure first.
3962 */
3963#ifdef VBOX_WITH_2X_4GB_ADDR_SPACE
3964 PPGMCHUNKR3MAP pChunk = (PPGMCHUNKR3MAP)MMR3HeapAllocZ(pVM, MM_TAG_PGM_CHUNK_MAPPING, sizeof(*pChunk));
3965#else
3966 PPGMCHUNKR3MAP pChunk = (PPGMCHUNKR3MAP)MMR3UkHeapAllocZ(pVM, MM_TAG_PGM_CHUNK_MAPPING, sizeof(*pChunk), NULL);
3967#endif
3968 AssertReturn(pChunk, VERR_NO_MEMORY);
3969 pChunk->Core.Key = idChunk;
3970
3971 /*
3972 * Request the ring-0 part to map the chunk in question.
3973 */
3974 GMMMAPUNMAPCHUNKREQ Req;
3975 Req.Hdr.u32Magic = SUPVMMR0REQHDR_MAGIC;
3976 Req.Hdr.cbReq = sizeof(Req);
3977 Req.pvR3 = NULL;
3978 Req.idChunkMap = idChunk;
3979 Req.idChunkUnmap = NIL_GMM_CHUNKID;
3980
3981 /* Must be callable from any thread, so can't use VMMR3CallR0. */
3982 STAM_PROFILE_START(&pVM->pgm.s.CTX_SUFF(pStats)->StatChunkMap, a);
3983 rc = SUPR3CallVMMR0Ex(pVM->pVMR0, NIL_VMCPUID, VMMR0_DO_GMM_MAP_UNMAP_CHUNK, 0, &Req.Hdr);
3984 STAM_PROFILE_STOP(&pVM->pgm.s.CTX_SUFF(pStats)->StatChunkMap, a);
3985 if (RT_SUCCESS(rc))
3986 {
3987 /*
3988 * Update the tree.
3989 */
3990 /* insert the new one. */
3991 AssertPtr(Req.pvR3);
3992 pChunk->pv = Req.pvR3;
3993 bool fRc = RTAvlU32Insert(&pVM->pgm.s.ChunkR3Map.pTree, &pChunk->Core);
3994 AssertRelease(fRc);
3995 pVM->pgm.s.ChunkR3Map.c++;
3996 pVM->pgm.s.cMappedChunks++;
3997
3998 /* If we're running out of virtual address space, then we should unmap another chunk. */
3999 if (pVM->pgm.s.ChunkR3Map.c >= pVM->pgm.s.ChunkR3Map.cMax)
4000 {
4001#ifdef PGM_WITH_LARGE_ADDRESS_SPACE_ON_32_BIT_HOST
4002 /* Postpone the unmap operation (which requires a rendezvous operation) as we own the PGM lock here. */
4003 rc = VMR3ReqCallNoWaitU(pVM->pUVM, VMCPUID_ANY_QUEUE, (PFNRT)pgmR3PhysUnmapChunk, 1, pVM);
4004 AssertRC(rc);
4005#else
4006 AssertFatalFailed(); /* can't happen */
4007#endif
4008 }
4009 }
4010 else
4011 {
4012 /** @todo this may fail because of /proc/sys/vm/max_map_count, so we
4013 * should probably restrict ourselves on linux. */
4014 AssertRC(rc);
4015#ifdef VBOX_WITH_2X_4GB_ADDR_SPACE
4016 MMR3HeapFree(pChunk);
4017#else
4018 MMR3UkHeapFree(pVM, pChunk, MM_TAG_PGM_CHUNK_MAPPING);
4019#endif
4020 pChunk = NULL;
4021 }
4022
4023 *ppChunk = pChunk;
4024 return rc;
4025}
4026
4027
4028/**
4029 * For VMMCALLRING3_PGM_MAP_CHUNK, considered internal.
4030 *
4031 * @returns see pgmR3PhysChunkMap.
4032 * @param pVM The VM handle.
4033 * @param idChunk The chunk to map.
4034 */
4035VMMR3DECL(int) PGMR3PhysChunkMap(PVM pVM, uint32_t idChunk)
4036{
4037 PPGMCHUNKR3MAP pChunk;
4038 int rc;
4039
4040 pgmLock(pVM);
4041 rc = pgmR3PhysChunkMap(pVM, idChunk, &pChunk);
4042 pgmUnlock(pVM);
4043 return rc;
4044}
4045
4046
4047/**
4048 * Invalidates the TLB for the ring-3 mapping cache.
4049 *
4050 * @param pVM The VM handle.
4051 */
4052VMMR3DECL(void) PGMR3PhysChunkInvalidateTLB(PVM pVM)
4053{
4054 pgmLock(pVM);
4055 for (unsigned i = 0; i < RT_ELEMENTS(pVM->pgm.s.ChunkR3Map.Tlb.aEntries); i++)
4056 {
4057 pVM->pgm.s.ChunkR3Map.Tlb.aEntries[i].idChunk = NIL_GMM_CHUNKID;
4058 pVM->pgm.s.ChunkR3Map.Tlb.aEntries[i].pChunk = NULL;
4059 }
4060 /* The page map TLB references chunks, so invalidate that one too. */
4061 pgmPhysInvalidatePageMapTLB(pVM);
4062 pgmUnlock(pVM);
4063}
4064
4065
4066/**
4067 * Response to VMMCALLRING3_PGM_ALLOCATE_LARGE_PAGE to allocate a large (2MB) page
4068 * for use with a nested paging PDE.
4069 *
4070 * @returns The following VBox status codes.
4071 * @retval VINF_SUCCESS on success.
4072 * @retval VINF_EM_NO_MEMORY if we're out of memory.
4073 *
4074 * @param pVM The VM handle.
4075 * @param GCPhys GC physical start address of the 2 MB range
4076 */
4077VMMR3DECL(int) PGMR3PhysAllocateLargeHandyPage(PVM pVM, RTGCPHYS GCPhys)
4078{
4079#ifdef PGM_WITH_LARGE_PAGES
4080 uint64_t u64TimeStamp1, u64TimeStamp2;
4081
4082 pgmLock(pVM);
4083
4084 STAM_PROFILE_START(&pVM->pgm.s.CTX_SUFF(pStats)->StatAllocLargePage, a);
4085 u64TimeStamp1 = RTTimeMilliTS();
4086 int rc = VMMR3CallR0(pVM, VMMR0_DO_PGM_ALLOCATE_LARGE_HANDY_PAGE, 0, NULL);
4087 u64TimeStamp2 = RTTimeMilliTS();
4088 STAM_PROFILE_STOP(&pVM->pgm.s.CTX_SUFF(pStats)->StatAllocLargePage, a);
4089 if (RT_SUCCESS(rc))
4090 {
4091 Assert(pVM->pgm.s.cLargeHandyPages == 1);
4092
4093 uint32_t idPage = pVM->pgm.s.aLargeHandyPage[0].idPage;
4094 RTHCPHYS HCPhys = pVM->pgm.s.aLargeHandyPage[0].HCPhysGCPhys;
4095
4096 void *pv;
4097
4098 /* Map the large page into our address space.
4099 *
4100 * Note: assuming that within the 2 MB range:
4101 * - GCPhys + PAGE_SIZE = HCPhys + PAGE_SIZE (whole point of this exercise)
4102 * - user space mapping is continuous as well
4103 * - page id (GCPhys) + 1 = page id (GCPhys + PAGE_SIZE)
4104 */
4105 rc = pgmPhysPageMapByPageID(pVM, idPage, HCPhys, &pv);
4106 AssertLogRelMsg(RT_SUCCESS(rc), ("idPage=%#x HCPhysGCPhys=%RHp rc=%Rrc\n", idPage, HCPhys, rc));
4107
4108 if (RT_SUCCESS(rc))
4109 {
4110 /*
4111 * Clear the pages.
4112 */
4113 STAM_PROFILE_START(&pVM->pgm.s.CTX_SUFF(pStats)->StatClearLargePage, b);
4114 for (unsigned i = 0; i < _2M/PAGE_SIZE; i++)
4115 {
4116 ASMMemZeroPage(pv);
4117
4118 PPGMPAGE pPage;
4119 rc = pgmPhysGetPageEx(pVM, GCPhys, &pPage);
4120 AssertRC(rc);
4121
4122 Assert(PGM_PAGE_IS_ZERO(pPage));
4123 STAM_COUNTER_INC(&pVM->pgm.s.CTX_SUFF(pStats)->StatRZPageReplaceZero);
4124 pVM->pgm.s.cZeroPages--;
4125
4126 /*
4127 * Do the PGMPAGE modifications.
4128 */
4129 pVM->pgm.s.cPrivatePages++;
4130 PGM_PAGE_SET_HCPHYS(pVM, pPage, HCPhys);
4131 PGM_PAGE_SET_PAGEID(pVM, pPage, idPage);
4132 PGM_PAGE_SET_STATE(pVM, pPage, PGM_PAGE_STATE_ALLOCATED);
4133 PGM_PAGE_SET_PDE_TYPE(pVM, pPage, PGM_PAGE_PDE_TYPE_PDE);
4134 PGM_PAGE_SET_PTE_INDEX(pVM, pPage, 0);
4135 PGM_PAGE_SET_TRACKING(pVM, pPage, 0);
4136
4137 /* Somewhat dirty assumption that page ids are increasing. */
4138 idPage++;
4139
4140 HCPhys += PAGE_SIZE;
4141 GCPhys += PAGE_SIZE;
4142
4143 pv = (void *)((uintptr_t)pv + PAGE_SIZE);
4144
4145 Log3(("PGMR3PhysAllocateLargePage: idPage=%#x HCPhys=%RGp\n", idPage, HCPhys));
4146 }
4147 STAM_PROFILE_STOP(&pVM->pgm.s.CTX_SUFF(pStats)->StatClearLargePage, b);
4148
4149 /* Flush all TLBs. */
4150 PGM_INVL_ALL_VCPU_TLBS(pVM);
4151 pgmPhysInvalidatePageMapTLB(pVM);
4152 }
4153 pVM->pgm.s.cLargeHandyPages = 0;
4154 }
4155
4156 if (RT_SUCCESS(rc))
4157 {
4158 static uint32_t cTimeOut = 0;
4159 uint64_t u64TimeStampDelta = u64TimeStamp2 - u64TimeStamp1;
4160
4161 if (u64TimeStampDelta > 100)
4162 {
4163 STAM_COUNTER_INC(&pVM->pgm.s.CTX_SUFF(pStats)->StatLargePageOverflow);
4164 if ( ++cTimeOut > 10
4165 || u64TimeStampDelta > 1000 /* more than one second forces an early retirement from allocating large pages. */)
4166 {
4167 /* If repeated attempts to allocate a large page takes more than 100 ms, then we fall back to normal 4k pages.
4168 * E.g. Vista 64 tries to move memory around, which takes a huge amount of time.
4169 */
4170 LogRel(("PGMR3PhysAllocateLargePage: allocating large pages takes too long (last attempt %d ms; nr of timeouts %d); DISABLE\n", u64TimeStampDelta, cTimeOut));
4171 PGMSetLargePageUsage(pVM, false);
4172 }
4173 }
4174 else
4175 if (cTimeOut > 0)
4176 cTimeOut--;
4177 }
4178
4179 pgmUnlock(pVM);
4180 return rc;
4181#else
4182 return VERR_NOT_IMPLEMENTED;
4183#endif /* PGM_WITH_LARGE_PAGES */
4184}
4185
4186
4187/**
4188 * Response to VM_FF_PGM_NEED_HANDY_PAGES and VMMCALLRING3_PGM_ALLOCATE_HANDY_PAGES.
4189 *
4190 * This function will also work the VM_FF_PGM_NO_MEMORY force action flag, to
4191 * signal and clear the out of memory condition. When contracted, this API is
4192 * used to try clear the condition when the user wants to resume.
4193 *
4194 * @returns The following VBox status codes.
4195 * @retval VINF_SUCCESS on success. FFs cleared.
4196 * @retval VINF_EM_NO_MEMORY if we're out of memory. The FF is not cleared in
4197 * this case and it gets accompanied by VM_FF_PGM_NO_MEMORY.
4198 *
4199 * @param pVM The VM handle.
4200 *
4201 * @remarks The VINF_EM_NO_MEMORY status is for the benefit of the FF processing
4202 * in EM.cpp and shouldn't be propagated outside TRPM, HWACCM, EM and
4203 * pgmPhysEnsureHandyPage. There is one exception to this in the \#PF
4204 * handler.
4205 */
4206VMMR3DECL(int) PGMR3PhysAllocateHandyPages(PVM pVM)
4207{
4208 pgmLock(pVM);
4209
4210 /*
4211 * Allocate more pages, noting down the index of the first new page.
4212 */
4213 uint32_t iClear = pVM->pgm.s.cHandyPages;
4214 AssertMsgReturn(iClear <= RT_ELEMENTS(pVM->pgm.s.aHandyPages), ("%d", iClear), VERR_INTERNAL_ERROR);
4215 Log(("PGMR3PhysAllocateHandyPages: %d -> %d\n", iClear, RT_ELEMENTS(pVM->pgm.s.aHandyPages)));
4216 int rcAlloc = VINF_SUCCESS;
4217 int rcSeed = VINF_SUCCESS;
4218 int rc = VMMR3CallR0(pVM, VMMR0_DO_PGM_ALLOCATE_HANDY_PAGES, 0, NULL);
4219 while (rc == VERR_GMM_SEED_ME)
4220 {
4221 void *pvChunk;
4222 rcAlloc = rc = SUPR3PageAlloc(GMM_CHUNK_SIZE >> PAGE_SHIFT, &pvChunk);
4223 if (RT_SUCCESS(rc))
4224 {
4225 rcSeed = rc = VMMR3CallR0(pVM, VMMR0_DO_GMM_SEED_CHUNK, (uintptr_t)pvChunk, NULL);
4226 if (RT_FAILURE(rc))
4227 SUPR3PageFree(pvChunk, GMM_CHUNK_SIZE >> PAGE_SHIFT);
4228 }
4229 if (RT_SUCCESS(rc))
4230 rc = VMMR3CallR0(pVM, VMMR0_DO_PGM_ALLOCATE_HANDY_PAGES, 0, NULL);
4231 }
4232
4233 /* todo: we should split this up into an allocate and flush operation. sometimes you want to flush and not allocate more (which will trigger the vm account limit error) */
4234 if ( rc == VERR_GMM_HIT_VM_ACCOUNT_LIMIT
4235 && pVM->pgm.s.cHandyPages > 0)
4236 {
4237 /* Still handy pages left, so don't panic. */
4238 rc = VINF_SUCCESS;
4239 }
4240
4241 if (RT_SUCCESS(rc))
4242 {
4243 AssertMsg(rc == VINF_SUCCESS, ("%Rrc\n", rc));
4244 Assert(pVM->pgm.s.cHandyPages > 0);
4245 VM_FF_CLEAR(pVM, VM_FF_PGM_NEED_HANDY_PAGES);
4246 VM_FF_CLEAR(pVM, VM_FF_PGM_NO_MEMORY);
4247
4248 /*
4249 * Clear the pages.
4250 */
4251 while (iClear < pVM->pgm.s.cHandyPages)
4252 {
4253 PGMMPAGEDESC pPage = &pVM->pgm.s.aHandyPages[iClear];
4254 void *pv;
4255 rc = pgmPhysPageMapByPageID(pVM, pPage->idPage, pPage->HCPhysGCPhys, &pv);
4256 AssertLogRelMsgBreak(RT_SUCCESS(rc), ("idPage=%#x HCPhysGCPhys=%RHp rc=%Rrc\n", pPage->idPage, pPage->HCPhysGCPhys, rc));
4257 ASMMemZeroPage(pv);
4258 iClear++;
4259 Log3(("PGMR3PhysAllocateHandyPages: idPage=%#x HCPhys=%RGp\n", pPage->idPage, pPage->HCPhysGCPhys));
4260 }
4261 }
4262 else
4263 {
4264 uint64_t cAllocPages, cMaxPages, cBalloonPages;
4265
4266 /*
4267 * We should never get here unless there is a genuine shortage of
4268 * memory (or some internal error). Flag the error so the VM can be
4269 * suspended ASAP and the user informed. If we're totally out of
4270 * handy pages we will return failure.
4271 */
4272 /* Report the failure. */
4273 LogRel(("PGM: Failed to procure handy pages; rc=%Rrc rcAlloc=%Rrc rcSeed=%Rrc cHandyPages=%#x\n"
4274 " cAllPages=%#x cPrivatePages=%#x cSharedPages=%#x cZeroPages=%#x\n",
4275 rc, rcAlloc, rcSeed,
4276 pVM->pgm.s.cHandyPages,
4277 pVM->pgm.s.cAllPages,
4278 pVM->pgm.s.cPrivatePages,
4279 pVM->pgm.s.cSharedPages,
4280 pVM->pgm.s.cZeroPages));
4281
4282 if (GMMR3QueryMemoryStats(pVM, &cAllocPages, &cMaxPages, &cBalloonPages) == VINF_SUCCESS)
4283 {
4284 LogRel(("GMM: Statistics:\n"
4285 " Allocated pages: %RX64\n"
4286 " Maximum pages: %RX64\n"
4287 " Ballooned pages: %RX64\n", cAllocPages, cMaxPages, cBalloonPages));
4288 }
4289
4290 if ( rc != VERR_NO_MEMORY
4291 && rc != VERR_LOCK_FAILED)
4292 {
4293 for (uint32_t i = 0; i < RT_ELEMENTS(pVM->pgm.s.aHandyPages); i++)
4294 {
4295 LogRel(("PGM: aHandyPages[#%#04x] = {.HCPhysGCPhys=%RHp, .idPage=%#08x, .idSharedPage=%#08x}\n",
4296 i, pVM->pgm.s.aHandyPages[i].HCPhysGCPhys, pVM->pgm.s.aHandyPages[i].idPage,
4297 pVM->pgm.s.aHandyPages[i].idSharedPage));
4298 uint32_t const idPage = pVM->pgm.s.aHandyPages[i].idPage;
4299 if (idPage != NIL_GMM_PAGEID)
4300 {
4301 for (PPGMRAMRANGE pRam = pVM->pgm.s.pRamRangesXR3;
4302 pRam;
4303 pRam = pRam->pNextR3)
4304 {
4305 uint32_t const cPages = pRam->cb >> PAGE_SHIFT;
4306 for (uint32_t iPage = 0; iPage < cPages; iPage++)
4307 if (PGM_PAGE_GET_PAGEID(&pRam->aPages[iPage]) == idPage)
4308 LogRel(("PGM: Used by %RGp %R[pgmpage] (%s)\n",
4309 pRam->GCPhys + ((RTGCPHYS)iPage << PAGE_SHIFT), &pRam->aPages[iPage], pRam->pszDesc));
4310 }
4311 }
4312 }
4313 }
4314
4315 /* Set the FFs and adjust rc. */
4316 VM_FF_SET(pVM, VM_FF_PGM_NEED_HANDY_PAGES);
4317 VM_FF_SET(pVM, VM_FF_PGM_NO_MEMORY);
4318 if ( rc == VERR_NO_MEMORY
4319 || rc == VERR_LOCK_FAILED)
4320 rc = VINF_EM_NO_MEMORY;
4321 }
4322
4323 pgmUnlock(pVM);
4324 return rc;
4325}
4326
4327
4328/**
4329 * Frees the specified RAM page and replaces it with the ZERO page.
4330 *
4331 * This is used by ballooning, remapping MMIO2, RAM reset and state loading.
4332 *
4333 * @param pVM Pointer to the shared VM structure.
4334 * @param pReq Pointer to the request.
4335 * @param pcPendingPages Where the number of pages waiting to be freed are
4336 * kept. This will normally be incremented.
4337 * @param pPage Pointer to the page structure.
4338 * @param GCPhys The guest physical address of the page, if applicable.
4339 *
4340 * @remarks The caller must own the PGM lock.
4341 */
4342int pgmPhysFreePage(PVM pVM, PGMMFREEPAGESREQ pReq, uint32_t *pcPendingPages, PPGMPAGE pPage, RTGCPHYS GCPhys)
4343{
4344 /*
4345 * Assert sanity.
4346 */
4347 PGM_LOCK_ASSERT_OWNER(pVM);
4348 if (RT_UNLIKELY( PGM_PAGE_GET_TYPE(pPage) != PGMPAGETYPE_RAM
4349 && PGM_PAGE_GET_TYPE(pPage) != PGMPAGETYPE_ROM_SHADOW))
4350 {
4351 AssertMsgFailed(("GCPhys=%RGp pPage=%R[pgmpage]\n", GCPhys, pPage));
4352 return VMSetError(pVM, VERR_PGM_PHYS_NOT_RAM, RT_SRC_POS, "GCPhys=%RGp type=%d", GCPhys, PGM_PAGE_GET_TYPE(pPage));
4353 }
4354
4355 /** @todo What about ballooning of large pages??! */
4356 Assert( PGM_PAGE_GET_PDE_TYPE(pPage) != PGM_PAGE_PDE_TYPE_PDE
4357 && PGM_PAGE_GET_PDE_TYPE(pPage) != PGM_PAGE_PDE_TYPE_PDE_DISABLED);
4358
4359 if ( PGM_PAGE_IS_ZERO(pPage)
4360 || PGM_PAGE_IS_BALLOONED(pPage))
4361 return VINF_SUCCESS;
4362
4363 const uint32_t idPage = PGM_PAGE_GET_PAGEID(pPage);
4364 Log3(("pgmPhysFreePage: idPage=%#x GCPhys=%RGp pPage=%R[pgmpage]\n", idPage, GCPhys, pPage));
4365 if (RT_UNLIKELY( idPage == NIL_GMM_PAGEID
4366 || idPage > GMM_PAGEID_LAST
4367 || PGM_PAGE_GET_CHUNKID(pPage) == NIL_GMM_CHUNKID))
4368 {
4369 AssertMsgFailed(("GCPhys=%RGp pPage=%R[pgmpage]\n", GCPhys, pPage));
4370 return VMSetError(pVM, VERR_PGM_PHYS_INVALID_PAGE_ID, RT_SRC_POS, "GCPhys=%RGp idPage=%#x", GCPhys, pPage);
4371 }
4372
4373 /* update page count stats. */
4374 if (PGM_PAGE_IS_SHARED(pPage))
4375 pVM->pgm.s.cSharedPages--;
4376 else
4377 pVM->pgm.s.cPrivatePages--;
4378 pVM->pgm.s.cZeroPages++;
4379
4380 /* Deal with write monitored pages. */
4381 if (PGM_PAGE_GET_STATE(pPage) == PGM_PAGE_STATE_WRITE_MONITORED)
4382 {
4383 PGM_PAGE_SET_WRITTEN_TO(pVM, pPage);
4384 pVM->pgm.s.cWrittenToPages++;
4385 }
4386
4387 /*
4388 * pPage = ZERO page.
4389 */
4390 PGM_PAGE_SET_HCPHYS(pVM, pPage, pVM->pgm.s.HCPhysZeroPg);
4391 PGM_PAGE_SET_STATE(pVM, pPage, PGM_PAGE_STATE_ZERO);
4392 PGM_PAGE_SET_PAGEID(pVM, pPage, NIL_GMM_PAGEID);
4393 PGM_PAGE_SET_PDE_TYPE(pVM, pPage, PGM_PAGE_PDE_TYPE_DONTCARE);
4394 PGM_PAGE_SET_PTE_INDEX(pVM, pPage, 0);
4395 PGM_PAGE_SET_TRACKING(pVM, pPage, 0);
4396
4397 /* Flush physical page map TLB entry. */
4398 pgmPhysInvalidatePageMapTLBEntry(pVM, GCPhys);
4399
4400 /*
4401 * Make sure it's not in the handy page array.
4402 */
4403 for (uint32_t i = pVM->pgm.s.cHandyPages; i < RT_ELEMENTS(pVM->pgm.s.aHandyPages); i++)
4404 {
4405 if (pVM->pgm.s.aHandyPages[i].idPage == idPage)
4406 {
4407 pVM->pgm.s.aHandyPages[i].idPage = NIL_GMM_PAGEID;
4408 break;
4409 }
4410 if (pVM->pgm.s.aHandyPages[i].idSharedPage == idPage)
4411 {
4412 pVM->pgm.s.aHandyPages[i].idSharedPage = NIL_GMM_PAGEID;
4413 break;
4414 }
4415 }
4416
4417 /*
4418 * Push it onto the page array.
4419 */
4420 uint32_t iPage = *pcPendingPages;
4421 Assert(iPage < PGMPHYS_FREE_PAGE_BATCH_SIZE);
4422 *pcPendingPages += 1;
4423
4424 pReq->aPages[iPage].idPage = idPage;
4425
4426 if (iPage + 1 < PGMPHYS_FREE_PAGE_BATCH_SIZE)
4427 return VINF_SUCCESS;
4428
4429 /*
4430 * Flush the pages.
4431 */
4432 int rc = GMMR3FreePagesPerform(pVM, pReq, PGMPHYS_FREE_PAGE_BATCH_SIZE);
4433 if (RT_SUCCESS(rc))
4434 {
4435 GMMR3FreePagesRePrep(pVM, pReq, PGMPHYS_FREE_PAGE_BATCH_SIZE, GMMACCOUNT_BASE);
4436 *pcPendingPages = 0;
4437 }
4438 return rc;
4439}
4440
4441
4442/**
4443 * Converts a GC physical address to a HC ring-3 pointer, with some
4444 * additional checks.
4445 *
4446 * @returns VBox status code.
4447 * @retval VINF_SUCCESS on success.
4448 * @retval VINF_PGM_PHYS_TLB_CATCH_WRITE and *ppv set if the page has a write
4449 * access handler of some kind.
4450 * @retval VERR_PGM_PHYS_TLB_CATCH_ALL if the page has a handler catching all
4451 * accesses or is odd in any way.
4452 * @retval VERR_PGM_PHYS_TLB_UNASSIGNED if the page doesn't exist.
4453 *
4454 * @param pVM The VM handle.
4455 * @param GCPhys The GC physical address to convert.
4456 * @param fWritable Whether write access is required.
4457 * @param ppv Where to store the pointer corresponding to GCPhys on
4458 * success.
4459 */
4460VMMR3DECL(int) PGMR3PhysTlbGCPhys2Ptr(PVM pVM, RTGCPHYS GCPhys, bool fWritable, void **ppv)
4461{
4462 pgmLock(pVM);
4463
4464 PPGMRAMRANGE pRam;
4465 PPGMPAGE pPage;
4466 int rc = pgmPhysGetPageAndRangeEx(pVM, GCPhys, &pPage, &pRam);
4467 if (RT_SUCCESS(rc))
4468 {
4469 if (PGM_PAGE_IS_BALLOONED(pPage))
4470 rc = VINF_PGM_PHYS_TLB_CATCH_WRITE;
4471 else if (!PGM_PAGE_HAS_ANY_HANDLERS(pPage))
4472 rc = VINF_SUCCESS;
4473 else
4474 {
4475 if (PGM_PAGE_HAS_ACTIVE_ALL_HANDLERS(pPage)) /* catches MMIO */
4476 rc = VERR_PGM_PHYS_TLB_CATCH_ALL;
4477 else if (PGM_PAGE_HAS_ACTIVE_HANDLERS(pPage))
4478 {
4479 /** @todo Handle TLB loads of virtual handlers so ./test.sh can be made to work
4480 * in -norawr0 mode. */
4481 if (fWritable)
4482 rc = VINF_PGM_PHYS_TLB_CATCH_WRITE;
4483 }
4484 else
4485 {
4486 /* Temporarily disabled physical handler(s), since the recompiler
4487 doesn't get notified when it's reset we'll have to pretend it's
4488 operating normally. */
4489 if (pgmHandlerPhysicalIsAll(pVM, GCPhys))
4490 rc = VERR_PGM_PHYS_TLB_CATCH_ALL;
4491 else
4492 rc = VINF_PGM_PHYS_TLB_CATCH_WRITE;
4493 }
4494 }
4495 if (RT_SUCCESS(rc))
4496 {
4497 int rc2;
4498
4499 /* Make sure what we return is writable. */
4500 if (fWritable)
4501 switch (PGM_PAGE_GET_STATE(pPage))
4502 {
4503 case PGM_PAGE_STATE_ALLOCATED:
4504 break;
4505 case PGM_PAGE_STATE_BALLOONED:
4506 AssertFailed();
4507 break;
4508 case PGM_PAGE_STATE_ZERO:
4509 case PGM_PAGE_STATE_SHARED:
4510 if (rc == VINF_PGM_PHYS_TLB_CATCH_WRITE)
4511 break;
4512 case PGM_PAGE_STATE_WRITE_MONITORED:
4513 rc2 = pgmPhysPageMakeWritable(pVM, pPage, GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK);
4514 AssertLogRelRCReturn(rc2, rc2);
4515 break;
4516 }
4517
4518 /* Get a ring-3 mapping of the address. */
4519 PPGMPAGER3MAPTLBE pTlbe;
4520 rc2 = pgmPhysPageQueryTlbe(pVM, GCPhys, &pTlbe);
4521 AssertLogRelRCReturn(rc2, rc2);
4522 *ppv = (void *)((uintptr_t)pTlbe->pv | (uintptr_t)(GCPhys & PAGE_OFFSET_MASK));
4523 /** @todo mapping/locking hell; this isn't horribly efficient since
4524 * pgmPhysPageLoadIntoTlb will repeat the lookup we've done here. */
4525
4526 Log6(("PGMR3PhysTlbGCPhys2Ptr: GCPhys=%RGp rc=%Rrc pPage=%R[pgmpage] *ppv=%p\n", GCPhys, rc, pPage, *ppv));
4527 }
4528 else
4529 Log6(("PGMR3PhysTlbGCPhys2Ptr: GCPhys=%RGp rc=%Rrc pPage=%R[pgmpage]\n", GCPhys, rc, pPage));
4530
4531 /* else: handler catching all access, no pointer returned. */
4532 }
4533 else
4534 rc = VERR_PGM_PHYS_TLB_UNASSIGNED;
4535
4536 pgmUnlock(pVM);
4537 return rc;
4538}
4539
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette