GMMR0.cpp@ 37803

Last change on this file since 37803 was 37803, checked in by vboxsync, 14 years ago
GMM: Don't ignore the status code of the update loop. PGM: Initialize the handy pages correctly and retry the handy page allocation a little differently.
Property svn:eol-style set to `native` Property svn:keywords set to `Id`
File size: 175.8 KB

Line
1	/* $Id: GMMR0.cpp 37803 2011-07-06 14:45:27Z vboxsync $ */
2	/** @file
3	* GMM - Global Memory Manager.
4	*/
5
6	/*
7	* Copyright (C) 2007-2011 Oracle Corporation
8	*
9	* This file is part of VirtualBox Open Source Edition (OSE), as
10	* available from http://www.virtualbox.org. This file is free software;
11	* you can redistribute it and/or modify it under the terms of the GNU
12	* General Public License (GPL) as published by the Free Software
13	* Foundation, in version 2 as it comes in the "COPYING" file of the
14	* VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15	* hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16	*/
17
18
19	/** @page pg_gmm GMM - The Global Memory Manager
20	*
21	* As the name indicates, this component is responsible for global memory
22	* management. Currently only guest RAM is allocated from the GMM, but this
23	* may change to include shadow page tables and other bits later.
24	*
25	* Guest RAM is managed as individual pages, but allocated from the host OS
26	* in chunks for reasons of portability / efficiency. To minimize the memory
27	* footprint all tracking structure must be as small as possible without
28	* unnecessary performance penalties.
29	*
30	* The allocation chunks has fixed sized, the size defined at compile time
31	* by the #GMM_CHUNK_SIZE \#define.
32	*
33	* Each chunk is given an unique ID. Each page also has a unique ID. The
34	* relation ship between the two IDs is:
35	* @code
36	* GMM_CHUNK_SHIFT = log2(GMM_CHUNK_SIZE / PAGE_SIZE);
37	* idPage = (idChunk << GMM_CHUNK_SHIFT) \| iPage;
38	* @endcode
39	* Where iPage is the index of the page within the chunk. This ID scheme
40	* permits for efficient chunk and page lookup, but it relies on the chunk size
41	* to be set at compile time. The chunks are organized in an AVL tree with their
42	* IDs being the keys.
43	*
44	* The physical address of each page in an allocation chunk is maintained by
45	* the #RTR0MEMOBJ and obtained using #RTR0MemObjGetPagePhysAddr. There is no
46	* need to duplicate this information (it'll cost 8-bytes per page if we did).
47	*
48	* So what do we need to track per page? Most importantly we need to know
49	* which state the page is in:
50	* - Private - Allocated for (eventually) backing one particular VM page.
51	* - Shared - Readonly page that is used by one or more VMs and treated
52	* as COW by PGM.
53	* - Free - Not used by anyone.
54	*
55	* For the page replacement operations (sharing, defragmenting and freeing)
56	* to be somewhat efficient, private pages needs to be associated with a
57	* particular page in a particular VM.
58	*
59	* Tracking the usage of shared pages is impractical and expensive, so we'll
60	* settle for a reference counting system instead.
61	*
62	* Free pages will be chained on LIFOs
63	*
64	* On 64-bit systems we will use a 64-bit bitfield per page, while on 32-bit
65	* systems a 32-bit bitfield will have to suffice because of address space
66	* limitations. The #GMMPAGE structure shows the details.
67	*
68	*
69	* @section sec_gmm_alloc_strat Page Allocation Strategy
70	*
71	* The strategy for allocating pages has to take fragmentation and shared
72	* pages into account, or we may end up with with 2000 chunks with only
73	* a few pages in each. Shared pages cannot easily be reallocated because
74	* of the inaccurate usage accounting (see above). Private pages can be
75	* reallocated by a defragmentation thread in the same manner that sharing
76	* is done.
77	*
78	* The first approach is to manage the free pages in two sets depending on
79	* whether they are mainly for the allocation of shared or private pages.
80	* In the initial implementation there will be almost no possibility for
81	* mixing shared and private pages in the same chunk (only if we're really
82	* stressed on memory), but when we implement forking of VMs and have to
83	* deal with lots of COW pages it'll start getting kind of interesting.
84	*
85	* The sets are lists of chunks with approximately the same number of
86	* free pages. Say the chunk size is 1MB, meaning 256 pages, and a set
87	* consists of 16 lists. So, the first list will contain the chunks with
88	* 1-7 free pages, the second covers 8-15, and so on. The chunks will be
89	* moved between the lists as pages are freed up or allocated.
90	*
91	*
92	* @section sec_gmm_costs Costs
93	*
94	* The per page cost in kernel space is 32-bit plus whatever RTR0MEMOBJ
95	* entails. In addition there is the chunk cost of approximately
96	* (sizeof(RT0MEMOBJ) + sizeof(CHUNK)) / 2^CHUNK_SHIFT bytes per page.
97	*
98	* On Windows the per page #RTR0MEMOBJ cost is 32-bit on 32-bit windows
99	* and 64-bit on 64-bit windows (a PFN_NUMBER in the MDL). So, 64-bit per page.
100	* The cost on Linux is identical, but here it's because of sizeof(struct page *).
101	*
102	*
103	* @section sec_gmm_legacy Legacy Mode for Non-Tier-1 Platforms
104	*
105	* In legacy mode the page source is locked user pages and not
106	* #RTR0MemObjAllocPhysNC, this means that a page can only be allocated
107	* by the VM that locked it. We will make no attempt at implementing
108	* page sharing on these systems, just do enough to make it all work.
109	*
110	*
111	* @subsection sub_gmm_locking Serializing
112	*
113	* One simple fast mutex will be employed in the initial implementation, not
114	* two as mentioned in @ref subsec_pgmPhys_Serializing.
115	*
116	* @see @ref subsec_pgmPhys_Serializing
117	*
118	*
119	* @section sec_gmm_overcommit Memory Over-Commitment Management
120	*
121	* The GVM will have to do the system wide memory over-commitment
122	* management. My current ideas are:
123	* - Per VM oc policy that indicates how much to initially commit
124	* to it and what to do in a out-of-memory situation.
125	* - Prevent overtaxing the host.
126	*
127	* There are some challenges here, the main ones are configurability and
128	* security. Should we for instance permit anyone to request 100% memory
129	* commitment? Who should be allowed to do runtime adjustments of the
130	* config. And how to prevent these settings from being lost when the last
131	* VM process exits? The solution is probably to have an optional root
132	* daemon the will keep VMMR0.r0 in memory and enable the security measures.
133	*
134	*
135	*
136	* @section sec_gmm_numa NUMA
137	*
138	* NUMA considerations will be designed and implemented a bit later.
139	*
140	* The preliminary guesses is that we will have to try allocate memory as
141	* close as possible to the CPUs the VM is executed on (EMT and additional CPU
142	* threads). Which means it's mostly about allocation and sharing policies.
143	* Both the scheduler and allocator interface will to supply some NUMA info
144	* and we'll need to have a way to calc access costs.
145	*
146	*/
147
148
149	/*******************************************************************************
150	* Header Files *
151	*******************************************************************************/
152	#define LOG_GROUP LOG_GROUP_GMM
153	#include <VBox/rawpci.h>
154	#include <VBox/vmm/vm.h>
155	#include <VBox/vmm/gmm.h>
156	#include "GMMR0Internal.h"
157	#include <VBox/vmm/gvm.h>
158	#include <VBox/vmm/pgm.h>
159	#include <VBox/log.h>
160	#include <VBox/param.h>
161	#include <VBox/err.h>
162	#include <iprt/asm.h>
163	#include <iprt/avl.h>
164	#include <iprt/list.h>
165	#include <iprt/mem.h>
166	#include <iprt/memobj.h>
167	#include <iprt/mp.h>
168	#include <iprt/semaphore.h>
169	#include <iprt/string.h>
170	#include <iprt/time.h>
171
172
173	/*******************************************************************************
174	* Structures and Typedefs *
175	*******************************************************************************/
176	/** Pointer to set of free chunks. */
177	typedef struct GMMCHUNKFREESET *PGMMCHUNKFREESET;
178
179	/**
180	* The per-page tracking structure employed by the GMM.
181	*
182	* On 32-bit hosts we'll some trickery is necessary to compress all
183	* the information into 32-bits. When the fSharedFree member is set,
184	* the 30th bit decides whether it's a free page or not.
185	*
186	* Because of the different layout on 32-bit and 64-bit hosts, macros
187	* are used to get and set some of the data.
188	*/
189	typedef union GMMPAGE
190	{
191	#if HC_ARCH_BITS == 64
192	/** Unsigned integer view. */
193	uint64_t u;
194
195	/** The common view. */
196	struct GMMPAGECOMMON
197	{
198	uint32_t uStuff1 : 32;
199	uint32_t uStuff2 : 30;
200	/** The page state. */
201	uint32_t u2State : 2;
202	} Common;
203
204	/** The view of a private page. */
205	struct GMMPAGEPRIVATE
206	{
207	/** The guest page frame number. (Max addressable: 2 ^ 44 - 16) */
208	uint32_t pfn;
209	/** The GVM handle. (64K VMs) */
210	uint32_t hGVM : 16;
211	/** Reserved. */
212	uint32_t u16Reserved : 14;
213	/** The page state. */
214	uint32_t u2State : 2;
215	} Private;
216
217	/** The view of a shared page. */
218	struct GMMPAGESHARED
219	{
220	/** The host page frame number. (Max addressable: 2 ^ 44 - 16) */
221	uint32_t pfn;
222	/** The reference count (64K VMs). */
223	uint32_t cRefs : 16;
224	/** Reserved. Checksum or something? Two hGVMs for forking? */
225	uint32_t u14Reserved : 14;
226	/** The page state. */
227	uint32_t u2State : 2;
228	} Shared;
229
230	/** The view of a free page. */
231	struct GMMPAGEFREE
232	{
233	/** The index of the next page in the free list. UINT16_MAX is NIL. */
234	uint16_t iNext;
235	/** Reserved. Checksum or something? */
236	uint16_t u16Reserved0;
237	/** Reserved. Checksum or something? */
238	uint32_t u30Reserved1 : 30;
239	/** The page state. */
240	uint32_t u2State : 2;
241	} Free;
242
243	#else /* 32-bit */
244	/** Unsigned integer view. */
245	uint32_t u;
246
247	/** The common view. */
248	struct GMMPAGECOMMON
249	{
250	uint32_t uStuff : 30;
251	/** The page state. */
252	uint32_t u2State : 2;
253	} Common;
254
255	/** The view of a private page. */
256	struct GMMPAGEPRIVATE
257	{
258	/** The guest page frame number. (Max addressable: 2 ^ 36) */
259	uint32_t pfn : 24;
260	/** The GVM handle. (127 VMs) */
261	uint32_t hGVM : 7;
262	/** The top page state bit, MBZ. */
263	uint32_t fZero : 1;
264	} Private;
265
266	/** The view of a shared page. */
267	struct GMMPAGESHARED
268	{
269	/** The reference count. */
270	uint32_t cRefs : 30;
271	/** The page state. */
272	uint32_t u2State : 2;
273	} Shared;
274
275	/** The view of a free page. */
276	struct GMMPAGEFREE
277	{
278	/** The index of the next page in the free list. UINT16_MAX is NIL. */
279	uint32_t iNext : 16;
280	/** Reserved. Checksum or something? */
281	uint32_t u14Reserved : 14;
282	/** The page state. */
283	uint32_t u2State : 2;
284	} Free;
285	#endif
286	} GMMPAGE;
287	AssertCompileSize(GMMPAGE, sizeof(RTHCUINTPTR));
288	/** Pointer to a GMMPAGE. */
289	typedef GMMPAGE *PGMMPAGE;
290
291
292	/** @name The Page States.
293	* @{ */
294	/** A private page. */
295	#define GMM_PAGE_STATE_PRIVATE 0
296	/** A private page - alternative value used on the 32-bit implementation.
297	* This will never be used on 64-bit hosts. */
298	#define GMM_PAGE_STATE_PRIVATE_32 1
299	/** A shared page. */
300	#define GMM_PAGE_STATE_SHARED 2
301	/** A free page. */
302	#define GMM_PAGE_STATE_FREE 3
303	/** @} */
304
305
306	/** @def GMM_PAGE_IS_PRIVATE
307	*
308	* @returns true if private, false if not.
309	* @param pPage The GMM page.
310	*/
311	#if HC_ARCH_BITS == 64
312	# define GMM_PAGE_IS_PRIVATE(pPage) ( (pPage)->Common.u2State == GMM_PAGE_STATE_PRIVATE )
313	#else
314	# define GMM_PAGE_IS_PRIVATE(pPage) ( (pPage)->Private.fZero == 0 )
315	#endif
316
317	/** @def GMM_PAGE_IS_SHARED
318	*
319	* @returns true if shared, false if not.
320	* @param pPage The GMM page.
321	*/
322	#define GMM_PAGE_IS_SHARED(pPage) ( (pPage)->Common.u2State == GMM_PAGE_STATE_SHARED )
323
324	/** @def GMM_PAGE_IS_FREE
325	*
326	* @returns true if free, false if not.
327	* @param pPage The GMM page.
328	*/
329	#define GMM_PAGE_IS_FREE(pPage) ( (pPage)->Common.u2State == GMM_PAGE_STATE_FREE )
330
331	/** @def GMM_PAGE_PFN_LAST
332	* The last valid guest pfn range.
333	* @remark Some of the values outside the range has special meaning,
334	* see GMM_PAGE_PFN_UNSHAREABLE.
335	*/
336	#if HC_ARCH_BITS == 64
337	# define GMM_PAGE_PFN_LAST UINT32_C(0xfffffff0)
338	#else
339	# define GMM_PAGE_PFN_LAST UINT32_C(0x00fffff0)
340	#endif
341	AssertCompile(GMM_PAGE_PFN_LAST == (GMM_GCPHYS_LAST >> PAGE_SHIFT));
342
343	/** @def GMM_PAGE_PFN_UNSHAREABLE
344	* Indicates that this page isn't used for normal guest memory and thus isn't shareable.
345	*/
346	#if HC_ARCH_BITS == 64
347	# define GMM_PAGE_PFN_UNSHAREABLE UINT32_C(0xfffffff1)
348	#else
349	# define GMM_PAGE_PFN_UNSHAREABLE UINT32_C(0x00fffff1)
350	#endif
351	AssertCompile(GMM_PAGE_PFN_UNSHAREABLE == (GMM_GCPHYS_UNSHAREABLE >> PAGE_SHIFT));
352
353
354	/**
355	* A GMM allocation chunk ring-3 mapping record.
356	*
357	* This should really be associated with a session and not a VM, but
358	* it's simpler to associated with a VM and cleanup with the VM object
359	* is destroyed.
360	*/
361	typedef struct GMMCHUNKMAP
362	{
363	/** The mapping object. */
364	RTR0MEMOBJ hMapObj;
365	/** The VM owning the mapping. */
366	PGVM pGVM;
367	} GMMCHUNKMAP;
368	/** Pointer to a GMM allocation chunk mapping. */
369	typedef struct GMMCHUNKMAP *PGMMCHUNKMAP;
370
371
372	/**
373	* A GMM allocation chunk.
374	*/
375	typedef struct GMMCHUNK
376	{
377	/** The AVL node core.
378	* The Key is the chunk ID. (Giant mtx.) */
379	AVLU32NODECORE Core;
380	/** The memory object.
381	* Either from RTR0MemObjAllocPhysNC or RTR0MemObjLockUser depending on
382	* what the host can dish up with. (Chunk mtx protects mapping accesses
383	* and related frees.) */
384	RTR0MEMOBJ hMemObj;
385	/** Pointer to the next chunk in the free list. (Giant mtx.) */
386	PGMMCHUNK pFreeNext;
387	/** Pointer to the previous chunk in the free list. (Giant mtx.) */
388	PGMMCHUNK pFreePrev;
389	/** Pointer to the free set this chunk belongs to. NULL for
390	* chunks with no free pages. (Giant mtx.) */
391	PGMMCHUNKFREESET pSet;
392	/** List node in the chunk list (GMM::ChunkList). (Giant mtx.) */
393	RTLISTNODE ListNode;
394	/** Pointer to an array of mappings. (Chunk mtx.) */
395	PGMMCHUNKMAP paMappingsX;
396	/** The number of mappings. (Chunk mtx.) */
397	uint16_t cMappingsX;
398	/** The mapping lock this chunk is using using. UINT16_MAX if nobody is
399	* mapping or freeing anything. (Giant mtx.) */
400	uint8_t volatile iChunkMtx;
401	/** Flags field reserved for future use (like eliminating enmType).
402	* (Giant mtx.) */
403	uint8_t fFlags;
404	/** The head of the list of free pages. UINT16_MAX is the NIL value.
405	* (Giant mtx.) */
406	uint16_t iFreeHead;
407	/** The number of free pages. (Giant mtx.) */
408	uint16_t cFree;
409	/** The GVM handle of the VM that first allocated pages from this chunk, this
410	* is used as a preference when there are several chunks to choose from.
411	* When in bound memory mode this isn't a preference any longer. (Giant
412	* mtx.) */
413	uint16_t hGVM;
414	/** The ID of the NUMA node the memory mostly resides on. (Reserved for
415	* future use.) (Giant mtx.) */
416	uint16_t idNumaNode;
417	/** The number of private pages. (Giant mtx.) */
418	uint16_t cPrivate;
419	/** The number of shared pages. (Giant mtx.) */
420	uint16_t cShared;
421	/** The pages. (Giant mtx.) */
422	GMMPAGE aPages[GMM_CHUNK_SIZE >> PAGE_SHIFT];
423	} GMMCHUNK;
424
425	/** Indicates that the NUMA properies of the memory is unknown. */
426	#define GMM_CHUNK_NUMA_ID_UNKNOWN UINT16_C(0xfffe)
427
428	/** @name GMM_CHUNK_FLAGS_XXX - chunk flags.
429	* @{ */
430	/** Indicates that the chunk is a large page (2MB). */
431	#define GMM_CHUNK_FLAGS_LARGE_PAGE UINT16_C(0x0001)
432	/** @} */
433
434
435	/**
436	* An allocation chunk TLB entry.
437	*/
438	typedef struct GMMCHUNKTLBE
439	{
440	/** The chunk id. */
441	uint32_t idChunk;
442	/** Pointer to the chunk. */
443	PGMMCHUNK pChunk;
444	} GMMCHUNKTLBE;
445	/** Pointer to an allocation chunk TLB entry. */
446	typedef GMMCHUNKTLBE *PGMMCHUNKTLBE;
447
448
449	/** The number of entries tin the allocation chunk TLB. */
450	#define GMM_CHUNKTLB_ENTRIES 32
451	/** Gets the TLB entry index for the given Chunk ID. */
452	#define GMM_CHUNKTLB_IDX(idChunk) ( (idChunk) & (GMM_CHUNKTLB_ENTRIES - 1) )
453
454	/**
455	* An allocation chunk TLB.
456	*/
457	typedef struct GMMCHUNKTLB
458	{
459	/** The TLB entries. */
460	GMMCHUNKTLBE aEntries[GMM_CHUNKTLB_ENTRIES];
461	} GMMCHUNKTLB;
462	/** Pointer to an allocation chunk TLB. */
463	typedef GMMCHUNKTLB *PGMMCHUNKTLB;
464
465
466	/**
467	* The GMM instance data.
468	*/
469	typedef struct GMM
470	{
471	/** Magic / eye catcher. GMM_MAGIC */
472	uint32_t u32Magic;
473	/** The number of threads waiting on the mutex. */
474	uint32_t cMtxContenders;
475	/** The fast mutex protecting the GMM.
476	* More fine grained locking can be implemented later if necessary. */
477	RTSEMFASTMUTEX hMtx;
478	#ifdef VBOX_STRICT
479	/** The current mutex owner. */
480	RTNATIVETHREAD hMtxOwner;
481	#endif
482	/** The chunk tree. */
483	PAVLU32NODECORE pChunks;
484	/** The chunk TLB. */
485	GMMCHUNKTLB ChunkTLB;
486	/** The private free set. */
487	GMMCHUNKFREESET PrivateX;
488	/** The shared free set. */
489	GMMCHUNKFREESET Shared;
490
491	/** Shared module tree (global). */
492	/** @todo separate trees for distinctly different guest OSes. */
493	PAVLGCPTRNODECORE pGlobalSharedModuleTree;
494
495	/** The chunk list. For simplifying the cleanup process. */
496	RTLISTNODE ChunkList;
497
498	/** The maximum number of pages we're allowed to allocate.
499	* @gcfgm 64-bit GMM/MaxPages Direct.
500	* @gcfgm 32-bit GMM/PctPages Relative to the number of host pages. */
501	uint64_t cMaxPages;
502	/** The number of pages that has been reserved.
503	* The deal is that cReservedPages - cOverCommittedPages <= cMaxPages. */
504	uint64_t cReservedPages;
505	/** The number of pages that we have over-committed in reservations. */
506	uint64_t cOverCommittedPages;
507	/** The number of actually allocated (committed if you like) pages. */
508	uint64_t cAllocatedPages;
509	/** The number of pages that are shared. A subset of cAllocatedPages. */
510	uint64_t cSharedPages;
511	/** The number of pages that are actually shared between VMs. */
512	uint64_t cDuplicatePages;
513	/** The number of pages that are shared that has been left behind by
514	* VMs not doing proper cleanups. */
515	uint64_t cLeftBehindSharedPages;
516	/** The number of allocation chunks.
517	* (The number of pages we've allocated from the host can be derived from this.) */
518	uint32_t cChunks;
519	/** The number of current ballooned pages. */
520	uint64_t cBalloonedPages;
521
522	/** The legacy allocation mode indicator.
523	* This is determined at initialization time. */
524	bool fLegacyAllocationMode;
525	/** The bound memory mode indicator.
526	* When set, the memory will be bound to a specific VM and never
527	* shared. This is always set if fLegacyAllocationMode is set.
528	* (Also determined at initialization time.) */
529	bool fBoundMemoryMode;
530	/** The number of registered VMs. */
531	uint16_t cRegisteredVMs;
532
533	/** The number of freed chunks ever. This is used a list generation to
534	* avoid restarting the cleanup scanning when the list wasn't modified. */
535	uint32_t cFreedChunks;
536	/** The previous allocated Chunk ID.
537	* Used as a hint to avoid scanning the whole bitmap. */
538	uint32_t idChunkPrev;
539	/** Chunk ID allocation bitmap.
540	* Bits of allocated IDs are set, free ones are clear.
541	* The NIL id (0) is marked allocated. */
542	uint32_t bmChunkId[(GMM_CHUNKID_LAST + 1 + 31) / 32];
543
544	/** The index of the next mutex to use. */
545	uint32_t iNextChunkMtx;
546	/** Chunk locks for reducing lock contention without having to allocate
547	* one lock per chunk. */
548	struct
549	{
550	/** The mutex */
551	RTSEMFASTMUTEX hMtx;
552	/** The number of threads currently using this mutex. */
553	uint32_t volatile cUsers;
554	} aChunkMtx[64];
555	} GMM;
556	/** Pointer to the GMM instance. */
557	typedef GMM *PGMM;
558
559	/** The value of GMM::u32Magic (Katsuhiro Otomo). */
560	#define GMM_MAGIC UINT32_C(0x19540414)
561
562
563	/**
564	* GMM chunk mutex state.
565	*
566	* This is returned by gmmR0ChunkMutexAcquire and is used by the other
567	* gmmR0ChunkMutex* methods.
568	*/
569	typedef struct GMMR0CHUNKMTXSTATE
570	{
571	PGMM pGMM;
572	/** The index of the chunk mutex. */
573	uint8_t iChunkMtx;
574	/** The relevant flags (GMMR0CHUNK_MTX_XXX). */
575	uint8_t fFlags;
576	} GMMR0CHUNKMTXSTATE;
577	/** Pointer to a chunk mutex state. */
578	typedef GMMR0CHUNKMTXSTATE *PGMMR0CHUNKMTXSTATE;
579
580	/** @name GMMR0CHUNK_MTX_XXX
581	* @{ */
582	#define GMMR0CHUNK_MTX_INVALID UINT32_C(0)
583	#define GMMR0CHUNK_MTX_KEEP_GIANT UINT32_C(1)
584	#define GMMR0CHUNK_MTX_RETAKE_GIANT UINT32_C(2)
585	#define GMMR0CHUNK_MTX_DROP_GIANT UINT32_C(3)
586	#define GMMR0CHUNK_MTX_END UINT32_C(4)
587	/** @} */
588
589
590	/*******************************************************************************
591	* Global Variables *
592	*******************************************************************************/
593	/** Pointer to the GMM instance data. */
594	static PGMM g_pGMM = NULL;
595
596	/** Macro for obtaining and validating the g_pGMM pointer.
597	* On failure it will return from the invoking function with the specified return value.
598	*
599	* @param pGMM The name of the pGMM variable.
600	* @param rc The return value on failure. Use VERR_INTERNAL_ERROR for
601	* VBox status codes.
602	*/
603	#define GMM_GET_VALID_INSTANCE(pGMM, rc) \
604	do { \
605	(pGMM) = g_pGMM; \
606	AssertPtrReturn((pGMM), (rc)); \
607	AssertMsgReturn((pGMM)->u32Magic == GMM_MAGIC, ("%p - %#x\n", (pGMM), (pGMM)->u32Magic), (rc)); \
608	} while (0)
609
610	/** Macro for obtaining and validating the g_pGMM pointer, void function variant.
611	* On failure it will return from the invoking function.
612	*
613	* @param pGMM The name of the pGMM variable.
614	*/
615	#define GMM_GET_VALID_INSTANCE_VOID(pGMM) \
616	do { \
617	(pGMM) = g_pGMM; \
618	AssertPtrReturnVoid((pGMM)); \
619	AssertMsgReturnVoid((pGMM)->u32Magic == GMM_MAGIC, ("%p - %#x\n", (pGMM), (pGMM)->u32Magic)); \
620	} while (0)
621
622
623	/** @def GMM_CHECK_SANITY_UPON_ENTERING
624	* Checks the sanity of the GMM instance data before making changes.
625	*
626	* This is macro is a stub by default and must be enabled manually in the code.
627	*
628	* @returns true if sane, false if not.
629	* @param pGMM The name of the pGMM variable.
630	*/
631	#if defined(VBOX_STRICT) && 0
632	# define GMM_CHECK_SANITY_UPON_ENTERING(pGMM) (gmmR0SanityCheck((pGMM), __PRETTY_FUNCTION__, __LINE__) == 0)
633	#else
634	# define GMM_CHECK_SANITY_UPON_ENTERING(pGMM) (true)
635	#endif
636
637	/** @def GMM_CHECK_SANITY_UPON_LEAVING
638	* Checks the sanity of the GMM instance data after making changes.
639	*
640	* This is macro is a stub by default and must be enabled manually in the code.
641	*
642	* @returns true if sane, false if not.
643	* @param pGMM The name of the pGMM variable.
644	*/
645	#if defined(VBOX_STRICT) && 0
646	# define GMM_CHECK_SANITY_UPON_LEAVING(pGMM) (gmmR0SanityCheck((pGMM), __PRETTY_FUNCTION__, __LINE__) == 0)
647	#else
648	# define GMM_CHECK_SANITY_UPON_LEAVING(pGMM) (true)
649	#endif
650
651	/** @def GMM_CHECK_SANITY_IN_LOOPS
652	* Checks the sanity of the GMM instance in the allocation loops.
653	*
654	* This is macro is a stub by default and must be enabled manually in the code.
655	*
656	* @returns true if sane, false if not.
657	* @param pGMM The name of the pGMM variable.
658	*/
659	#if defined(VBOX_STRICT) && 0
660	# define GMM_CHECK_SANITY_IN_LOOPS(pGMM) (gmmR0SanityCheck((pGMM), __PRETTY_FUNCTION__, __LINE__) == 0)
661	#else
662	# define GMM_CHECK_SANITY_IN_LOOPS(pGMM) (true)
663	#endif
664
665
666	/*******************************************************************************
667	* Internal Functions *
668	*******************************************************************************/
669	static DECLCALLBACK(int) gmmR0TermDestroyChunk(PAVLU32NODECORE pNode, void *pvGMM);
670	static bool gmmR0CleanupVMScanChunk(PGMM pGMM, PGVM pGVM, PGMMCHUNK pChunk);
671	DECLINLINE(void) gmmR0UnlinkChunk(PGMMCHUNK pChunk);
672	DECLINLINE(void) gmmR0LinkChunk(PGMMCHUNK pChunk, PGMMCHUNKFREESET pSet);
673	DECLINLINE(void) gmmR0SelectSetAndLinkChunk(PGMM pGMM, PGVM pGVM, PGMMCHUNK pChunk);
674	static uint32_t gmmR0SanityCheck(PGMM pGMM, const char *pszFunction, unsigned uLineNo);
675	static bool gmmR0FreeChunk(PGMM pGMM, PGVM pGVM, PGMMCHUNK pChunk, bool fRelaxedSem);
676	DECLINLINE(void) gmmR0FreePrivatePage(PGMM pGMM, PGVM pGVM, uint32_t idPage, PGMMPAGE pPage);
677	DECLINLINE(void) gmmR0FreeSharedPage(PGMM pGMM, PGVM pGVM, uint32_t idPage, PGMMPAGE pPage);
678	static int gmmR0UnmapChunkLocked(PGMM pGMM, PGVM pGVM, PGMMCHUNK pChunk);
679	static void gmmR0SharedModuleCleanup(PGMM pGMM, PGVM pGVM);
680
681
682
683	/**
684	* Initializes the GMM component.
685	*
686	* This is called when the VMMR0.r0 module is loaded and protected by the
687	* loader semaphore.
688	*
689	* @returns VBox status code.
690	*/
691	GMMR0DECL(int) GMMR0Init(void)
692	{
693	LogFlow(("GMMInit:\n"));
694
695	/*
696	* Allocate the instance data and the locks.
697	*/
698	PGMM pGMM = (PGMM)RTMemAllocZ(sizeof(*pGMM));
699	if (!pGMM)
700	return VERR_NO_MEMORY;
701
702	pGMM->u32Magic = GMM_MAGIC;
703	for (unsigned i = 0; i < RT_ELEMENTS(pGMM->ChunkTLB.aEntries); i++)
704	pGMM->ChunkTLB.aEntries[i].idChunk = NIL_GMM_CHUNKID;
705	RTListInit(&pGMM->ChunkList);
706	ASMBitSet(&pGMM->bmChunkId[0], NIL_GMM_CHUNKID);
707
708	int rc = RTSemFastMutexCreate(&pGMM->hMtx);
709	if (RT_SUCCESS(rc))
710	{
711	unsigned iMtx;
712	for (iMtx = 0; iMtx < RT_ELEMENTS(pGMM->aChunkMtx); iMtx++)
713	{
714	rc = RTSemFastMutexCreate(&pGMM->aChunkMtx[iMtx].hMtx);
715	if (RT_FAILURE(rc))
716	break;
717	}
718	if (RT_SUCCESS(rc))
719	{
720	/*
721	* Check and see if RTR0MemObjAllocPhysNC works.
722	*/
723	#if 0 /* later, see #3170. */
724	RTR0MEMOBJ MemObj;
725	rc = RTR0MemObjAllocPhysNC(&MemObj, _64K, NIL_RTHCPHYS);
726	if (RT_SUCCESS(rc))
727	{
728	rc = RTR0MemObjFree(MemObj, true);
729	AssertRC(rc);
730	}
731	else if (rc == VERR_NOT_SUPPORTED)
732	pGMM->fLegacyAllocationMode = pGMM->fBoundMemoryMode = true;
733	else
734	SUPR0Printf("GMMR0Init: RTR0MemObjAllocPhysNC(,64K,Any) -> %d!\n", rc);
735	#else
736	# if defined(RT_OS_WINDOWS) \|\| (defined(RT_OS_SOLARIS) && ARCH_BITS == 64) \|\| defined(RT_OS_LINUX) \|\| defined(RT_OS_FREEBSD)
737	pGMM->fLegacyAllocationMode = false;
738	# if ARCH_BITS == 32
739	/* Don't reuse possibly partial chunks because of the virtual
740	address space limitation. */
741	pGMM->fBoundMemoryMode = true;
742	# else
743	pGMM->fBoundMemoryMode = false;
744	# endif
745	# else
746	pGMM->fLegacyAllocationMode = true;
747	pGMM->fBoundMemoryMode = true;
748	# endif
749	#endif
750
751	/*
752	* Query system page count and guess a reasonable cMaxPages value.
753	*/
754	pGMM->cMaxPages = UINT32_MAX; /** @todo IPRT function for query ram size and such. */
755
756	g_pGMM = pGMM;
757	LogFlow(("GMMInit: pGMM=%p fLegacyAllocationMode=%RTbool fBoundMemoryMode=%RTbool\n", pGMM, pGMM->fLegacyAllocationMode, pGMM->fBoundMemoryMode));
758	return VINF_SUCCESS;
759	}
760
761	/*
762	* Bail out.
763	*/
764	while (iMtx-- > 0)
765	RTSemFastMutexDestroy(pGMM->aChunkMtx[iMtx].hMtx);
766	RTSemFastMutexDestroy(pGMM->hMtx);
767	}
768
769	pGMM->u32Magic = 0;
770	RTMemFree(pGMM);
771	SUPR0Printf("GMMR0Init: failed! rc=%d\n", rc);
772	return rc;
773	}
774
775
776	/**
777	* Terminates the GMM component.
778	*/
779	GMMR0DECL(void) GMMR0Term(void)
780	{
781	LogFlow(("GMMTerm:\n"));
782
783	/*
784	* Take care / be paranoid...
785	*/
786	PGMM pGMM = g_pGMM;
787	if (!VALID_PTR(pGMM))
788	return;
789	if (pGMM->u32Magic != GMM_MAGIC)
790	{
791	SUPR0Printf("GMMR0Term: u32Magic=%#x\n", pGMM->u32Magic);
792	return;
793	}
794
795	/*
796	* Undo what init did and free all the resources we've acquired.
797	*/
798	/* Destroy the fundamentals. */
799	g_pGMM = NULL;
800	pGMM->u32Magic = ~GMM_MAGIC;
801	RTSemFastMutexDestroy(pGMM->hMtx);
802	pGMM->hMtx = NIL_RTSEMFASTMUTEX;
803
804	/* Free any chunks still hanging around. */
805	RTAvlU32Destroy(&pGMM->pChunks, gmmR0TermDestroyChunk, pGMM);
806
807	/* Destroy the chunk locks. */
808	for (unsigned iMtx = 0; iMtx++ < RT_ELEMENTS(pGMM->aChunkMtx); iMtx++)
809	{
810	Assert(pGMM->aChunkMtx[iMtx].cUsers == 0);
811	RTSemFastMutexDestroy(pGMM->aChunkMtx[iMtx].hMtx);
812	pGMM->aChunkMtx[iMtx].hMtx = NIL_RTSEMFASTMUTEX;
813	}
814
815	/* Finally the instance data itself. */
816	RTMemFree(pGMM);
817	LogFlow(("GMMTerm: done\n"));
818	}
819
820
821	/**
822	* RTAvlU32Destroy callback.
823	*
824	* @returns 0
825	* @param pNode The node to destroy.
826	* @param pvGMM The GMM handle.
827	*/
828	static DECLCALLBACK(int) gmmR0TermDestroyChunk(PAVLU32NODECORE pNode, void *pvGMM)
829	{
830	PGMMCHUNK pChunk = (PGMMCHUNK)pNode;
831
832	if (pChunk->cFree != (GMM_CHUNK_SIZE >> PAGE_SHIFT))
833	SUPR0Printf("GMMR0Term: %p/%#x: cFree=%d cPrivate=%d cShared=%d cMappings=%d\n", pChunk,
834	pChunk->Core.Key, pChunk->cFree, pChunk->cPrivate, pChunk->cShared, pChunk->cMappingsX);
835
836	int rc = RTR0MemObjFree(pChunk->hMemObj, true /* fFreeMappings */);
837	if (RT_FAILURE(rc))
838	{
839	SUPR0Printf("GMMR0Term: %p/%#x: RTRMemObjFree(%p,true) -> %d (cMappings=%d)\n", pChunk,
840	pChunk->Core.Key, pChunk->hMemObj, rc, pChunk->cMappingsX);
841	AssertRC(rc);
842	}
843	pChunk->hMemObj = NIL_RTR0MEMOBJ;
844
845	RTMemFree(pChunk->paMappingsX);
846	pChunk->paMappingsX = NULL;
847
848	RTMemFree(pChunk);
849	NOREF(pvGMM);
850	return 0;
851	}
852
853
854	/**
855	* Initializes the per-VM data for the GMM.
856	*
857	* This is called from within the GVMM lock (from GVMMR0CreateVM)
858	* and should only initialize the data members so GMMR0CleanupVM
859	* can deal with them. We reserve no memory or anything here,
860	* that's done later in GMMR0InitVM.
861	*
862	* @param pGVM Pointer to the Global VM structure.
863	*/
864	GMMR0DECL(void) GMMR0InitPerVMData(PGVM pGVM)
865	{
866	AssertCompile(RT_SIZEOFMEMB(GVM,gmm.s) <= RT_SIZEOFMEMB(GVM,gmm.padding));
867
868	pGVM->gmm.s.enmPolicy = GMMOCPOLICY_INVALID;
869	pGVM->gmm.s.enmPriority = GMMPRIORITY_INVALID;
870	pGVM->gmm.s.fMayAllocate = false;
871	}
872
873
874	/**
875	* Acquires the GMM giant lock.
876	*
877	* @returns Assert status code from RTSemFastMutexRequest.
878	* @param pGMM Pointer to the GMM instance.
879	*/
880	static int gmmR0MutexAcquire(PGMM pGMM)
881	{
882	ASMAtomicIncU32(&pGMM->cMtxContenders);
883	int rc = RTSemFastMutexRequest(pGMM->hMtx);
884	ASMAtomicDecU32(&pGMM->cMtxContenders);
885	AssertRC(rc);
886	#ifdef VBOX_STRICT
887	pGMM->hMtxOwner = RTThreadNativeSelf();
888	#endif
889	return rc;
890	}
891
892
893	/**
894	* Releases the GMM giant lock.
895	*
896	* @returns Assert status code from RTSemFastMutexRequest.
897	* @param pGMM Pointer to the GMM instance.
898	*/
899	static int gmmR0MutexRelease(PGMM pGMM)
900	{
901	#ifdef VBOX_STRICT
902	pGMM->hMtxOwner = NIL_RTNATIVETHREAD;
903	#endif
904	int rc = RTSemFastMutexRelease(pGMM->hMtx);
905	AssertRC(rc);
906	return rc;
907	}
908
909
910	/**
911	* Yields the GMM giant lock if there is contention and a certain minimum time
912	* has elapsed since we took it.
913	*
914	* @returns @c true if the mutex was yielded, @c false if not.
915	* @param pGMM Pointer to the GMM instance.
916	* @param puLockNanoTS Where the lock acquisition time stamp is kept
917	* (in/out).
918	*/
919	static bool gmmR0MutexYield(PGMM pGMM, uint64_t *puLockNanoTS)
920	{
921	/*
922	* If nobody is contending the mutex, don't bother checking the time.
923	*/
924	if (ASMAtomicReadU32(&pGMM->cMtxContenders) == 0)
925	return false;
926
927	/*
928	* Don't yield if we haven't executed for at least 2 milliseconds.
929	*/
930	uint64_t uNanoNow = RTTimeSystemNanoTS();
931	if (uNanoNow - *puLockNanoTS < UINT32_C(2000000))
932	return false;
933
934	/*
935	* Yield the mutex.
936	*/
937	#ifdef VBOX_STRICT
938	pGMM->hMtxOwner = NIL_RTNATIVETHREAD;
939	#endif
940	ASMAtomicIncU32(&pGMM->cMtxContenders);
941	int rc1 = RTSemFastMutexRelease(pGMM->hMtx); AssertRC(rc1);
942
943	RTThreadYield();
944
945	int rc2 = RTSemFastMutexRequest(pGMM->hMtx); AssertRC(rc2);
946	*puLockNanoTS = RTTimeSystemNanoTS();
947	ASMAtomicDecU32(&pGMM->cMtxContenders);
948	#ifdef VBOX_STRICT
949	pGMM->hMtxOwner = RTThreadNativeSelf();
950	#endif
951
952	return true;
953	}
954
955
956	/**
957	* Acquires a chunk lock.
958	*
959	* The caller must own the giant lock.
960	*
961	* @returns Assert status code from RTSemFastMutexRequest.
962	* @param pMtxState The chunk mutex state info. (Avoids
963	* passing the same flags and stuff around
964	* for subsequent release and drop-giant
965	* calls.)
966	* @param pGMM Pointer to the GMM instance.
967	* @param pChunk Pointer to the chunk.
968	* @param fFlags Flags regarding the giant lock, GMMR0CHUNK_MTX_XXX.
969	*/
970	static int gmmR0ChunkMutexAcquire(PGMMR0CHUNKMTXSTATE pMtxState, PGMM pGMM, PGMMCHUNK pChunk, uint32_t fFlags)
971	{
972	Assert(fFlags > GMMR0CHUNK_MTX_INVALID && fFlags < GMMR0CHUNK_MTX_END);
973	Assert(pGMM->hMtxOwner == RTThreadNativeSelf());
974
975	pMtxState->pGMM = pGMM;
976	pMtxState->fFlags = (uint8_t)fFlags;
977
978	/*
979	* Get the lock index and reference the lock.
980	*/
981	Assert(pGMM->hMtxOwner == RTThreadNativeSelf());
982	uint32_t iChunkMtx = pChunk->iChunkMtx;
983	if (iChunkMtx == UINT8_MAX)
984	{
985	iChunkMtx = pGMM->iNextChunkMtx++;
986	iChunkMtx %= RT_ELEMENTS(pGMM->aChunkMtx);
987
988	/* Try get an unused one... */
989	if (pGMM->aChunkMtx[iChunkMtx].cUsers)
990	{
991	iChunkMtx = pGMM->iNextChunkMtx++;
992	iChunkMtx %= RT_ELEMENTS(pGMM->aChunkMtx);
993	if (pGMM->aChunkMtx[iChunkMtx].cUsers)
994	{
995	iChunkMtx = pGMM->iNextChunkMtx++;
996	iChunkMtx %= RT_ELEMENTS(pGMM->aChunkMtx);
997	if (pGMM->aChunkMtx[iChunkMtx].cUsers)
998	{
999	iChunkMtx = pGMM->iNextChunkMtx++;
1000	iChunkMtx %= RT_ELEMENTS(pGMM->aChunkMtx);
1001	}
1002	}
1003	}
1004
1005	pChunk->iChunkMtx = iChunkMtx;
1006	}
1007	AssertCompile(RT_ELEMENTS(pGMM->aChunkMtx) < UINT8_MAX);
1008	pMtxState->iChunkMtx = (uint8_t)iChunkMtx;
1009	ASMAtomicIncU32(&pGMM->aChunkMtx[iChunkMtx].cUsers);
1010
1011	/*
1012	* Drop the giant?
1013	*/
1014	if (fFlags != GMMR0CHUNK_MTX_KEEP_GIANT)
1015	{
1016	/** @todo GMM life cycle cleanup (we may race someone
1017	* destroying and cleaning up GMM)? */
1018	gmmR0MutexRelease(pGMM);
1019	}
1020
1021	/*
1022	* Take the chunk mutex.
1023	*/
1024	int rc = RTSemFastMutexRequest(pGMM->aChunkMtx[iChunkMtx].hMtx);
1025	AssertRC(rc);
1026	return rc;
1027	}
1028
1029
1030	/**
1031	* Releases the GMM giant lock.
1032	*
1033	* @returns Assert status code from RTSemFastMutexRequest.
1034	* @param pGMM Pointer to the GMM instance.
1035	* @param pChunk Pointer to the chunk if it's still
1036	* alive, NULL if it isn't. This is used to deassociate
1037	* the chunk from the mutex on the way out so a new one
1038	* can be selected next time, thus avoiding contented
1039	* mutexes.
1040	*/
1041	static int gmmR0ChunkMutexRelease(PGMMR0CHUNKMTXSTATE pMtxState, PGMMCHUNK pChunk)
1042	{
1043	PGMM pGMM = pMtxState->pGMM;
1044
1045	/*
1046	* Release the chunk mutex and reacquire the giant if requested.
1047	*/
1048	int rc = RTSemFastMutexRelease(pGMM->aChunkMtx[pMtxState->iChunkMtx].hMtx);
1049	AssertRC(rc);
1050	if (pMtxState->fFlags == GMMR0CHUNK_MTX_RETAKE_GIANT)
1051	rc = gmmR0MutexAcquire(pGMM);
1052	else
1053	Assert((pMtxState->fFlags != GMMR0CHUNK_MTX_DROP_GIANT) == (pGMM->hMtxOwner == RTThreadNativeSelf()));
1054
1055	/*
1056	* Drop the chunk mutex user reference and deassociate it from the chunk
1057	* when possible.
1058	*/
1059	if ( ASMAtomicDecU32(&pGMM->aChunkMtx[pMtxState->iChunkMtx].cUsers) == 0
1060	&& pChunk
1061	&& RT_SUCCESS(rc) )
1062	{
1063	if (pMtxState->fFlags != GMMR0CHUNK_MTX_DROP_GIANT)
1064	pChunk->iChunkMtx = UINT8_MAX;
1065	else
1066	{
1067	rc = gmmR0MutexAcquire(pGMM);
1068	if (RT_SUCCESS(rc))
1069	{
1070	if (pGMM->aChunkMtx[pMtxState->iChunkMtx].cUsers == 0)
1071	pChunk->iChunkMtx = UINT8_MAX;
1072	rc = gmmR0MutexRelease(pGMM);
1073	}
1074	}
1075	}
1076
1077	pMtxState->pGMM = NULL;
1078	return rc;
1079	}
1080
1081
1082	/**
1083	* Drops the giant GMM lock we kept in gmmR0ChunkMutexAcquire while keeping the
1084	* chunk locked.
1085	*
1086	* This only works if gmmR0ChunkMutexAcquire was called with
1087	* GMMR0CHUNK_MTX_KEEP_GIANT. gmmR0ChunkMutexRelease will retake the giant
1088	* mutex, i.e. behave as if GMMR0CHUNK_MTX_RETAKE_GIANT was used.
1089	*
1090	* @returns VBox status code (assuming success is ok).
1091	* @param pMtxState Pointer to the chunk mutex state.
1092	*/
1093	static int gmmR0ChunkMutexDropGiant(PGMMR0CHUNKMTXSTATE pMtxState)
1094	{
1095	AssertReturn(pMtxState->fFlags == GMMR0CHUNK_MTX_KEEP_GIANT, VERR_INTERNAL_ERROR_2);
1096	Assert(pMtxState->pGMM->hMtxOwner == RTThreadNativeSelf());
1097	pMtxState->fFlags = GMMR0CHUNK_MTX_RETAKE_GIANT;
1098	/** @todo GMM life cycle cleanup (we may race someone
1099	* destroying and cleaning up GMM)? */
1100	return gmmR0MutexRelease(pMtxState->pGMM);
1101	}
1102
1103
1104	/**
1105	* For experimenting with NUMA affinity and such.
1106	*
1107	* @returns The current NUMA Node ID.
1108	*/
1109	static uint16_t gmmR0GetCurrentNumaNodeId(void)
1110	{
1111	#if 1
1112	return GMM_CHUNK_NUMA_ID_UNKNOWN;
1113	#else
1114	return RTMpCpuId() / 16;
1115	#endif
1116	}
1117
1118
1119
1120	/**
1121	* Cleans up when a VM is terminating.
1122	*
1123	* @param pGVM Pointer to the Global VM structure.
1124	*/
1125	GMMR0DECL(void) GMMR0CleanupVM(PGVM pGVM)
1126	{
1127	LogFlow(("GMMR0CleanupVM: pGVM=%p:{.pVM=%p, .hSelf=%#x}\n", pGVM, pGVM->pVM, pGVM->hSelf));
1128
1129	PGMM pGMM;
1130	GMM_GET_VALID_INSTANCE_VOID(pGMM);
1131
1132	#ifdef VBOX_WITH_PAGE_SHARING
1133	/*
1134	* Clean up all registered shared modules first.
1135	*/
1136	gmmR0SharedModuleCleanup(pGMM, pGVM);
1137	#endif
1138
1139	gmmR0MutexAcquire(pGMM);
1140	uint64_t uLockNanoTS = RTTimeSystemNanoTS();
1141	GMM_CHECK_SANITY_UPON_ENTERING(pGMM);
1142
1143	/*
1144	* The policy is 'INVALID' until the initial reservation
1145	* request has been serviced.
1146	*/
1147	if ( pGVM->gmm.s.enmPolicy > GMMOCPOLICY_INVALID
1148	&& pGVM->gmm.s.enmPolicy < GMMOCPOLICY_END)
1149	{
1150	/*
1151	* If it's the last VM around, we can skip walking all the chunk looking
1152	* for the pages owned by this VM and instead flush the whole shebang.
1153	*
1154	* This takes care of the eventuality that a VM has left shared page
1155	* references behind (shouldn't happen of course, but you never know).
1156	*/
1157	Assert(pGMM->cRegisteredVMs);
1158	pGMM->cRegisteredVMs--;
1159
1160	/*
1161	* Walk the entire pool looking for pages that belong to this VM
1162	* and leftover mappings. (This'll only catch private pages,
1163	* shared pages will be 'left behind'.)
1164	*/
1165	uint64_t cPrivatePages = pGVM->gmm.s.cPrivatePages; /* save */
1166
1167	unsigned iCountDown = 64;
1168	bool fRedoFromStart;
1169	PGMMCHUNK pChunk;
1170	do
1171	{
1172	fRedoFromStart = false;
1173	RTListForEachReverse(&pGMM->ChunkList, pChunk, GMMCHUNK, ListNode)
1174	{
1175	uint32_t const cFreeChunksOld = pGMM->cFreedChunks;
1176	if (gmmR0CleanupVMScanChunk(pGMM, pGVM, pChunk))
1177	{
1178	/* We left the giant mutex, so reset the yield counters. */
1179	uLockNanoTS = RTTimeSystemNanoTS();
1180	iCountDown = 64;
1181	}
1182	else
1183	{
1184	/* Didn't leave it, so do normal yielding. */
1185	if (!iCountDown)
1186	gmmR0MutexYield(pGMM, &uLockNanoTS);
1187	else
1188	iCountDown--;
1189	}
1190	if (pGMM->cFreedChunks != cFreeChunksOld)
1191	break;
1192	}
1193	} while (fRedoFromStart);
1194
1195	if (pGVM->gmm.s.cPrivatePages)
1196	SUPR0Printf("GMMR0CleanupVM: hGVM=%#x has %#x private pages that cannot be found!\n", pGVM->hSelf, pGVM->gmm.s.cPrivatePages);
1197
1198	pGMM->cAllocatedPages -= cPrivatePages;
1199
1200	/*
1201	* Free empty chunks.
1202	*/
1203	PGMMCHUNKFREESET pPrivateSet = pGMM->fBoundMemoryMode ? &pGVM->gmm.s.Private : &pGMM->PrivateX;
1204	do
1205	{
1206	fRedoFromStart = false;
1207	iCountDown = 10240;
1208	pChunk = pPrivateSet->apLists[GMM_CHUNK_FREE_SET_UNUSED_LIST];
1209	while (pChunk)
1210	{
1211	PGMMCHUNK pNext = pChunk->pFreeNext;
1212	Assert(pChunk->cFree == GMM_CHUNK_NUM_PAGES);
1213	if ( !pGMM->fBoundMemoryMode
1214	\|\| pChunk->hGVM == pGVM->hSelf)
1215	{
1216	uint64_t const idGenerationOld = pPrivateSet->idGeneration;
1217	if (gmmR0FreeChunk(pGMM, pGVM, pChunk, true /fRelaxedSem/))
1218	{
1219	/* We've left the giant mutex, restart? (+1 for our unlink) */
1220	fRedoFromStart = pPrivateSet->idGeneration != idGenerationOld + 1;
1221	if (fRedoFromStart)
1222	break;
1223	uLockNanoTS = RTTimeSystemNanoTS();
1224	iCountDown = 10240;
1225	}
1226	}
1227
1228	/* Advance and maybe yield the lock. */
1229	pChunk = pNext;
1230	if (--iCountDown == 0)
1231	{
1232	uint64_t const idGenerationOld = pPrivateSet->idGeneration;
1233	fRedoFromStart = gmmR0MutexYield(pGMM, &uLockNanoTS)
1234	&& pPrivateSet->idGeneration != idGenerationOld;
1235	if (fRedoFromStart)
1236	break;
1237	iCountDown = 10240;
1238	}
1239	}
1240	} while (fRedoFromStart);
1241
1242	/*
1243	* Account for shared pages that weren't freed.
1244	*/
1245	if (pGVM->gmm.s.cSharedPages)
1246	{
1247	Assert(pGMM->cSharedPages >= pGVM->gmm.s.cSharedPages);
1248	SUPR0Printf("GMMR0CleanupVM: hGVM=%#x left %#x shared pages behind!\n", pGVM->hSelf, pGVM->gmm.s.cSharedPages);
1249	pGMM->cLeftBehindSharedPages += pGVM->gmm.s.cSharedPages;
1250	}
1251
1252	/*
1253	* Clean up balloon statistics in case the VM process crashed.
1254	*/
1255	Assert(pGMM->cBalloonedPages >= pGVM->gmm.s.cBalloonedPages);
1256	pGMM->cBalloonedPages -= pGVM->gmm.s.cBalloonedPages;
1257
1258	/*
1259	* Update the over-commitment management statistics.
1260	*/
1261	pGMM->cReservedPages -= pGVM->gmm.s.Reserved.cBasePages
1262	+ pGVM->gmm.s.Reserved.cFixedPages
1263	+ pGVM->gmm.s.Reserved.cShadowPages;
1264	switch (pGVM->gmm.s.enmPolicy)
1265	{
1266	case GMMOCPOLICY_NO_OC:
1267	break;
1268	default:
1269	/** @todo Update GMM->cOverCommittedPages */
1270	break;
1271	}
1272	}
1273
1274	/* zap the GVM data. */
1275	pGVM->gmm.s.enmPolicy = GMMOCPOLICY_INVALID;
1276	pGVM->gmm.s.enmPriority = GMMPRIORITY_INVALID;
1277	pGVM->gmm.s.fMayAllocate = false;
1278
1279	GMM_CHECK_SANITY_UPON_LEAVING(pGMM);
1280	gmmR0MutexRelease(pGMM);
1281
1282	LogFlow(("GMMR0CleanupVM: returns\n"));
1283	}
1284
1285
1286	/**
1287	* Scan one chunk for private pages belonging to the specified VM.
1288	*
1289	* @note This function may drop the gian mutex!
1290	*
1291	* @returns @c true if we've temporarily dropped the giant mutex, @c false if
1292	* we didn't.
1293	* @param pGMM Pointer to the GMM instance.
1294	* @param pGVM The global VM handle.
1295	* @param pChunk The chunk to scan.
1296	*/
1297	static bool gmmR0CleanupVMScanChunk(PGMM pGMM, PGVM pGVM, PGMMCHUNK pChunk)
1298	{
1299	/*
1300	* Look for pages belonging to the VM.
1301	* (Perform some internal checks while we're scanning.)
1302	*/
1303	#ifndef VBOX_STRICT
1304	if (pChunk->cFree != (GMM_CHUNK_SIZE >> PAGE_SHIFT))
1305	#endif
1306	{
1307	unsigned cPrivate = 0;
1308	unsigned cShared = 0;
1309	unsigned cFree = 0;
1310
1311	gmmR0UnlinkChunk(pChunk); /* avoiding cFreePages updates. */
1312
1313	uint16_t hGVM = pGVM->hSelf;
1314	unsigned iPage = (GMM_CHUNK_SIZE >> PAGE_SHIFT);
1315	while (iPage-- > 0)
1316	if (GMM_PAGE_IS_PRIVATE(&pChunk->aPages[iPage]))
1317	{
1318	if (pChunk->aPages[iPage].Private.hGVM == hGVM)
1319	{
1320	/*
1321	* Free the page.
1322	*
1323	* The reason for not using gmmR0FreePrivatePage here is that we
1324	* must not cause the chunk to be freed from under us - we're in
1325	* an AVL tree walk here.
1326	*/
1327	pChunk->aPages[iPage].u = 0;
1328	pChunk->aPages[iPage].Free.iNext = pChunk->iFreeHead;
1329	pChunk->aPages[iPage].Free.u2State = GMM_PAGE_STATE_FREE;
1330	pChunk->iFreeHead = iPage;
1331	pChunk->cPrivate--;
1332	pChunk->cFree++;
1333	pGVM->gmm.s.cPrivatePages--;
1334	cFree++;
1335	}
1336	else
1337	cPrivate++;
1338	}
1339	else if (GMM_PAGE_IS_FREE(&pChunk->aPages[iPage]))
1340	cFree++;
1341	else
1342	cShared++;
1343
1344	gmmR0SelectSetAndLinkChunk(pGMM, pGVM, pChunk);
1345
1346	/*
1347	* Did it add up?
1348	*/
1349	if (RT_UNLIKELY( pChunk->cFree != cFree
1350	\|\| pChunk->cPrivate != cPrivate
1351	\|\| pChunk->cShared != cShared))
1352	{
1353	SUPR0Printf("gmmR0CleanupVMScanChunk: Chunk %p/%#x has bogus stats - free=%d/%d private=%d/%d shared=%d/%d\n",
1354	pChunk->cFree, cFree, pChunk->cPrivate, cPrivate, pChunk->cShared, cShared);
1355	pChunk->cFree = cFree;
1356	pChunk->cPrivate = cPrivate;
1357	pChunk->cShared = cShared;
1358	}
1359	}
1360
1361	/*
1362	* If not in bound memory mode, we should reset the hGVM field
1363	* if it has our handle in it.
1364	*/
1365	if (pChunk->hGVM == pGVM->hSelf)
1366	{
1367	if (!g_pGMM->fBoundMemoryMode)
1368	pChunk->hGVM = NIL_GVM_HANDLE;
1369	else if (pChunk->cFree != GMM_CHUNK_NUM_PAGES)
1370	{
1371	SUPR0Printf("gmmR0CleanupVMScanChunk: %p/%#x: cFree=%#x - it should be 0 in bound mode!\n",
1372	pChunk, pChunk->Core.Key, pChunk->cFree);
1373	AssertMsgFailed(("%p/%#x: cFree=%#x - it should be 0 in bound mode!\n", pChunk, pChunk->Core.Key, pChunk->cFree));
1374
1375	gmmR0UnlinkChunk(pChunk);
1376	pChunk->cFree = GMM_CHUNK_NUM_PAGES;
1377	gmmR0SelectSetAndLinkChunk(pGMM, pGVM, pChunk);
1378	}
1379	}
1380
1381	/*
1382	* Look for a mapping belonging to the terminating VM.
1383	*/
1384	GMMR0CHUNKMTXSTATE MtxState;
1385	gmmR0ChunkMutexAcquire(&MtxState, pGMM, pChunk, GMMR0CHUNK_MTX_KEEP_GIANT);
1386	unsigned cMappings = pChunk->cMappingsX;
1387	for (unsigned i = 0; i < cMappings; i++)
1388	if (pChunk->paMappingsX[i].pGVM == pGVM)
1389	{
1390	gmmR0ChunkMutexDropGiant(&MtxState);
1391
1392	RTR0MEMOBJ hMemObj = pChunk->paMappingsX[i].hMapObj;
1393
1394	cMappings--;
1395	if (i < cMappings)
1396	pChunk->paMappingsX[i] = pChunk->paMappingsX[cMappings];
1397	pChunk->paMappingsX[cMappings].pGVM = NULL;
1398	pChunk->paMappingsX[cMappings].hMapObj = NIL_RTR0MEMOBJ;
1399	Assert(pChunk->cMappingsX - 1U == cMappings);
1400	pChunk->cMappingsX = cMappings;
1401
1402	int rc = RTR0MemObjFree(hMemObj, false /* fFreeMappings (NA) */);
1403	if (RT_FAILURE(rc))
1404	{
1405	SUPR0Printf("gmmR0CleanupVMScanChunk: %p/%#x: mapping #%x: RTRMemObjFree(%p,false) -> %d \n",
1406	pChunk, pChunk->Core.Key, i, hMemObj, rc);
1407	AssertRC(rc);
1408	}
1409
1410	gmmR0ChunkMutexRelease(&MtxState, pChunk);
1411	return true;
1412	}
1413
1414	gmmR0ChunkMutexRelease(&MtxState, pChunk);
1415	return false;
1416	}
1417
1418
1419	/**
1420	* The initial resource reservations.
1421	*
1422	* This will make memory reservations according to policy and priority. If there aren't
1423	* sufficient resources available to sustain the VM this function will fail and all
1424	* future allocations requests will fail as well.
1425	*
1426	* These are just the initial reservations made very very early during the VM creation
1427	* process and will be adjusted later in the GMMR0UpdateReservation call after the
1428	* ring-3 init has completed.
1429	*
1430	* @returns VBox status code.
1431	* @retval VERR_GMM_MEMORY_RESERVATION_DECLINED
1432	* @retval VERR_GMM_
1433	*
1434	* @param pVM Pointer to the shared VM structure.
1435	* @param idCpu VCPU id
1436	* @param cBasePages The number of pages that may be allocated for the base RAM and ROMs.
1437	* This does not include MMIO2 and similar.
1438	* @param cShadowPages The number of pages that may be allocated for shadow paging structures.
1439	* @param cFixedPages The number of pages that may be allocated for fixed objects like the
1440	* hyper heap, MMIO2 and similar.
1441	* @param enmPolicy The OC policy to use on this VM.
1442	* @param enmPriority The priority in an out-of-memory situation.
1443	*
1444	* @thread The creator thread / EMT.
1445	*/
1446	GMMR0DECL(int) GMMR0InitialReservation(PVM pVM, VMCPUID idCpu, uint64_t cBasePages, uint32_t cShadowPages, uint32_t cFixedPages,
1447	GMMOCPOLICY enmPolicy, GMMPRIORITY enmPriority)
1448	{
1449	LogFlow(("GMMR0InitialReservation: pVM=%p cBasePages=%#llx cShadowPages=%#x cFixedPages=%#x enmPolicy=%d enmPriority=%d\n",
1450	pVM, cBasePages, cShadowPages, cFixedPages, enmPolicy, enmPriority));
1451
1452	/*
1453	* Validate, get basics and take the semaphore.
1454	*/
1455	PGMM pGMM;
1456	GMM_GET_VALID_INSTANCE(pGMM, VERR_INTERNAL_ERROR);
1457	PGVM pGVM;
1458	int rc = GVMMR0ByVMAndEMT(pVM, idCpu, &pGVM);
1459	if (RT_FAILURE(rc))
1460	return rc;
1461
1462	AssertReturn(cBasePages, VERR_INVALID_PARAMETER);
1463	AssertReturn(cShadowPages, VERR_INVALID_PARAMETER);
1464	AssertReturn(cFixedPages, VERR_INVALID_PARAMETER);
1465	AssertReturn(enmPolicy > GMMOCPOLICY_INVALID && enmPolicy < GMMOCPOLICY_END, VERR_INVALID_PARAMETER);
1466	AssertReturn(enmPriority > GMMPRIORITY_INVALID && enmPriority < GMMPRIORITY_END, VERR_INVALID_PARAMETER);
1467
1468	gmmR0MutexAcquire(pGMM);
1469	if (GMM_CHECK_SANITY_UPON_ENTERING(pGMM))
1470	{
1471	if ( !pGVM->gmm.s.Reserved.cBasePages
1472	&& !pGVM->gmm.s.Reserved.cFixedPages
1473	&& !pGVM->gmm.s.Reserved.cShadowPages)
1474	{
1475	/*
1476	* Check if we can accommodate this.
1477	*/
1478	/* ... later ... */
1479	if (RT_SUCCESS(rc))
1480	{
1481	/*
1482	* Update the records.
1483	*/
1484	pGVM->gmm.s.Reserved.cBasePages = cBasePages;
1485	pGVM->gmm.s.Reserved.cFixedPages = cFixedPages;
1486	pGVM->gmm.s.Reserved.cShadowPages = cShadowPages;
1487	pGVM->gmm.s.enmPolicy = enmPolicy;
1488	pGVM->gmm.s.enmPriority = enmPriority;
1489	pGVM->gmm.s.fMayAllocate = true;
1490
1491	pGMM->cReservedPages += cBasePages + cFixedPages + cShadowPages;
1492	pGMM->cRegisteredVMs++;
1493	}
1494	}
1495	else
1496	rc = VERR_WRONG_ORDER;
1497	GMM_CHECK_SANITY_UPON_LEAVING(pGMM);
1498	}
1499	else
1500	rc = VERR_INTERNAL_ERROR_5;
1501	gmmR0MutexRelease(pGMM);
1502	LogFlow(("GMMR0InitialReservation: returns %Rrc\n", rc));
1503	return rc;
1504	}
1505
1506
1507	/**
1508	* VMMR0 request wrapper for GMMR0InitialReservation.
1509	*
1510	* @returns see GMMR0InitialReservation.
1511	* @param pVM Pointer to the shared VM structure.
1512	* @param idCpu VCPU id
1513	* @param pReq The request packet.
1514	*/
1515	GMMR0DECL(int) GMMR0InitialReservationReq(PVM pVM, VMCPUID idCpu, PGMMINITIALRESERVATIONREQ pReq)
1516	{
1517	/*
1518	* Validate input and pass it on.
1519	*/
1520	AssertPtrReturn(pVM, VERR_INVALID_POINTER);
1521	AssertPtrReturn(pReq, VERR_INVALID_POINTER);
1522	AssertMsgReturn(pReq->Hdr.cbReq == sizeof(pReq), ("%#x != %#x\n", pReq->Hdr.cbReq, sizeof(pReq)), VERR_INVALID_PARAMETER);
1523
1524	return GMMR0InitialReservation(pVM, idCpu, pReq->cBasePages, pReq->cShadowPages, pReq->cFixedPages, pReq->enmPolicy, pReq->enmPriority);
1525	}
1526
1527
1528	/**
1529	* This updates the memory reservation with the additional MMIO2 and ROM pages.
1530	*
1531	* @returns VBox status code.
1532	* @retval VERR_GMM_MEMORY_RESERVATION_DECLINED
1533	*
1534	* @param pVM Pointer to the shared VM structure.
1535	* @param idCpu VCPU id
1536	* @param cBasePages The number of pages that may be allocated for the base RAM and ROMs.
1537	* This does not include MMIO2 and similar.
1538	* @param cShadowPages The number of pages that may be allocated for shadow paging structures.
1539	* @param cFixedPages The number of pages that may be allocated for fixed objects like the
1540	* hyper heap, MMIO2 and similar.
1541	*
1542	* @thread EMT.
1543	*/
1544	GMMR0DECL(int) GMMR0UpdateReservation(PVM pVM, VMCPUID idCpu, uint64_t cBasePages, uint32_t cShadowPages, uint32_t cFixedPages)
1545	{
1546	LogFlow(("GMMR0UpdateReservation: pVM=%p cBasePages=%#llx cShadowPages=%#x cFixedPages=%#x\n",
1547	pVM, cBasePages, cShadowPages, cFixedPages));
1548
1549	/*
1550	* Validate, get basics and take the semaphore.
1551	*/
1552	PGMM pGMM;
1553	GMM_GET_VALID_INSTANCE(pGMM, VERR_INTERNAL_ERROR);
1554	PGVM pGVM;
1555	int rc = GVMMR0ByVMAndEMT(pVM, idCpu, &pGVM);
1556	if (RT_FAILURE(rc))
1557	return rc;
1558
1559	AssertReturn(cBasePages, VERR_INVALID_PARAMETER);
1560	AssertReturn(cShadowPages, VERR_INVALID_PARAMETER);
1561	AssertReturn(cFixedPages, VERR_INVALID_PARAMETER);
1562
1563	gmmR0MutexAcquire(pGMM);
1564	if (GMM_CHECK_SANITY_UPON_ENTERING(pGMM))
1565	{
1566	if ( pGVM->gmm.s.Reserved.cBasePages
1567	&& pGVM->gmm.s.Reserved.cFixedPages
1568	&& pGVM->gmm.s.Reserved.cShadowPages)
1569	{
1570	/*
1571	* Check if we can accommodate this.
1572	*/
1573	/* ... later ... */
1574	if (RT_SUCCESS(rc))
1575	{
1576	/*
1577	* Update the records.
1578	*/
1579	pGMM->cReservedPages -= pGVM->gmm.s.Reserved.cBasePages
1580	+ pGVM->gmm.s.Reserved.cFixedPages
1581	+ pGVM->gmm.s.Reserved.cShadowPages;
1582	pGMM->cReservedPages += cBasePages + cFixedPages + cShadowPages;
1583
1584	pGVM->gmm.s.Reserved.cBasePages = cBasePages;
1585	pGVM->gmm.s.Reserved.cFixedPages = cFixedPages;
1586	pGVM->gmm.s.Reserved.cShadowPages = cShadowPages;
1587	}
1588	}
1589	else
1590	rc = VERR_WRONG_ORDER;
1591	GMM_CHECK_SANITY_UPON_LEAVING(pGMM);
1592	}
1593	else
1594	rc = VERR_INTERNAL_ERROR_5;
1595	gmmR0MutexRelease(pGMM);
1596	LogFlow(("GMMR0UpdateReservation: returns %Rrc\n", rc));
1597	return rc;
1598	}
1599
1600
1601	/**
1602	* VMMR0 request wrapper for GMMR0UpdateReservation.
1603	*
1604	* @returns see GMMR0UpdateReservation.
1605	* @param pVM Pointer to the shared VM structure.
1606	* @param idCpu VCPU id
1607	* @param pReq The request packet.
1608	*/
1609	GMMR0DECL(int) GMMR0UpdateReservationReq(PVM pVM, VMCPUID idCpu, PGMMUPDATERESERVATIONREQ pReq)
1610	{
1611	/*
1612	* Validate input and pass it on.
1613	*/
1614	AssertPtrReturn(pVM, VERR_INVALID_POINTER);
1615	AssertPtrReturn(pReq, VERR_INVALID_POINTER);
1616	AssertMsgReturn(pReq->Hdr.cbReq == sizeof(pReq), ("%#x != %#x\n", pReq->Hdr.cbReq, sizeof(pReq)), VERR_INVALID_PARAMETER);
1617
1618	return GMMR0UpdateReservation(pVM, idCpu, pReq->cBasePages, pReq->cShadowPages, pReq->cFixedPages);
1619	}
1620
1621
1622	/**
1623	* Performs sanity checks on a free set.
1624	*
1625	* @returns Error count.
1626	*
1627	* @param pGMM Pointer to the GMM instance.
1628	* @param pSet Pointer to the set.
1629	* @param pszSetName The set name.
1630	* @param pszFunction The function from which it was called.
1631	* @param uLine The line number.
1632	*/
1633	static uint32_t gmmR0SanityCheckSet(PGMM pGMM, PGMMCHUNKFREESET pSet, const char *pszSetName,
1634	const char *pszFunction, unsigned uLineNo)
1635	{
1636	uint32_t cErrors = 0;
1637
1638	/*
1639	* Count the free pages in all the chunks and match it against pSet->cFreePages.
1640	*/
1641	uint32_t cPages = 0;
1642	for (unsigned i = 0; i < RT_ELEMENTS(pSet->apLists); i++)
1643	{
1644	for (PGMMCHUNK pCur = pSet->apLists[i]; pCur; pCur = pCur->pFreeNext)
1645	{
1646	/** @todo check that the chunk is hash into the right set. */
1647	cPages += pCur->cFree;
1648	}
1649	}
1650	if (RT_UNLIKELY(cPages != pSet->cFreePages))
1651	{
1652	SUPR0Printf("GMM insanity: found %#x pages in the %s set, expected %#x. (%s, line %u)\n",
1653	cPages, pszSetName, pSet->cFreePages, pszFunction, uLineNo);
1654	cErrors++;
1655	}
1656
1657	return cErrors;
1658	}
1659
1660
1661	/**
1662	* Performs some sanity checks on the GMM while owning lock.
1663	*
1664	* @returns Error count.
1665	*
1666	* @param pGMM Pointer to the GMM instance.
1667	* @param pszFunction The function from which it is called.
1668	* @param uLineNo The line number.
1669	*/
1670	static uint32_t gmmR0SanityCheck(PGMM pGMM, const char *pszFunction, unsigned uLineNo)
1671	{
1672	uint32_t cErrors = 0;
1673
1674	cErrors += gmmR0SanityCheckSet(pGMM, &pGMM->PrivateX, "private", pszFunction, uLineNo);
1675	cErrors += gmmR0SanityCheckSet(pGMM, &pGMM->Shared, "shared", pszFunction, uLineNo);
1676	/** @todo add more sanity checks. */
1677
1678	return cErrors;
1679	}
1680
1681
1682	/**
1683	* Looks up a chunk in the tree and fill in the TLB entry for it.
1684	*
1685	* This is not expected to fail and will bitch if it does.
1686	*
1687	* @returns Pointer to the allocation chunk, NULL if not found.
1688	* @param pGMM Pointer to the GMM instance.
1689	* @param idChunk The ID of the chunk to find.
1690	* @param pTlbe Pointer to the TLB entry.
1691	*/
1692	static PGMMCHUNK gmmR0GetChunkSlow(PGMM pGMM, uint32_t idChunk, PGMMCHUNKTLBE pTlbe)
1693	{
1694	PGMMCHUNK pChunk = (PGMMCHUNK)RTAvlU32Get(&pGMM->pChunks, idChunk);
1695	AssertMsgReturn(pChunk, ("Chunk %#x not found!\n", idChunk), NULL);
1696	pTlbe->idChunk = idChunk;
1697	pTlbe->pChunk = pChunk;
1698	return pChunk;
1699	}
1700
1701
1702	/**
1703	* Finds a allocation chunk.
1704	*
1705	* This is not expected to fail and will bitch if it does.
1706	*
1707	* @returns Pointer to the allocation chunk, NULL if not found.
1708	* @param pGMM Pointer to the GMM instance.
1709	* @param idChunk The ID of the chunk to find.
1710	*/
1711	DECLINLINE(PGMMCHUNK) gmmR0GetChunk(PGMM pGMM, uint32_t idChunk)
1712	{
1713	/*
1714	* Do a TLB lookup, branch if not in the TLB.
1715	*/
1716	PGMMCHUNKTLBE pTlbe = &pGMM->ChunkTLB.aEntries[GMM_CHUNKTLB_IDX(idChunk)];
1717	if ( pTlbe->idChunk != idChunk
1718	\|\| !pTlbe->pChunk)
1719	return gmmR0GetChunkSlow(pGMM, idChunk, pTlbe);
1720	return pTlbe->pChunk;
1721	}
1722
1723
1724	/**
1725	* Finds a page.
1726	*
1727	* This is not expected to fail and will bitch if it does.
1728	*
1729	* @returns Pointer to the page, NULL if not found.
1730	* @param pGMM Pointer to the GMM instance.
1731	* @param idPage The ID of the page to find.
1732	*/
1733	DECLINLINE(PGMMPAGE) gmmR0GetPage(PGMM pGMM, uint32_t idPage)
1734	{
1735	PGMMCHUNK pChunk = gmmR0GetChunk(pGMM, idPage >> GMM_CHUNKID_SHIFT);
1736	if (RT_LIKELY(pChunk))
1737	return &pChunk->aPages[idPage & GMM_PAGEID_IDX_MASK];
1738	return NULL;
1739	}
1740
1741
1742	/**
1743	* Gets the host physical address for a page given by it's ID.
1744	*
1745	* @returns The host physical address or NIL_RTHCPHYS.
1746	* @param pGMM Pointer to the GMM instance.
1747	* @param idPage The ID of the page to find.
1748	*/
1749	DECLINLINE(RTHCPHYS) gmmR0GetPageHCPhys(PGMM pGMM, uint32_t idPage)
1750	{
1751	PGMMCHUNK pChunk = gmmR0GetChunk(pGMM, idPage >> GMM_CHUNKID_SHIFT);
1752	if (RT_LIKELY(pChunk))
1753	return RTR0MemObjGetPagePhysAddr(pChunk->hMemObj, idPage & GMM_PAGEID_IDX_MASK);
1754	return NIL_RTHCPHYS;
1755	}
1756
1757
1758	/**
1759	* Selects the appropriate free list given the number of free pages.
1760	*
1761	* @returns Free list index.
1762	* @param cFree The number of free pages in the chunk.
1763	*/
1764	DECLINLINE(unsigned) gmmR0SelectFreeSetList(unsigned cFree)
1765	{
1766	unsigned iList = cFree >> GMM_CHUNK_FREE_SET_SHIFT;
1767	AssertMsg(iList < RT_SIZEOFMEMB(GMMCHUNKFREESET, apLists) / RT_SIZEOFMEMB(GMMCHUNKFREESET, apLists[0]),
1768	("%d (%u)\n", iList, cFree));
1769	return iList;
1770	}
1771
1772
1773	/**
1774	* Unlinks the chunk from the free list it's currently on (if any).
1775	*
1776	* @param pChunk The allocation chunk.
1777	*/
1778	DECLINLINE(void) gmmR0UnlinkChunk(PGMMCHUNK pChunk)
1779	{
1780	PGMMCHUNKFREESET pSet = pChunk->pSet;
1781	if (RT_LIKELY(pSet))
1782	{
1783	pSet->cFreePages -= pChunk->cFree;
1784	pSet->idGeneration++;
1785
1786	PGMMCHUNK pPrev = pChunk->pFreePrev;
1787	PGMMCHUNK pNext = pChunk->pFreeNext;
1788	if (pPrev)
1789	pPrev->pFreeNext = pNext;
1790	else
1791	pSet->apLists[gmmR0SelectFreeSetList(pChunk->cFree)] = pNext;
1792	if (pNext)
1793	pNext->pFreePrev = pPrev;
1794
1795	pChunk->pSet = NULL;
1796	pChunk->pFreeNext = NULL;
1797	pChunk->pFreePrev = NULL;
1798	}
1799	else
1800	{
1801	Assert(!pChunk->pFreeNext);
1802	Assert(!pChunk->pFreePrev);
1803	Assert(!pChunk->cFree);
1804	}
1805	}
1806
1807
1808	/**
1809	* Links the chunk onto the appropriate free list in the specified free set.
1810	*
1811	* If no free entries, it's not linked into any list.
1812	*
1813	* @param pChunk The allocation chunk.
1814	* @param pSet The free set.
1815	*/
1816	DECLINLINE(void) gmmR0LinkChunk(PGMMCHUNK pChunk, PGMMCHUNKFREESET pSet)
1817	{
1818	Assert(!pChunk->pSet);
1819	Assert(!pChunk->pFreeNext);
1820	Assert(!pChunk->pFreePrev);
1821
1822	if (pChunk->cFree > 0)
1823	{
1824	pChunk->pSet = pSet;
1825	pChunk->pFreePrev = NULL;
1826	unsigned const iList = gmmR0SelectFreeSetList(pChunk->cFree);
1827	pChunk->pFreeNext = pSet->apLists[iList];
1828	if (pChunk->pFreeNext)
1829	pChunk->pFreeNext->pFreePrev = pChunk;
1830	pSet->apLists[iList] = pChunk;
1831
1832	pSet->cFreePages += pChunk->cFree;
1833	pSet->idGeneration++;
1834	}
1835	}
1836
1837
1838	/**
1839	* Links the chunk onto the appropriate free list in the specified free set.
1840	*
1841	* If no free entries, it's not linked into any list.
1842	*
1843	* @param pChunk The allocation chunk.
1844	*/
1845	DECLINLINE(void) gmmR0SelectSetAndLinkChunk(PGMM pGMM, PGVM pGVM, PGMMCHUNK pChunk)
1846	{
1847	PGMMCHUNKFREESET pSet;
1848	if (pGMM->fBoundMemoryMode)
1849	pSet = &pGVM->gmm.s.Private;
1850	else if (pChunk->cShared)
1851	pSet = &pGMM->Shared;
1852	else
1853	pSet = &pGMM->PrivateX;
1854	gmmR0LinkChunk(pChunk, pSet);
1855	}
1856
1857
1858	/**
1859	* Frees a Chunk ID.
1860	*
1861	* @param pGMM Pointer to the GMM instance.
1862	* @param idChunk The Chunk ID to free.
1863	*/
1864	static void gmmR0FreeChunkId(PGMM pGMM, uint32_t idChunk)
1865	{
1866	AssertReturnVoid(idChunk != NIL_GMM_CHUNKID);
1867	AssertMsg(ASMBitTest(&pGMM->bmChunkId[0], idChunk), ("%#x\n", idChunk));
1868	ASMAtomicBitClear(&pGMM->bmChunkId[0], idChunk);
1869	}
1870
1871
1872	/**
1873	* Allocates a new Chunk ID.
1874	*
1875	* @returns The Chunk ID.
1876	* @param pGMM Pointer to the GMM instance.
1877	*/
1878	static uint32_t gmmR0AllocateChunkId(PGMM pGMM)
1879	{
1880	AssertCompile(!((GMM_CHUNKID_LAST + 1) & 31)); /* must be a multiple of 32 */
1881	AssertCompile(NIL_GMM_CHUNKID == 0);
1882
1883	/*
1884	* Try the next sequential one.
1885	*/
1886	int32_t idChunk = ++pGMM->idChunkPrev;
1887	#if 0 /** @todo enable this code */
1888	if ( idChunk <= GMM_CHUNKID_LAST
1889	&& idChunk > NIL_GMM_CHUNKID
1890	&& !ASMAtomicBitTestAndSet(&pVMM->bmChunkId[0], idChunk))
1891	return idChunk;
1892	#endif
1893
1894	/*
1895	* Scan sequentially from the last one.
1896	*/
1897	if ( (uint32_t)idChunk < GMM_CHUNKID_LAST
1898	&& idChunk > NIL_GMM_CHUNKID)
1899	{
1900	idChunk = ASMBitNextClear(&pGMM->bmChunkId[0], GMM_CHUNKID_LAST + 1, idChunk);
1901	if (idChunk > NIL_GMM_CHUNKID)
1902	{
1903	AssertMsgReturn(!ASMAtomicBitTestAndSet(&pGMM->bmChunkId[0], idChunk), ("%#x\n", idChunk), NIL_GMM_CHUNKID);
1904	return pGMM->idChunkPrev = idChunk;
1905	}
1906	}
1907
1908	/*
1909	* Ok, scan from the start.
1910	* We're not racing anyone, so there is no need to expect failures or have restart loops.
1911	*/
1912	idChunk = ASMBitFirstClear(&pGMM->bmChunkId[0], GMM_CHUNKID_LAST + 1);
1913	AssertMsgReturn(idChunk > NIL_GMM_CHUNKID, ("%#x\n", idChunk), NIL_GVM_HANDLE);
1914	AssertMsgReturn(!ASMAtomicBitTestAndSet(&pGMM->bmChunkId[0], idChunk), ("%#x\n", idChunk), NIL_GMM_CHUNKID);
1915
1916	return pGMM->idChunkPrev = idChunk;
1917	}
1918
1919
1920	/**
1921	* Allocates one private page.
1922	*
1923	* Worker for gmmR0AllocatePages.
1924	*
1925	* @param pGMM Pointer to the GMM instance data.
1926	* @param hGVM The GVM handle of the VM requesting memory.
1927	* @param pChunk The chunk to allocate it from.
1928	* @param pPageDesc The page descriptor.
1929	*/
1930	static void gmmR0AllocatePage(PGMM pGMM, uint32_t hGVM, PGMMCHUNK pChunk, PGMMPAGEDESC pPageDesc)
1931	{
1932	/* update the chunk stats. */
1933	if (pChunk->hGVM == NIL_GVM_HANDLE)
1934	pChunk->hGVM = hGVM;
1935	Assert(pChunk->cFree);
1936	pChunk->cFree--;
1937	pChunk->cPrivate++;
1938
1939	/* unlink the first free page. */
1940	const uint32_t iPage = pChunk->iFreeHead;
1941	AssertReleaseMsg(iPage < RT_ELEMENTS(pChunk->aPages), ("%d\n", iPage));
1942	PGMMPAGE pPage = &pChunk->aPages[iPage];
1943	Assert(GMM_PAGE_IS_FREE(pPage));
1944	pChunk->iFreeHead = pPage->Free.iNext;
1945	Log3(("A pPage=%p iPage=%#x/%#x u2State=%d iFreeHead=%#x iNext=%#x\n",
1946	pPage, iPage, (pChunk->Core.Key << GMM_CHUNKID_SHIFT) \| iPage,
1947	pPage->Common.u2State, pChunk->iFreeHead, pPage->Free.iNext));
1948
1949	/* make the page private. */
1950	pPage->u = 0;
1951	AssertCompile(GMM_PAGE_STATE_PRIVATE == 0);
1952	pPage->Private.hGVM = hGVM;
1953	AssertCompile(NIL_RTHCPHYS >= GMM_GCPHYS_LAST);
1954	AssertCompile(GMM_GCPHYS_UNSHAREABLE >= GMM_GCPHYS_LAST);
1955	if (pPageDesc->HCPhysGCPhys <= GMM_GCPHYS_LAST)
1956	pPage->Private.pfn = pPageDesc->HCPhysGCPhys >> PAGE_SHIFT;
1957	else
1958	pPage->Private.pfn = GMM_PAGE_PFN_UNSHAREABLE; /* unshareable / unassigned - same thing. */
1959
1960	/* update the page descriptor. */
1961	pPageDesc->HCPhysGCPhys = RTR0MemObjGetPagePhysAddr(pChunk->hMemObj, iPage);
1962	Assert(pPageDesc->HCPhysGCPhys != NIL_RTHCPHYS);
1963	pPageDesc->idPage = (pChunk->Core.Key << GMM_CHUNKID_SHIFT) \| iPage;
1964	pPageDesc->idSharedPage = NIL_GMM_PAGEID;
1965	}
1966
1967
1968	/**
1969	* Picks the free pages from a chunk.
1970	*
1971	* @returns The new page descriptor table index.
1972	* @param pGMM Pointer to the GMM instance data.
1973	* @param hGVM The VM handle.
1974	* @param pChunk The chunk.
1975	* @param iPage The current page descriptor table index.
1976	* @param cPages The total number of pages to allocate.
1977	* @param paPages The page descriptor table (input + ouput).
1978	*/
1979	static uint32_t gmmR0AllocatePagesFromChunk(PGMM pGMM, uint16_t const hGVM, PGMMCHUNK pChunk, uint32_t iPage, uint32_t cPages,
1980	PGMMPAGEDESC paPages)
1981	{
1982	PGMMCHUNKFREESET pSet = pChunk->pSet; Assert(pSet);
1983	gmmR0UnlinkChunk(pChunk);
1984
1985	for (; pChunk->cFree && iPage < cPages; iPage++)
1986	gmmR0AllocatePage(pGMM, hGVM, pChunk, &paPages[iPage]);
1987
1988	gmmR0LinkChunk(pChunk, pSet);
1989	return iPage;
1990	}
1991
1992
1993	/**
1994	* Registers a new chunk of memory.
1995	*
1996	* This is called by both gmmR0AllocateOneChunk and GMMR0SeedChunk.
1997	*
1998	* @returns VBox status code. On success, the giant GMM lock will be held, the
1999	* caller must release it (ugly).
2000	* @param pGMM Pointer to the GMM instance.
2001	* @param pSet Pointer to the set.
2002	* @param MemObj The memory object for the chunk.
2003	* @param hGVM The affinity of the chunk. NIL_GVM_HANDLE for no
2004	* affinity.
2005	* @param fChunkFlags The chunk flags, GMM_CHUNK_FLAGS_XXX.
2006	* @param ppChunk Chunk address (out). Optional.
2007	*
2008	* @remarks The caller must not own the giant GMM mutex.
2009	* The giant GMM mutex will be acquired and returned acquired in
2010	* the success path. On failure, no locks will be held.
2011	*/
2012	static int gmmR0RegisterChunk(PGMM pGMM, PGMMCHUNKFREESET pSet, RTR0MEMOBJ MemObj, uint16_t hGVM, uint16_t fChunkFlags,
2013	PGMMCHUNK *ppChunk)
2014	{
2015	Assert(pGMM->hMtxOwner != RTThreadNativeSelf());
2016	Assert(hGVM != NIL_GVM_HANDLE \|\| pGMM->fBoundMemoryMode);
2017	Assert(fChunkFlags == 0 \|\| fChunkFlags == GMM_CHUNK_FLAGS_LARGE_PAGE);
2018
2019	int rc;
2020	PGMMCHUNK pChunk = (PGMMCHUNK)RTMemAllocZ(sizeof(*pChunk));
2021	if (pChunk)
2022	{
2023	/*
2024	* Initialize it.
2025	*/
2026	pChunk->hMemObj = MemObj;
2027	pChunk->cFree = GMM_CHUNK_NUM_PAGES;
2028	pChunk->hGVM = hGVM;
2029	/pChunk->iFreeHead = 0;/
2030	pChunk->idNumaNode = gmmR0GetCurrentNumaNodeId();
2031	pChunk->iChunkMtx = UINT8_MAX;
2032	pChunk->fFlags = fChunkFlags;
2033	for (unsigned iPage = 0; iPage < RT_ELEMENTS(pChunk->aPages) - 1; iPage++)
2034	{
2035	pChunk->aPages[iPage].Free.u2State = GMM_PAGE_STATE_FREE;
2036	pChunk->aPages[iPage].Free.iNext = iPage + 1;
2037	}
2038	pChunk->aPages[RT_ELEMENTS(pChunk->aPages) - 1].Free.u2State = GMM_PAGE_STATE_FREE;
2039	pChunk->aPages[RT_ELEMENTS(pChunk->aPages) - 1].Free.iNext = UINT16_MAX;
2040
2041	/*
2042	* Allocate a Chunk ID and insert it into the tree.
2043	* This has to be done behind the mutex of course.
2044	*/
2045	rc = gmmR0MutexAcquire(pGMM);
2046	if (RT_SUCCESS(rc))
2047	{
2048	if (GMM_CHECK_SANITY_UPON_ENTERING(pGMM))
2049	{
2050	pChunk->Core.Key = gmmR0AllocateChunkId(pGMM);
2051	if ( pChunk->Core.Key != NIL_GMM_CHUNKID
2052	&& pChunk->Core.Key <= GMM_CHUNKID_LAST
2053	&& RTAvlU32Insert(&pGMM->pChunks, &pChunk->Core))
2054	{
2055	pGMM->cChunks++;
2056	RTListAppend(&pGMM->ChunkList, &pChunk->ListNode);
2057	gmmR0LinkChunk(pChunk, pSet);
2058	LogFlow(("gmmR0RegisterChunk: pChunk=%p id=%#x cChunks=%d\n", pChunk, pChunk->Core.Key, pGMM->cChunks));
2059
2060	if (ppChunk)
2061	*ppChunk = pChunk;
2062	GMM_CHECK_SANITY_UPON_LEAVING(pGMM);
2063	return VINF_SUCCESS;
2064	}
2065
2066	/* bail out */
2067	rc = VERR_INTERNAL_ERROR;
2068	}
2069	else
2070	rc = VERR_INTERNAL_ERROR_5;
2071	gmmR0MutexRelease(pGMM);
2072	}
2073
2074	RTMemFree(pChunk);
2075	}
2076	else
2077	rc = VERR_NO_MEMORY;
2078	return rc;
2079	}
2080
2081
2082	/**
2083	* Allocate a new chunk, immediately pick the requested pages from it, and adds
2084	* what's remaining to the specified free set.
2085	*
2086	* @note This will leave the giant mutex while allocating the new chunk!
2087	*
2088	* @returns VBox status code.
2089	* @param pGMM Pointer to the GMM instance data.
2090	* @param pGVM Pointer to the kernel-only VM instace data.
2091	* @param pSet Pointer to the free set.
2092	* @param cPages The number of pages requested.
2093	* @param paPages The page descriptor table (input + output).
2094	* @param piPage The pointer to the page descriptor table index
2095	* variable. This will be updated.
2096	*/
2097	static int gmmR0AllocateChunkNew(PGMM pGMM, PGVM pGVM, PGMMCHUNKFREESET pSet, uint32_t cPages,
2098	PGMMPAGEDESC paPages, uint32_t *piPage)
2099	{
2100	gmmR0MutexRelease(pGMM);
2101
2102	RTR0MEMOBJ hMemObj;
2103	int rc = RTR0MemObjAllocPhysNC(&hMemObj, GMM_CHUNK_SIZE, NIL_RTHCPHYS);
2104	if (RT_SUCCESS(rc))
2105	{
2106	/** @todo Duplicate gmmR0RegisterChunk here so we can avoid chaining up the
2107	* free pages first and then unchaining them right afterwards. Instead
2108	* do as much work as possible without holding the giant lock. */
2109	PGMMCHUNK pChunk;
2110	rc = gmmR0RegisterChunk(pGMM, pSet, hMemObj, pGVM->hSelf, 0 /fChunkFlags/, &pChunk);
2111	if (RT_SUCCESS(rc))
2112	{
2113	piPage = gmmR0AllocatePagesFromChunk(pGMM, pGVM->hSelf, pChunk, piPage, cPages, paPages);
2114	return VINF_SUCCESS;
2115	}
2116
2117	/* bail out */
2118	RTR0MemObjFree(hMemObj, false /* fFreeMappings */);
2119	}
2120
2121	int rc2 = gmmR0MutexAcquire(pGMM);
2122	AssertRCReturn(rc2, RT_FAILURE(rc) ? rc : rc2);
2123	return rc;
2124
2125	}
2126
2127
2128	/**
2129	* As a last restort we'll pick any page we can get.
2130	*
2131	* @returns The new page descriptor table index.
2132	* @param pGMM Pointer to the GMM instance data.
2133	* @param pGVM Pointer to the global VM structure.
2134	* @param pSet The set to pick from.
2135	* @param iPage The current page descriptor table index.
2136	* @param cPages The total number of pages to allocate.
2137	* @param paPages The page descriptor table (input + ouput).
2138	*/
2139	static uint32_t gmmR0AllocatePagesIndiscriminately(PGMM pGMM, PGVM pGVM, PGMMCHUNKFREESET pSet,
2140	uint32_t iPage, uint32_t cPages, PGMMPAGEDESC paPages)
2141	{
2142	unsigned iList = RT_ELEMENTS(pSet->apLists);
2143	while (iList-- > 0)
2144	{
2145	PGMMCHUNK pChunk = pSet->apLists[iList];
2146	while (pChunk)
2147	{
2148	PGMMCHUNK pNext = pChunk->pFreeNext;
2149
2150	iPage = gmmR0AllocatePagesFromChunk(pGMM, pGVM->hSelf, pChunk, iPage, cPages, paPages);
2151	if (iPage >= cPages)
2152	return iPage;
2153
2154	pChunk = pNext;
2155	}
2156	}
2157	return iPage;
2158	}
2159
2160
2161	/**
2162	* Pick pages from empty chunks on the same NUMA node.
2163	*
2164	* @returns The new page descriptor table index.
2165	* @param pGMM Pointer to the GMM instance data.
2166	* @param pGVM Pointer to the global VM structure.
2167	* @param pSet The set to pick from.
2168	* @param iPage The current page descriptor table index.
2169	* @param cPages The total number of pages to allocate.
2170	* @param paPages The page descriptor table (input + ouput).
2171	*/
2172	static uint32_t gmmR0AllocatePagesFromEmptyChunksOnSameNode(PGMM pGMM, PGVM pGVM, PGMMCHUNKFREESET pSet,
2173	uint32_t iPage, uint32_t cPages, PGMMPAGEDESC paPages)
2174	{
2175	PGMMCHUNK pChunk = pSet->apLists[GMM_CHUNK_FREE_SET_UNUSED_LIST];
2176	if (pChunk)
2177	{
2178	uint16_t const idNumaNode = gmmR0GetCurrentNumaNodeId();
2179	while (pChunk)
2180	{
2181	PGMMCHUNK pNext = pChunk->pFreeNext;
2182
2183	if (pChunk->idNumaNode == idNumaNode)
2184	{
2185	pChunk->hGVM = pGVM->hSelf;
2186	iPage = gmmR0AllocatePagesFromChunk(pGMM, pGVM->hSelf, pChunk, iPage, cPages, paPages);
2187	if (iPage >= cPages)
2188	{
2189	pGVM->gmm.s.idLastChunkHint = pChunk->cFree ? pChunk->Core.Key : NIL_GMM_CHUNKID;
2190	return iPage;
2191	}
2192	}
2193
2194	pChunk = pNext;
2195	}
2196	}
2197	return iPage;
2198	}
2199
2200
2201	/**
2202	* Pick pages from non-empty chunks on the same NUMA node.
2203	*
2204	* @returns The new page descriptor table index.
2205	* @param pGMM Pointer to the GMM instance data.
2206	* @param pGVM Pointer to the global VM structure.
2207	* @param pSet The set to pick from.
2208	* @param iPage The current page descriptor table index.
2209	* @param cPages The total number of pages to allocate.
2210	* @param paPages The page descriptor table (input + ouput).
2211	*/
2212	static uint32_t gmmR0AllocatePagesFromSameNode(PGMM pGMM, PGVM pGVM, PGMMCHUNKFREESET pSet,
2213	uint32_t iPage, uint32_t cPages, PGMMPAGEDESC paPages)
2214	{
2215	/** @todo start by picking from chunks with about the right size first? */
2216	uint16_t const idNumaNode = gmmR0GetCurrentNumaNodeId();
2217	unsigned iList = GMM_CHUNK_FREE_SET_UNUSED_LIST;
2218	while (iList-- > 0)
2219	{
2220	PGMMCHUNK pChunk = pSet->apLists[iList];
2221	while (pChunk)
2222	{
2223	PGMMCHUNK pNext = pChunk->pFreeNext;
2224
2225	if (pChunk->idNumaNode == idNumaNode)
2226	{
2227	iPage = gmmR0AllocatePagesFromChunk(pGMM, pGVM->hSelf, pChunk, iPage, cPages, paPages);
2228	if (iPage >= cPages)
2229	{
2230	pGVM->gmm.s.idLastChunkHint = pChunk->cFree ? pChunk->Core.Key : NIL_GMM_CHUNKID;
2231	return iPage;
2232	}
2233	}
2234
2235	pChunk = pNext;
2236	}
2237	}
2238	return iPage;
2239	}
2240
2241
2242	/**
2243	* Pick pages that are in chunks already associated with the VM.
2244	*
2245	* @returns The new page descriptor table index.
2246	* @param pGMM Pointer to the GMM instance data.
2247	* @param pGVM Pointer to the global VM structure.
2248	* @param pSet The set to pick from.
2249	* @param iPage The current page descriptor table index.
2250	* @param cPages The total number of pages to allocate.
2251	* @param paPages The page descriptor table (input + ouput).
2252	*/
2253	static uint32_t gmmR0AllocatePagesAssociatedWithVM(PGMM pGMM, PGVM pGVM, PGMMCHUNKFREESET pSet,
2254	uint32_t iPage, uint32_t cPages, PGMMPAGEDESC paPages)
2255	{
2256	uint16_t const hGVM = pGVM->hSelf;
2257
2258	/* Hint. */
2259	if (pGVM->gmm.s.idLastChunkHint != NIL_GMM_CHUNKID)
2260	{
2261	PGMMCHUNK pChunk = gmmR0GetChunk(pGMM, pGVM->gmm.s.idLastChunkHint);
2262	if (pChunk && pChunk->cFree)
2263	{
2264	iPage = gmmR0AllocatePagesFromChunk(pGMM, hGVM, pChunk, iPage, cPages, paPages);
2265	if (iPage >= cPages)
2266	return iPage;
2267	}
2268	}
2269
2270	/* Scan. */
2271	for (unsigned iList = 0; iList < RT_ELEMENTS(pSet->apLists); iList++)
2272	{
2273	PGMMCHUNK pChunk = pSet->apLists[iList];
2274	while (pChunk)
2275	{
2276	PGMMCHUNK pNext = pChunk->pFreeNext;
2277
2278	if (pChunk->hGVM == hGVM)
2279	{
2280	iPage = gmmR0AllocatePagesFromChunk(pGMM, hGVM, pChunk, iPage, cPages, paPages);
2281	if (iPage >= cPages)
2282	{
2283	pGVM->gmm.s.idLastChunkHint = pChunk->cFree ? pChunk->Core.Key : NIL_GMM_CHUNKID;
2284	return iPage;
2285	}
2286	}
2287
2288	pChunk = pNext;
2289	}
2290	}
2291	return iPage;
2292	}
2293
2294
2295
2296	/**
2297	* Pick pages in bound memory mode.
2298	*
2299	* @returns The new page descriptor table index.
2300	* @param pGMM Pointer to the GMM instance data.
2301	* @param pGVM Pointer to the global VM structure.
2302	* @param iPage The current page descriptor table index.
2303	* @param cPages The total number of pages to allocate.
2304	* @param paPages The page descriptor table (input + ouput).
2305	*/
2306	static uint32_t gmmR0AllocatePagesInBoundMode(PGMM pGMM, PGVM pGVM, uint32_t iPage, uint32_t cPages, PGMMPAGEDESC paPages)
2307	{
2308	for (unsigned iList = 0; iList < RT_ELEMENTS(pGVM->gmm.s.Private.apLists); iList++)
2309	{
2310	PGMMCHUNK pChunk = pGVM->gmm.s.Private.apLists[iList];
2311	while (pChunk)
2312	{
2313	Assert(pChunk->hGVM == pGVM->hSelf);
2314	PGMMCHUNK pNext = pChunk->pFreeNext;
2315	iPage = gmmR0AllocatePagesFromChunk(pGMM, pGVM->hSelf, pChunk, iPage, cPages, paPages);
2316	if (iPage >= cPages)
2317	return iPage;
2318	pChunk = pNext;
2319	}
2320	}
2321	return iPage;
2322	}
2323
2324
2325	/**
2326	* Checks if we should start picking pages from chunks of other VMs.
2327	*
2328	* @returns @c true if we should, @c false if we should first try allocate more
2329	* chunks.
2330	*/
2331	static bool gmmR0ShouldAllocatePagesInOtherChunks(PGVM pGVM)
2332	{
2333	/*
2334	* Don't allocate a new chunk if we're
2335	*/
2336	uint64_t cPgReserved = pGVM->gmm.s.Reserved.cBasePages
2337	+ pGVM->gmm.s.Reserved.cFixedPages
2338	- pGVM->gmm.s.cBalloonedPages
2339	/** @todo what about shared pages? */;
2340	uint64_t cPgAllocated = pGVM->gmm.s.Allocated.cBasePages
2341	+ pGVM->gmm.s.Allocated.cFixedPages;
2342	uint64_t cPgDelta = cPgReserved - cPgAllocated;
2343	if (cPgDelta < GMM_CHUNK_NUM_PAGES * 4)
2344	return true;
2345	/** @todo make the threshold configurable, also test the code to see if
2346	* this ever kicks in (we might be reserving too much or smth). */
2347
2348	/*
2349	* Check how close we're to the max memory limit and how many fragments
2350	* there are?...
2351	*/
2352	/** @todo. */
2353
2354	return false;
2355	}
2356
2357
2358	/**
2359	* Common worker for GMMR0AllocateHandyPages and GMMR0AllocatePages.
2360	*
2361	* @returns VBox status code:
2362	* @retval VINF_SUCCESS on success.
2363	* @retval VERR_GMM_SEED_ME if seeding via GMMR0SeedChunk or
2364	* gmmR0AllocateMoreChunks is necessary.
2365	* @retval VERR_GMM_HIT_GLOBAL_LIMIT if we've exhausted the available pages.
2366	* @retval VERR_GMM_HIT_VM_ACCOUNT_LIMIT if we've hit the VM account limit,
2367	* that is we're trying to allocate more than we've reserved.
2368	*
2369	* @param pGMM Pointer to the GMM instance data.
2370	* @param pGVM Pointer to the shared VM structure.
2371	* @param cPages The number of pages to allocate.
2372	* @param paPages Pointer to the page descriptors.
2373	* See GMMPAGEDESC for details on what is expected on input.
2374	* @param enmAccount The account to charge.
2375	*
2376	* @remarks Call takes the giant GMM lock.
2377	*/
2378	static int gmmR0AllocatePagesNew(PGMM pGMM, PGVM pGVM, uint32_t cPages, PGMMPAGEDESC paPages, GMMACCOUNT enmAccount)
2379	{
2380	Assert(pGMM->hMtxOwner == RTThreadNativeSelf());
2381
2382	/*
2383	* Check allocation limits.
2384	*/
2385	if (RT_UNLIKELY(pGMM->cAllocatedPages + cPages > pGMM->cMaxPages))
2386	return VERR_GMM_HIT_GLOBAL_LIMIT;
2387
2388	switch (enmAccount)
2389	{
2390	case GMMACCOUNT_BASE:
2391	if (RT_UNLIKELY( pGVM->gmm.s.Allocated.cBasePages + pGVM->gmm.s.cBalloonedPages + cPages
2392	> pGVM->gmm.s.Reserved.cBasePages))
2393	{
2394	Log(("gmmR0AllocatePages:Base: Reserved=%#llx Allocated+Ballooned+Requested=%#llx+%#llx+%#x!\n",
2395	pGVM->gmm.s.Reserved.cBasePages, pGVM->gmm.s.Allocated.cBasePages, pGVM->gmm.s.cBalloonedPages, cPages));
2396	return VERR_GMM_HIT_VM_ACCOUNT_LIMIT;
2397	}
2398	break;
2399	case GMMACCOUNT_SHADOW:
2400	if (RT_UNLIKELY(pGVM->gmm.s.Allocated.cShadowPages + cPages > pGVM->gmm.s.Reserved.cShadowPages))
2401	{
2402	Log(("gmmR0AllocatePages:Shadow: Reserved=%#x Allocated+Requested=%#x+%#x!\n",
2403	pGVM->gmm.s.Reserved.cShadowPages, pGVM->gmm.s.Allocated.cShadowPages, cPages));
2404	return VERR_GMM_HIT_VM_ACCOUNT_LIMIT;
2405	}
2406	break;
2407	case GMMACCOUNT_FIXED:
2408	if (RT_UNLIKELY(pGVM->gmm.s.Allocated.cFixedPages + cPages > pGVM->gmm.s.Reserved.cFixedPages))
2409	{
2410	Log(("gmmR0AllocatePages:Fixed: Reserved=%#x Allocated+Requested=%#x+%#x!\n",
2411	pGVM->gmm.s.Reserved.cFixedPages, pGVM->gmm.s.Allocated.cFixedPages, cPages));
2412	return VERR_GMM_HIT_VM_ACCOUNT_LIMIT;
2413	}
2414	break;
2415	default:
2416	AssertMsgFailedReturn(("enmAccount=%d\n", enmAccount), VERR_INTERNAL_ERROR);
2417	}
2418
2419	/*
2420	* If we're in legacy memory mode, it's easy to figure if we have
2421	* sufficient number of pages up-front.
2422	*/
2423	if ( pGMM->fLegacyAllocationMode
2424	&& pGVM->gmm.s.Private.cFreePages < cPages)
2425	{
2426	Assert(pGMM->fBoundMemoryMode);
2427	return VERR_GMM_SEED_ME;
2428	}
2429
2430	/*
2431	* Update the accounts before we proceed because we might be leaving the
2432	* protection of the global mutex and thus run the risk of permitting
2433	* too much memory to be allocated.
2434	*/
2435	switch (enmAccount)
2436	{
2437	case GMMACCOUNT_BASE: pGVM->gmm.s.Allocated.cBasePages += cPages; break;
2438	case GMMACCOUNT_SHADOW: pGVM->gmm.s.Allocated.cShadowPages += cPages; break;
2439	case GMMACCOUNT_FIXED: pGVM->gmm.s.Allocated.cFixedPages += cPages; break;
2440	default: AssertMsgFailedReturn(("enmAccount=%d\n", enmAccount), VERR_INTERNAL_ERROR);
2441	}
2442	pGVM->gmm.s.cPrivatePages += cPages;
2443	pGMM->cAllocatedPages += cPages;
2444
2445	/*
2446	* Part two of it's-easy-in-legacy-memory-mode.
2447	*/
2448	uint32_t iPage = 0;
2449	if (pGMM->fLegacyAllocationMode)
2450	{
2451	iPage = gmmR0AllocatePagesInBoundMode(pGMM, pGVM, iPage, cPages, paPages);
2452	AssertReleaseReturn(iPage == cPages, VERR_INTERNAL_ERROR_3);
2453	return VINF_SUCCESS;
2454	}
2455
2456	/*
2457	* Bound mode is also relatively straightforward.
2458	*/
2459	int rc = VINF_SUCCESS;
2460	if (pGMM->fBoundMemoryMode)
2461	{
2462	iPage = gmmR0AllocatePagesInBoundMode(pGMM, pGVM, iPage, cPages, paPages);
2463	if (iPage < cPages)
2464	do
2465	rc = gmmR0AllocateChunkNew(pGMM, pGVM, &pGVM->gmm.s.Private, cPages, paPages, &iPage);
2466	while (iPage < cPages && RT_SUCCESS(rc));
2467	}
2468	/*
2469	* Shared mode is trickier as we should try archive the same locality as
2470	* in bound mode, but smartly make use of non-full chunks allocated by
2471	* other VMs if we're low on memory.
2472	*/
2473	else
2474	{
2475	/* Pick the most optimal pages first. */
2476	iPage = gmmR0AllocatePagesAssociatedWithVM(pGMM, pGVM, &pGMM->PrivateX, iPage, cPages, paPages);
2477	if (iPage < cPages)
2478	{
2479	/* Maybe we should try getting pages from chunks "belonging" to
2480	other VMs before allocating more chunks? */
2481	if (gmmR0ShouldAllocatePagesInOtherChunks(pGVM))
2482	iPage = gmmR0AllocatePagesFromSameNode(pGMM, pGVM, &pGMM->PrivateX, iPage, cPages, paPages);
2483
2484	/* Allocate memory from empty chunks. */
2485	if (iPage < cPages)
2486	iPage = gmmR0AllocatePagesFromEmptyChunksOnSameNode(pGMM, pGVM, &pGMM->PrivateX, iPage, cPages, paPages);
2487
2488	/* Grab empty shared chunks. */
2489	if (iPage < cPages)
2490	iPage = gmmR0AllocatePagesFromEmptyChunksOnSameNode(pGMM, pGVM, &pGMM->Shared, iPage, cPages, paPages);
2491
2492	/*
2493	* Ok, try allocate new chunks.
2494	*/
2495	if (iPage < cPages)
2496	{
2497	do
2498	rc = gmmR0AllocateChunkNew(pGMM, pGVM, &pGMM->PrivateX, cPages, paPages, &iPage);
2499	while (iPage < cPages && RT_SUCCESS(rc));
2500
2501	/* If the host is out of memory, take whatever we can get. */
2502	if ( rc == VERR_NO_MEMORY
2503	&& pGMM->PrivateX.cFreePages + pGMM->Shared.cFreePages >= cPages - iPage)
2504	{
2505	iPage = gmmR0AllocatePagesIndiscriminately(pGMM, pGVM, &pGMM->PrivateX, iPage, cPages, paPages);
2506	if (iPage < cPages)
2507	iPage = gmmR0AllocatePagesIndiscriminately(pGMM, pGVM, &pGMM->Shared, iPage, cPages, paPages);
2508	AssertRelease(iPage == cPages);
2509	rc = VINF_SUCCESS;
2510	}
2511	}
2512	}
2513	}
2514
2515	/*
2516	* Clean up on failure. Since this is bound to be a low-memory condition
2517	* we will give back any empty chunks that might be hanging around.
2518	*/
2519	if (RT_FAILURE(rc))
2520	{
2521	/* Update the statistics. */
2522	pGVM->gmm.s.cPrivatePages -= cPages;
2523	pGMM->cAllocatedPages -= cPages - iPage;
2524	switch (enmAccount)
2525	{
2526	case GMMACCOUNT_BASE: pGVM->gmm.s.Allocated.cBasePages -= cPages; break;
2527	case GMMACCOUNT_SHADOW: pGVM->gmm.s.Allocated.cShadowPages -= cPages; break;
2528	case GMMACCOUNT_FIXED: pGVM->gmm.s.Allocated.cFixedPages -= cPages; break;
2529	default: AssertMsgFailedReturn(("enmAccount=%d\n", enmAccount), VERR_INTERNAL_ERROR);
2530	}
2531
2532	/* Release the pages. */
2533	while (iPage-- > 0)
2534	{
2535	uint32_t idPage = paPages[iPage].idPage;
2536	PGMMPAGE pPage = gmmR0GetPage(pGMM, idPage);
2537	if (RT_LIKELY(pPage))
2538	{
2539	Assert(GMM_PAGE_IS_PRIVATE(pPage));
2540	Assert(pPage->Private.hGVM == pGVM->hSelf);
2541	gmmR0FreePrivatePage(pGMM, pGVM, idPage, pPage);
2542	}
2543	else
2544	AssertMsgFailed(("idPage=%#x\n", idPage));
2545	}
2546
2547	/* Free empty chunks. */
2548	/** @todo */
2549	}
2550	return VINF_SUCCESS;
2551	}
2552
2553
2554	/**
2555	* Updates the previous allocations and allocates more pages.
2556	*
2557	* The handy pages are always taken from the 'base' memory account.
2558	* The allocated pages are not cleared and will contains random garbage.
2559	*
2560	* @returns VBox status code:
2561	* @retval VINF_SUCCESS on success.
2562	* @retval VERR_NOT_OWNER if the caller is not an EMT.
2563	* @retval VERR_GMM_PAGE_NOT_FOUND if one of the pages to update wasn't found.
2564	* @retval VERR_GMM_PAGE_NOT_PRIVATE if one of the pages to update wasn't a
2565	* private page.
2566	* @retval VERR_GMM_PAGE_NOT_SHARED if one of the pages to update wasn't a
2567	* shared page.
2568	* @retval VERR_GMM_NOT_PAGE_OWNER if one of the pages to be updated wasn't
2569	* owned by the VM.
2570	* @retval VERR_GMM_SEED_ME if seeding via GMMR0SeedChunk is necessary.
2571	* @retval VERR_GMM_HIT_GLOBAL_LIMIT if we've exhausted the available pages.
2572	* @retval VERR_GMM_HIT_VM_ACCOUNT_LIMIT if we've hit the VM account limit,
2573	* that is we're trying to allocate more than we've reserved.
2574	*
2575	* @param pVM Pointer to the shared VM structure.
2576	* @param idCpu VCPU id
2577	* @param cPagesToUpdate The number of pages to update (starting from the head).
2578	* @param cPagesToAlloc The number of pages to allocate (starting from the head).
2579	* @param paPages The array of page descriptors.
2580	* See GMMPAGEDESC for details on what is expected on input.
2581	* @thread EMT.
2582	*/
2583	GMMR0DECL(int) GMMR0AllocateHandyPages(PVM pVM, VMCPUID idCpu, uint32_t cPagesToUpdate, uint32_t cPagesToAlloc, PGMMPAGEDESC paPages)
2584	{
2585	LogFlow(("GMMR0AllocateHandyPages: pVM=%p cPagesToUpdate=%#x cPagesToAlloc=%#x paPages=%p\n",
2586	pVM, cPagesToUpdate, cPagesToAlloc, paPages));
2587
2588	/*
2589	* Validate, get basics and take the semaphore.
2590	* (This is a relatively busy path, so make predictions where possible.)
2591	*/
2592	PGMM pGMM;
2593	GMM_GET_VALID_INSTANCE(pGMM, VERR_INTERNAL_ERROR);
2594	PGVM pGVM;
2595	int rc = GVMMR0ByVMAndEMT(pVM, idCpu, &pGVM);
2596	if (RT_FAILURE(rc))
2597	return rc;
2598
2599	AssertPtrReturn(paPages, VERR_INVALID_PARAMETER);
2600	AssertMsgReturn( (cPagesToUpdate && cPagesToUpdate < 1024)
2601	\|\| (cPagesToAlloc && cPagesToAlloc < 1024),
2602	("cPagesToUpdate=%#x cPagesToAlloc=%#x\n", cPagesToUpdate, cPagesToAlloc),
2603	VERR_INVALID_PARAMETER);
2604
2605	unsigned iPage = 0;
2606	for (; iPage < cPagesToUpdate; iPage++)
2607	{
2608	AssertMsgReturn( ( paPages[iPage].HCPhysGCPhys <= GMM_GCPHYS_LAST
2609	&& !(paPages[iPage].HCPhysGCPhys & PAGE_OFFSET_MASK))
2610	\|\| paPages[iPage].HCPhysGCPhys == NIL_RTHCPHYS
2611	\|\| paPages[iPage].HCPhysGCPhys == GMM_GCPHYS_UNSHAREABLE,
2612	("#%#x: %RHp\n", iPage, paPages[iPage].HCPhysGCPhys),
2613	VERR_INVALID_PARAMETER);
2614	AssertMsgReturn( paPages[iPage].idPage <= GMM_PAGEID_LAST
2615	/\|\| paPages[iPage].idPage == NIL_GMM_PAGEID/,
2616	("#%#x: %#x\n", iPage, paPages[iPage].idPage), VERR_INVALID_PARAMETER);
2617	AssertMsgReturn( paPages[iPage].idPage <= GMM_PAGEID_LAST
2618	/\|\| paPages[iPage].idSharedPage == NIL_GMM_PAGEID/,
2619	("#%#x: %#x\n", iPage, paPages[iPage].idSharedPage), VERR_INVALID_PARAMETER);
2620	}
2621
2622	for (; iPage < cPagesToAlloc; iPage++)
2623	{
2624	AssertMsgReturn(paPages[iPage].HCPhysGCPhys == NIL_RTHCPHYS, ("#%#x: %RHp\n", iPage, paPages[iPage].HCPhysGCPhys), VERR_INVALID_PARAMETER);
2625	AssertMsgReturn(paPages[iPage].idPage == NIL_GMM_PAGEID, ("#%#x: %#x\n", iPage, paPages[iPage].idPage), VERR_INVALID_PARAMETER);
2626	AssertMsgReturn(paPages[iPage].idSharedPage == NIL_GMM_PAGEID, ("#%#x: %#x\n", iPage, paPages[iPage].idSharedPage), VERR_INVALID_PARAMETER);
2627	}
2628
2629	gmmR0MutexAcquire(pGMM);
2630	if (GMM_CHECK_SANITY_UPON_ENTERING(pGMM))
2631	{
2632	/* No allocations before the initial reservation has been made! */
2633	if (RT_LIKELY( pGVM->gmm.s.Reserved.cBasePages
2634	&& pGVM->gmm.s.Reserved.cFixedPages
2635	&& pGVM->gmm.s.Reserved.cShadowPages))
2636	{
2637	/*
2638	* Perform the updates.
2639	* Stop on the first error.
2640	*/
2641	for (iPage = 0; iPage < cPagesToUpdate; iPage++)
2642	{
2643	if (paPages[iPage].idPage != NIL_GMM_PAGEID)
2644	{
2645	PGMMPAGE pPage = gmmR0GetPage(pGMM, paPages[iPage].idPage);
2646	if (RT_LIKELY(pPage))
2647	{
2648	if (RT_LIKELY(GMM_PAGE_IS_PRIVATE(pPage)))
2649	{
2650	if (RT_LIKELY(pPage->Private.hGVM == pGVM->hSelf))
2651	{
2652	AssertCompile(NIL_RTHCPHYS > GMM_GCPHYS_LAST && GMM_GCPHYS_UNSHAREABLE > GMM_GCPHYS_LAST);
2653	if (RT_LIKELY(paPages[iPage].HCPhysGCPhys <= GMM_GCPHYS_LAST))
2654	pPage->Private.pfn = paPages[iPage].HCPhysGCPhys >> PAGE_SHIFT;
2655	else if (paPages[iPage].HCPhysGCPhys == GMM_GCPHYS_UNSHAREABLE)
2656	pPage->Private.pfn = GMM_PAGE_PFN_UNSHAREABLE;
2657	/* else: NIL_RTHCPHYS nothing */
2658
2659	paPages[iPage].idPage = NIL_GMM_PAGEID;
2660	paPages[iPage].HCPhysGCPhys = NIL_RTHCPHYS;
2661	}
2662	else
2663	{
2664	Log(("GMMR0AllocateHandyPages: #%#x/%#x: Not owner! hGVM=%#x hSelf=%#x\n",
2665	iPage, paPages[iPage].idPage, pPage->Private.hGVM, pGVM->hSelf));
2666	rc = VERR_GMM_NOT_PAGE_OWNER;
2667	break;
2668	}
2669	}
2670	else
2671	{
2672	Log(("GMMR0AllocateHandyPages: #%#x/%#x: Not private! %.Rhxs (type %d)\n", iPage, paPages[iPage].idPage, sizeof(pPage), pPage, pPage->Common.u2State));
2673	rc = VERR_GMM_PAGE_NOT_PRIVATE;
2674	break;
2675	}
2676	}
2677	else
2678	{
2679	Log(("GMMR0AllocateHandyPages: #%#x/%#x: Not found! (private)\n", iPage, paPages[iPage].idPage));
2680	rc = VERR_GMM_PAGE_NOT_FOUND;
2681	break;
2682	}
2683	}
2684
2685	if (paPages[iPage].idSharedPage != NIL_GMM_PAGEID)
2686	{
2687	PGMMPAGE pPage = gmmR0GetPage(pGMM, paPages[iPage].idSharedPage);
2688	if (RT_LIKELY(pPage))
2689	{
2690	if (RT_LIKELY(GMM_PAGE_IS_SHARED(pPage)))
2691	{
2692	AssertCompile(NIL_RTHCPHYS > GMM_GCPHYS_LAST && GMM_GCPHYS_UNSHAREABLE > GMM_GCPHYS_LAST);
2693	Assert(pPage->Shared.cRefs);
2694	Assert(pGVM->gmm.s.cSharedPages);
2695	Assert(pGVM->gmm.s.Allocated.cBasePages);
2696
2697	Log(("GMMR0AllocateHandyPages: free shared page %x cRefs=%d\n", paPages[iPage].idSharedPage, pPage->Shared.cRefs));
2698	pGVM->gmm.s.cSharedPages--;
2699	pGVM->gmm.s.Allocated.cBasePages--;
2700	if (!--pPage->Shared.cRefs)
2701	gmmR0FreeSharedPage(pGMM, pGVM, paPages[iPage].idSharedPage, pPage);
2702	else
2703	{
2704	Assert(pGMM->cDuplicatePages);
2705	pGMM->cDuplicatePages--;
2706	}
2707
2708	paPages[iPage].idSharedPage = NIL_GMM_PAGEID;
2709	}
2710	else
2711	{
2712	Log(("GMMR0AllocateHandyPages: #%#x/%#x: Not shared!\n", iPage, paPages[iPage].idSharedPage));
2713	rc = VERR_GMM_PAGE_NOT_SHARED;
2714	break;
2715	}
2716	}
2717	else
2718	{
2719	Log(("GMMR0AllocateHandyPages: #%#x/%#x: Not found! (shared)\n", iPage, paPages[iPage].idSharedPage));
2720	rc = VERR_GMM_PAGE_NOT_FOUND;
2721	break;
2722	}
2723	}
2724	} /* for each page to update */
2725
2726	if (RT_SUCCESS(rc))
2727	{
2728	#if 0 /* This appears to spell trouble... weird. */
2729	for (iPage = 0; iPage < cPagesToAlloc; iPage++)
2730	{
2731	Assert(paPages[iPage].HCPhysGCPhys == NIL_RTHCPHYS);
2732	Assert(paPages[iPage].idPage == NIL_GMM_PAGEID);
2733	Assert(paPages[iPage].idSharedPage == NIL_GMM_PAGEID);
2734	}
2735	#endif
2736
2737	/*
2738	* Join paths with GMMR0AllocatePages for the allocation.
2739	* Note! gmmR0AllocateMoreChunks may leave the protection of the mutex!
2740	*/
2741	#if 0 /* Trying to reproduce out of memory issue... */
2742	if (!cPagesToUpdate)
2743	#endif
2744	rc = gmmR0AllocatePagesNew(pGMM, pGVM, cPagesToAlloc, paPages, GMMACCOUNT_BASE);
2745	#if 0 /* Trying to reproduce out of memory issue... */
2746	else
2747	{
2748	for (iPage = 0; iPage < cPagesToAlloc; iPage++)
2749	{
2750	paPages[iPage].HCPhysGCPhys = NIL_RTHCPHYS;
2751	paPages[iPage].idPage = NIL_GMM_PAGEID;
2752	paPages[iPage].idSharedPage = NIL_GMM_PAGEID;
2753	}
2754
2755	rc = VERR_GMM_HIT_VM_ACCOUNT_LIMIT;
2756	}
2757	#endif
2758	}
2759	}
2760	else
2761	rc = VERR_WRONG_ORDER;
2762	GMM_CHECK_SANITY_UPON_LEAVING(pGMM);
2763	}
2764	else
2765	rc = VERR_INTERNAL_ERROR_5;
2766	gmmR0MutexRelease(pGMM);
2767	LogFlow(("GMMR0AllocateHandyPages: returns %Rrc\n", rc));
2768	return rc;
2769	}
2770
2771
2772	/**
2773	* Allocate one or more pages.
2774	*
2775	* This is typically used for ROMs and MMIO2 (VRAM) during VM creation.
2776	* The allocated pages are not cleared and will contains random garbage.
2777	*
2778	* @returns VBox status code:
2779	* @retval VINF_SUCCESS on success.
2780	* @retval VERR_NOT_OWNER if the caller is not an EMT.
2781	* @retval VERR_GMM_SEED_ME if seeding via GMMR0SeedChunk is necessary.
2782	* @retval VERR_GMM_HIT_GLOBAL_LIMIT if we've exhausted the available pages.
2783	* @retval VERR_GMM_HIT_VM_ACCOUNT_LIMIT if we've hit the VM account limit,
2784	* that is we're trying to allocate more than we've reserved.
2785	*
2786	* @param pVM Pointer to the shared VM structure.
2787	* @param idCpu VCPU id
2788	* @param cPages The number of pages to allocate.
2789	* @param paPages Pointer to the page descriptors.
2790	* See GMMPAGEDESC for details on what is expected on input.
2791	* @param enmAccount The account to charge.
2792	*
2793	* @thread EMT.
2794	*/
2795	GMMR0DECL(int) GMMR0AllocatePages(PVM pVM, VMCPUID idCpu, uint32_t cPages, PGMMPAGEDESC paPages, GMMACCOUNT enmAccount)
2796	{
2797	LogFlow(("GMMR0AllocatePages: pVM=%p cPages=%#x paPages=%p enmAccount=%d\n", pVM, cPages, paPages, enmAccount));
2798
2799	/*
2800	* Validate, get basics and take the semaphore.
2801	*/
2802	PGMM pGMM;
2803	GMM_GET_VALID_INSTANCE(pGMM, VERR_INTERNAL_ERROR);
2804	PGVM pGVM;
2805	int rc = GVMMR0ByVMAndEMT(pVM, idCpu, &pGVM);
2806	if (RT_FAILURE(rc))
2807	return rc;
2808
2809	AssertPtrReturn(paPages, VERR_INVALID_PARAMETER);
2810	AssertMsgReturn(enmAccount > GMMACCOUNT_INVALID && enmAccount < GMMACCOUNT_END, ("%d\n", enmAccount), VERR_INVALID_PARAMETER);
2811	AssertMsgReturn(cPages > 0 && cPages < RT_BIT(32 - PAGE_SHIFT), ("%#x\n", cPages), VERR_INVALID_PARAMETER);
2812
2813	for (unsigned iPage = 0; iPage < cPages; iPage++)
2814	{
2815	AssertMsgReturn( paPages[iPage].HCPhysGCPhys == NIL_RTHCPHYS
2816	\|\| paPages[iPage].HCPhysGCPhys == GMM_GCPHYS_UNSHAREABLE
2817	\|\| ( enmAccount == GMMACCOUNT_BASE
2818	&& paPages[iPage].HCPhysGCPhys <= GMM_GCPHYS_LAST
2819	&& !(paPages[iPage].HCPhysGCPhys & PAGE_OFFSET_MASK)),
2820	("#%#x: %RHp enmAccount=%d\n", iPage, paPages[iPage].HCPhysGCPhys, enmAccount),
2821	VERR_INVALID_PARAMETER);
2822	AssertMsgReturn(paPages[iPage].idPage == NIL_GMM_PAGEID, ("#%#x: %#x\n", iPage, paPages[iPage].idPage), VERR_INVALID_PARAMETER);
2823	AssertMsgReturn(paPages[iPage].idSharedPage == NIL_GMM_PAGEID, ("#%#x: %#x\n", iPage, paPages[iPage].idSharedPage), VERR_INVALID_PARAMETER);
2824	}
2825
2826	gmmR0MutexAcquire(pGMM);
2827	if (GMM_CHECK_SANITY_UPON_ENTERING(pGMM))
2828	{
2829
2830	/* No allocations before the initial reservation has been made! */
2831	if (RT_LIKELY( pGVM->gmm.s.Reserved.cBasePages
2832	&& pGVM->gmm.s.Reserved.cFixedPages
2833	&& pGVM->gmm.s.Reserved.cShadowPages))
2834	rc = gmmR0AllocatePagesNew(pGMM, pGVM, cPages, paPages, enmAccount);
2835	else
2836	rc = VERR_WRONG_ORDER;
2837	GMM_CHECK_SANITY_UPON_LEAVING(pGMM);
2838	}
2839	else
2840	rc = VERR_INTERNAL_ERROR_5;
2841	gmmR0MutexRelease(pGMM);
2842	LogFlow(("GMMR0AllocatePages: returns %Rrc\n", rc));
2843	return rc;
2844	}
2845
2846
2847	/**
2848	* VMMR0 request wrapper for GMMR0AllocatePages.
2849	*
2850	* @returns see GMMR0AllocatePages.
2851	* @param pVM Pointer to the shared VM structure.
2852	* @param idCpu VCPU id
2853	* @param pReq The request packet.
2854	*/
2855	GMMR0DECL(int) GMMR0AllocatePagesReq(PVM pVM, VMCPUID idCpu, PGMMALLOCATEPAGESREQ pReq)
2856	{
2857	/*
2858	* Validate input and pass it on.
2859	*/
2860	AssertPtrReturn(pVM, VERR_INVALID_POINTER);
2861	AssertPtrReturn(pReq, VERR_INVALID_POINTER);
2862	AssertMsgReturn(pReq->Hdr.cbReq >= RT_UOFFSETOF(GMMALLOCATEPAGESREQ, aPages[0]),
2863	("%#x < %#x\n", pReq->Hdr.cbReq, RT_UOFFSETOF(GMMALLOCATEPAGESREQ, aPages[0])),
2864	VERR_INVALID_PARAMETER);
2865	AssertMsgReturn(pReq->Hdr.cbReq == RT_UOFFSETOF(GMMALLOCATEPAGESREQ, aPages[pReq->cPages]),
2866	("%#x != %#x\n", pReq->Hdr.cbReq, RT_UOFFSETOF(GMMALLOCATEPAGESREQ, aPages[pReq->cPages])),
2867	VERR_INVALID_PARAMETER);
2868
2869	return GMMR0AllocatePages(pVM, idCpu, pReq->cPages, &pReq->aPages[0], pReq->enmAccount);
2870	}
2871
2872
2873	/**
2874	* Allocate a large page to represent guest RAM
2875	*
2876	* The allocated pages are not cleared and will contains random garbage.
2877	*
2878	* @returns VBox status code:
2879	* @retval VINF_SUCCESS on success.
2880	* @retval VERR_NOT_OWNER if the caller is not an EMT.
2881	* @retval VERR_GMM_SEED_ME if seeding via GMMR0SeedChunk is necessary.
2882	* @retval VERR_GMM_HIT_GLOBAL_LIMIT if we've exhausted the available pages.
2883	* @retval VERR_GMM_HIT_VM_ACCOUNT_LIMIT if we've hit the VM account limit,
2884	* that is we're trying to allocate more than we've reserved.
2885	* @returns see GMMR0AllocatePages.
2886	* @param pVM Pointer to the shared VM structure.
2887	* @param idCpu VCPU id
2888	* @param cbPage Large page size
2889	*/
2890	GMMR0DECL(int) GMMR0AllocateLargePage(PVM pVM, VMCPUID idCpu, uint32_t cbPage, uint32_t pIdPage, RTHCPHYS pHCPhys)
2891	{
2892	LogFlow(("GMMR0AllocateLargePage: pVM=%p cbPage=%x\n", pVM, cbPage));
2893
2894	AssertReturn(cbPage == GMM_CHUNK_SIZE, VERR_INVALID_PARAMETER);
2895	AssertPtrReturn(pIdPage, VERR_INVALID_PARAMETER);
2896	AssertPtrReturn(pHCPhys, VERR_INVALID_PARAMETER);
2897
2898	/*
2899	* Validate, get basics and take the semaphore.
2900	*/
2901	PGMM pGMM;
2902	GMM_GET_VALID_INSTANCE(pGMM, VERR_INTERNAL_ERROR);
2903	PGVM pGVM;
2904	int rc = GVMMR0ByVMAndEMT(pVM, idCpu, &pGVM);
2905	if (RT_FAILURE(rc))
2906	return rc;
2907
2908	/* Not supported in legacy mode where we allocate the memory in ring 3 and lock it in ring 0. */
2909	if (pGMM->fLegacyAllocationMode)
2910	return VERR_NOT_SUPPORTED;
2911
2912	*pHCPhys = NIL_RTHCPHYS;
2913	*pIdPage = NIL_GMM_PAGEID;
2914
2915	gmmR0MutexAcquire(pGMM);
2916	if (GMM_CHECK_SANITY_UPON_ENTERING(pGMM))
2917	{
2918	const unsigned cPages = (GMM_CHUNK_SIZE >> PAGE_SHIFT);
2919	if (RT_UNLIKELY( pGVM->gmm.s.Allocated.cBasePages + pGVM->gmm.s.cBalloonedPages + cPages
2920	> pGVM->gmm.s.Reserved.cBasePages))
2921	{
2922	Log(("GMMR0AllocateLargePage: Reserved=%#llx Allocated+Requested=%#llx+%#x!\n",
2923	pGVM->gmm.s.Reserved.cBasePages, pGVM->gmm.s.Allocated.cBasePages, cPages));
2924	gmmR0MutexRelease(pGMM);
2925	return VERR_GMM_HIT_VM_ACCOUNT_LIMIT;
2926	}
2927
2928	/*
2929	* Allocate a new large page chunk.
2930	*
2931	* Note! We leave the giant GMM lock temporarily as the allocation might
2932	* take a long time. gmmR0RegisterChunk will retake it (ugly).
2933	*/
2934	AssertCompile(GMM_CHUNK_SIZE == _2M);
2935	gmmR0MutexRelease(pGMM);
2936
2937	RTR0MEMOBJ hMemObj;
2938	rc = RTR0MemObjAllocPhysEx(&hMemObj, GMM_CHUNK_SIZE, NIL_RTHCPHYS, GMM_CHUNK_SIZE);
2939	if (RT_SUCCESS(rc))
2940	{
2941	PGMMCHUNKFREESET pSet = pGMM->fBoundMemoryMode ? &pGVM->gmm.s.Private : &pGMM->PrivateX;
2942	PGMMCHUNK pChunk;
2943	rc = gmmR0RegisterChunk(pGMM, pSet, hMemObj, pGVM->hSelf, GMM_CHUNK_FLAGS_LARGE_PAGE, &pChunk);
2944	if (RT_SUCCESS(rc))
2945	{
2946	/*
2947	* Allocate all the pages in the chunk.
2948	*/
2949	/* Unlink the new chunk from the free list. */
2950	gmmR0UnlinkChunk(pChunk);
2951
2952	/** @todo rewrite this to skip the looping. */
2953	/* Allocate all pages. */
2954	GMMPAGEDESC PageDesc;
2955	gmmR0AllocatePage(pGMM, pGVM->hSelf, pChunk, &PageDesc);
2956
2957	/* Return the first page as we'll use the whole chunk as one big page. */
2958	*pIdPage = PageDesc.idPage;
2959	*pHCPhys = PageDesc.HCPhysGCPhys;
2960
2961	for (unsigned i = 1; i < cPages; i++)
2962	gmmR0AllocatePage(pGMM, pGVM->hSelf, pChunk, &PageDesc);
2963
2964	/* Update accounting. */
2965	pGVM->gmm.s.Allocated.cBasePages += cPages;
2966	pGVM->gmm.s.cPrivatePages += cPages;
2967	pGMM->cAllocatedPages += cPages;
2968
2969	gmmR0LinkChunk(pChunk, pSet);
2970	gmmR0MutexRelease(pGMM);
2971	}
2972	else
2973	RTR0MemObjFree(hMemObj, false /* fFreeMappings */);
2974	}
2975	}
2976	else
2977	{
2978	gmmR0MutexRelease(pGMM);
2979	rc = VERR_INTERNAL_ERROR_5;
2980	}
2981
2982	LogFlow(("GMMR0AllocateLargePage: returns %Rrc\n", rc));
2983	return rc;
2984	}
2985
2986
2987	/**
2988	* Free a large page
2989	*
2990	* @returns VBox status code:
2991	* @param pVM Pointer to the shared VM structure.
2992	* @param idCpu VCPU id
2993	* @param idPage Large page id
2994	*/
2995	GMMR0DECL(int) GMMR0FreeLargePage(PVM pVM, VMCPUID idCpu, uint32_t idPage)
2996	{
2997	LogFlow(("GMMR0FreeLargePage: pVM=%p idPage=%x\n", pVM, idPage));
2998
2999	/*
3000	* Validate, get basics and take the semaphore.
3001	*/
3002	PGMM pGMM;
3003	GMM_GET_VALID_INSTANCE(pGMM, VERR_INTERNAL_ERROR);
3004	PGVM pGVM;
3005	int rc = GVMMR0ByVMAndEMT(pVM, idCpu, &pGVM);
3006	if (RT_FAILURE(rc))
3007	return rc;
3008
3009	/* Not supported in legacy mode where we allocate the memory in ring 3 and lock it in ring 0. */
3010	if (pGMM->fLegacyAllocationMode)
3011	return VERR_NOT_SUPPORTED;
3012
3013	gmmR0MutexAcquire(pGMM);
3014	if (GMM_CHECK_SANITY_UPON_ENTERING(pGMM))
3015	{
3016	const unsigned cPages = (GMM_CHUNK_SIZE >> PAGE_SHIFT);
3017
3018	if (RT_UNLIKELY(pGVM->gmm.s.Allocated.cBasePages < cPages))
3019	{
3020	Log(("GMMR0FreeLargePage: allocated=%#llx cPages=%#x!\n", pGVM->gmm.s.Allocated.cBasePages, cPages));
3021	gmmR0MutexRelease(pGMM);
3022	return VERR_GMM_ATTEMPT_TO_FREE_TOO_MUCH;
3023	}
3024
3025	PGMMPAGE pPage = gmmR0GetPage(pGMM, idPage);
3026	if (RT_LIKELY( pPage
3027	&& GMM_PAGE_IS_PRIVATE(pPage)))
3028	{
3029	PGMMCHUNK pChunk = gmmR0GetChunk(pGMM, idPage >> GMM_CHUNKID_SHIFT);
3030	Assert(pChunk);
3031	Assert(pChunk->cFree < GMM_CHUNK_NUM_PAGES);
3032	Assert(pChunk->cPrivate > 0);
3033
3034	/* Release the memory immediately. */
3035	gmmR0FreeChunk(pGMM, NULL, pChunk, false /fRelaxedSem/); /** @todo this can be relaxed too! */
3036
3037	/* Update accounting. */
3038	pGVM->gmm.s.Allocated.cBasePages -= cPages;
3039	pGVM->gmm.s.cPrivatePages -= cPages;
3040	pGMM->cAllocatedPages -= cPages;
3041	}
3042	else
3043	rc = VERR_GMM_PAGE_NOT_FOUND;
3044	}
3045	else
3046	rc = VERR_INTERNAL_ERROR_5;
3047
3048	gmmR0MutexRelease(pGMM);
3049	LogFlow(("GMMR0FreeLargePage: returns %Rrc\n", rc));
3050	return rc;
3051	}
3052
3053
3054	/**
3055	* VMMR0 request wrapper for GMMR0FreeLargePage.
3056	*
3057	* @returns see GMMR0FreeLargePage.
3058	* @param pVM Pointer to the shared VM structure.
3059	* @param idCpu VCPU id
3060	* @param pReq The request packet.
3061	*/
3062	GMMR0DECL(int) GMMR0FreeLargePageReq(PVM pVM, VMCPUID idCpu, PGMMFREELARGEPAGEREQ pReq)
3063	{
3064	/*
3065	* Validate input and pass it on.
3066	*/
3067	AssertPtrReturn(pVM, VERR_INVALID_POINTER);
3068	AssertPtrReturn(pReq, VERR_INVALID_POINTER);
3069	AssertMsgReturn(pReq->Hdr.cbReq == sizeof(GMMFREEPAGESREQ),
3070	("%#x != %#x\n", pReq->Hdr.cbReq, sizeof(GMMFREEPAGESREQ)),
3071	VERR_INVALID_PARAMETER);
3072
3073	return GMMR0FreeLargePage(pVM, idCpu, pReq->idPage);
3074	}
3075
3076
3077	/**
3078	* Frees a chunk, giving it back to the host OS.
3079	*
3080	* @param pGMM Pointer to the GMM instance.
3081	* @param pGVM This is set when called from GMMR0CleanupVM so we can
3082	* unmap and free the chunk in one go.
3083	* @param pChunk The chunk to free.
3084	* @param fRelaxedSem Whether we can release the semaphore while doing the
3085	* freeing (@c true) or not.
3086	*/
3087	static bool gmmR0FreeChunk(PGMM pGMM, PGVM pGVM, PGMMCHUNK pChunk, bool fRelaxedSem)
3088	{
3089	Assert(pChunk->Core.Key != NIL_GMM_CHUNKID);
3090
3091	GMMR0CHUNKMTXSTATE MtxState;
3092	gmmR0ChunkMutexAcquire(&MtxState, pGMM, pChunk, GMMR0CHUNK_MTX_KEEP_GIANT);
3093
3094	/*
3095	* Cleanup hack! Unmap the chunk from the callers address space.
3096	* This shouldn't happen, so screw lock contention...
3097	*/
3098	if ( pChunk->cMappingsX
3099	&& !pGMM->fLegacyAllocationMode
3100	&& pGVM)
3101	gmmR0UnmapChunkLocked(pGMM, pGVM, pChunk);
3102
3103	/*
3104	* If there are current mappings of the chunk, then request the
3105	* VMs to unmap them. Reposition the chunk in the free list so
3106	* it won't be a likely candidate for allocations.
3107	*/
3108	if (pChunk->cMappingsX)
3109	{
3110	/** @todo R0 -> VM request */
3111	/* The chunk can be mapped by more than one VM if fBoundMemoryMode is false! */
3112	Log(("gmmR0FreeChunk: chunk still has %d/%d mappings; don't free!\n", pChunk->cMappingsX));
3113	gmmR0ChunkMutexRelease(&MtxState, pChunk);
3114	return false;
3115	}
3116
3117
3118	/*
3119	* Save and trash the handle.
3120	*/
3121	RTR0MEMOBJ const hMemObj = pChunk->hMemObj;
3122	pChunk->hMemObj = NIL_RTR0MEMOBJ;
3123
3124	/*
3125	* Unlink it from everywhere.
3126	*/
3127	gmmR0UnlinkChunk(pChunk);
3128
3129	RTListNodeRemove(&pChunk->ListNode);
3130
3131	PAVLU32NODECORE pCore = RTAvlU32Remove(&pGMM->pChunks, pChunk->Core.Key);
3132	Assert(pCore == &pChunk->Core); NOREF(pCore);
3133
3134	PGMMCHUNKTLBE pTlbe = &pGMM->ChunkTLB.aEntries[GMM_CHUNKTLB_IDX(pChunk->Core.Key)];
3135	if (pTlbe->pChunk == pChunk)
3136	{
3137	pTlbe->idChunk = NIL_GMM_CHUNKID;
3138	pTlbe->pChunk = NULL;
3139	}
3140
3141	Assert(pGMM->cChunks > 0);
3142	pGMM->cChunks--;
3143
3144	/*
3145	* Free the Chunk ID before dropping the locks and freeing the rest.
3146	*/
3147	gmmR0FreeChunkId(pGMM, pChunk->Core.Key);
3148	pChunk->Core.Key = NIL_GMM_CHUNKID;
3149
3150	pGMM->cFreedChunks++;
3151
3152	gmmR0ChunkMutexRelease(&MtxState, NULL);
3153	if (fRelaxedSem)
3154	gmmR0MutexRelease(pGMM);
3155
3156	RTMemFree(pChunk->paMappingsX);
3157	pChunk->paMappingsX = NULL;
3158
3159	RTMemFree(pChunk);
3160
3161	int rc = RTR0MemObjFree(hMemObj, false /* fFreeMappings */);
3162	AssertLogRelRC(rc);
3163
3164	if (fRelaxedSem)
3165	gmmR0MutexAcquire(pGMM);
3166	return fRelaxedSem;
3167	}
3168
3169
3170	/**
3171	* Free page worker.
3172	*
3173	* The caller does all the statistic decrementing, we do all the incrementing.
3174	*
3175	* @param pGMM Pointer to the GMM instance data.
3176	* @param pGVM Pointer to the GVM instance.
3177	* @param pChunk Pointer to the chunk this page belongs to.
3178	* @param idPage The Page ID.
3179	* @param pPage Pointer to the page.
3180	*/
3181	static void gmmR0FreePageWorker(PGMM pGMM, PGVM pGVM, PGMMCHUNK pChunk, uint32_t idPage, PGMMPAGE pPage)
3182	{
3183	Log3(("F pPage=%p iPage=%#x/%#x u2State=%d iFreeHead=%#x\n",
3184	pPage, pPage - &pChunk->aPages[0], idPage, pPage->Common.u2State, pChunk->iFreeHead)); NOREF(idPage);
3185
3186	/*
3187	* Put the page on the free list.
3188	*/
3189	pPage->u = 0;
3190	pPage->Free.u2State = GMM_PAGE_STATE_FREE;
3191	Assert(pChunk->iFreeHead < RT_ELEMENTS(pChunk->aPages) \|\| pChunk->iFreeHead == UINT16_MAX);
3192	pPage->Free.iNext = pChunk->iFreeHead;
3193	pChunk->iFreeHead = pPage - &pChunk->aPages[0];
3194
3195	/*
3196	* Update statistics (the cShared/cPrivate stats are up to date already),
3197	* and relink the chunk if necessary.
3198	*/
3199	unsigned const cFree = pChunk->cFree;
3200	if ( !cFree
3201	\|\| gmmR0SelectFreeSetList(cFree) != gmmR0SelectFreeSetList(cFree + 1))
3202	{
3203	gmmR0UnlinkChunk(pChunk);
3204	pChunk->cFree++;
3205	gmmR0SelectSetAndLinkChunk(pGMM, pGVM, pChunk);
3206	}
3207	else
3208	{
3209	pChunk->cFree = cFree + 1;
3210	pChunk->pSet->cFreePages++;
3211	}
3212
3213	/*
3214	* If the chunk becomes empty, consider giving memory back to the host OS.
3215	*
3216	* The current strategy is to try give it back if there are other chunks
3217	* in this free list, meaning if there are at least 240 free pages in this
3218	* category. Note that since there are probably mappings of the chunk,
3219	* it won't be freed up instantly, which probably screws up this logic
3220	* a bit...
3221	*/
3222	/** @todo Do this on the way out. */
3223	if (RT_UNLIKELY( pChunk->cFree == GMM_CHUNK_NUM_PAGES
3224	&& pChunk->pFreeNext
3225	&& pChunk->pFreePrev /** @todo this is probably misfiring, see reset... */
3226	&& !pGMM->fLegacyAllocationMode))
3227	gmmR0FreeChunk(pGMM, NULL, pChunk, false);
3228
3229	}
3230
3231
3232	/**
3233	* Frees a shared page, the page is known to exist and be valid and such.
3234	*
3235	* @param pGMM Pointer to the GMM instance.
3236	* @param pGVM Pointer to the GVM instance.
3237	* @param idPage The Page ID
3238	* @param pPage The page structure.
3239	*/
3240	DECLINLINE(void) gmmR0FreeSharedPage(PGMM pGMM, PGVM pGVM, uint32_t idPage, PGMMPAGE pPage)
3241	{
3242	PGMMCHUNK pChunk = gmmR0GetChunk(pGMM, idPage >> GMM_CHUNKID_SHIFT);
3243	Assert(pChunk);
3244	Assert(pChunk->cFree < GMM_CHUNK_NUM_PAGES);
3245	Assert(pChunk->cShared > 0);
3246	Assert(pGMM->cSharedPages > 0);
3247	Assert(pGMM->cAllocatedPages > 0);
3248	Assert(!pPage->Shared.cRefs);
3249
3250	pChunk->cShared--;
3251	pGMM->cAllocatedPages--;
3252	pGMM->cSharedPages--;
3253	gmmR0FreePageWorker(pGMM, pGVM, pChunk, idPage, pPage);
3254	}
3255
3256
3257	/**
3258	* Frees a private page, the page is known to exist and be valid and such.
3259	*
3260	* @param pGMM Pointer to the GMM instance.
3261	* @param pGVM Pointer to the GVM instance.
3262	* @param idPage The Page ID
3263	* @param pPage The page structure.
3264	*/
3265	DECLINLINE(void) gmmR0FreePrivatePage(PGMM pGMM, PGVM pGVM, uint32_t idPage, PGMMPAGE pPage)
3266	{
3267	PGMMCHUNK pChunk = gmmR0GetChunk(pGMM, idPage >> GMM_CHUNKID_SHIFT);
3268	Assert(pChunk);
3269	Assert(pChunk->cFree < GMM_CHUNK_NUM_PAGES);
3270	Assert(pChunk->cPrivate > 0);
3271	Assert(pGMM->cAllocatedPages > 0);
3272
3273	pChunk->cPrivate--;
3274	pGMM->cAllocatedPages--;
3275	gmmR0FreePageWorker(pGMM, pGVM, pChunk, idPage, pPage);
3276	}
3277
3278
3279	/**
3280	* Common worker for GMMR0FreePages and GMMR0BalloonedPages.
3281	*
3282	* @returns VBox status code:
3283	* @retval xxx
3284	*
3285	* @param pGMM Pointer to the GMM instance data.
3286	* @param pGVM Pointer to the shared VM structure.
3287	* @param cPages The number of pages to free.
3288	* @param paPages Pointer to the page descriptors.
3289	* @param enmAccount The account this relates to.
3290	*/
3291	static int gmmR0FreePages(PGMM pGMM, PGVM pGVM, uint32_t cPages, PGMMFREEPAGEDESC paPages, GMMACCOUNT enmAccount)
3292	{
3293	/*
3294	* Check that the request isn't impossible wrt to the account status.
3295	*/
3296	switch (enmAccount)
3297	{
3298	case GMMACCOUNT_BASE:
3299	if (RT_UNLIKELY(pGVM->gmm.s.Allocated.cBasePages < cPages))
3300	{
3301	Log(("gmmR0FreePages: allocated=%#llx cPages=%#x!\n", pGVM->gmm.s.Allocated.cBasePages, cPages));
3302	return VERR_GMM_ATTEMPT_TO_FREE_TOO_MUCH;
3303	}
3304	break;
3305	case GMMACCOUNT_SHADOW:
3306	if (RT_UNLIKELY(pGVM->gmm.s.Allocated.cShadowPages < cPages))
3307	{
3308	Log(("gmmR0FreePages: allocated=%#llx cPages=%#x!\n", pGVM->gmm.s.Allocated.cShadowPages, cPages));
3309	return VERR_GMM_ATTEMPT_TO_FREE_TOO_MUCH;
3310	}
3311	break;
3312	case GMMACCOUNT_FIXED:
3313	if (RT_UNLIKELY(pGVM->gmm.s.Allocated.cFixedPages < cPages))
3314	{
3315	Log(("gmmR0FreePages: allocated=%#llx cPages=%#x!\n", pGVM->gmm.s.Allocated.cFixedPages, cPages));
3316	return VERR_GMM_ATTEMPT_TO_FREE_TOO_MUCH;
3317	}
3318	break;
3319	default:
3320	AssertMsgFailedReturn(("enmAccount=%d\n", enmAccount), VERR_INTERNAL_ERROR);
3321	}
3322
3323	/*
3324	* Walk the descriptors and free the pages.
3325	*
3326	* Statistics (except the account) are being updated as we go along,
3327	* unlike the alloc code. Also, stop on the first error.
3328	*/
3329	int rc = VINF_SUCCESS;
3330	uint32_t iPage;
3331	for (iPage = 0; iPage < cPages; iPage++)
3332	{
3333	uint32_t idPage = paPages[iPage].idPage;
3334	PGMMPAGE pPage = gmmR0GetPage(pGMM, idPage);
3335	if (RT_LIKELY(pPage))
3336	{
3337	if (RT_LIKELY(GMM_PAGE_IS_PRIVATE(pPage)))
3338	{
3339	if (RT_LIKELY(pPage->Private.hGVM == pGVM->hSelf))
3340	{
3341	Assert(pGVM->gmm.s.cPrivatePages);
3342	pGVM->gmm.s.cPrivatePages--;
3343	gmmR0FreePrivatePage(pGMM, pGVM, idPage, pPage);
3344	}
3345	else
3346	{
3347	Log(("gmmR0AllocatePages: #%#x/%#x: not owner! hGVM=%#x hSelf=%#x\n", iPage, idPage,
3348	pPage->Private.hGVM, pGVM->hSelf));
3349	rc = VERR_GMM_NOT_PAGE_OWNER;
3350	break;
3351	}
3352	}
3353	else if (RT_LIKELY(GMM_PAGE_IS_SHARED(pPage)))
3354	{
3355	Assert(pGVM->gmm.s.cSharedPages);
3356	pGVM->gmm.s.cSharedPages--;
3357	Assert(pPage->Shared.cRefs);
3358	if (!--pPage->Shared.cRefs)
3359	gmmR0FreeSharedPage(pGMM, pGVM, idPage, pPage);
3360	else
3361	{
3362	Assert(pGMM->cDuplicatePages);
3363	pGMM->cDuplicatePages--;
3364	}
3365	}
3366	else
3367	{
3368	Log(("gmmR0AllocatePages: #%#x/%#x: already free!\n", iPage, idPage));
3369	rc = VERR_GMM_PAGE_ALREADY_FREE;
3370	break;
3371	}
3372	}
3373	else
3374	{
3375	Log(("gmmR0AllocatePages: #%#x/%#x: not found!\n", iPage, idPage));
3376	rc = VERR_GMM_PAGE_NOT_FOUND;
3377	break;
3378	}
3379	paPages[iPage].idPage = NIL_GMM_PAGEID;
3380	}
3381
3382	/*
3383	* Update the account.
3384	*/
3385	switch (enmAccount)
3386	{
3387	case GMMACCOUNT_BASE: pGVM->gmm.s.Allocated.cBasePages -= iPage; break;
3388	case GMMACCOUNT_SHADOW: pGVM->gmm.s.Allocated.cShadowPages -= iPage; break;
3389	case GMMACCOUNT_FIXED: pGVM->gmm.s.Allocated.cFixedPages -= iPage; break;
3390	default:
3391	AssertMsgFailedReturn(("enmAccount=%d\n", enmAccount), VERR_INTERNAL_ERROR);
3392	}
3393
3394	/*
3395	* Any threshold stuff to be done here?
3396	*/
3397
3398	return rc;
3399	}
3400
3401
3402	/**
3403	* Free one or more pages.
3404	*
3405	* This is typically used at reset time or power off.
3406	*
3407	* @returns VBox status code:
3408	* @retval xxx
3409	*
3410	* @param pVM Pointer to the shared VM structure.
3411	* @param idCpu VCPU id
3412	* @param cPages The number of pages to allocate.
3413	* @param paPages Pointer to the page descriptors containing the Page IDs for each page.
3414	* @param enmAccount The account this relates to.
3415	* @thread EMT.
3416	*/
3417	GMMR0DECL(int) GMMR0FreePages(PVM pVM, VMCPUID idCpu, uint32_t cPages, PGMMFREEPAGEDESC paPages, GMMACCOUNT enmAccount)
3418	{
3419	LogFlow(("GMMR0FreePages: pVM=%p cPages=%#x paPages=%p enmAccount=%d\n", pVM, cPages, paPages, enmAccount));
3420
3421	/*
3422	* Validate input and get the basics.
3423	*/
3424	PGMM pGMM;
3425	GMM_GET_VALID_INSTANCE(pGMM, VERR_INTERNAL_ERROR);
3426	PGVM pGVM;
3427	int rc = GVMMR0ByVMAndEMT(pVM, idCpu, &pGVM);
3428	if (RT_FAILURE(rc))
3429	return rc;
3430
3431	AssertPtrReturn(paPages, VERR_INVALID_PARAMETER);
3432	AssertMsgReturn(enmAccount > GMMACCOUNT_INVALID && enmAccount < GMMACCOUNT_END, ("%d\n", enmAccount), VERR_INVALID_PARAMETER);
3433	AssertMsgReturn(cPages > 0 && cPages < RT_BIT(32 - PAGE_SHIFT), ("%#x\n", cPages), VERR_INVALID_PARAMETER);
3434
3435	for (unsigned iPage = 0; iPage < cPages; iPage++)
3436	AssertMsgReturn( paPages[iPage].idPage <= GMM_PAGEID_LAST
3437	/\|\| paPages[iPage].idPage == NIL_GMM_PAGEID/,
3438	("#%#x: %#x\n", iPage, paPages[iPage].idPage), VERR_INVALID_PARAMETER);
3439
3440	/*
3441	* Take the semaphore and call the worker function.
3442	*/
3443	gmmR0MutexAcquire(pGMM);
3444	if (GMM_CHECK_SANITY_UPON_ENTERING(pGMM))
3445	{
3446	rc = gmmR0FreePages(pGMM, pGVM, cPages, paPages, enmAccount);
3447	GMM_CHECK_SANITY_UPON_LEAVING(pGMM);
3448	}
3449	else
3450	rc = VERR_INTERNAL_ERROR_5;
3451	gmmR0MutexRelease(pGMM);
3452	LogFlow(("GMMR0FreePages: returns %Rrc\n", rc));
3453	return rc;
3454	}
3455
3456
3457	/**
3458	* VMMR0 request wrapper for GMMR0FreePages.
3459	*
3460	* @returns see GMMR0FreePages.
3461	* @param pVM Pointer to the shared VM structure.
3462	* @param idCpu VCPU id
3463	* @param pReq The request packet.
3464	*/
3465	GMMR0DECL(int) GMMR0FreePagesReq(PVM pVM, VMCPUID idCpu, PGMMFREEPAGESREQ pReq)
3466	{
3467	/*
3468	* Validate input and pass it on.
3469	*/
3470	AssertPtrReturn(pVM, VERR_INVALID_POINTER);
3471	AssertPtrReturn(pReq, VERR_INVALID_POINTER);
3472	AssertMsgReturn(pReq->Hdr.cbReq >= RT_UOFFSETOF(GMMFREEPAGESREQ, aPages[0]),
3473	("%#x < %#x\n", pReq->Hdr.cbReq, RT_UOFFSETOF(GMMFREEPAGESREQ, aPages[0])),
3474	VERR_INVALID_PARAMETER);
3475	AssertMsgReturn(pReq->Hdr.cbReq == RT_UOFFSETOF(GMMFREEPAGESREQ, aPages[pReq->cPages]),
3476	("%#x != %#x\n", pReq->Hdr.cbReq, RT_UOFFSETOF(GMMFREEPAGESREQ, aPages[pReq->cPages])),
3477	VERR_INVALID_PARAMETER);
3478
3479	return GMMR0FreePages(pVM, idCpu, pReq->cPages, &pReq->aPages[0], pReq->enmAccount);
3480	}
3481
3482
3483	/**
3484	* Report back on a memory ballooning request.
3485	*
3486	* The request may or may not have been initiated by the GMM. If it was initiated
3487	* by the GMM it is important that this function is called even if no pages were
3488	* ballooned.
3489	*
3490	* @returns VBox status code:
3491	* @retval VERR_GMM_ATTEMPT_TO_FREE_TOO_MUCH
3492	* @retval VERR_GMM_ATTEMPT_TO_DEFLATE_TOO_MUCH
3493	* @retval VERR_GMM_OVERCOMMITTED_TRY_AGAIN_IN_A_BIT - reset condition
3494	* indicating that we won't necessarily have sufficient RAM to boot
3495	* the VM again and that it should pause until this changes (we'll try
3496	* balloon some other VM). (For standard deflate we have little choice
3497	* but to hope the VM won't use the memory that was returned to it.)
3498	*
3499	* @param pVM Pointer to the shared VM structure.
3500	* @param idCpu VCPU id
3501	* @param enmAction Inflate/deflate/reset
3502	* @param cBalloonedPages The number of pages that was ballooned.
3503	*
3504	* @thread EMT.
3505	*/
3506	GMMR0DECL(int) GMMR0BalloonedPages(PVM pVM, VMCPUID idCpu, GMMBALLOONACTION enmAction, uint32_t cBalloonedPages)
3507	{
3508	LogFlow(("GMMR0BalloonedPages: pVM=%p enmAction=%d cBalloonedPages=%#x\n",
3509	pVM, enmAction, cBalloonedPages));
3510
3511	AssertMsgReturn(cBalloonedPages < RT_BIT(32 - PAGE_SHIFT), ("%#x\n", cBalloonedPages), VERR_INVALID_PARAMETER);
3512
3513	/*
3514	* Validate input and get the basics.
3515	*/
3516	PGMM pGMM;
3517	GMM_GET_VALID_INSTANCE(pGMM, VERR_INTERNAL_ERROR);
3518	PGVM pGVM;
3519	int rc = GVMMR0ByVMAndEMT(pVM, idCpu, &pGVM);
3520	if (RT_FAILURE(rc))
3521	return rc;
3522
3523	/*
3524	* Take the semaphore and do some more validations.
3525	*/
3526	gmmR0MutexAcquire(pGMM);
3527	if (GMM_CHECK_SANITY_UPON_ENTERING(pGMM))
3528	{
3529	switch (enmAction)
3530	{
3531	case GMMBALLOONACTION_INFLATE:
3532	{
3533	if (RT_LIKELY(pGVM->gmm.s.Allocated.cBasePages + pGVM->gmm.s.cBalloonedPages + cBalloonedPages <= pGVM->gmm.s.Reserved.cBasePages))
3534	{
3535	/*
3536	* Record the ballooned memory.
3537	*/
3538	pGMM->cBalloonedPages += cBalloonedPages;
3539	if (pGVM->gmm.s.cReqBalloonedPages)
3540	{
3541	/* Codepath never taken. Might be interesting in the future to request ballooned memory from guests in low memory conditions.. */
3542	AssertFailed();
3543
3544	pGVM->gmm.s.cBalloonedPages += cBalloonedPages;
3545	pGVM->gmm.s.cReqActuallyBalloonedPages += cBalloonedPages;
3546	Log(("GMMR0BalloonedPages: +%#x - Global=%#llx / VM: Total=%#llx Req=%#llx Actual=%#llx (pending)\n", cBalloonedPages,
3547	pGMM->cBalloonedPages, pGVM->gmm.s.cBalloonedPages, pGVM->gmm.s.cReqBalloonedPages, pGVM->gmm.s.cReqActuallyBalloonedPages));
3548	}
3549	else
3550	{
3551	pGVM->gmm.s.cBalloonedPages += cBalloonedPages;
3552	Log(("GMMR0BalloonedPages: +%#x - Global=%#llx / VM: Total=%#llx (user)\n",
3553	cBalloonedPages, pGMM->cBalloonedPages, pGVM->gmm.s.cBalloonedPages));
3554	}
3555	}
3556	else
3557	{
3558	Log(("GMMR0BalloonedPages: cBasePages=%#llx Total=%#llx cBalloonedPages=%#llx Reserved=%#llx\n",
3559	pGVM->gmm.s.Allocated.cBasePages, pGVM->gmm.s.cBalloonedPages, cBalloonedPages, pGVM->gmm.s.Reserved.cBasePages));
3560	rc = VERR_GMM_ATTEMPT_TO_FREE_TOO_MUCH;
3561	}
3562	break;
3563	}
3564
3565	case GMMBALLOONACTION_DEFLATE:
3566	{
3567	/* Deflate. */
3568	if (pGVM->gmm.s.cBalloonedPages >= cBalloonedPages)
3569	{
3570	/*
3571	* Record the ballooned memory.
3572	*/
3573	Assert(pGMM->cBalloonedPages >= cBalloonedPages);
3574	pGMM->cBalloonedPages -= cBalloonedPages;
3575	pGVM->gmm.s.cBalloonedPages -= cBalloonedPages;
3576	if (pGVM->gmm.s.cReqDeflatePages)
3577	{
3578	AssertFailed(); /* This is path is for later. */
3579	Log(("GMMR0BalloonedPages: -%#x - Global=%#llx / VM: Total=%#llx Req=%#llx\n",
3580	cBalloonedPages, pGMM->cBalloonedPages, pGVM->gmm.s.cBalloonedPages, pGVM->gmm.s.cReqDeflatePages));
3581
3582	/*
3583	* Anything we need to do here now when the request has been completed?
3584	*/
3585	pGVM->gmm.s.cReqDeflatePages = 0;
3586	}
3587	else
3588	Log(("GMMR0BalloonedPages: -%#x - Global=%#llx / VM: Total=%#llx (user)\n",
3589	cBalloonedPages, pGMM->cBalloonedPages, pGVM->gmm.s.cBalloonedPages));
3590	}
3591	else
3592	{
3593	Log(("GMMR0BalloonedPages: Total=%#llx cBalloonedPages=%#llx\n", pGVM->gmm.s.cBalloonedPages, cBalloonedPages));
3594	rc = VERR_GMM_ATTEMPT_TO_DEFLATE_TOO_MUCH;
3595	}
3596	break;
3597	}
3598
3599	case GMMBALLOONACTION_RESET:
3600	{
3601	/* Reset to an empty balloon. */
3602	Assert(pGMM->cBalloonedPages >= pGVM->gmm.s.cBalloonedPages);
3603
3604	pGMM->cBalloonedPages -= pGVM->gmm.s.cBalloonedPages;
3605	pGVM->gmm.s.cBalloonedPages = 0;
3606	break;
3607	}
3608
3609	default:
3610	rc = VERR_INVALID_PARAMETER;
3611	break;
3612	}
3613	GMM_CHECK_SANITY_UPON_LEAVING(pGMM);
3614	}
3615	else
3616	rc = VERR_INTERNAL_ERROR_5;
3617
3618	gmmR0MutexRelease(pGMM);
3619	LogFlow(("GMMR0BalloonedPages: returns %Rrc\n", rc));
3620	return rc;
3621	}
3622
3623
3624	/**
3625	* VMMR0 request wrapper for GMMR0BalloonedPages.
3626	*
3627	* @returns see GMMR0BalloonedPages.
3628	* @param pVM Pointer to the shared VM structure.
3629	* @param idCpu VCPU id
3630	* @param pReq The request packet.
3631	*/
3632	GMMR0DECL(int) GMMR0BalloonedPagesReq(PVM pVM, VMCPUID idCpu, PGMMBALLOONEDPAGESREQ pReq)
3633	{
3634	/*
3635	* Validate input and pass it on.
3636	*/
3637	AssertPtrReturn(pVM, VERR_INVALID_POINTER);
3638	AssertPtrReturn(pReq, VERR_INVALID_POINTER);
3639	AssertMsgReturn(pReq->Hdr.cbReq == sizeof(GMMBALLOONEDPAGESREQ),
3640	("%#x < %#x\n", pReq->Hdr.cbReq, sizeof(GMMBALLOONEDPAGESREQ)),
3641	VERR_INVALID_PARAMETER);
3642
3643	return GMMR0BalloonedPages(pVM, idCpu, pReq->enmAction, pReq->cBalloonedPages);
3644	}
3645
3646	/**
3647	* Return memory statistics for the hypervisor
3648	*
3649	* @returns VBox status code:
3650	* @param pVM Pointer to the shared VM structure.
3651	* @param pReq The request packet.
3652	*/
3653	GMMR0DECL(int) GMMR0QueryHypervisorMemoryStatsReq(PVM pVM, PGMMMEMSTATSREQ pReq)
3654	{
3655	/*
3656	* Validate input and pass it on.
3657	*/
3658	AssertPtrReturn(pVM, VERR_INVALID_POINTER);
3659	AssertPtrReturn(pReq, VERR_INVALID_POINTER);
3660	AssertMsgReturn(pReq->Hdr.cbReq == sizeof(GMMMEMSTATSREQ),
3661	("%#x < %#x\n", pReq->Hdr.cbReq, sizeof(GMMMEMSTATSREQ)),
3662	VERR_INVALID_PARAMETER);
3663
3664	/*
3665	* Validate input and get the basics.
3666	*/
3667	PGMM pGMM;
3668	GMM_GET_VALID_INSTANCE(pGMM, VERR_INTERNAL_ERROR);
3669	pReq->cAllocPages = pGMM->cAllocatedPages;
3670	pReq->cFreePages = (pGMM->cChunks << (GMM_CHUNK_SHIFT- PAGE_SHIFT)) - pGMM->cAllocatedPages;
3671	pReq->cBalloonedPages = pGMM->cBalloonedPages;
3672	pReq->cMaxPages = pGMM->cMaxPages;
3673	pReq->cSharedPages = pGMM->cDuplicatePages;
3674	GMM_CHECK_SANITY_UPON_LEAVING(pGMM);
3675
3676	return VINF_SUCCESS;
3677	}
3678
3679	/**
3680	* Return memory statistics for the VM
3681	*
3682	* @returns VBox status code:
3683	* @param pVM Pointer to the shared VM structure.
3684	* @parma idCpu Cpu id.
3685	* @param pReq The request packet.
3686	*/
3687	GMMR0DECL(int) GMMR0QueryMemoryStatsReq(PVM pVM, VMCPUID idCpu, PGMMMEMSTATSREQ pReq)
3688	{
3689	/*
3690	* Validate input and pass it on.
3691	*/
3692	AssertPtrReturn(pVM, VERR_INVALID_POINTER);
3693	AssertPtrReturn(pReq, VERR_INVALID_POINTER);
3694	AssertMsgReturn(pReq->Hdr.cbReq == sizeof(GMMMEMSTATSREQ),
3695	("%#x < %#x\n", pReq->Hdr.cbReq, sizeof(GMMMEMSTATSREQ)),
3696	VERR_INVALID_PARAMETER);
3697
3698	/*
3699	* Validate input and get the basics.
3700	*/
3701	PGMM pGMM;
3702	GMM_GET_VALID_INSTANCE(pGMM, VERR_INTERNAL_ERROR);
3703	PGVM pGVM;
3704	int rc = GVMMR0ByVMAndEMT(pVM, idCpu, &pGVM);
3705	if (RT_FAILURE(rc))
3706	return rc;
3707
3708	/*
3709	* Take the semaphore and do some more validations.
3710	*/
3711	gmmR0MutexAcquire(pGMM);
3712	if (GMM_CHECK_SANITY_UPON_ENTERING(pGMM))
3713	{
3714	pReq->cAllocPages = pGVM->gmm.s.Allocated.cBasePages;
3715	pReq->cBalloonedPages = pGVM->gmm.s.cBalloonedPages;
3716	pReq->cMaxPages = pGVM->gmm.s.Reserved.cBasePages;
3717	pReq->cFreePages = pReq->cMaxPages - pReq->cAllocPages;
3718	}
3719	else
3720	rc = VERR_INTERNAL_ERROR_5;
3721
3722	gmmR0MutexRelease(pGMM);
3723	LogFlow(("GMMR3QueryVMMemoryStats: returns %Rrc\n", rc));
3724	return rc;
3725	}
3726
3727
3728	/**
3729	* Worker for gmmR0UnmapChunk and gmmr0FreeChunk.
3730	*
3731	* Don't call this in legacy allocation mode!
3732	*
3733	* @returns VBox status code.
3734	* @param pGMM Pointer to the GMM instance data.
3735	* @param pGVM Pointer to the Global VM structure.
3736	* @param pChunk Pointer to the chunk to be unmapped.
3737	*/
3738	static int gmmR0UnmapChunkLocked(PGMM pGMM, PGVM pGVM, PGMMCHUNK pChunk)
3739	{
3740	Assert(!pGMM->fLegacyAllocationMode);
3741
3742	/*
3743	* Find the mapping and try unmapping it.
3744	*/
3745	uint32_t cMappings = pChunk->cMappingsX;
3746	for (uint32_t i = 0; i < cMappings; i++)
3747	{
3748	Assert(pChunk->paMappingsX[i].pGVM && pChunk->paMappingsX[i].hMapObj != NIL_RTR0MEMOBJ);
3749	if (pChunk->paMappingsX[i].pGVM == pGVM)
3750	{
3751	/* unmap */
3752	int rc = RTR0MemObjFree(pChunk->paMappingsX[i].hMapObj, false /* fFreeMappings (NA) */);
3753	if (RT_SUCCESS(rc))
3754	{
3755	/* update the record. */
3756	cMappings--;
3757	if (i < cMappings)
3758	pChunk->paMappingsX[i] = pChunk->paMappingsX[cMappings];
3759	pChunk->paMappingsX[cMappings].hMapObj = NIL_RTR0MEMOBJ;
3760	pChunk->paMappingsX[cMappings].pGVM = NULL;
3761	Assert(pChunk->cMappingsX - 1U == cMappings);
3762	pChunk->cMappingsX = cMappings;
3763	}
3764
3765	return rc;
3766	}
3767	}
3768
3769	Log(("gmmR0UnmapChunk: Chunk %#x is not mapped into pGVM=%p/%#x\n", pChunk->Core.Key, pGVM, pGVM->hSelf));
3770	return VERR_GMM_CHUNK_NOT_MAPPED;
3771	}
3772
3773
3774	/**
3775	* Unmaps a chunk previously mapped into the address space of the current process.
3776	*
3777	* @returns VBox status code.
3778	* @param pGMM Pointer to the GMM instance data.
3779	* @param pGVM Pointer to the Global VM structure.
3780	* @param pChunk Pointer to the chunk to be unmapped.
3781	*/
3782	static int gmmR0UnmapChunk(PGMM pGMM, PGVM pGVM, PGMMCHUNK pChunk, bool fRelaxedSem)
3783	{
3784	if (!pGMM->fLegacyAllocationMode)
3785	{
3786	/*
3787	* Lock the chunk and if possible leave the giant GMM lock.
3788	*/
3789	GMMR0CHUNKMTXSTATE MtxState;
3790	int rc = gmmR0ChunkMutexAcquire(&MtxState, pGMM, pChunk,
3791	fRelaxedSem ? GMMR0CHUNK_MTX_RETAKE_GIANT : GMMR0CHUNK_MTX_KEEP_GIANT);
3792	if (RT_SUCCESS(rc))
3793	{
3794	rc = gmmR0UnmapChunkLocked(pGMM, pGVM, pChunk);
3795	gmmR0ChunkMutexRelease(&MtxState, pChunk);
3796	}
3797	return rc;
3798	}
3799
3800	if (pChunk->hGVM == pGVM->hSelf)
3801	return VINF_SUCCESS;
3802
3803	Log(("gmmR0UnmapChunk: Chunk %#x is not mapped into pGVM=%p/%#x (legacy)\n", pChunk->Core.Key, pGVM, pGVM->hSelf));
3804	return VERR_GMM_CHUNK_NOT_MAPPED;
3805	}
3806
3807
3808	/**
3809	* Worker for gmmR0MapChunk.
3810	*
3811	* @returns VBox status code.
3812	* @param pGMM Pointer to the GMM instance data.
3813	* @param pGVM Pointer to the Global VM structure.
3814	* @param pChunk Pointer to the chunk to be mapped.
3815	* @param ppvR3 Where to store the ring-3 address of the mapping.
3816	* In the VERR_GMM_CHUNK_ALREADY_MAPPED case, this will be
3817	* contain the address of the existing mapping.
3818	*/
3819	static int gmmR0MapChunkLocked(PGMM pGMM, PGVM pGVM, PGMMCHUNK pChunk, PRTR3PTR ppvR3)
3820	{
3821	/*
3822	* If we're in legacy mode this is simple.
3823	*/
3824	if (pGMM->fLegacyAllocationMode)
3825	{
3826	if (pChunk->hGVM != pGVM->hSelf)
3827	{
3828	Log(("gmmR0MapChunk: chunk %#x is already mapped at %p!\n", pChunk->Core.Key, *ppvR3));
3829	return VERR_GMM_CHUNK_NOT_FOUND;
3830	}
3831
3832	*ppvR3 = RTR0MemObjAddressR3(pChunk->hMemObj);
3833	return VINF_SUCCESS;
3834	}
3835
3836	/*
3837	* Check to see if the chunk is already mapped.
3838	*/
3839	for (uint32_t i = 0; i < pChunk->cMappingsX; i++)
3840	{
3841	Assert(pChunk->paMappingsX[i].pGVM && pChunk->paMappingsX[i].hMapObj != NIL_RTR0MEMOBJ);
3842	if (pChunk->paMappingsX[i].pGVM == pGVM)
3843	{
3844	*ppvR3 = RTR0MemObjAddressR3(pChunk->paMappingsX[i].hMapObj);
3845	Log(("gmmR0MapChunk: chunk %#x is already mapped at %p!\n", pChunk->Core.Key, *ppvR3));
3846	#ifdef VBOX_WITH_PAGE_SHARING
3847	/* The ring-3 chunk cache can be out of sync; don't fail. */
3848	return VINF_SUCCESS;
3849	#else
3850	return VERR_GMM_CHUNK_ALREADY_MAPPED;
3851	#endif
3852	}
3853	}
3854
3855	/*
3856	* Do the mapping.
3857	*/
3858	RTR0MEMOBJ hMapObj;
3859	int rc = RTR0MemObjMapUser(&hMapObj, pChunk->hMemObj, (RTR3PTR)-1, 0, RTMEM_PROT_READ \| RTMEM_PROT_WRITE, NIL_RTR0PROCESS);
3860	if (RT_SUCCESS(rc))
3861	{
3862	/* reallocate the array? assumes few users per chunk (usually one). */
3863	unsigned iMapping = pChunk->cMappingsX;
3864	if ( iMapping <= 3
3865	\|\| (iMapping & 3) == 0)
3866	{
3867	unsigned cNewSize = iMapping <= 3
3868	? iMapping + 1
3869	: iMapping + 4;
3870	Assert(cNewSize < 4 \|\| RT_ALIGN_32(cNewSize, 4) == cNewSize);
3871	if (RT_UNLIKELY(cNewSize > UINT16_MAX))
3872	{
3873	rc = RTR0MemObjFree(hMapObj, false /* fFreeMappings (NA) */); AssertRC(rc);
3874	return VERR_GMM_TOO_MANY_CHUNK_MAPPINGS;
3875	}
3876
3877	void pvMappings = RTMemRealloc(pChunk->paMappingsX, cNewSize sizeof(pChunk->paMappingsX[0]));
3878	if (RT_UNLIKELY(!pvMappings))
3879	{
3880	rc = RTR0MemObjFree(hMapObj, false /* fFreeMappings (NA) */); AssertRC(rc);
3881	return VERR_NO_MEMORY;
3882	}
3883	pChunk->paMappingsX = (PGMMCHUNKMAP)pvMappings;
3884	}
3885
3886	/* insert new entry */
3887	pChunk->paMappingsX[iMapping].hMapObj = hMapObj;
3888	pChunk->paMappingsX[iMapping].pGVM = pGVM;
3889	Assert(pChunk->cMappingsX == iMapping);
3890	pChunk->cMappingsX = iMapping + 1;
3891
3892	*ppvR3 = RTR0MemObjAddressR3(hMapObj);
3893	}
3894
3895	return rc;
3896	}
3897
3898
3899	/**
3900	* Maps a chunk into the user address space of the current process.
3901	*
3902	* @returns VBox status code.
3903	* @param pGMM Pointer to the GMM instance data.
3904	* @param pGVM Pointer to the Global VM structure.
3905	* @param pChunk Pointer to the chunk to be mapped.
3906	* @param fRelaxedSem Whether we can release the semaphore while doing the
3907	* mapping (@c true) or not.
3908	* @param ppvR3 Where to store the ring-3 address of the mapping.
3909	* In the VERR_GMM_CHUNK_ALREADY_MAPPED case, this will be
3910	* contain the address of the existing mapping.
3911	*/
3912	static int gmmR0MapChunk(PGMM pGMM, PGVM pGVM, PGMMCHUNK pChunk, bool fRelaxedSem, PRTR3PTR ppvR3)
3913	{
3914	/*
3915	* Take the chunk lock and leave the giant GMM lock when possible, then
3916	* call the worker function.
3917	*/
3918	GMMR0CHUNKMTXSTATE MtxState;
3919	int rc = gmmR0ChunkMutexAcquire(&MtxState, pGMM, pChunk,
3920	fRelaxedSem ? GMMR0CHUNK_MTX_RETAKE_GIANT : GMMR0CHUNK_MTX_KEEP_GIANT);
3921	if (RT_SUCCESS(rc))
3922	{
3923	rc = gmmR0MapChunkLocked(pGMM, pGVM, pChunk, ppvR3);
3924	gmmR0ChunkMutexRelease(&MtxState, pChunk);
3925	}
3926
3927	return rc;
3928	}
3929
3930
3931
3932	/**
3933	* Check if a chunk is mapped into the specified VM
3934	*
3935	* @returns mapped yes/no
3936	* @param pGMM Pointer to the GMM instance.
3937	* @param pGVM Pointer to the Global VM structure.
3938	* @param pChunk Pointer to the chunk to be mapped.
3939	* @param ppvR3 Where to store the ring-3 address of the mapping.
3940	*/
3941	static int gmmR0IsChunkMapped(PGMM pGMM, PGVM pGVM, PGMMCHUNK pChunk, PRTR3PTR ppvR3)
3942	{
3943	GMMR0CHUNKMTXSTATE MtxState;
3944	gmmR0ChunkMutexAcquire(&MtxState, pGMM, pChunk, GMMR0CHUNK_MTX_KEEP_GIANT);
3945	for (uint32_t i = 0; i < pChunk->cMappingsX; i++)
3946	{
3947	Assert(pChunk->paMappingsX[i].pGVM && pChunk->paMappingsX[i].hMapObj != NIL_RTR0MEMOBJ);
3948	if (pChunk->paMappingsX[i].pGVM == pGVM)
3949	{
3950	*ppvR3 = RTR0MemObjAddressR3(pChunk->paMappingsX[i].hMapObj);
3951	gmmR0ChunkMutexRelease(&MtxState, pChunk);
3952	return true;
3953	}
3954	}
3955	*ppvR3 = NULL;
3956	gmmR0ChunkMutexRelease(&MtxState, pChunk);
3957	return false;
3958	}
3959
3960
3961	/**
3962	* Map a chunk and/or unmap another chunk.
3963	*
3964	* The mapping and unmapping applies to the current process.
3965	*
3966	* This API does two things because it saves a kernel call per mapping when
3967	* when the ring-3 mapping cache is full.
3968	*
3969	* @returns VBox status code.
3970	* @param pVM The VM.
3971	* @param idChunkMap The chunk to map. NIL_GMM_CHUNKID if nothing to map.
3972	* @param idChunkUnmap The chunk to unmap. NIL_GMM_CHUNKID if nothing to unmap.
3973	* @param ppvR3 Where to store the address of the mapped chunk. NULL is ok if nothing to map.
3974	* @thread EMT
3975	*/
3976	GMMR0DECL(int) GMMR0MapUnmapChunk(PVM pVM, uint32_t idChunkMap, uint32_t idChunkUnmap, PRTR3PTR ppvR3)
3977	{
3978	LogFlow(("GMMR0MapUnmapChunk: pVM=%p idChunkMap=%#x idChunkUnmap=%#x ppvR3=%p\n",
3979	pVM, idChunkMap, idChunkUnmap, ppvR3));
3980
3981	/*
3982	* Validate input and get the basics.
3983	*/
3984	PGMM pGMM;
3985	GMM_GET_VALID_INSTANCE(pGMM, VERR_INTERNAL_ERROR);
3986	PGVM pGVM;
3987	int rc = GVMMR0ByVM(pVM, &pGVM);
3988	if (RT_FAILURE(rc))
3989	return rc;
3990
3991	AssertCompile(NIL_GMM_CHUNKID == 0);
3992	AssertMsgReturn(idChunkMap <= GMM_CHUNKID_LAST, ("%#x\n", idChunkMap), VERR_INVALID_PARAMETER);
3993	AssertMsgReturn(idChunkUnmap <= GMM_CHUNKID_LAST, ("%#x\n", idChunkUnmap), VERR_INVALID_PARAMETER);
3994
3995	if ( idChunkMap == NIL_GMM_CHUNKID
3996	&& idChunkUnmap == NIL_GMM_CHUNKID)
3997	return VERR_INVALID_PARAMETER;
3998
3999	if (idChunkMap != NIL_GMM_CHUNKID)
4000	{
4001	AssertPtrReturn(ppvR3, VERR_INVALID_POINTER);
4002	*ppvR3 = NIL_RTR3PTR;
4003	}
4004
4005	/*
4006	* Take the semaphore and do the work.
4007	*
4008	* The unmapping is done last since it's easier to undo a mapping than
4009	* undoing an unmapping. The ring-3 mapping cache cannot not be so big
4010	* that it pushes the user virtual address space to within a chunk of
4011	* it it's limits, so, no problem here.
4012	*/
4013	gmmR0MutexAcquire(pGMM);
4014	if (GMM_CHECK_SANITY_UPON_ENTERING(pGMM))
4015	{
4016	PGMMCHUNK pMap = NULL;
4017	if (idChunkMap != NIL_GVM_HANDLE)
4018	{
4019	pMap = gmmR0GetChunk(pGMM, idChunkMap);
4020	if (RT_LIKELY(pMap))
4021	rc = gmmR0MapChunk(pGMM, pGVM, pMap, true /fRelaxedSem/, ppvR3);
4022	else
4023	{
4024	Log(("GMMR0MapUnmapChunk: idChunkMap=%#x\n", idChunkMap));
4025	rc = VERR_GMM_CHUNK_NOT_FOUND;
4026	}
4027	}
4028	/** @todo split this operation, the bail out might (theoretcially) not be
4029	* entirely safe. */
4030
4031	if ( idChunkUnmap != NIL_GMM_CHUNKID
4032	&& RT_SUCCESS(rc))
4033	{
4034	PGMMCHUNK pUnmap = gmmR0GetChunk(pGMM, idChunkUnmap);
4035	if (RT_LIKELY(pUnmap))
4036	rc = gmmR0UnmapChunk(pGMM, pGVM, pUnmap, true /fRelaxedSem/);
4037	else
4038	{
4039	Log(("GMMR0MapUnmapChunk: idChunkUnmap=%#x\n", idChunkUnmap));
4040	rc = VERR_GMM_CHUNK_NOT_FOUND;
4041	}
4042
4043	if (RT_FAILURE(rc) && pMap)
4044	gmmR0UnmapChunk(pGMM, pGVM, pMap, false /fRelaxedSem/);
4045	}
4046
4047	GMM_CHECK_SANITY_UPON_LEAVING(pGMM);
4048	}
4049	else
4050	rc = VERR_INTERNAL_ERROR_5;
4051	gmmR0MutexRelease(pGMM);
4052
4053	LogFlow(("GMMR0MapUnmapChunk: returns %Rrc\n", rc));
4054	return rc;
4055	}
4056
4057
4058	/**
4059	* VMMR0 request wrapper for GMMR0MapUnmapChunk.
4060	*
4061	* @returns see GMMR0MapUnmapChunk.
4062	* @param pVM Pointer to the shared VM structure.
4063	* @param pReq The request packet.
4064	*/
4065	GMMR0DECL(int) GMMR0MapUnmapChunkReq(PVM pVM, PGMMMAPUNMAPCHUNKREQ pReq)
4066	{
4067	/*
4068	* Validate input and pass it on.
4069	*/
4070	AssertPtrReturn(pVM, VERR_INVALID_POINTER);
4071	AssertPtrReturn(pReq, VERR_INVALID_POINTER);
4072	AssertMsgReturn(pReq->Hdr.cbReq == sizeof(pReq), ("%#x != %#x\n", pReq->Hdr.cbReq, sizeof(pReq)), VERR_INVALID_PARAMETER);
4073
4074	return GMMR0MapUnmapChunk(pVM, pReq->idChunkMap, pReq->idChunkUnmap, &pReq->pvR3);
4075	}
4076
4077
4078	/**
4079	* Legacy mode API for supplying pages.
4080	*
4081	* The specified user address points to a allocation chunk sized block that
4082	* will be locked down and used by the GMM when the GM asks for pages.
4083	*
4084	* @returns VBox status code.
4085	* @param pVM The VM.
4086	* @param idCpu VCPU id
4087	* @param pvR3 Pointer to the chunk size memory block to lock down.
4088	*/
4089	GMMR0DECL(int) GMMR0SeedChunk(PVM pVM, VMCPUID idCpu, RTR3PTR pvR3)
4090	{
4091	/*
4092	* Validate input and get the basics.
4093	*/
4094	PGMM pGMM;
4095	GMM_GET_VALID_INSTANCE(pGMM, VERR_INTERNAL_ERROR);
4096	PGVM pGVM;
4097	int rc = GVMMR0ByVMAndEMT(pVM, idCpu, &pGVM);
4098	if (RT_FAILURE(rc))
4099	return rc;
4100
4101	AssertPtrReturn(pvR3, VERR_INVALID_POINTER);
4102	AssertReturn(!(PAGE_OFFSET_MASK & pvR3), VERR_INVALID_POINTER);
4103
4104	if (!pGMM->fLegacyAllocationMode)
4105	{
4106	Log(("GMMR0SeedChunk: not in legacy allocation mode!\n"));
4107	return VERR_NOT_SUPPORTED;
4108	}
4109
4110	/*
4111	* Lock the memory and add it as new chunk with our hGVM.
4112	* (The GMM locking is done inside gmmR0RegisterChunk.)
4113	*/
4114	RTR0MEMOBJ MemObj;
4115	rc = RTR0MemObjLockUser(&MemObj, pvR3, GMM_CHUNK_SIZE, RTMEM_PROT_READ \| RTMEM_PROT_WRITE, NIL_RTR0PROCESS);
4116	if (RT_SUCCESS(rc))
4117	{
4118	rc = gmmR0RegisterChunk(pGMM, &pGVM->gmm.s.Private, MemObj, pGVM->hSelf, 0 /fChunkFlags/, NULL);
4119	if (RT_SUCCESS(rc))
4120	gmmR0MutexRelease(pGMM);
4121	else
4122	RTR0MemObjFree(MemObj, false /* fFreeMappings */);
4123	}
4124
4125	LogFlow(("GMMR0SeedChunk: rc=%d (pvR3=%p)\n", rc, pvR3));
4126	return rc;
4127	}
4128
4129
4130	typedef struct
4131	{
4132	PAVLGCPTRNODECORE pNode;
4133	char *pszModuleName;
4134	char *pszVersion;
4135	VBOXOSFAMILY enmGuestOS;
4136	} GMMFINDMODULEBYNAME, *PGMMFINDMODULEBYNAME;
4137
4138	/**
4139	* Tree enumeration callback for finding identical modules by name and version
4140	*/
4141	DECLCALLBACK(int) gmmR0CheckForIdenticalModule(PAVLGCPTRNODECORE pNode, void *pvUser)
4142	{
4143	PGMMFINDMODULEBYNAME pInfo = (PGMMFINDMODULEBYNAME)pvUser;
4144	PGMMSHAREDMODULE pModule = (PGMMSHAREDMODULE)pNode;
4145
4146	if ( pInfo
4147	&& pInfo->enmGuestOS == pModule->enmGuestOS
4148	/** @todo replace with RTStrNCmp */
4149	&& !strcmp(pModule->szName, pInfo->pszModuleName)
4150	&& !strcmp(pModule->szVersion, pInfo->pszVersion))
4151	{
4152	pInfo->pNode = pNode;
4153	return 1; /* stop search */
4154	}
4155	return 0;
4156	}
4157
4158
4159	/**
4160	* Registers a new shared module for the VM
4161	*
4162	* @returns VBox status code.
4163	* @param pVM VM handle
4164	* @param idCpu VCPU id
4165	* @param enmGuestOS Guest OS type
4166	* @param pszModuleName Module name
4167	* @param pszVersion Module version
4168	* @param GCBaseAddr Module base address
4169	* @param cbModule Module size
4170	* @param cRegions Number of shared region descriptors
4171	* @param pRegions Shared region(s)
4172	*/
4173	GMMR0DECL(int) GMMR0RegisterSharedModule(PVM pVM, VMCPUID idCpu, VBOXOSFAMILY enmGuestOS, char pszModuleName, char pszVersion, RTGCPTR GCBaseAddr, uint32_t cbModule,
4174	unsigned cRegions, VMMDEVSHAREDREGIONDESC *pRegions)
4175	{
4176	#ifdef VBOX_WITH_PAGE_SHARING
4177	/*
4178	* Validate input and get the basics.
4179	*/
4180	PGMM pGMM;
4181	GMM_GET_VALID_INSTANCE(pGMM, VERR_INTERNAL_ERROR);
4182	PGVM pGVM;
4183	int rc = GVMMR0ByVMAndEMT(pVM, idCpu, &pGVM);
4184	if (RT_FAILURE(rc))
4185	return rc;
4186
4187	Log(("GMMR0RegisterSharedModule %s %s base %RGv size %x\n", pszModuleName, pszVersion, GCBaseAddr, cbModule));
4188
4189	/*
4190	* Take the semaphore and do some more validations.
4191	*/
4192	gmmR0MutexAcquire(pGMM);
4193	if (GMM_CHECK_SANITY_UPON_ENTERING(pGMM))
4194	{
4195	bool fNewModule = false;
4196
4197	/* Check if this module is already locally registered. */
4198	PGMMSHAREDMODULEPERVM pRecVM = (PGMMSHAREDMODULEPERVM)RTAvlGCPtrGet(&pGVM->gmm.s.pSharedModuleTree, GCBaseAddr);
4199	if (!pRecVM)
4200	{
4201	pRecVM = (PGMMSHAREDMODULEPERVM)RTMemAllocZ(RT_OFFSETOF(GMMSHAREDMODULEPERVM, aRegions[cRegions]));
4202	if (!pRecVM)
4203	{
4204	AssertFailed();
4205	rc = VERR_NO_MEMORY;
4206	goto end;
4207	}
4208	pRecVM->Core.Key = GCBaseAddr;
4209	pRecVM->cRegions = cRegions;
4210
4211	/* Save the region data as they can differ between VMs (address space scrambling or simply different loading order) */
4212	for (unsigned i = 0; i < cRegions; i++)
4213	{
4214	pRecVM->aRegions[i].GCRegionAddr = pRegions[i].GCRegionAddr;
4215	pRecVM->aRegions[i].cbRegion = RT_ALIGN_T(pRegions[i].cbRegion, PAGE_SIZE, uint32_t);
4216	pRecVM->aRegions[i].u32Alignment = 0;
4217	pRecVM->aRegions[i].paHCPhysPageID = NULL; /* unused */
4218	}
4219
4220	bool ret = RTAvlGCPtrInsert(&pGVM->gmm.s.pSharedModuleTree, &pRecVM->Core);
4221	Assert(ret);
4222
4223	Log(("GMMR0RegisterSharedModule: new local module %s\n", pszModuleName));
4224	fNewModule = true;
4225	}
4226	else
4227	rc = VINF_PGM_SHARED_MODULE_ALREADY_REGISTERED;
4228
4229	/* Check if this module is already globally registered. */
4230	PGMMSHAREDMODULE pGlobalModule = (PGMMSHAREDMODULE)RTAvlGCPtrGet(&pGMM->pGlobalSharedModuleTree, GCBaseAddr);
4231	if ( !pGlobalModule
4232	&& enmGuestOS == VBOXOSFAMILY_Windows64)
4233	{
4234	/* Two identical copies of e.g. Win7 x64 will typically not have a similar virtual address space layout for dlls or kernel modules.
4235	* Try to find identical binaries based on name and version.
4236	*/
4237	GMMFINDMODULEBYNAME Info;
4238
4239	Info.pNode = NULL;
4240	Info.pszVersion = pszVersion;
4241	Info.pszModuleName = pszModuleName;
4242	Info.enmGuestOS = enmGuestOS;
4243
4244	Log(("Try to find identical module %s\n", pszModuleName));
4245	int ret = RTAvlGCPtrDoWithAll(&pGMM->pGlobalSharedModuleTree, true /* fFromLeft */, gmmR0CheckForIdenticalModule, &Info);
4246	if (ret == 1)
4247	{
4248	Assert(Info.pNode);
4249	pGlobalModule = (PGMMSHAREDMODULE)Info.pNode;
4250	Log(("Found identical module at %RGv\n", pGlobalModule->Core.Key));
4251	}
4252	}
4253
4254	if (!pGlobalModule)
4255	{
4256	Assert(fNewModule);
4257	Assert(!pRecVM->fCollision);
4258
4259	pGlobalModule = (PGMMSHAREDMODULE)RTMemAllocZ(RT_OFFSETOF(GMMSHAREDMODULE, aRegions[cRegions]));
4260	if (!pGlobalModule)
4261	{
4262	AssertFailed();
4263	rc = VERR_NO_MEMORY;
4264	goto end;
4265	}
4266
4267	pGlobalModule->Core.Key = GCBaseAddr;
4268	pGlobalModule->cbModule = cbModule;
4269	/* Input limit already safe; no need to check again. */
4270	/** @todo replace with RTStrCopy */
4271	strcpy(pGlobalModule->szName, pszModuleName);
4272	strcpy(pGlobalModule->szVersion, pszVersion);
4273
4274	pGlobalModule->enmGuestOS = enmGuestOS;
4275	pGlobalModule->cRegions = cRegions;
4276
4277	for (unsigned i = 0; i < cRegions; i++)
4278	{
4279	Log(("New region %d base=%RGv size %x\n", i, pRegions[i].GCRegionAddr, pRegions[i].cbRegion));
4280	pGlobalModule->aRegions[i].GCRegionAddr = pRegions[i].GCRegionAddr;
4281	pGlobalModule->aRegions[i].cbRegion = RT_ALIGN_T(pRegions[i].cbRegion, PAGE_SIZE, uint32_t);
4282	pGlobalModule->aRegions[i].u32Alignment = 0;
4283	pGlobalModule->aRegions[i].paHCPhysPageID = NULL; /* uninitialized. */
4284	}
4285
4286	/* Save reference. */
4287	pRecVM->pGlobalModule = pGlobalModule;
4288	pRecVM->fCollision = false;
4289	pGlobalModule->cUsers++;
4290	rc = VINF_SUCCESS;
4291
4292	bool ret = RTAvlGCPtrInsert(&pGMM->pGlobalSharedModuleTree, &pGlobalModule->Core);
4293	Assert(ret);
4294
4295	Log(("GMMR0RegisterSharedModule: new global module %s\n", pszModuleName));
4296	}
4297	else
4298	{
4299	Assert(pGlobalModule->cUsers > 0);
4300
4301	/* Make sure the name and version are identical. */
4302	/** @todo replace with RTStrNCmp */
4303	if ( !strcmp(pGlobalModule->szName, pszModuleName)
4304	&& !strcmp(pGlobalModule->szVersion, pszVersion))
4305	{
4306	/* Save reference. */
4307	pRecVM->pGlobalModule = pGlobalModule;
4308	if ( fNewModule
4309	\|\| pRecVM->fCollision == true) /* colliding module unregistered and new one registered since the last check */
4310	{
4311	pGlobalModule->cUsers++;
4312	Log(("GMMR0RegisterSharedModule: using existing module %s cUser=%d!\n", pszModuleName, pGlobalModule->cUsers));
4313	}
4314	pRecVM->fCollision = false;
4315	rc = VINF_SUCCESS;
4316	}
4317	else
4318	{
4319	Log(("GMMR0RegisterSharedModule: module %s collision!\n", pszModuleName));
4320	pRecVM->fCollision = true;
4321	rc = VINF_PGM_SHARED_MODULE_COLLISION;
4322	goto end;
4323	}
4324	}
4325
4326	GMM_CHECK_SANITY_UPON_LEAVING(pGMM);
4327	}
4328	else
4329	rc = VERR_INTERNAL_ERROR_5;
4330
4331	end:
4332	gmmR0MutexRelease(pGMM);
4333	return rc;
4334	#else
4335	return VERR_NOT_IMPLEMENTED;
4336	#endif
4337	}
4338
4339
4340	/**
4341	* VMMR0 request wrapper for GMMR0RegisterSharedModule.
4342	*
4343	* @returns see GMMR0RegisterSharedModule.
4344	* @param pVM Pointer to the shared VM structure.
4345	* @param idCpu VCPU id
4346	* @param pReq The request packet.
4347	*/
4348	GMMR0DECL(int) GMMR0RegisterSharedModuleReq(PVM pVM, VMCPUID idCpu, PGMMREGISTERSHAREDMODULEREQ pReq)
4349	{
4350	/*
4351	* Validate input and pass it on.
4352	*/
4353	AssertPtrReturn(pVM, VERR_INVALID_POINTER);
4354	AssertPtrReturn(pReq, VERR_INVALID_POINTER);
4355	AssertMsgReturn(pReq->Hdr.cbReq >= sizeof(pReq) && pReq->Hdr.cbReq == RT_UOFFSETOF(GMMREGISTERSHAREDMODULEREQ, aRegions[pReq->cRegions]), ("%#x != %#x\n", pReq->Hdr.cbReq, sizeof(pReq)), VERR_INVALID_PARAMETER);
4356
4357	/* Pass back return code in the request packet to preserve informational codes. (VMMR3CallR0 chokes on them) */
4358	pReq->rc = GMMR0RegisterSharedModule(pVM, idCpu, pReq->enmGuestOS, pReq->szName, pReq->szVersion, pReq->GCBaseAddr, pReq->cbModule, pReq->cRegions, pReq->aRegions);
4359	return VINF_SUCCESS;
4360	}
4361
4362
4363	/**
4364	* Unregisters a shared module for the VM
4365	*
4366	* @returns VBox status code.
4367	* @param pVM VM handle
4368	* @param idCpu VCPU id
4369	* @param pszModuleName Module name
4370	* @param pszVersion Module version
4371	* @param GCBaseAddr Module base address
4372	* @param cbModule Module size
4373	*/
4374	GMMR0DECL(int) GMMR0UnregisterSharedModule(PVM pVM, VMCPUID idCpu, char pszModuleName, char pszVersion, RTGCPTR GCBaseAddr, uint32_t cbModule)
4375	{
4376	#ifdef VBOX_WITH_PAGE_SHARING
4377	/*
4378	* Validate input and get the basics.
4379	*/
4380	PGMM pGMM;
4381	GMM_GET_VALID_INSTANCE(pGMM, VERR_INTERNAL_ERROR);
4382	PGVM pGVM;
4383	int rc = GVMMR0ByVMAndEMT(pVM, idCpu, &pGVM);
4384	if (RT_FAILURE(rc))
4385	return rc;
4386
4387	Log(("GMMR0UnregisterSharedModule %s %s base=%RGv size %x\n", pszModuleName, pszVersion, GCBaseAddr, cbModule));
4388
4389	/*
4390	* Take the semaphore and do some more validations.
4391	*/
4392	gmmR0MutexAcquire(pGMM);
4393	if (GMM_CHECK_SANITY_UPON_ENTERING(pGMM))
4394	{
4395	PGMMSHAREDMODULEPERVM pRecVM = (PGMMSHAREDMODULEPERVM)RTAvlGCPtrGet(&pGVM->gmm.s.pSharedModuleTree, GCBaseAddr);
4396	if (pRecVM)
4397	{
4398	/* Remove reference to global shared module. */
4399	if (!pRecVM->fCollision)
4400	{
4401	PGMMSHAREDMODULE pRec = pRecVM->pGlobalModule;
4402	Assert(pRec);
4403
4404	if (pRec) /* paranoia */
4405	{
4406	Assert(pRec->cUsers);
4407	pRec->cUsers--;
4408	if (pRec->cUsers == 0)
4409	{
4410	/* Free the ranges, but leave the pages intact as there might still be references; they will be cleared by the COW mechanism. */
4411	for (unsigned i = 0; i < pRec->cRegions; i++)
4412	if (pRec->aRegions[i].paHCPhysPageID)
4413	RTMemFree(pRec->aRegions[i].paHCPhysPageID);
4414
4415	Assert(pRec->Core.Key == GCBaseAddr \|\| pRec->enmGuestOS == VBOXOSFAMILY_Windows64);
4416	Assert(pRec->cRegions == pRecVM->cRegions);
4417	#ifdef VBOX_STRICT
4418	for (unsigned i = 0; i < pRecVM->cRegions; i++)
4419	{
4420	Assert(pRecVM->aRegions[i].GCRegionAddr == pRec->aRegions[i].GCRegionAddr);
4421	Assert(pRecVM->aRegions[i].cbRegion == pRec->aRegions[i].cbRegion);
4422	}
4423	#endif
4424
4425	/* Remove from the tree and free memory. */
4426	RTAvlGCPtrRemove(&pGMM->pGlobalSharedModuleTree, pRec->Core.Key);
4427	RTMemFree(pRec);
4428	}
4429	}
4430	else
4431	rc = VERR_PGM_SHARED_MODULE_REGISTRATION_INCONSISTENCY;
4432	}
4433	else
4434	Assert(!pRecVM->pGlobalModule);
4435
4436	/* Remove from the tree and free memory. */
4437	RTAvlGCPtrRemove(&pGVM->gmm.s.pSharedModuleTree, GCBaseAddr);
4438	RTMemFree(pRecVM);
4439	}
4440	else
4441	rc = VERR_PGM_SHARED_MODULE_NOT_FOUND;
4442
4443	GMM_CHECK_SANITY_UPON_LEAVING(pGMM);
4444	}
4445	else
4446	rc = VERR_INTERNAL_ERROR_5;
4447
4448	gmmR0MutexRelease(pGMM);
4449	return rc;
4450	#else
4451	return VERR_NOT_IMPLEMENTED;
4452	#endif
4453	}
4454
4455
4456	/**
4457	* VMMR0 request wrapper for GMMR0UnregisterSharedModule.
4458	*
4459	* @returns see GMMR0UnregisterSharedModule.
4460	* @param pVM Pointer to the shared VM structure.
4461	* @param idCpu VCPU id
4462	* @param pReq The request packet.
4463	*/
4464	GMMR0DECL(int) GMMR0UnregisterSharedModuleReq(PVM pVM, VMCPUID idCpu, PGMMUNREGISTERSHAREDMODULEREQ pReq)
4465	{
4466	/*
4467	* Validate input and pass it on.
4468	*/
4469	AssertPtrReturn(pVM, VERR_INVALID_POINTER);
4470	AssertPtrReturn(pReq, VERR_INVALID_POINTER);
4471	AssertMsgReturn(pReq->Hdr.cbReq == sizeof(pReq), ("%#x != %#x\n", pReq->Hdr.cbReq, sizeof(pReq)), VERR_INVALID_PARAMETER);
4472
4473	return GMMR0UnregisterSharedModule(pVM, idCpu, pReq->szName, pReq->szVersion, pReq->GCBaseAddr, pReq->cbModule);
4474	}
4475
4476	#ifdef VBOX_WITH_PAGE_SHARING
4477
4478	/**
4479	* Increase the use count of a shared page, the page is known to exist and be valid and such.
4480	*
4481	* @param pGMM Pointer to the GMM instance.
4482	* @param pGVM Pointer to the GVM instance.
4483	* @param pPage The page structure.
4484	*/
4485	DECLINLINE(void) gmmR0UseSharedPage(PGMM pGMM, PGVM pGVM, PGMMPAGE pPage)
4486	{
4487	Assert(pGMM->cSharedPages > 0);
4488	Assert(pGMM->cAllocatedPages > 0);
4489
4490	pGMM->cDuplicatePages++;
4491
4492	pPage->Shared.cRefs++;
4493	pGVM->gmm.s.cSharedPages++;
4494	pGVM->gmm.s.Allocated.cBasePages++;
4495	}
4496
4497
4498	/**
4499	* Converts a private page to a shared page, the page is known to exist and be valid and such.
4500	*
4501	* @param pGMM Pointer to the GMM instance.
4502	* @param pGVM Pointer to the GVM instance.
4503	* @param HCPhys Host physical address
4504	* @param idPage The Page ID
4505	* @param pPage The page structure.
4506	*/
4507	DECLINLINE(void) gmmR0ConvertToSharedPage(PGMM pGMM, PGVM pGVM, RTHCPHYS HCPhys, uint32_t idPage, PGMMPAGE pPage)
4508	{
4509	PGMMCHUNK pChunk = gmmR0GetChunk(pGMM, idPage >> GMM_CHUNKID_SHIFT);
4510	Assert(pChunk);
4511	Assert(pChunk->cFree < GMM_CHUNK_NUM_PAGES);
4512	Assert(GMM_PAGE_IS_PRIVATE(pPage));
4513
4514	pChunk->cPrivate--;
4515	pChunk->cShared++;
4516
4517	pGMM->cSharedPages++;
4518
4519	pGVM->gmm.s.cSharedPages++;
4520	pGVM->gmm.s.cPrivatePages--;
4521
4522	/* Modify the page structure. */
4523	pPage->Shared.pfn = (uint32_t)(uint64_t)(HCPhys >> PAGE_SHIFT);
4524	pPage->Shared.cRefs = 1;
4525	pPage->Common.u2State = GMM_PAGE_STATE_SHARED;
4526	}
4527
4528
4529	/**
4530	* Checks specified shared module range for changes
4531	*
4532	* Performs the following tasks:
4533	* - If a shared page is new, then it changes the GMM page type to shared and
4534	* returns it in the pPageDesc descriptor.
4535	* - If a shared page already exists, then it checks if the VM page is
4536	* identical and if so frees the VM page and returns the shared page in
4537	* pPageDesc descriptor.
4538	*
4539	* @remarks ASSUMES the caller has acquired the GMM semaphore!!
4540	*
4541	* @returns VBox status code.
4542	* @param pGMM Pointer to the GMM instance data.
4543	* @param pGVM Pointer to the GVM instance data.
4544	* @param pModule Module description
4545	* @param idxRegion Region index
4546	* @param idxPage Page index
4547	* @param paPageDesc Page descriptor
4548	*/
4549	GMMR0DECL(int) GMMR0SharedModuleCheckPage(PGVM pGVM, PGMMSHAREDMODULE pModule, unsigned idxRegion, unsigned idxPage,
4550	PGMMSHAREDPAGEDESC pPageDesc)
4551	{
4552	int rc = VINF_SUCCESS;
4553	PGMM pGMM;
4554	GMM_GET_VALID_INSTANCE(pGMM, VERR_INTERNAL_ERROR);
4555	unsigned cPages = pModule->aRegions[idxRegion].cbRegion >> PAGE_SHIFT;
4556
4557	AssertReturn(idxRegion < pModule->cRegions, VERR_INVALID_PARAMETER);
4558	AssertReturn(idxPage < cPages, VERR_INVALID_PARAMETER);
4559
4560	LogFlow(("GMMR0SharedModuleCheckRange %s base %RGv region %d idxPage %d\n", pModule->szName, pModule->Core.Key, idxRegion, idxPage));
4561
4562	PGMMSHAREDREGIONDESC pGlobalRegion = &pModule->aRegions[idxRegion];
4563	if (!pGlobalRegion->paHCPhysPageID)
4564	{
4565	/* First time; create a page descriptor array. */
4566	Log(("Allocate page descriptor array for %d pages\n", cPages));
4567	pGlobalRegion->paHCPhysPageID = (uint32_t )RTMemAlloc(cPages sizeof(*pGlobalRegion->paHCPhysPageID));
4568	if (!pGlobalRegion->paHCPhysPageID)
4569	{
4570	AssertFailed();
4571	rc = VERR_NO_MEMORY;
4572	goto end;
4573	}
4574	/* Invalidate all descriptors. */
4575	for (unsigned i = 0; i < cPages; i++)
4576	pGlobalRegion->paHCPhysPageID[i] = NIL_GMM_PAGEID;
4577	}
4578
4579	/* We've seen this shared page for the first time? */
4580	if (pGlobalRegion->paHCPhysPageID[idxPage] == NIL_GMM_PAGEID)
4581	{
4582	new_shared_page:
4583	Log(("New shared page guest %RGp host %RHp\n", pPageDesc->GCPhys, pPageDesc->HCPhys));
4584
4585	/* Easy case: just change the internal page type. */
4586	PGMMPAGE pPage = gmmR0GetPage(pGMM, pPageDesc->uHCPhysPageId);
4587	if (!pPage)
4588	{
4589	Log(("GMMR0SharedModuleCheckPage: Invalid idPage=%#x #1 (GCPhys=%RGp HCPhys=%RHp idxRegion=%#x idxPage=%#x)\n",
4590	pPageDesc->uHCPhysPageId, pPageDesc->GCPhys, pPageDesc->HCPhys, idxRegion, idxPage));
4591	AssertFailed();
4592	rc = VERR_PGM_PHYS_INVALID_PAGE_ID;
4593	goto end;
4594	}
4595
4596	AssertMsg(pPageDesc->GCPhys == (pPage->Private.pfn << 12), ("desc %RGp gmm %RGp\n", pPageDesc->HCPhys, (pPage->Private.pfn << 12)));
4597
4598	gmmR0ConvertToSharedPage(pGMM, pGVM, pPageDesc->HCPhys, pPageDesc->uHCPhysPageId, pPage);
4599
4600	/* Keep track of these references. */
4601	pGlobalRegion->paHCPhysPageID[idxPage] = pPageDesc->uHCPhysPageId;
4602	}
4603	else
4604	{
4605	uint8_t pbLocalPage, pbSharedPage;
4606	uint8_t *pbChunk;
4607	PGMMCHUNK pChunk;
4608
4609	Assert(pPageDesc->uHCPhysPageId != pGlobalRegion->paHCPhysPageID[idxPage]);
4610
4611	Log(("Replace existing page guest %RGp host %RHp id %x -> id %x\n", pPageDesc->GCPhys, pPageDesc->HCPhys, pPageDesc->uHCPhysPageId, pGlobalRegion->paHCPhysPageID[idxPage]));
4612
4613	/* Get the shared page source. */
4614	PGMMPAGE pPage = gmmR0GetPage(pGMM, pGlobalRegion->paHCPhysPageID[idxPage]);
4615	if (!pPage)
4616	{
4617	Log(("GMMR0SharedModuleCheckPage: Invalid idPage=%#x #2 (idxRegion=%#x idxPage=%#x)\n",
4618	pPageDesc->uHCPhysPageId, idxRegion, idxPage));
4619	AssertFailed();
4620	rc = VERR_PGM_PHYS_INVALID_PAGE_ID;
4621	goto end;
4622	}
4623	if (pPage->Common.u2State != GMM_PAGE_STATE_SHARED)
4624	{
4625	/* Page was freed at some point; invalidate this entry. */
4626	/** @todo this isn't really bullet proof. */
4627	Log(("Old shared page was freed -> create a new one\n"));
4628	pGlobalRegion->paHCPhysPageID[idxPage] = NIL_GMM_PAGEID;
4629	goto new_shared_page; /* ugly goto */
4630	}
4631
4632	Log(("Replace existing page guest host %RHp -> %RHp\n", pPageDesc->HCPhys, ((uint64_t)pPage->Shared.pfn) << PAGE_SHIFT));
4633
4634	/* Calculate the virtual address of the local page. */
4635	pChunk = gmmR0GetChunk(pGMM, pPageDesc->uHCPhysPageId >> GMM_CHUNKID_SHIFT);
4636	if (pChunk)
4637	{
4638	if (!gmmR0IsChunkMapped(pGMM, pGVM, pChunk, (PRTR3PTR)&pbChunk))
4639	{
4640	Log(("GMMR0SharedModuleCheckPage: Invalid idPage=%#x #3\n", pPageDesc->uHCPhysPageId));
4641	AssertFailed();
4642	rc = VERR_PGM_PHYS_INVALID_PAGE_ID;
4643	goto end;
4644	}
4645	pbLocalPage = pbChunk + ((pPageDesc->uHCPhysPageId & GMM_PAGEID_IDX_MASK) << PAGE_SHIFT);
4646	}
4647	else
4648	{
4649	Log(("GMMR0SharedModuleCheckPage: Invalid idPage=%#x #4\n", pPageDesc->uHCPhysPageId));
4650	AssertFailed();
4651	rc = VERR_PGM_PHYS_INVALID_PAGE_ID;
4652	goto end;
4653	}
4654
4655	/* Calculate the virtual address of the shared page. */
4656	pChunk = gmmR0GetChunk(pGMM, pGlobalRegion->paHCPhysPageID[idxPage] >> GMM_CHUNKID_SHIFT);
4657	Assert(pChunk); /* can't fail as gmmR0GetPage succeeded. */
4658
4659	/* Get the virtual address of the physical page; map the chunk into the VM process if not already done. */
4660	if (!gmmR0IsChunkMapped(pGMM, pGVM, pChunk, (PRTR3PTR)&pbChunk))
4661	{
4662	Log(("Map chunk into process!\n"));
4663	rc = gmmR0MapChunk(pGMM, pGVM, pChunk, false /fRelaxedSem/, (PRTR3PTR)&pbChunk);
4664	if (rc != VINF_SUCCESS)
4665	{
4666	AssertRC(rc);
4667	goto end;
4668	}
4669	}
4670	pbSharedPage = pbChunk + ((pGlobalRegion->paHCPhysPageID[idxPage] & GMM_PAGEID_IDX_MASK) << PAGE_SHIFT);
4671
4672	/** @todo write ASMMemComparePage. */
4673	if (memcmp(pbSharedPage, pbLocalPage, PAGE_SIZE))
4674	{
4675	Log(("Unexpected differences found between local and shared page; skip\n"));
4676	/* Signal to the caller that this one hasn't changed. */
4677	pPageDesc->uHCPhysPageId = NIL_GMM_PAGEID;
4678	goto end;
4679	}
4680
4681	/* Free the old local page. */
4682	GMMFREEPAGEDESC PageDesc;
4683
4684	PageDesc.idPage = pPageDesc->uHCPhysPageId;
4685	rc = gmmR0FreePages(pGMM, pGVM, 1, &PageDesc, GMMACCOUNT_BASE);
4686	AssertRCReturn(rc, rc);
4687
4688	gmmR0UseSharedPage(pGMM, pGVM, pPage);
4689
4690	/* Pass along the new physical address & page id. */
4691	pPageDesc->HCPhys = ((uint64_t)pPage->Shared.pfn) << PAGE_SHIFT;
4692	pPageDesc->uHCPhysPageId = pGlobalRegion->paHCPhysPageID[idxPage];
4693	}
4694	end:
4695	return rc;
4696	}
4697
4698
4699	/**
4700	* RTAvlGCPtrDestroy callback.
4701	*
4702	* @returns 0 or VERR_INTERNAL_ERROR.
4703	* @param pNode The node to destroy.
4704	* @param pvGVM The GVM handle.
4705	*/
4706	static DECLCALLBACK(int) gmmR0CleanupSharedModule(PAVLGCPTRNODECORE pNode, void *pvGVM)
4707	{
4708	PGVM pGVM = (PGVM)pvGVM;
4709	PGMMSHAREDMODULEPERVM pRecVM = (PGMMSHAREDMODULEPERVM)pNode;
4710
4711	Assert(pRecVM->pGlobalModule \|\| pRecVM->fCollision);
4712	if (pRecVM->pGlobalModule)
4713	{
4714	PGMMSHAREDMODULE pRec = pRecVM->pGlobalModule;
4715	AssertPtr(pRec);
4716	Assert(pRec->cUsers);
4717
4718	Log(("gmmR0CleanupSharedModule: %s %s cUsers=%d\n", pRec->szName, pRec->szVersion, pRec->cUsers));
4719	pRec->cUsers--;
4720	if (pRec->cUsers == 0)
4721	{
4722	for (uint32_t i = 0; i < pRec->cRegions; i++)
4723	if (pRec->aRegions[i].paHCPhysPageID)
4724	RTMemFree(pRec->aRegions[i].paHCPhysPageID);
4725
4726	/* Remove from the tree and free memory. */
4727	PGMM pGMM;
4728	GMM_GET_VALID_INSTANCE(pGMM, VERR_INTERNAL_ERROR);
4729	RTAvlGCPtrRemove(&pGMM->pGlobalSharedModuleTree, pRec->Core.Key);
4730	RTMemFree(pRec);
4731	}
4732	}
4733	RTMemFree(pRecVM);
4734	return 0;
4735	}
4736
4737
4738	/**
4739	* Used by GMMR0CleanupVM to clean up shared modules.
4740	*
4741	* This is called without taking the GMM lock so that it can be yielded as
4742	* needed here.
4743	*
4744	* @param pGMM The GMM handle.
4745	* @param pGVM The global VM handle.
4746	*/
4747	static void gmmR0SharedModuleCleanup(PGMM pGMM, PGVM pGVM)
4748	{
4749	gmmR0MutexAcquire(pGMM);
4750	GMM_CHECK_SANITY_UPON_ENTERING(pGMM);
4751
4752	RTAvlGCPtrDestroy(&pGVM->gmm.s.pSharedModuleTree, gmmR0CleanupSharedModule, pGVM);
4753
4754	gmmR0MutexRelease(pGMM);
4755	}
4756
4757	#endif /* VBOX_WITH_PAGE_SHARING */
4758
4759	/**
4760	* Removes all shared modules for the specified VM
4761	*
4762	* @returns VBox status code.
4763	* @param pVM VM handle
4764	* @param idCpu VCPU id
4765	*/
4766	GMMR0DECL(int) GMMR0ResetSharedModules(PVM pVM, VMCPUID idCpu)
4767	{
4768	#ifdef VBOX_WITH_PAGE_SHARING
4769	/*
4770	* Validate input and get the basics.
4771	*/
4772	PGMM pGMM;
4773	GMM_GET_VALID_INSTANCE(pGMM, VERR_INTERNAL_ERROR);
4774	PGVM pGVM;
4775	int rc = GVMMR0ByVMAndEMT(pVM, idCpu, &pGVM);
4776	if (RT_FAILURE(rc))
4777	return rc;
4778
4779	/*
4780	* Take the semaphore and do some more validations.
4781	*/
4782	gmmR0MutexAcquire(pGMM);
4783	if (GMM_CHECK_SANITY_UPON_ENTERING(pGMM))
4784	{
4785	Log(("GMMR0ResetSharedModules\n"));
4786	RTAvlGCPtrDestroy(&pGVM->gmm.s.pSharedModuleTree, gmmR0CleanupSharedModule, pGVM);
4787
4788	rc = VINF_SUCCESS;
4789	GMM_CHECK_SANITY_UPON_LEAVING(pGMM);
4790	}
4791	else
4792	rc = VERR_INTERNAL_ERROR_5;
4793
4794	gmmR0MutexRelease(pGMM);
4795	return rc;
4796	#else
4797	return VERR_NOT_IMPLEMENTED;
4798	#endif
4799	}
4800
4801	#ifdef VBOX_WITH_PAGE_SHARING
4802
4803	typedef struct
4804	{
4805	PGVM pGVM;
4806	VMCPUID idCpu;
4807	int rc;
4808	} GMMCHECKSHAREDMODULEINFO, *PGMMCHECKSHAREDMODULEINFO;
4809
4810	/**
4811	* Tree enumeration callback for checking a shared module.
4812	*/
4813	DECLCALLBACK(int) gmmR0CheckSharedModule(PAVLGCPTRNODECORE pNode, void *pvUser)
4814	{
4815	PGMMCHECKSHAREDMODULEINFO pInfo = (PGMMCHECKSHAREDMODULEINFO)pvUser;
4816	PGMMSHAREDMODULEPERVM pLocalModule = (PGMMSHAREDMODULEPERVM)pNode;
4817	PGMMSHAREDMODULE pGlobalModule = pLocalModule->pGlobalModule;
4818
4819	if ( !pLocalModule->fCollision
4820	&& pGlobalModule)
4821	{
4822	Log(("gmmR0CheckSharedModule: check %s %s base=%RGv size=%x collision=%d\n", pGlobalModule->szName, pGlobalModule->szVersion, pGlobalModule->Core.Key, pGlobalModule->cbModule, pLocalModule->fCollision));
4823	pInfo->rc = PGMR0SharedModuleCheck(pInfo->pGVM->pVM, pInfo->pGVM, pInfo->idCpu, pGlobalModule, pLocalModule->cRegions, pLocalModule->aRegions);
4824	if (RT_FAILURE(pInfo->rc))
4825	return 1; /* stop enumeration. */
4826	}
4827	return 0;
4828	}
4829
4830	#endif /* VBOX_WITH_PAGE_SHARING */
4831	#ifdef DEBUG_sandervl
4832
4833	/**
4834	* Setup for a GMMR0CheckSharedModules call (to allow log flush jumps back to ring 3)
4835	*
4836	* @returns VBox status code.
4837	* @param pVM VM handle
4838	*/
4839	GMMR0DECL(int) GMMR0CheckSharedModulesStart(PVM pVM)
4840	{
4841	/*
4842	* Validate input and get the basics.
4843	*/
4844	PGMM pGMM;
4845	GMM_GET_VALID_INSTANCE(pGMM, VERR_INTERNAL_ERROR);
4846
4847	/*
4848	* Take the semaphore and do some more validations.
4849	*/
4850	gmmR0MutexAcquire(pGMM);
4851	if (!GMM_CHECK_SANITY_UPON_ENTERING(pGMM))
4852	rc = VERR_INTERNAL_ERROR_5;
4853	else
4854	rc = VINF_SUCCESS;
4855
4856	return rc;
4857	}
4858
4859	/**
4860	* Clean up after a GMMR0CheckSharedModules call (to allow log flush jumps back to ring 3)
4861	*
4862	* @returns VBox status code.
4863	* @param pVM VM handle
4864	*/
4865	GMMR0DECL(int) GMMR0CheckSharedModulesEnd(PVM pVM)
4866	{
4867	/*
4868	* Validate input and get the basics.
4869	*/
4870	PGMM pGMM;
4871	GMM_GET_VALID_INSTANCE(pGMM, VERR_INTERNAL_ERROR);
4872
4873	gmmR0MutexRelease(pGMM);
4874	return VINF_SUCCESS;
4875	}
4876
4877	#endif /* DEBUG_sandervl */
4878
4879	/**
4880	* Check all shared modules for the specified VM
4881	*
4882	* @returns VBox status code.
4883	* @param pVM VM handle
4884	* @param pVCpu VMCPU handle
4885	*/
4886	GMMR0DECL(int) GMMR0CheckSharedModules(PVM pVM, PVMCPU pVCpu)
4887	{
4888	#ifdef VBOX_WITH_PAGE_SHARING
4889	/*
4890	* Validate input and get the basics.
4891	*/
4892	PGMM pGMM;
4893	GMM_GET_VALID_INSTANCE(pGMM, VERR_INTERNAL_ERROR);
4894	PGVM pGVM;
4895	int rc = GVMMR0ByVMAndEMT(pVM, pVCpu->idCpu, &pGVM);
4896	if (RT_FAILURE(rc))
4897	return rc;
4898
4899	# ifndef DEBUG_sandervl
4900	/*
4901	* Take the semaphore and do some more validations.
4902	*/
4903	gmmR0MutexAcquire(pGMM);
4904	# endif
4905	if (GMM_CHECK_SANITY_UPON_ENTERING(pGMM))
4906	{
4907	GMMCHECKSHAREDMODULEINFO Info;
4908
4909	Log(("GMMR0CheckSharedModules\n"));
4910	Info.pGVM = pGVM;
4911	Info.idCpu = pVCpu->idCpu;
4912	Info.rc = VINF_SUCCESS;
4913
4914	RTAvlGCPtrDoWithAll(&pGVM->gmm.s.pSharedModuleTree, true /* fFromLeft */, gmmR0CheckSharedModule, &Info);
4915
4916	rc = Info.rc;
4917
4918	Log(("GMMR0CheckSharedModules done!\n"));
4919
4920	GMM_CHECK_SANITY_UPON_LEAVING(pGMM);
4921	}
4922	else
4923	rc = VERR_INTERNAL_ERROR_5;
4924
4925	# ifndef DEBUG_sandervl
4926	gmmR0MutexRelease(pGMM);
4927	# endif
4928	return rc;
4929	#else
4930	return VERR_NOT_IMPLEMENTED;
4931	#endif
4932	}
4933
4934	#if defined(VBOX_STRICT) && HC_ARCH_BITS == 64
4935
4936	typedef struct
4937	{
4938	PGVM pGVM;
4939	PGMM pGMM;
4940	uint8_t *pSourcePage;
4941	bool fFoundDuplicate;
4942	} GMMFINDDUPPAGEINFO, *PGMMFINDDUPPAGEINFO;
4943
4944	/**
4945	* RTAvlU32DoWithAll callback.
4946	*
4947	* @returns 0
4948	* @param pNode The node to search.
4949	* @param pvInfo Pointer to the input parameters
4950	*/
4951	static DECLCALLBACK(int) gmmR0FindDupPageInChunk(PAVLU32NODECORE pNode, void *pvInfo)
4952	{
4953	PGMMCHUNK pChunk = (PGMMCHUNK)pNode;
4954	PGMMFINDDUPPAGEINFO pInfo = (PGMMFINDDUPPAGEINFO)pvInfo;
4955	PGVM pGVM = pInfo->pGVM;
4956	PGMM pGMM = pInfo->pGMM;
4957	uint8_t *pbChunk;
4958
4959	/* Only take chunks not mapped into this VM process; not entirely correct. */
4960	if (!gmmR0IsChunkMapped(pGMM, pGVM, pChunk, (PRTR3PTR)&pbChunk))
4961	{
4962	int rc = gmmR0MapChunk(pGMM, pGVM, pChunk, false /fRelaxedSem/, (PRTR3PTR)&pbChunk);
4963	if (RT_SUCCESS(rc))
4964	{
4965	/*
4966	* Look for duplicate pages
4967	*/
4968	unsigned iPage = (GMM_CHUNK_SIZE >> PAGE_SHIFT);
4969	while (iPage-- > 0)
4970	{
4971	if (GMM_PAGE_IS_PRIVATE(&pChunk->aPages[iPage]))
4972	{
4973	uint8_t *pbDestPage = pbChunk + (iPage << PAGE_SHIFT);
4974
4975	if (!memcmp(pInfo->pSourcePage, pbDestPage, PAGE_SIZE))
4976	{
4977	pInfo->fFoundDuplicate = true;
4978	break;
4979	}
4980	}
4981	}
4982	gmmR0UnmapChunk(pGMM, pGVM, pChunk, false /fRelaxedSem/);
4983	}
4984	}
4985	return pInfo->fFoundDuplicate; /* (stops search if true) */
4986	}
4987
4988
4989	/**
4990	* Find a duplicate of the specified page in other active VMs
4991	*
4992	* @returns VBox status code.
4993	* @param pVM VM handle
4994	* @param pReq Request packet
4995	*/
4996	GMMR0DECL(int) GMMR0FindDuplicatePageReq(PVM pVM, PGMMFINDDUPLICATEPAGEREQ pReq)
4997	{
4998	/*
4999	* Validate input and pass it on.
5000	*/
5001	AssertPtrReturn(pVM, VERR_INVALID_POINTER);
5002	AssertPtrReturn(pReq, VERR_INVALID_POINTER);
5003	AssertMsgReturn(pReq->Hdr.cbReq == sizeof(pReq), ("%#x != %#x\n", pReq->Hdr.cbReq, sizeof(pReq)), VERR_INVALID_PARAMETER);
5004
5005	PGMM pGMM;
5006	GMM_GET_VALID_INSTANCE(pGMM, VERR_INTERNAL_ERROR);
5007
5008	PGVM pGVM;
5009	int rc = GVMMR0ByVM(pVM, &pGVM);
5010	if (RT_FAILURE(rc))
5011	return rc;
5012
5013	/*
5014	* Take the semaphore and do some more validations.
5015	*/
5016	rc = gmmR0MutexAcquire(pGMM);
5017	if (GMM_CHECK_SANITY_UPON_ENTERING(pGMM))
5018	{
5019	uint8_t *pbChunk;
5020	PGMMCHUNK pChunk = gmmR0GetChunk(pGMM, pReq->idPage >> GMM_CHUNKID_SHIFT);
5021	if (pChunk)
5022	{
5023	if (gmmR0IsChunkMapped(pGMM, pGVM, pChunk, (PRTR3PTR)&pbChunk))
5024	{
5025	uint8_t *pbSourcePage = pbChunk + ((pReq->idPage & GMM_PAGEID_IDX_MASK) << PAGE_SHIFT);
5026	PGMMPAGE pPage = gmmR0GetPage(pGMM, pReq->idPage);
5027	if (pPage)
5028	{
5029	GMMFINDDUPPAGEINFO Info;
5030	Info.pGVM = pGVM;
5031	Info.pGMM = pGMM;
5032	Info.pSourcePage = pbSourcePage;
5033	Info.fFoundDuplicate = false;
5034	RTAvlU32DoWithAll(&pGMM->pChunks, true /* fFromLeft */, gmmR0FindDupPageInChunk, &Info);
5035
5036	pReq->fDuplicate = Info.fFoundDuplicate;
5037	}
5038	else
5039	{
5040	AssertFailed();
5041	rc = VERR_PGM_PHYS_INVALID_PAGE_ID;
5042	}
5043	}
5044	else
5045	AssertFailed();
5046	}
5047	else
5048	AssertFailed();
5049	}
5050	else
5051	rc = VERR_INTERNAL_ERROR_5;
5052
5053	gmmR0MutexRelease(pGMM);
5054	return rc;
5055	}
5056
5057	#endif /* VBOX_STRICT && HC_ARCH_BITS == 64 */
5058

Note: See TracBrowser for help on using the repository browser.

source: vbox/trunk/src/VBox/VMM/VMMR0/GMMR0.cpp@ 37803

Download in other formats: