GMMR0.cpp@ 37529

Last change on this file since 37529 was 37251, checked in by vboxsync, 14 years ago
GMMR0: Removed unused code and moved some functions around.
Property svn:eol-style set to `native` Property svn:keywords set to `Id`
File size: 174.7 KB

Line
1	/* $Id: GMMR0.cpp 37251 2011-05-30 10:54:45Z vboxsync $ */
2	/** @file
3	* GMM - Global Memory Manager.
4	*/
5
6	/*
7	* Copyright (C) 2007-2011 Oracle Corporation
8	*
9	* This file is part of VirtualBox Open Source Edition (OSE), as
10	* available from http://www.virtualbox.org. This file is free software;
11	* you can redistribute it and/or modify it under the terms of the GNU
12	* General Public License (GPL) as published by the Free Software
13	* Foundation, in version 2 as it comes in the "COPYING" file of the
14	* VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15	* hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16	*/
17
18
19	/** @page pg_gmm GMM - The Global Memory Manager
20	*
21	* As the name indicates, this component is responsible for global memory
22	* management. Currently only guest RAM is allocated from the GMM, but this
23	* may change to include shadow page tables and other bits later.
24	*
25	* Guest RAM is managed as individual pages, but allocated from the host OS
26	* in chunks for reasons of portability / efficiency. To minimize the memory
27	* footprint all tracking structure must be as small as possible without
28	* unnecessary performance penalties.
29	*
30	* The allocation chunks has fixed sized, the size defined at compile time
31	* by the #GMM_CHUNK_SIZE \#define.
32	*
33	* Each chunk is given an unique ID. Each page also has a unique ID. The
34	* relation ship between the two IDs is:
35	* @code
36	* GMM_CHUNK_SHIFT = log2(GMM_CHUNK_SIZE / PAGE_SIZE);
37	* idPage = (idChunk << GMM_CHUNK_SHIFT) \| iPage;
38	* @endcode
39	* Where iPage is the index of the page within the chunk. This ID scheme
40	* permits for efficient chunk and page lookup, but it relies on the chunk size
41	* to be set at compile time. The chunks are organized in an AVL tree with their
42	* IDs being the keys.
43	*
44	* The physical address of each page in an allocation chunk is maintained by
45	* the #RTR0MEMOBJ and obtained using #RTR0MemObjGetPagePhysAddr. There is no
46	* need to duplicate this information (it'll cost 8-bytes per page if we did).
47	*
48	* So what do we need to track per page? Most importantly we need to know
49	* which state the page is in:
50	* - Private - Allocated for (eventually) backing one particular VM page.
51	* - Shared - Readonly page that is used by one or more VMs and treated
52	* as COW by PGM.
53	* - Free - Not used by anyone.
54	*
55	* For the page replacement operations (sharing, defragmenting and freeing)
56	* to be somewhat efficient, private pages needs to be associated with a
57	* particular page in a particular VM.
58	*
59	* Tracking the usage of shared pages is impractical and expensive, so we'll
60	* settle for a reference counting system instead.
61	*
62	* Free pages will be chained on LIFOs
63	*
64	* On 64-bit systems we will use a 64-bit bitfield per page, while on 32-bit
65	* systems a 32-bit bitfield will have to suffice because of address space
66	* limitations. The #GMMPAGE structure shows the details.
67	*
68	*
69	* @section sec_gmm_alloc_strat Page Allocation Strategy
70	*
71	* The strategy for allocating pages has to take fragmentation and shared
72	* pages into account, or we may end up with with 2000 chunks with only
73	* a few pages in each. Shared pages cannot easily be reallocated because
74	* of the inaccurate usage accounting (see above). Private pages can be
75	* reallocated by a defragmentation thread in the same manner that sharing
76	* is done.
77	*
78	* The first approach is to manage the free pages in two sets depending on
79	* whether they are mainly for the allocation of shared or private pages.
80	* In the initial implementation there will be almost no possibility for
81	* mixing shared and private pages in the same chunk (only if we're really
82	* stressed on memory), but when we implement forking of VMs and have to
83	* deal with lots of COW pages it'll start getting kind of interesting.
84	*
85	* The sets are lists of chunks with approximately the same number of
86	* free pages. Say the chunk size is 1MB, meaning 256 pages, and a set
87	* consists of 16 lists. So, the first list will contain the chunks with
88	* 1-7 free pages, the second covers 8-15, and so on. The chunks will be
89	* moved between the lists as pages are freed up or allocated.
90	*
91	*
92	* @section sec_gmm_costs Costs
93	*
94	* The per page cost in kernel space is 32-bit plus whatever RTR0MEMOBJ
95	* entails. In addition there is the chunk cost of approximately
96	* (sizeof(RT0MEMOBJ) + sizeof(CHUNK)) / 2^CHUNK_SHIFT bytes per page.
97	*
98	* On Windows the per page #RTR0MEMOBJ cost is 32-bit on 32-bit windows
99	* and 64-bit on 64-bit windows (a PFN_NUMBER in the MDL). So, 64-bit per page.
100	* The cost on Linux is identical, but here it's because of sizeof(struct page *).
101	*
102	*
103	* @section sec_gmm_legacy Legacy Mode for Non-Tier-1 Platforms
104	*
105	* In legacy mode the page source is locked user pages and not
106	* #RTR0MemObjAllocPhysNC, this means that a page can only be allocated
107	* by the VM that locked it. We will make no attempt at implementing
108	* page sharing on these systems, just do enough to make it all work.
109	*
110	*
111	* @subsection sub_gmm_locking Serializing
112	*
113	* One simple fast mutex will be employed in the initial implementation, not
114	* two as mentioned in @ref subsec_pgmPhys_Serializing.
115	*
116	* @see @ref subsec_pgmPhys_Serializing
117	*
118	*
119	* @section sec_gmm_overcommit Memory Over-Commitment Management
120	*
121	* The GVM will have to do the system wide memory over-commitment
122	* management. My current ideas are:
123	* - Per VM oc policy that indicates how much to initially commit
124	* to it and what to do in a out-of-memory situation.
125	* - Prevent overtaxing the host.
126	*
127	* There are some challenges here, the main ones are configurability and
128	* security. Should we for instance permit anyone to request 100% memory
129	* commitment? Who should be allowed to do runtime adjustments of the
130	* config. And how to prevent these settings from being lost when the last
131	* VM process exits? The solution is probably to have an optional root
132	* daemon the will keep VMMR0.r0 in memory and enable the security measures.
133	*
134	*
135	*
136	* @section sec_gmm_numa NUMA
137	*
138	* NUMA considerations will be designed and implemented a bit later.
139	*
140	* The preliminary guesses is that we will have to try allocate memory as
141	* close as possible to the CPUs the VM is executed on (EMT and additional CPU
142	* threads). Which means it's mostly about allocation and sharing policies.
143	* Both the scheduler and allocator interface will to supply some NUMA info
144	* and we'll need to have a way to calc access costs.
145	*
146	*/
147
148
149	/*******************************************************************************
150	* Header Files *
151	*******************************************************************************/
152	#define LOG_GROUP LOG_GROUP_GMM
153	#include <VBox/rawpci.h>
154	#include <VBox/vmm/vm.h>
155	#include <VBox/vmm/gmm.h>
156	#include "GMMR0Internal.h"
157	#include <VBox/vmm/gvm.h>
158	#include <VBox/vmm/pgm.h>
159	#include <VBox/log.h>
160	#include <VBox/param.h>
161	#include <VBox/err.h>
162	#include <iprt/asm.h>
163	#include <iprt/avl.h>
164	#include <iprt/list.h>
165	#include <iprt/mem.h>
166	#include <iprt/memobj.h>
167	#include <iprt/mp.h>
168	#include <iprt/semaphore.h>
169	#include <iprt/string.h>
170	#include <iprt/time.h>
171
172
173	/*******************************************************************************
174	* Structures and Typedefs *
175	*******************************************************************************/
176	/** Pointer to set of free chunks. */
177	typedef struct GMMCHUNKFREESET *PGMMCHUNKFREESET;
178
179	/**
180	* The per-page tracking structure employed by the GMM.
181	*
182	* On 32-bit hosts we'll some trickery is necessary to compress all
183	* the information into 32-bits. When the fSharedFree member is set,
184	* the 30th bit decides whether it's a free page or not.
185	*
186	* Because of the different layout on 32-bit and 64-bit hosts, macros
187	* are used to get and set some of the data.
188	*/
189	typedef union GMMPAGE
190	{
191	#if HC_ARCH_BITS == 64
192	/** Unsigned integer view. */
193	uint64_t u;
194
195	/** The common view. */
196	struct GMMPAGECOMMON
197	{
198	uint32_t uStuff1 : 32;
199	uint32_t uStuff2 : 30;
200	/** The page state. */
201	uint32_t u2State : 2;
202	} Common;
203
204	/** The view of a private page. */
205	struct GMMPAGEPRIVATE
206	{
207	/** The guest page frame number. (Max addressable: 2 ^ 44 - 16) */
208	uint32_t pfn;
209	/** The GVM handle. (64K VMs) */
210	uint32_t hGVM : 16;
211	/** Reserved. */
212	uint32_t u16Reserved : 14;
213	/** The page state. */
214	uint32_t u2State : 2;
215	} Private;
216
217	/** The view of a shared page. */
218	struct GMMPAGESHARED
219	{
220	/** The host page frame number. (Max addressable: 2 ^ 44 - 16) */
221	uint32_t pfn;
222	/** The reference count (64K VMs). */
223	uint32_t cRefs : 16;
224	/** Reserved. Checksum or something? Two hGVMs for forking? */
225	uint32_t u14Reserved : 14;
226	/** The page state. */
227	uint32_t u2State : 2;
228	} Shared;
229
230	/** The view of a free page. */
231	struct GMMPAGEFREE
232	{
233	/** The index of the next page in the free list. UINT16_MAX is NIL. */
234	uint16_t iNext;
235	/** Reserved. Checksum or something? */
236	uint16_t u16Reserved0;
237	/** Reserved. Checksum or something? */
238	uint32_t u30Reserved1 : 30;
239	/** The page state. */
240	uint32_t u2State : 2;
241	} Free;
242
243	#else /* 32-bit */
244	/** Unsigned integer view. */
245	uint32_t u;
246
247	/** The common view. */
248	struct GMMPAGECOMMON
249	{
250	uint32_t uStuff : 30;
251	/** The page state. */
252	uint32_t u2State : 2;
253	} Common;
254
255	/** The view of a private page. */
256	struct GMMPAGEPRIVATE
257	{
258	/** The guest page frame number. (Max addressable: 2 ^ 36) */
259	uint32_t pfn : 24;
260	/** The GVM handle. (127 VMs) */
261	uint32_t hGVM : 7;
262	/** The top page state bit, MBZ. */
263	uint32_t fZero : 1;
264	} Private;
265
266	/** The view of a shared page. */
267	struct GMMPAGESHARED
268	{
269	/** The reference count. */
270	uint32_t cRefs : 30;
271	/** The page state. */
272	uint32_t u2State : 2;
273	} Shared;
274
275	/** The view of a free page. */
276	struct GMMPAGEFREE
277	{
278	/** The index of the next page in the free list. UINT16_MAX is NIL. */
279	uint32_t iNext : 16;
280	/** Reserved. Checksum or something? */
281	uint32_t u14Reserved : 14;
282	/** The page state. */
283	uint32_t u2State : 2;
284	} Free;
285	#endif
286	} GMMPAGE;
287	AssertCompileSize(GMMPAGE, sizeof(RTHCUINTPTR));
288	/** Pointer to a GMMPAGE. */
289	typedef GMMPAGE *PGMMPAGE;
290
291
292	/** @name The Page States.
293	* @{ */
294	/** A private page. */
295	#define GMM_PAGE_STATE_PRIVATE 0
296	/** A private page - alternative value used on the 32-bit implementation.
297	* This will never be used on 64-bit hosts. */
298	#define GMM_PAGE_STATE_PRIVATE_32 1
299	/** A shared page. */
300	#define GMM_PAGE_STATE_SHARED 2
301	/** A free page. */
302	#define GMM_PAGE_STATE_FREE 3
303	/** @} */
304
305
306	/** @def GMM_PAGE_IS_PRIVATE
307	*
308	* @returns true if private, false if not.
309	* @param pPage The GMM page.
310	*/
311	#if HC_ARCH_BITS == 64
312	# define GMM_PAGE_IS_PRIVATE(pPage) ( (pPage)->Common.u2State == GMM_PAGE_STATE_PRIVATE )
313	#else
314	# define GMM_PAGE_IS_PRIVATE(pPage) ( (pPage)->Private.fZero == 0 )
315	#endif
316
317	/** @def GMM_PAGE_IS_SHARED
318	*
319	* @returns true if shared, false if not.
320	* @param pPage The GMM page.
321	*/
322	#define GMM_PAGE_IS_SHARED(pPage) ( (pPage)->Common.u2State == GMM_PAGE_STATE_SHARED )
323
324	/** @def GMM_PAGE_IS_FREE
325	*
326	* @returns true if free, false if not.
327	* @param pPage The GMM page.
328	*/
329	#define GMM_PAGE_IS_FREE(pPage) ( (pPage)->Common.u2State == GMM_PAGE_STATE_FREE )
330
331	/** @def GMM_PAGE_PFN_LAST
332	* The last valid guest pfn range.
333	* @remark Some of the values outside the range has special meaning,
334	* see GMM_PAGE_PFN_UNSHAREABLE.
335	*/
336	#if HC_ARCH_BITS == 64
337	# define GMM_PAGE_PFN_LAST UINT32_C(0xfffffff0)
338	#else
339	# define GMM_PAGE_PFN_LAST UINT32_C(0x00fffff0)
340	#endif
341	AssertCompile(GMM_PAGE_PFN_LAST == (GMM_GCPHYS_LAST >> PAGE_SHIFT));
342
343	/** @def GMM_PAGE_PFN_UNSHAREABLE
344	* Indicates that this page isn't used for normal guest memory and thus isn't shareable.
345	*/
346	#if HC_ARCH_BITS == 64
347	# define GMM_PAGE_PFN_UNSHAREABLE UINT32_C(0xfffffff1)
348	#else
349	# define GMM_PAGE_PFN_UNSHAREABLE UINT32_C(0x00fffff1)
350	#endif
351	AssertCompile(GMM_PAGE_PFN_UNSHAREABLE == (GMM_GCPHYS_UNSHAREABLE >> PAGE_SHIFT));
352
353
354	/**
355	* A GMM allocation chunk ring-3 mapping record.
356	*
357	* This should really be associated with a session and not a VM, but
358	* it's simpler to associated with a VM and cleanup with the VM object
359	* is destroyed.
360	*/
361	typedef struct GMMCHUNKMAP
362	{
363	/** The mapping object. */
364	RTR0MEMOBJ hMapObj;
365	/** The VM owning the mapping. */
366	PGVM pGVM;
367	} GMMCHUNKMAP;
368	/** Pointer to a GMM allocation chunk mapping. */
369	typedef struct GMMCHUNKMAP *PGMMCHUNKMAP;
370
371
372	/**
373	* A GMM allocation chunk.
374	*/
375	typedef struct GMMCHUNK
376	{
377	/** The AVL node core.
378	* The Key is the chunk ID. (Giant mtx.) */
379	AVLU32NODECORE Core;
380	/** The memory object.
381	* Either from RTR0MemObjAllocPhysNC or RTR0MemObjLockUser depending on
382	* what the host can dish up with. (Chunk mtx protects mapping accesses
383	* and related frees.) */
384	RTR0MEMOBJ hMemObj;
385	/** Pointer to the next chunk in the free list. (Giant mtx.) */
386	PGMMCHUNK pFreeNext;
387	/** Pointer to the previous chunk in the free list. (Giant mtx.) */
388	PGMMCHUNK pFreePrev;
389	/** Pointer to the free set this chunk belongs to. NULL for
390	* chunks with no free pages. (Giant mtx.) */
391	PGMMCHUNKFREESET pSet;
392	/** List node in the chunk list (GMM::ChunkList). (Giant mtx.) */
393	RTLISTNODE ListNode;
394	/** Pointer to an array of mappings. (Chunk mtx.) */
395	PGMMCHUNKMAP paMappingsX;
396	/** The number of mappings. (Chunk mtx.) */
397	uint16_t cMappingsX;
398	/** The mapping lock this chunk is using using. UINT16_MAX if nobody is
399	* mapping or freeing anything. (Giant mtx.) */
400	uint8_t volatile iChunkMtx;
401	/** Flags field reserved for future use (like eliminating enmType).
402	* (Giant mtx.) */
403	uint8_t fFlags;
404	/** The head of the list of free pages. UINT16_MAX is the NIL value.
405	* (Giant mtx.) */
406	uint16_t iFreeHead;
407	/** The number of free pages. (Giant mtx.) */
408	uint16_t cFree;
409	/** The GVM handle of the VM that first allocated pages from this chunk, this
410	* is used as a preference when there are several chunks to choose from.
411	* When in bound memory mode this isn't a preference any longer. (Giant
412	* mtx.) */
413	uint16_t hGVM;
414	/** The ID of the NUMA node the memory mostly resides on. (Reserved for
415	* future use.) (Giant mtx.) */
416	uint16_t idNumaNode;
417	/** The number of private pages. (Giant mtx.) */
418	uint16_t cPrivate;
419	/** The number of shared pages. (Giant mtx.) */
420	uint16_t cShared;
421	/** The pages. (Giant mtx.) */
422	GMMPAGE aPages[GMM_CHUNK_SIZE >> PAGE_SHIFT];
423	} GMMCHUNK;
424
425	/** Indicates that the NUMA properies of the memory is unknown. */
426	#define GMM_CHUNK_NUMA_ID_UNKNOWN UINT16_C(0xfffe)
427
428	/** @name GMM_CHUNK_FLAGS_XXX - chunk flags.
429	* @{ */
430	/** Indicates that the chunk is a large page (2MB). */
431	#define GMM_CHUNK_FLAGS_LARGE_PAGE UINT16_C(0x0001)
432	/** @} */
433
434
435	/**
436	* An allocation chunk TLB entry.
437	*/
438	typedef struct GMMCHUNKTLBE
439	{
440	/** The chunk id. */
441	uint32_t idChunk;
442	/** Pointer to the chunk. */
443	PGMMCHUNK pChunk;
444	} GMMCHUNKTLBE;
445	/** Pointer to an allocation chunk TLB entry. */
446	typedef GMMCHUNKTLBE *PGMMCHUNKTLBE;
447
448
449	/** The number of entries tin the allocation chunk TLB. */
450	#define GMM_CHUNKTLB_ENTRIES 32
451	/** Gets the TLB entry index for the given Chunk ID. */
452	#define GMM_CHUNKTLB_IDX(idChunk) ( (idChunk) & (GMM_CHUNKTLB_ENTRIES - 1) )
453
454	/**
455	* An allocation chunk TLB.
456	*/
457	typedef struct GMMCHUNKTLB
458	{
459	/** The TLB entries. */
460	GMMCHUNKTLBE aEntries[GMM_CHUNKTLB_ENTRIES];
461	} GMMCHUNKTLB;
462	/** Pointer to an allocation chunk TLB. */
463	typedef GMMCHUNKTLB *PGMMCHUNKTLB;
464
465
466	/**
467	* The GMM instance data.
468	*/
469	typedef struct GMM
470	{
471	/** Magic / eye catcher. GMM_MAGIC */
472	uint32_t u32Magic;
473	/** The number of threads waiting on the mutex. */
474	uint32_t cMtxContenders;
475	/** The fast mutex protecting the GMM.
476	* More fine grained locking can be implemented later if necessary. */
477	RTSEMFASTMUTEX hMtx;
478	#ifdef VBOX_STRICT
479	/** The current mutex owner. */
480	RTNATIVETHREAD hMtxOwner;
481	#endif
482	/** The chunk tree. */
483	PAVLU32NODECORE pChunks;
484	/** The chunk TLB. */
485	GMMCHUNKTLB ChunkTLB;
486	/** The private free set. */
487	GMMCHUNKFREESET PrivateX;
488	/** The shared free set. */
489	GMMCHUNKFREESET Shared;
490
491	/** Shared module tree (global). */
492	/** @todo separate trees for distinctly different guest OSes. */
493	PAVLGCPTRNODECORE pGlobalSharedModuleTree;
494
495	/** The chunk list. For simplifying the cleanup process. */
496	RTLISTNODE ChunkList;
497
498	/** The maximum number of pages we're allowed to allocate.
499	* @gcfgm 64-bit GMM/MaxPages Direct.
500	* @gcfgm 32-bit GMM/PctPages Relative to the number of host pages. */
501	uint64_t cMaxPages;
502	/** The number of pages that has been reserved.
503	* The deal is that cReservedPages - cOverCommittedPages <= cMaxPages. */
504	uint64_t cReservedPages;
505	/** The number of pages that we have over-committed in reservations. */
506	uint64_t cOverCommittedPages;
507	/** The number of actually allocated (committed if you like) pages. */
508	uint64_t cAllocatedPages;
509	/** The number of pages that are shared. A subset of cAllocatedPages. */
510	uint64_t cSharedPages;
511	/** The number of pages that are actually shared between VMs. */
512	uint64_t cDuplicatePages;
513	/** The number of pages that are shared that has been left behind by
514	* VMs not doing proper cleanups. */
515	uint64_t cLeftBehindSharedPages;
516	/** The number of allocation chunks.
517	* (The number of pages we've allocated from the host can be derived from this.) */
518	uint32_t cChunks;
519	/** The number of current ballooned pages. */
520	uint64_t cBalloonedPages;
521
522	/** The legacy allocation mode indicator.
523	* This is determined at initialization time. */
524	bool fLegacyAllocationMode;
525	/** The bound memory mode indicator.
526	* When set, the memory will be bound to a specific VM and never
527	* shared. This is always set if fLegacyAllocationMode is set.
528	* (Also determined at initialization time.) */
529	bool fBoundMemoryMode;
530	/** The number of registered VMs. */
531	uint16_t cRegisteredVMs;
532
533	/** The number of freed chunks ever. This is used a list generation to
534	* avoid restarting the cleanup scanning when the list wasn't modified. */
535	uint32_t cFreedChunks;
536	/** The previous allocated Chunk ID.
537	* Used as a hint to avoid scanning the whole bitmap. */
538	uint32_t idChunkPrev;
539	/** Chunk ID allocation bitmap.
540	* Bits of allocated IDs are set, free ones are clear.
541	* The NIL id (0) is marked allocated. */
542	uint32_t bmChunkId[(GMM_CHUNKID_LAST + 1 + 31) / 32];
543
544	/** The index of the next mutex to use. */
545	uint32_t iNextChunkMtx;
546	/** Chunk locks for reducing lock contention without having to allocate
547	* one lock per chunk. */
548	struct
549	{
550	/** The mutex */
551	RTSEMFASTMUTEX hMtx;
552	/** The number of threads currently using this mutex. */
553	uint32_t volatile cUsers;
554	} aChunkMtx[64];
555	} GMM;
556	/** Pointer to the GMM instance. */
557	typedef GMM *PGMM;
558
559	/** The value of GMM::u32Magic (Katsuhiro Otomo). */
560	#define GMM_MAGIC UINT32_C(0x19540414)
561
562
563	/**
564	* GMM chunk mutex state.
565	*
566	* This is returned by gmmR0ChunkMutexAcquire and is used by the other
567	* gmmR0ChunkMutex* methods.
568	*/
569	typedef struct GMMR0CHUNKMTXSTATE
570	{
571	PGMM pGMM;
572	/** The index of the chunk mutex. */
573	uint8_t iChunkMtx;
574	/** The relevant flags (GMMR0CHUNK_MTX_XXX). */
575	uint8_t fFlags;
576	} GMMR0CHUNKMTXSTATE;
577	/** Pointer to a chunk mutex state. */
578	typedef GMMR0CHUNKMTXSTATE *PGMMR0CHUNKMTXSTATE;
579
580	/** @name GMMR0CHUNK_MTX_XXX
581	* @{ */
582	#define GMMR0CHUNK_MTX_INVALID UINT32_C(0)
583	#define GMMR0CHUNK_MTX_KEEP_GIANT UINT32_C(1)
584	#define GMMR0CHUNK_MTX_RETAKE_GIANT UINT32_C(2)
585	#define GMMR0CHUNK_MTX_DROP_GIANT UINT32_C(3)
586	#define GMMR0CHUNK_MTX_END UINT32_C(4)
587	/** @} */
588
589
590	/*******************************************************************************
591	* Global Variables *
592	*******************************************************************************/
593	/** Pointer to the GMM instance data. */
594	static PGMM g_pGMM = NULL;
595
596	/** Macro for obtaining and validating the g_pGMM pointer.
597	* On failure it will return from the invoking function with the specified return value.
598	*
599	* @param pGMM The name of the pGMM variable.
600	* @param rc The return value on failure. Use VERR_INTERNAL_ERROR for
601	* VBox status codes.
602	*/
603	#define GMM_GET_VALID_INSTANCE(pGMM, rc) \
604	do { \
605	(pGMM) = g_pGMM; \
606	AssertPtrReturn((pGMM), (rc)); \
607	AssertMsgReturn((pGMM)->u32Magic == GMM_MAGIC, ("%p - %#x\n", (pGMM), (pGMM)->u32Magic), (rc)); \
608	} while (0)
609
610	/** Macro for obtaining and validating the g_pGMM pointer, void function variant.
611	* On failure it will return from the invoking function.
612	*
613	* @param pGMM The name of the pGMM variable.
614	*/
615	#define GMM_GET_VALID_INSTANCE_VOID(pGMM) \
616	do { \
617	(pGMM) = g_pGMM; \
618	AssertPtrReturnVoid((pGMM)); \
619	AssertMsgReturnVoid((pGMM)->u32Magic == GMM_MAGIC, ("%p - %#x\n", (pGMM), (pGMM)->u32Magic)); \
620	} while (0)
621
622
623	/** @def GMM_CHECK_SANITY_UPON_ENTERING
624	* Checks the sanity of the GMM instance data before making changes.
625	*
626	* This is macro is a stub by default and must be enabled manually in the code.
627	*
628	* @returns true if sane, false if not.
629	* @param pGMM The name of the pGMM variable.
630	*/
631	#if defined(VBOX_STRICT) && 0
632	# define GMM_CHECK_SANITY_UPON_ENTERING(pGMM) (gmmR0SanityCheck((pGMM), __PRETTY_FUNCTION__, __LINE__) == 0)
633	#else
634	# define GMM_CHECK_SANITY_UPON_ENTERING(pGMM) (true)
635	#endif
636
637	/** @def GMM_CHECK_SANITY_UPON_LEAVING
638	* Checks the sanity of the GMM instance data after making changes.
639	*
640	* This is macro is a stub by default and must be enabled manually in the code.
641	*
642	* @returns true if sane, false if not.
643	* @param pGMM The name of the pGMM variable.
644	*/
645	#if defined(VBOX_STRICT) && 0
646	# define GMM_CHECK_SANITY_UPON_LEAVING(pGMM) (gmmR0SanityCheck((pGMM), __PRETTY_FUNCTION__, __LINE__) == 0)
647	#else
648	# define GMM_CHECK_SANITY_UPON_LEAVING(pGMM) (true)
649	#endif
650
651	/** @def GMM_CHECK_SANITY_IN_LOOPS
652	* Checks the sanity of the GMM instance in the allocation loops.
653	*
654	* This is macro is a stub by default and must be enabled manually in the code.
655	*
656	* @returns true if sane, false if not.
657	* @param pGMM The name of the pGMM variable.
658	*/
659	#if defined(VBOX_STRICT) && 0
660	# define GMM_CHECK_SANITY_IN_LOOPS(pGMM) (gmmR0SanityCheck((pGMM), __PRETTY_FUNCTION__, __LINE__) == 0)
661	#else
662	# define GMM_CHECK_SANITY_IN_LOOPS(pGMM) (true)
663	#endif
664
665
666	/*******************************************************************************
667	* Internal Functions *
668	*******************************************************************************/
669	static DECLCALLBACK(int) gmmR0TermDestroyChunk(PAVLU32NODECORE pNode, void *pvGMM);
670	static bool gmmR0CleanupVMScanChunk(PGMM pGMM, PGVM pGVM, PGMMCHUNK pChunk);
671	DECLINLINE(void) gmmR0UnlinkChunk(PGMMCHUNK pChunk);
672	DECLINLINE(void) gmmR0LinkChunk(PGMMCHUNK pChunk, PGMMCHUNKFREESET pSet);
673	DECLINLINE(void) gmmR0SelectSetAndLinkChunk(PGMM pGMM, PGVM pGVM, PGMMCHUNK pChunk);
674	static uint32_t gmmR0SanityCheck(PGMM pGMM, const char *pszFunction, unsigned uLineNo);
675	static bool gmmR0FreeChunk(PGMM pGMM, PGVM pGVM, PGMMCHUNK pChunk, bool fRelaxedSem);
676	DECLINLINE(void) gmmR0FreePrivatePage(PGMM pGMM, PGVM pGVM, uint32_t idPage, PGMMPAGE pPage);
677	DECLINLINE(void) gmmR0FreeSharedPage(PGMM pGMM, PGVM pGVM, uint32_t idPage, PGMMPAGE pPage);
678	static int gmmR0UnmapChunkLocked(PGMM pGMM, PGVM pGVM, PGMMCHUNK pChunk);
679	static void gmmR0SharedModuleCleanup(PGMM pGMM, PGVM pGVM);
680
681
682
683	/**
684	* Initializes the GMM component.
685	*
686	* This is called when the VMMR0.r0 module is loaded and protected by the
687	* loader semaphore.
688	*
689	* @returns VBox status code.
690	*/
691	GMMR0DECL(int) GMMR0Init(void)
692	{
693	LogFlow(("GMMInit:\n"));
694
695	/*
696	* Allocate the instance data and the locks.
697	*/
698	PGMM pGMM = (PGMM)RTMemAllocZ(sizeof(*pGMM));
699	if (!pGMM)
700	return VERR_NO_MEMORY;
701
702	pGMM->u32Magic = GMM_MAGIC;
703	for (unsigned i = 0; i < RT_ELEMENTS(pGMM->ChunkTLB.aEntries); i++)
704	pGMM->ChunkTLB.aEntries[i].idChunk = NIL_GMM_CHUNKID;
705	RTListInit(&pGMM->ChunkList);
706	ASMBitSet(&pGMM->bmChunkId[0], NIL_GMM_CHUNKID);
707
708	int rc = RTSemFastMutexCreate(&pGMM->hMtx);
709	if (RT_SUCCESS(rc))
710	{
711	unsigned iMtx;
712	for (iMtx = 0; iMtx < RT_ELEMENTS(pGMM->aChunkMtx); iMtx++)
713	{
714	rc = RTSemFastMutexCreate(&pGMM->aChunkMtx[iMtx].hMtx);
715	if (RT_FAILURE(rc))
716	break;
717	}
718	if (RT_SUCCESS(rc))
719	{
720	/*
721	* Check and see if RTR0MemObjAllocPhysNC works.
722	*/
723	#if 0 /* later, see #3170. */
724	RTR0MEMOBJ MemObj;
725	rc = RTR0MemObjAllocPhysNC(&MemObj, _64K, NIL_RTHCPHYS);
726	if (RT_SUCCESS(rc))
727	{
728	rc = RTR0MemObjFree(MemObj, true);
729	AssertRC(rc);
730	}
731	else if (rc == VERR_NOT_SUPPORTED)
732	pGMM->fLegacyAllocationMode = pGMM->fBoundMemoryMode = true;
733	else
734	SUPR0Printf("GMMR0Init: RTR0MemObjAllocPhysNC(,64K,Any) -> %d!\n", rc);
735	#else
736	# if defined(RT_OS_WINDOWS) \|\| (defined(RT_OS_SOLARIS) && ARCH_BITS == 64) \|\| defined(RT_OS_LINUX) \|\| defined(RT_OS_FREEBSD)
737	pGMM->fLegacyAllocationMode = false;
738	# if ARCH_BITS == 32
739	/* Don't reuse possibly partial chunks because of the virtual
740	address space limitation. */
741	pGMM->fBoundMemoryMode = true;
742	# else
743	pGMM->fBoundMemoryMode = false;
744	# endif
745	# else
746	pGMM->fLegacyAllocationMode = true;
747	pGMM->fBoundMemoryMode = true;
748	# endif
749	#endif
750
751	/*
752	* Query system page count and guess a reasonable cMaxPages value.
753	*/
754	pGMM->cMaxPages = UINT32_MAX; /** @todo IPRT function for query ram size and such. */
755
756	g_pGMM = pGMM;
757	LogFlow(("GMMInit: pGMM=%p fLegacyAllocationMode=%RTbool fBoundMemoryMode=%RTbool\n", pGMM, pGMM->fLegacyAllocationMode, pGMM->fBoundMemoryMode));
758	return VINF_SUCCESS;
759	}
760
761	/*
762	* Bail out.
763	*/
764	while (iMtx-- > 0)
765	RTSemFastMutexDestroy(pGMM->aChunkMtx[iMtx].hMtx);
766	RTSemFastMutexDestroy(pGMM->hMtx);
767	}
768
769	pGMM->u32Magic = 0;
770	RTMemFree(pGMM);
771	SUPR0Printf("GMMR0Init: failed! rc=%d\n", rc);
772	return rc;
773	}
774
775
776	/**
777	* Terminates the GMM component.
778	*/
779	GMMR0DECL(void) GMMR0Term(void)
780	{
781	LogFlow(("GMMTerm:\n"));
782
783	/*
784	* Take care / be paranoid...
785	*/
786	PGMM pGMM = g_pGMM;
787	if (!VALID_PTR(pGMM))
788	return;
789	if (pGMM->u32Magic != GMM_MAGIC)
790	{
791	SUPR0Printf("GMMR0Term: u32Magic=%#x\n", pGMM->u32Magic);
792	return;
793	}
794
795	/*
796	* Undo what init did and free all the resources we've acquired.
797	*/
798	/* Destroy the fundamentals. */
799	g_pGMM = NULL;
800	pGMM->u32Magic = ~GMM_MAGIC;
801	RTSemFastMutexDestroy(pGMM->hMtx);
802	pGMM->hMtx = NIL_RTSEMFASTMUTEX;
803
804	/* Free any chunks still hanging around. */
805	RTAvlU32Destroy(&pGMM->pChunks, gmmR0TermDestroyChunk, pGMM);
806
807	/* Destroy the chunk locks. */
808	for (unsigned iMtx = 0; iMtx++ < RT_ELEMENTS(pGMM->aChunkMtx); iMtx++)
809	{
810	Assert(pGMM->aChunkMtx[iMtx].cUsers == 0);
811	RTSemFastMutexDestroy(pGMM->aChunkMtx[iMtx].hMtx);
812	pGMM->aChunkMtx[iMtx].hMtx = NIL_RTSEMFASTMUTEX;
813	}
814
815	/* Finally the instance data itself. */
816	RTMemFree(pGMM);
817	LogFlow(("GMMTerm: done\n"));
818	}
819
820
821	/**
822	* RTAvlU32Destroy callback.
823	*
824	* @returns 0
825	* @param pNode The node to destroy.
826	* @param pvGMM The GMM handle.
827	*/
828	static DECLCALLBACK(int) gmmR0TermDestroyChunk(PAVLU32NODECORE pNode, void *pvGMM)
829	{
830	PGMMCHUNK pChunk = (PGMMCHUNK)pNode;
831
832	if (pChunk->cFree != (GMM_CHUNK_SIZE >> PAGE_SHIFT))
833	SUPR0Printf("GMMR0Term: %p/%#x: cFree=%d cPrivate=%d cShared=%d cMappings=%d\n", pChunk,
834	pChunk->Core.Key, pChunk->cFree, pChunk->cPrivate, pChunk->cShared, pChunk->cMappingsX);
835
836	int rc = RTR0MemObjFree(pChunk->hMemObj, true /* fFreeMappings */);
837	if (RT_FAILURE(rc))
838	{
839	SUPR0Printf("GMMR0Term: %p/%#x: RTRMemObjFree(%p,true) -> %d (cMappings=%d)\n", pChunk,
840	pChunk->Core.Key, pChunk->hMemObj, rc, pChunk->cMappingsX);
841	AssertRC(rc);
842	}
843	pChunk->hMemObj = NIL_RTR0MEMOBJ;
844
845	RTMemFree(pChunk->paMappingsX);
846	pChunk->paMappingsX = NULL;
847
848	RTMemFree(pChunk);
849	NOREF(pvGMM);
850	return 0;
851	}
852
853
854	/**
855	* Initializes the per-VM data for the GMM.
856	*
857	* This is called from within the GVMM lock (from GVMMR0CreateVM)
858	* and should only initialize the data members so GMMR0CleanupVM
859	* can deal with them. We reserve no memory or anything here,
860	* that's done later in GMMR0InitVM.
861	*
862	* @param pGVM Pointer to the Global VM structure.
863	*/
864	GMMR0DECL(void) GMMR0InitPerVMData(PGVM pGVM)
865	{
866	AssertCompile(RT_SIZEOFMEMB(GVM,gmm.s) <= RT_SIZEOFMEMB(GVM,gmm.padding));
867
868	pGVM->gmm.s.enmPolicy = GMMOCPOLICY_INVALID;
869	pGVM->gmm.s.enmPriority = GMMPRIORITY_INVALID;
870	pGVM->gmm.s.fMayAllocate = false;
871	}
872
873
874	/**
875	* Acquires the GMM giant lock.
876	*
877	* @returns Assert status code from RTSemFastMutexRequest.
878	* @param pGMM Pointer to the GMM instance.
879	*/
880	static int gmmR0MutexAcquire(PGMM pGMM)
881	{
882	ASMAtomicIncU32(&pGMM->cMtxContenders);
883	int rc = RTSemFastMutexRequest(pGMM->hMtx);
884	ASMAtomicDecU32(&pGMM->cMtxContenders);
885	AssertRC(rc);
886	#ifdef VBOX_STRICT
887	pGMM->hMtxOwner = RTThreadNativeSelf();
888	#endif
889	return rc;
890	}
891
892
893	/**
894	* Releases the GMM giant lock.
895	*
896	* @returns Assert status code from RTSemFastMutexRequest.
897	* @param pGMM Pointer to the GMM instance.
898	*/
899	static int gmmR0MutexRelease(PGMM pGMM)
900	{
901	#ifdef VBOX_STRICT
902	pGMM->hMtxOwner = NIL_RTNATIVETHREAD;
903	#endif
904	int rc = RTSemFastMutexRelease(pGMM->hMtx);
905	AssertRC(rc);
906	return rc;
907	}
908
909
910	/**
911	* Yields the GMM giant lock if there is contention and a certain minimum time
912	* has elapsed since we took it.
913	*
914	* @returns @c true if the mutex was yielded, @c false if not.
915	* @param pGMM Pointer to the GMM instance.
916	* @param puLockNanoTS Where the lock acquisition time stamp is kept
917	* (in/out).
918	*/
919	static bool gmmR0MutexYield(PGMM pGMM, uint64_t *puLockNanoTS)
920	{
921	/*
922	* If nobody is contending the mutex, don't bother checking the time.
923	*/
924	if (ASMAtomicReadU32(&pGMM->cMtxContenders) == 0)
925	return false;
926
927	/*
928	* Don't yield if we haven't executed for at least 2 milliseconds.
929	*/
930	uint64_t uNanoNow = RTTimeSystemNanoTS();
931	if (uNanoNow - *puLockNanoTS < UINT32_C(2000000))
932	return false;
933
934	/*
935	* Yield the mutex.
936	*/
937	#ifdef VBOX_STRICT
938	pGMM->hMtxOwner = NIL_RTNATIVETHREAD;
939	#endif
940	ASMAtomicIncU32(&pGMM->cMtxContenders);
941	int rc1 = RTSemFastMutexRelease(pGMM->hMtx); AssertRC(rc1);
942
943	RTThreadYield();
944
945	int rc2 = RTSemFastMutexRequest(pGMM->hMtx); AssertRC(rc2);
946	*puLockNanoTS = RTTimeSystemNanoTS();
947	ASMAtomicDecU32(&pGMM->cMtxContenders);
948	#ifdef VBOX_STRICT
949	pGMM->hMtxOwner = RTThreadNativeSelf();
950	#endif
951
952	return true;
953	}
954
955
956	/**
957	* Acquires a chunk lock.
958	*
959	* The caller must own the giant lock.
960	*
961	* @returns Assert status code from RTSemFastMutexRequest.
962	* @param pMtxState The chunk mutex state info. (Avoids
963	* passing the same flags and stuff around
964	* for subsequent release and drop-giant
965	* calls.)
966	* @param pGMM Pointer to the GMM instance.
967	* @param pChunk Pointer to the chunk.
968	* @param fFlags Flags regarding the giant lock, GMMR0CHUNK_MTX_XXX.
969	*/
970	static int gmmR0ChunkMutexAcquire(PGMMR0CHUNKMTXSTATE pMtxState, PGMM pGMM, PGMMCHUNK pChunk, uint32_t fFlags)
971	{
972	Assert(fFlags > GMMR0CHUNK_MTX_INVALID && fFlags < GMMR0CHUNK_MTX_END);
973	Assert(pGMM->hMtxOwner == RTThreadNativeSelf());
974
975	pMtxState->pGMM = pGMM;
976	pMtxState->fFlags = (uint8_t)fFlags;
977
978	/*
979	* Get the lock index and reference the lock.
980	*/
981	Assert(pGMM->hMtxOwner == RTThreadNativeSelf());
982	uint32_t iChunkMtx = pChunk->iChunkMtx;
983	if (iChunkMtx == UINT8_MAX)
984	{
985	iChunkMtx = pGMM->iNextChunkMtx++;
986	iChunkMtx %= RT_ELEMENTS(pGMM->aChunkMtx);
987
988	/* Try get an unused one... */
989	if (pGMM->aChunkMtx[iChunkMtx].cUsers)
990	{
991	iChunkMtx = pGMM->iNextChunkMtx++;
992	iChunkMtx %= RT_ELEMENTS(pGMM->aChunkMtx);
993	if (pGMM->aChunkMtx[iChunkMtx].cUsers)
994	{
995	iChunkMtx = pGMM->iNextChunkMtx++;
996	iChunkMtx %= RT_ELEMENTS(pGMM->aChunkMtx);
997	if (pGMM->aChunkMtx[iChunkMtx].cUsers)
998	{
999	iChunkMtx = pGMM->iNextChunkMtx++;
1000	iChunkMtx %= RT_ELEMENTS(pGMM->aChunkMtx);
1001	}
1002	}
1003	}
1004
1005	pChunk->iChunkMtx = iChunkMtx;
1006	}
1007	AssertCompile(RT_ELEMENTS(pGMM->aChunkMtx) < UINT8_MAX);
1008	pMtxState->iChunkMtx = (uint8_t)iChunkMtx;
1009	ASMAtomicIncU32(&pGMM->aChunkMtx[iChunkMtx].cUsers);
1010
1011	/*
1012	* Drop the giant?
1013	*/
1014	if (fFlags != GMMR0CHUNK_MTX_KEEP_GIANT)
1015	{
1016	/** @todo GMM life cycle cleanup (we may race someone
1017	* destroying and cleaning up GMM)? */
1018	gmmR0MutexRelease(pGMM);
1019	}
1020
1021	/*
1022	* Take the chunk mutex.
1023	*/
1024	int rc = RTSemFastMutexRequest(pGMM->aChunkMtx[iChunkMtx].hMtx);
1025	AssertRC(rc);
1026	return rc;
1027	}
1028
1029
1030	/**
1031	* Releases the GMM giant lock.
1032	*
1033	* @returns Assert status code from RTSemFastMutexRequest.
1034	* @param pGMM Pointer to the GMM instance.
1035	* @param pChunk Pointer to the chunk if it's still
1036	* alive, NULL if it isn't. This is used to deassociate
1037	* the chunk from the mutex on the way out so a new one
1038	* can be selected next time, thus avoiding contented
1039	* mutexes.
1040	*/
1041	static int gmmR0ChunkMutexRelease(PGMMR0CHUNKMTXSTATE pMtxState, PGMMCHUNK pChunk)
1042	{
1043	PGMM pGMM = pMtxState->pGMM;
1044
1045	/*
1046	* Release the chunk mutex and reacquire the giant if requested.
1047	*/
1048	int rc = RTSemFastMutexRelease(pGMM->aChunkMtx[pMtxState->iChunkMtx].hMtx);
1049	AssertRC(rc);
1050	if (pMtxState->fFlags == GMMR0CHUNK_MTX_RETAKE_GIANT)
1051	rc = gmmR0MutexAcquire(pGMM);
1052	else
1053	Assert((pMtxState->fFlags != GMMR0CHUNK_MTX_DROP_GIANT) == (pGMM->hMtxOwner == RTThreadNativeSelf()));
1054
1055	/*
1056	* Drop the chunk mutex user reference and deassociate it from the chunk
1057	* when possible.
1058	*/
1059	if ( ASMAtomicDecU32(&pGMM->aChunkMtx[pMtxState->iChunkMtx].cUsers) == 0
1060	&& pChunk
1061	&& RT_SUCCESS(rc) )
1062	{
1063	if (pMtxState->fFlags != GMMR0CHUNK_MTX_DROP_GIANT)
1064	pChunk->iChunkMtx = UINT8_MAX;
1065	else
1066	{
1067	rc = gmmR0MutexAcquire(pGMM);
1068	if (RT_SUCCESS(rc))
1069	{
1070	if (pGMM->aChunkMtx[pMtxState->iChunkMtx].cUsers == 0)
1071	pChunk->iChunkMtx = UINT8_MAX;
1072	rc = gmmR0MutexRelease(pGMM);
1073	}
1074	}
1075	}
1076
1077	pMtxState->pGMM = NULL;
1078	return rc;
1079	}
1080
1081
1082	/**
1083	* Drops the giant GMM lock we kept in gmmR0ChunkMutexAcquire while keeping the
1084	* chunk locked.
1085	*
1086	* This only works if gmmR0ChunkMutexAcquire was called with
1087	* GMMR0CHUNK_MTX_KEEP_GIANT. gmmR0ChunkMutexRelease will retake the giant
1088	* mutex, i.e. behave as if GMMR0CHUNK_MTX_RETAKE_GIANT was used.
1089	*
1090	* @returns VBox status code (assuming success is ok).
1091	* @param pMtxState Pointer to the chunk mutex state.
1092	*/
1093	static int gmmR0ChunkMutexDropGiant(PGMMR0CHUNKMTXSTATE pMtxState)
1094	{
1095	AssertReturn(pMtxState->fFlags == GMMR0CHUNK_MTX_KEEP_GIANT, VERR_INTERNAL_ERROR_2);
1096	Assert(pMtxState->pGMM->hMtxOwner == RTThreadNativeSelf());
1097	pMtxState->fFlags = GMMR0CHUNK_MTX_RETAKE_GIANT;
1098	/** @todo GMM life cycle cleanup (we may race someone
1099	* destroying and cleaning up GMM)? */
1100	return gmmR0MutexRelease(pMtxState->pGMM);
1101	}
1102
1103
1104	/**
1105	* For experimenting with NUMA affinity and such.
1106	*
1107	* @returns The current NUMA Node ID.
1108	*/
1109	static uint16_t gmmR0GetCurrentNumaNodeId(void)
1110	{
1111	#if 1
1112	return GMM_CHUNK_NUMA_ID_UNKNOWN;
1113	#else
1114	return RTMpCpuId() / 16;
1115	#endif
1116	}
1117
1118
1119
1120	/**
1121	* Cleans up when a VM is terminating.
1122	*
1123	* @param pGVM Pointer to the Global VM structure.
1124	*/
1125	GMMR0DECL(void) GMMR0CleanupVM(PGVM pGVM)
1126	{
1127	LogFlow(("GMMR0CleanupVM: pGVM=%p:{.pVM=%p, .hSelf=%#x}\n", pGVM, pGVM->pVM, pGVM->hSelf));
1128
1129	PGMM pGMM;
1130	GMM_GET_VALID_INSTANCE_VOID(pGMM);
1131
1132	#ifdef VBOX_WITH_PAGE_SHARING
1133	/*
1134	* Clean up all registered shared modules first.
1135	*/
1136	gmmR0SharedModuleCleanup(pGMM, pGVM);
1137	#endif
1138
1139	gmmR0MutexAcquire(pGMM);
1140	uint64_t uLockNanoTS = RTTimeSystemNanoTS();
1141	GMM_CHECK_SANITY_UPON_ENTERING(pGMM);
1142
1143	/*
1144	* The policy is 'INVALID' until the initial reservation
1145	* request has been serviced.
1146	*/
1147	if ( pGVM->gmm.s.enmPolicy > GMMOCPOLICY_INVALID
1148	&& pGVM->gmm.s.enmPolicy < GMMOCPOLICY_END)
1149	{
1150	/*
1151	* If it's the last VM around, we can skip walking all the chunk looking
1152	* for the pages owned by this VM and instead flush the whole shebang.
1153	*
1154	* This takes care of the eventuality that a VM has left shared page
1155	* references behind (shouldn't happen of course, but you never know).
1156	*/
1157	Assert(pGMM->cRegisteredVMs);
1158	pGMM->cRegisteredVMs--;
1159
1160	/*
1161	* Walk the entire pool looking for pages that belong to this VM
1162	* and leftover mappings. (This'll only catch private pages,
1163	* shared pages will be 'left behind'.)
1164	*/
1165	uint64_t cPrivatePages = pGVM->gmm.s.cPrivatePages; /* save */
1166
1167	unsigned iCountDown = 64;
1168	bool fRedoFromStart;
1169	PGMMCHUNK pChunk;
1170	do
1171	{
1172	fRedoFromStart = false;
1173	RTListForEachReverse(&pGMM->ChunkList, pChunk, GMMCHUNK, ListNode)
1174	{
1175	uint32_t const cFreeChunksOld = pGMM->cFreedChunks;
1176	if (gmmR0CleanupVMScanChunk(pGMM, pGVM, pChunk))
1177	{
1178	/* We left the giant mutex, so reset the yield counters. */
1179	uLockNanoTS = RTTimeSystemNanoTS();
1180	iCountDown = 64;
1181	}
1182	else
1183	{
1184	/* Didn't leave it, so do normal yielding. */
1185	if (!iCountDown)
1186	gmmR0MutexYield(pGMM, &uLockNanoTS);
1187	else
1188	iCountDown--;
1189	}
1190	if (pGMM->cFreedChunks != cFreeChunksOld)
1191	break;
1192	}
1193	} while (fRedoFromStart);
1194
1195	if (pGVM->gmm.s.cPrivatePages)
1196	SUPR0Printf("GMMR0CleanupVM: hGVM=%#x has %#x private pages that cannot be found!\n", pGVM->hSelf, pGVM->gmm.s.cPrivatePages);
1197
1198	pGMM->cAllocatedPages -= cPrivatePages;
1199
1200	/*
1201	* Free empty chunks.
1202	*/
1203	PGMMCHUNKFREESET pPrivateSet = pGMM->fBoundMemoryMode ? &pGVM->gmm.s.Private : &pGMM->PrivateX;
1204	do
1205	{
1206	fRedoFromStart = false;
1207	iCountDown = 10240;
1208	pChunk = pPrivateSet->apLists[GMM_CHUNK_FREE_SET_UNUSED_LIST];
1209	while (pChunk)
1210	{
1211	PGMMCHUNK pNext = pChunk->pFreeNext;
1212	Assert(pChunk->cFree == GMM_CHUNK_NUM_PAGES);
1213	if ( !pGMM->fBoundMemoryMode
1214	\|\| pChunk->hGVM == pGVM->hSelf)
1215	{
1216	uint64_t const idGenerationOld = pPrivateSet->idGeneration;
1217	if (gmmR0FreeChunk(pGMM, pGVM, pChunk, true /fRelaxedSem/))
1218	{
1219	/* We've left the giant mutex, restart? (+1 for our unlink) */
1220	fRedoFromStart = pPrivateSet->idGeneration != idGenerationOld + 1;
1221	if (fRedoFromStart)
1222	break;
1223	uLockNanoTS = RTTimeSystemNanoTS();
1224	iCountDown = 10240;
1225	}
1226	}
1227
1228	/* Advance and maybe yield the lock. */
1229	pChunk = pNext;
1230	if (--iCountDown == 0)
1231	{
1232	uint64_t const idGenerationOld = pPrivateSet->idGeneration;
1233	fRedoFromStart = gmmR0MutexYield(pGMM, &uLockNanoTS)
1234	&& pPrivateSet->idGeneration != idGenerationOld;
1235	if (fRedoFromStart)
1236	break;
1237	iCountDown = 10240;
1238	}
1239	}
1240	} while (fRedoFromStart);
1241
1242	/*
1243	* Account for shared pages that weren't freed.
1244	*/
1245	if (pGVM->gmm.s.cSharedPages)
1246	{
1247	Assert(pGMM->cSharedPages >= pGVM->gmm.s.cSharedPages);
1248	SUPR0Printf("GMMR0CleanupVM: hGVM=%#x left %#x shared pages behind!\n", pGVM->hSelf, pGVM->gmm.s.cSharedPages);
1249	pGMM->cLeftBehindSharedPages += pGVM->gmm.s.cSharedPages;
1250	}
1251
1252	/*
1253	* Clean up balloon statistics in case the VM process crashed.
1254	*/
1255	Assert(pGMM->cBalloonedPages >= pGVM->gmm.s.cBalloonedPages);
1256	pGMM->cBalloonedPages -= pGVM->gmm.s.cBalloonedPages;
1257
1258	/*
1259	* Update the over-commitment management statistics.
1260	*/
1261	pGMM->cReservedPages -= pGVM->gmm.s.Reserved.cBasePages
1262	+ pGVM->gmm.s.Reserved.cFixedPages
1263	+ pGVM->gmm.s.Reserved.cShadowPages;
1264	switch (pGVM->gmm.s.enmPolicy)
1265	{
1266	case GMMOCPOLICY_NO_OC:
1267	break;
1268	default:
1269	/** @todo Update GMM->cOverCommittedPages */
1270	break;
1271	}
1272	}
1273
1274	/* zap the GVM data. */
1275	pGVM->gmm.s.enmPolicy = GMMOCPOLICY_INVALID;
1276	pGVM->gmm.s.enmPriority = GMMPRIORITY_INVALID;
1277	pGVM->gmm.s.fMayAllocate = false;
1278
1279	GMM_CHECK_SANITY_UPON_LEAVING(pGMM);
1280	gmmR0MutexRelease(pGMM);
1281
1282	LogFlow(("GMMR0CleanupVM: returns\n"));
1283	}
1284
1285
1286	/**
1287	* Scan one chunk for private pages belonging to the specified VM.
1288	*
1289	* @note This function may drop the gian mutex!
1290	*
1291	* @returns @c true if we've temporarily dropped the giant mutex, @c false if
1292	* we didn't.
1293	* @param pGMM Pointer to the GMM instance.
1294	* @param pGVM The global VM handle.
1295	* @param pChunk The chunk to scan.
1296	*/
1297	static bool gmmR0CleanupVMScanChunk(PGMM pGMM, PGVM pGVM, PGMMCHUNK pChunk)
1298	{
1299	/*
1300	* Look for pages belonging to the VM.
1301	* (Perform some internal checks while we're scanning.)
1302	*/
1303	#ifndef VBOX_STRICT
1304	if (pChunk->cFree != (GMM_CHUNK_SIZE >> PAGE_SHIFT))
1305	#endif
1306	{
1307	unsigned cPrivate = 0;
1308	unsigned cShared = 0;
1309	unsigned cFree = 0;
1310
1311	gmmR0UnlinkChunk(pChunk); /* avoiding cFreePages updates. */
1312
1313	uint16_t hGVM = pGVM->hSelf;
1314	unsigned iPage = (GMM_CHUNK_SIZE >> PAGE_SHIFT);
1315	while (iPage-- > 0)
1316	if (GMM_PAGE_IS_PRIVATE(&pChunk->aPages[iPage]))
1317	{
1318	if (pChunk->aPages[iPage].Private.hGVM == hGVM)
1319	{
1320	/*
1321	* Free the page.
1322	*
1323	* The reason for not using gmmR0FreePrivatePage here is that we
1324	* must not cause the chunk to be freed from under us - we're in
1325	* an AVL tree walk here.
1326	*/
1327	pChunk->aPages[iPage].u = 0;
1328	pChunk->aPages[iPage].Free.iNext = pChunk->iFreeHead;
1329	pChunk->aPages[iPage].Free.u2State = GMM_PAGE_STATE_FREE;
1330	pChunk->iFreeHead = iPage;
1331	pChunk->cPrivate--;
1332	pChunk->cFree++;
1333	pGVM->gmm.s.cPrivatePages--;
1334	cFree++;
1335	}
1336	else
1337	cPrivate++;
1338	}
1339	else if (GMM_PAGE_IS_FREE(&pChunk->aPages[iPage]))
1340	cFree++;
1341	else
1342	cShared++;
1343
1344	gmmR0SelectSetAndLinkChunk(pGMM, pGVM, pChunk);
1345
1346	/*
1347	* Did it add up?
1348	*/
1349	if (RT_UNLIKELY( pChunk->cFree != cFree
1350	\|\| pChunk->cPrivate != cPrivate
1351	\|\| pChunk->cShared != cShared))
1352	{
1353	SUPR0Printf("gmmR0CleanupVMScanChunk: Chunk %p/%#x has bogus stats - free=%d/%d private=%d/%d shared=%d/%d\n",
1354	pChunk->cFree, cFree, pChunk->cPrivate, cPrivate, pChunk->cShared, cShared);
1355	pChunk->cFree = cFree;
1356	pChunk->cPrivate = cPrivate;
1357	pChunk->cShared = cShared;
1358	}
1359	}
1360
1361	/*
1362	* If not in bound memory mode, we should reset the hGVM field
1363	* if it has our handle in it.
1364	*/
1365	if (pChunk->hGVM == pGVM->hSelf)
1366	{
1367	if (!g_pGMM->fBoundMemoryMode)
1368	pChunk->hGVM = NIL_GVM_HANDLE;
1369	else if (pChunk->cFree != GMM_CHUNK_NUM_PAGES)
1370	{
1371	SUPR0Printf("gmmR0CleanupVMScanChunk: %p/%#x: cFree=%#x - it should be 0 in bound mode!\n",
1372	pChunk, pChunk->Core.Key, pChunk->cFree);
1373	AssertMsgFailed(("%p/%#x: cFree=%#x - it should be 0 in bound mode!\n", pChunk, pChunk->Core.Key, pChunk->cFree));
1374
1375	gmmR0UnlinkChunk(pChunk);
1376	pChunk->cFree = GMM_CHUNK_NUM_PAGES;
1377	gmmR0SelectSetAndLinkChunk(pGMM, pGVM, pChunk);
1378	}
1379	}
1380
1381	/*
1382	* Look for a mapping belonging to the terminating VM.
1383	*/
1384	GMMR0CHUNKMTXSTATE MtxState;
1385	gmmR0ChunkMutexAcquire(&MtxState, pGMM, pChunk, GMMR0CHUNK_MTX_KEEP_GIANT);
1386	unsigned cMappings = pChunk->cMappingsX;
1387	for (unsigned i = 0; i < cMappings; i++)
1388	if (pChunk->paMappingsX[i].pGVM == pGVM)
1389	{
1390	gmmR0ChunkMutexDropGiant(&MtxState);
1391
1392	RTR0MEMOBJ hMemObj = pChunk->paMappingsX[i].hMapObj;
1393
1394	cMappings--;
1395	if (i < cMappings)
1396	pChunk->paMappingsX[i] = pChunk->paMappingsX[cMappings];
1397	pChunk->paMappingsX[cMappings].pGVM = NULL;
1398	pChunk->paMappingsX[cMappings].hMapObj = NIL_RTR0MEMOBJ;
1399	Assert(pChunk->cMappingsX - 1U == cMappings);
1400	pChunk->cMappingsX = cMappings;
1401
1402	int rc = RTR0MemObjFree(hMemObj, false /* fFreeMappings (NA) */);
1403	if (RT_FAILURE(rc))
1404	{
1405	SUPR0Printf("gmmR0CleanupVMScanChunk: %p/%#x: mapping #%x: RTRMemObjFree(%p,false) -> %d \n",
1406	pChunk, pChunk->Core.Key, i, hMemObj, rc);
1407	AssertRC(rc);
1408	}
1409
1410	gmmR0ChunkMutexRelease(&MtxState, pChunk);
1411	return true;
1412	}
1413
1414	gmmR0ChunkMutexRelease(&MtxState, pChunk);
1415	return false;
1416	}
1417
1418
1419	/**
1420	* The initial resource reservations.
1421	*
1422	* This will make memory reservations according to policy and priority. If there aren't
1423	* sufficient resources available to sustain the VM this function will fail and all
1424	* future allocations requests will fail as well.
1425	*
1426	* These are just the initial reservations made very very early during the VM creation
1427	* process and will be adjusted later in the GMMR0UpdateReservation call after the
1428	* ring-3 init has completed.
1429	*
1430	* @returns VBox status code.
1431	* @retval VERR_GMM_MEMORY_RESERVATION_DECLINED
1432	* @retval VERR_GMM_
1433	*
1434	* @param pVM Pointer to the shared VM structure.
1435	* @param idCpu VCPU id
1436	* @param cBasePages The number of pages that may be allocated for the base RAM and ROMs.
1437	* This does not include MMIO2 and similar.
1438	* @param cShadowPages The number of pages that may be allocated for shadow paging structures.
1439	* @param cFixedPages The number of pages that may be allocated for fixed objects like the
1440	* hyper heap, MMIO2 and similar.
1441	* @param enmPolicy The OC policy to use on this VM.
1442	* @param enmPriority The priority in an out-of-memory situation.
1443	*
1444	* @thread The creator thread / EMT.
1445	*/
1446	GMMR0DECL(int) GMMR0InitialReservation(PVM pVM, VMCPUID idCpu, uint64_t cBasePages, uint32_t cShadowPages, uint32_t cFixedPages,
1447	GMMOCPOLICY enmPolicy, GMMPRIORITY enmPriority)
1448	{
1449	LogFlow(("GMMR0InitialReservation: pVM=%p cBasePages=%#llx cShadowPages=%#x cFixedPages=%#x enmPolicy=%d enmPriority=%d\n",
1450	pVM, cBasePages, cShadowPages, cFixedPages, enmPolicy, enmPriority));
1451
1452	/*
1453	* Validate, get basics and take the semaphore.
1454	*/
1455	PGMM pGMM;
1456	GMM_GET_VALID_INSTANCE(pGMM, VERR_INTERNAL_ERROR);
1457	PGVM pGVM;
1458	int rc = GVMMR0ByVMAndEMT(pVM, idCpu, &pGVM);
1459	if (RT_FAILURE(rc))
1460	return rc;
1461
1462	AssertReturn(cBasePages, VERR_INVALID_PARAMETER);
1463	AssertReturn(cShadowPages, VERR_INVALID_PARAMETER);
1464	AssertReturn(cFixedPages, VERR_INVALID_PARAMETER);
1465	AssertReturn(enmPolicy > GMMOCPOLICY_INVALID && enmPolicy < GMMOCPOLICY_END, VERR_INVALID_PARAMETER);
1466	AssertReturn(enmPriority > GMMPRIORITY_INVALID && enmPriority < GMMPRIORITY_END, VERR_INVALID_PARAMETER);
1467
1468	gmmR0MutexAcquire(pGMM);
1469	if (GMM_CHECK_SANITY_UPON_ENTERING(pGMM))
1470	{
1471	if ( !pGVM->gmm.s.Reserved.cBasePages
1472	&& !pGVM->gmm.s.Reserved.cFixedPages
1473	&& !pGVM->gmm.s.Reserved.cShadowPages)
1474	{
1475	/*
1476	* Check if we can accommodate this.
1477	*/
1478	/* ... later ... */
1479	if (RT_SUCCESS(rc))
1480	{
1481	/*
1482	* Update the records.
1483	*/
1484	pGVM->gmm.s.Reserved.cBasePages = cBasePages;
1485	pGVM->gmm.s.Reserved.cFixedPages = cFixedPages;
1486	pGVM->gmm.s.Reserved.cShadowPages = cShadowPages;
1487	pGVM->gmm.s.enmPolicy = enmPolicy;
1488	pGVM->gmm.s.enmPriority = enmPriority;
1489	pGVM->gmm.s.fMayAllocate = true;
1490
1491	pGMM->cReservedPages += cBasePages + cFixedPages + cShadowPages;
1492	pGMM->cRegisteredVMs++;
1493	}
1494	}
1495	else
1496	rc = VERR_WRONG_ORDER;
1497	GMM_CHECK_SANITY_UPON_LEAVING(pGMM);
1498	}
1499	else
1500	rc = VERR_INTERNAL_ERROR_5;
1501	gmmR0MutexRelease(pGMM);
1502	LogFlow(("GMMR0InitialReservation: returns %Rrc\n", rc));
1503	return rc;
1504	}
1505
1506
1507	/**
1508	* VMMR0 request wrapper for GMMR0InitialReservation.
1509	*
1510	* @returns see GMMR0InitialReservation.
1511	* @param pVM Pointer to the shared VM structure.
1512	* @param idCpu VCPU id
1513	* @param pReq The request packet.
1514	*/
1515	GMMR0DECL(int) GMMR0InitialReservationReq(PVM pVM, VMCPUID idCpu, PGMMINITIALRESERVATIONREQ pReq)
1516	{
1517	/*
1518	* Validate input and pass it on.
1519	*/
1520	AssertPtrReturn(pVM, VERR_INVALID_POINTER);
1521	AssertPtrReturn(pReq, VERR_INVALID_POINTER);
1522	AssertMsgReturn(pReq->Hdr.cbReq == sizeof(pReq), ("%#x != %#x\n", pReq->Hdr.cbReq, sizeof(pReq)), VERR_INVALID_PARAMETER);
1523
1524	return GMMR0InitialReservation(pVM, idCpu, pReq->cBasePages, pReq->cShadowPages, pReq->cFixedPages, pReq->enmPolicy, pReq->enmPriority);
1525	}
1526
1527
1528	/**
1529	* This updates the memory reservation with the additional MMIO2 and ROM pages.
1530	*
1531	* @returns VBox status code.
1532	* @retval VERR_GMM_MEMORY_RESERVATION_DECLINED
1533	*
1534	* @param pVM Pointer to the shared VM structure.
1535	* @param idCpu VCPU id
1536	* @param cBasePages The number of pages that may be allocated for the base RAM and ROMs.
1537	* This does not include MMIO2 and similar.
1538	* @param cShadowPages The number of pages that may be allocated for shadow paging structures.
1539	* @param cFixedPages The number of pages that may be allocated for fixed objects like the
1540	* hyper heap, MMIO2 and similar.
1541	*
1542	* @thread EMT.
1543	*/
1544	GMMR0DECL(int) GMMR0UpdateReservation(PVM pVM, VMCPUID idCpu, uint64_t cBasePages, uint32_t cShadowPages, uint32_t cFixedPages)
1545	{
1546	LogFlow(("GMMR0UpdateReservation: pVM=%p cBasePages=%#llx cShadowPages=%#x cFixedPages=%#x\n",
1547	pVM, cBasePages, cShadowPages, cFixedPages));
1548
1549	/*
1550	* Validate, get basics and take the semaphore.
1551	*/
1552	PGMM pGMM;
1553	GMM_GET_VALID_INSTANCE(pGMM, VERR_INTERNAL_ERROR);
1554	PGVM pGVM;
1555	int rc = GVMMR0ByVMAndEMT(pVM, idCpu, &pGVM);
1556	if (RT_FAILURE(rc))
1557	return rc;
1558
1559	AssertReturn(cBasePages, VERR_INVALID_PARAMETER);
1560	AssertReturn(cShadowPages, VERR_INVALID_PARAMETER);
1561	AssertReturn(cFixedPages, VERR_INVALID_PARAMETER);
1562
1563	gmmR0MutexAcquire(pGMM);
1564	if (GMM_CHECK_SANITY_UPON_ENTERING(pGMM))
1565	{
1566	if ( pGVM->gmm.s.Reserved.cBasePages
1567	&& pGVM->gmm.s.Reserved.cFixedPages
1568	&& pGVM->gmm.s.Reserved.cShadowPages)
1569	{
1570	/*
1571	* Check if we can accommodate this.
1572	*/
1573	/* ... later ... */
1574	if (RT_SUCCESS(rc))
1575	{
1576	/*
1577	* Update the records.
1578	*/
1579	pGMM->cReservedPages -= pGVM->gmm.s.Reserved.cBasePages
1580	+ pGVM->gmm.s.Reserved.cFixedPages
1581	+ pGVM->gmm.s.Reserved.cShadowPages;
1582	pGMM->cReservedPages += cBasePages + cFixedPages + cShadowPages;
1583
1584	pGVM->gmm.s.Reserved.cBasePages = cBasePages;
1585	pGVM->gmm.s.Reserved.cFixedPages = cFixedPages;
1586	pGVM->gmm.s.Reserved.cShadowPages = cShadowPages;
1587	}
1588	}
1589	else
1590	rc = VERR_WRONG_ORDER;
1591	GMM_CHECK_SANITY_UPON_LEAVING(pGMM);
1592	}
1593	else
1594	rc = VERR_INTERNAL_ERROR_5;
1595	gmmR0MutexRelease(pGMM);
1596	LogFlow(("GMMR0UpdateReservation: returns %Rrc\n", rc));
1597	return rc;
1598	}
1599
1600
1601	/**
1602	* VMMR0 request wrapper for GMMR0UpdateReservation.
1603	*
1604	* @returns see GMMR0UpdateReservation.
1605	* @param pVM Pointer to the shared VM structure.
1606	* @param idCpu VCPU id
1607	* @param pReq The request packet.
1608	*/
1609	GMMR0DECL(int) GMMR0UpdateReservationReq(PVM pVM, VMCPUID idCpu, PGMMUPDATERESERVATIONREQ pReq)
1610	{
1611	/*
1612	* Validate input and pass it on.
1613	*/
1614	AssertPtrReturn(pVM, VERR_INVALID_POINTER);
1615	AssertPtrReturn(pReq, VERR_INVALID_POINTER);
1616	AssertMsgReturn(pReq->Hdr.cbReq == sizeof(pReq), ("%#x != %#x\n", pReq->Hdr.cbReq, sizeof(pReq)), VERR_INVALID_PARAMETER);
1617
1618	return GMMR0UpdateReservation(pVM, idCpu, pReq->cBasePages, pReq->cShadowPages, pReq->cFixedPages);
1619	}
1620
1621
1622	/**
1623	* Performs sanity checks on a free set.
1624	*
1625	* @returns Error count.
1626	*
1627	* @param pGMM Pointer to the GMM instance.
1628	* @param pSet Pointer to the set.
1629	* @param pszSetName The set name.
1630	* @param pszFunction The function from which it was called.
1631	* @param uLine The line number.
1632	*/
1633	static uint32_t gmmR0SanityCheckSet(PGMM pGMM, PGMMCHUNKFREESET pSet, const char *pszSetName,
1634	const char *pszFunction, unsigned uLineNo)
1635	{
1636	uint32_t cErrors = 0;
1637
1638	/*
1639	* Count the free pages in all the chunks and match it against pSet->cFreePages.
1640	*/
1641	uint32_t cPages = 0;
1642	for (unsigned i = 0; i < RT_ELEMENTS(pSet->apLists); i++)
1643	{
1644	for (PGMMCHUNK pCur = pSet->apLists[i]; pCur; pCur = pCur->pFreeNext)
1645	{
1646	/** @todo check that the chunk is hash into the right set. */
1647	cPages += pCur->cFree;
1648	}
1649	}
1650	if (RT_UNLIKELY(cPages != pSet->cFreePages))
1651	{
1652	SUPR0Printf("GMM insanity: found %#x pages in the %s set, expected %#x. (%s, line %u)\n",
1653	cPages, pszSetName, pSet->cFreePages, pszFunction, uLineNo);
1654	cErrors++;
1655	}
1656
1657	return cErrors;
1658	}
1659
1660
1661	/**
1662	* Performs some sanity checks on the GMM while owning lock.
1663	*
1664	* @returns Error count.
1665	*
1666	* @param pGMM Pointer to the GMM instance.
1667	* @param pszFunction The function from which it is called.
1668	* @param uLineNo The line number.
1669	*/
1670	static uint32_t gmmR0SanityCheck(PGMM pGMM, const char *pszFunction, unsigned uLineNo)
1671	{
1672	uint32_t cErrors = 0;
1673
1674	cErrors += gmmR0SanityCheckSet(pGMM, &pGMM->PrivateX, "private", pszFunction, uLineNo);
1675	cErrors += gmmR0SanityCheckSet(pGMM, &pGMM->Shared, "shared", pszFunction, uLineNo);
1676	/** @todo add more sanity checks. */
1677
1678	return cErrors;
1679	}
1680
1681
1682	/**
1683	* Looks up a chunk in the tree and fill in the TLB entry for it.
1684	*
1685	* This is not expected to fail and will bitch if it does.
1686	*
1687	* @returns Pointer to the allocation chunk, NULL if not found.
1688	* @param pGMM Pointer to the GMM instance.
1689	* @param idChunk The ID of the chunk to find.
1690	* @param pTlbe Pointer to the TLB entry.
1691	*/
1692	static PGMMCHUNK gmmR0GetChunkSlow(PGMM pGMM, uint32_t idChunk, PGMMCHUNKTLBE pTlbe)
1693	{
1694	PGMMCHUNK pChunk = (PGMMCHUNK)RTAvlU32Get(&pGMM->pChunks, idChunk);
1695	AssertMsgReturn(pChunk, ("Chunk %#x not found!\n", idChunk), NULL);
1696	pTlbe->idChunk = idChunk;
1697	pTlbe->pChunk = pChunk;
1698	return pChunk;
1699	}
1700
1701
1702	/**
1703	* Finds a allocation chunk.
1704	*
1705	* This is not expected to fail and will bitch if it does.
1706	*
1707	* @returns Pointer to the allocation chunk, NULL if not found.
1708	* @param pGMM Pointer to the GMM instance.
1709	* @param idChunk The ID of the chunk to find.
1710	*/
1711	DECLINLINE(PGMMCHUNK) gmmR0GetChunk(PGMM pGMM, uint32_t idChunk)
1712	{
1713	/*
1714	* Do a TLB lookup, branch if not in the TLB.
1715	*/
1716	PGMMCHUNKTLBE pTlbe = &pGMM->ChunkTLB.aEntries[GMM_CHUNKTLB_IDX(idChunk)];
1717	if ( pTlbe->idChunk != idChunk
1718	\|\| !pTlbe->pChunk)
1719	return gmmR0GetChunkSlow(pGMM, idChunk, pTlbe);
1720	return pTlbe->pChunk;
1721	}
1722
1723
1724	/**
1725	* Finds a page.
1726	*
1727	* This is not expected to fail and will bitch if it does.
1728	*
1729	* @returns Pointer to the page, NULL if not found.
1730	* @param pGMM Pointer to the GMM instance.
1731	* @param idPage The ID of the page to find.
1732	*/
1733	DECLINLINE(PGMMPAGE) gmmR0GetPage(PGMM pGMM, uint32_t idPage)
1734	{
1735	PGMMCHUNK pChunk = gmmR0GetChunk(pGMM, idPage >> GMM_CHUNKID_SHIFT);
1736	if (RT_LIKELY(pChunk))
1737	return &pChunk->aPages[idPage & GMM_PAGEID_IDX_MASK];
1738	return NULL;
1739	}
1740
1741
1742	/**
1743	* Gets the host physical address for a page given by it's ID.
1744	*
1745	* @returns The host physical address or NIL_RTHCPHYS.
1746	* @param pGMM Pointer to the GMM instance.
1747	* @param idPage The ID of the page to find.
1748	*/
1749	DECLINLINE(RTHCPHYS) gmmR0GetPageHCPhys(PGMM pGMM, uint32_t idPage)
1750	{
1751	PGMMCHUNK pChunk = gmmR0GetChunk(pGMM, idPage >> GMM_CHUNKID_SHIFT);
1752	if (RT_LIKELY(pChunk))
1753	return RTR0MemObjGetPagePhysAddr(pChunk->hMemObj, idPage & GMM_PAGEID_IDX_MASK);
1754	return NIL_RTHCPHYS;
1755	}
1756
1757
1758	/**
1759	* Selects the appropriate free list given the number of free pages.
1760	*
1761	* @returns Free list index.
1762	* @param cFree The number of free pages in the chunk.
1763	*/
1764	DECLINLINE(unsigned) gmmR0SelectFreeSetList(unsigned cFree)
1765	{
1766	unsigned iList = cFree >> GMM_CHUNK_FREE_SET_SHIFT;
1767	AssertMsg(iList < RT_SIZEOFMEMB(GMMCHUNKFREESET, apLists) / RT_SIZEOFMEMB(GMMCHUNKFREESET, apLists[0]),
1768	("%d (%u)\n", iList, cFree));
1769	return iList;
1770	}
1771
1772
1773	/**
1774	* Unlinks the chunk from the free list it's currently on (if any).
1775	*
1776	* @param pChunk The allocation chunk.
1777	*/
1778	DECLINLINE(void) gmmR0UnlinkChunk(PGMMCHUNK pChunk)
1779	{
1780	PGMMCHUNKFREESET pSet = pChunk->pSet;
1781	if (RT_LIKELY(pSet))
1782	{
1783	pSet->cFreePages -= pChunk->cFree;
1784	pSet->idGeneration++;
1785
1786	PGMMCHUNK pPrev = pChunk->pFreePrev;
1787	PGMMCHUNK pNext = pChunk->pFreeNext;
1788	if (pPrev)
1789	pPrev->pFreeNext = pNext;
1790	else
1791	pSet->apLists[gmmR0SelectFreeSetList(pChunk->cFree)] = pNext;
1792	if (pNext)
1793	pNext->pFreePrev = pPrev;
1794
1795	pChunk->pSet = NULL;
1796	pChunk->pFreeNext = NULL;
1797	pChunk->pFreePrev = NULL;
1798	}
1799	else
1800	{
1801	Assert(!pChunk->pFreeNext);
1802	Assert(!pChunk->pFreePrev);
1803	Assert(!pChunk->cFree);
1804	}
1805	}
1806
1807
1808	/**
1809	* Links the chunk onto the appropriate free list in the specified free set.
1810	*
1811	* If no free entries, it's not linked into any list.
1812	*
1813	* @param pChunk The allocation chunk.
1814	* @param pSet The free set.
1815	*/
1816	DECLINLINE(void) gmmR0LinkChunk(PGMMCHUNK pChunk, PGMMCHUNKFREESET pSet)
1817	{
1818	Assert(!pChunk->pSet);
1819	Assert(!pChunk->pFreeNext);
1820	Assert(!pChunk->pFreePrev);
1821
1822	if (pChunk->cFree > 0)
1823	{
1824	pChunk->pSet = pSet;
1825	pChunk->pFreePrev = NULL;
1826	unsigned const iList = gmmR0SelectFreeSetList(pChunk->cFree);
1827	pChunk->pFreeNext = pSet->apLists[iList];
1828	if (pChunk->pFreeNext)
1829	pChunk->pFreeNext->pFreePrev = pChunk;
1830	pSet->apLists[iList] = pChunk;
1831
1832	pSet->cFreePages += pChunk->cFree;
1833	pSet->idGeneration++;
1834	}
1835	}
1836
1837
1838	/**
1839	* Links the chunk onto the appropriate free list in the specified free set.
1840	*
1841	* If no free entries, it's not linked into any list.
1842	*
1843	* @param pChunk The allocation chunk.
1844	*/
1845	DECLINLINE(void) gmmR0SelectSetAndLinkChunk(PGMM pGMM, PGVM pGVM, PGMMCHUNK pChunk)
1846	{
1847	PGMMCHUNKFREESET pSet;
1848	if (pGMM->fBoundMemoryMode)
1849	pSet = &pGVM->gmm.s.Private;
1850	else if (pChunk->cShared)
1851	pSet = &pGMM->Shared;
1852	else
1853	pSet = &pGMM->PrivateX;
1854	gmmR0LinkChunk(pChunk, pSet);
1855	}
1856
1857
1858	/**
1859	* Frees a Chunk ID.
1860	*
1861	* @param pGMM Pointer to the GMM instance.
1862	* @param idChunk The Chunk ID to free.
1863	*/
1864	static void gmmR0FreeChunkId(PGMM pGMM, uint32_t idChunk)
1865	{
1866	AssertReturnVoid(idChunk != NIL_GMM_CHUNKID);
1867	AssertMsg(ASMBitTest(&pGMM->bmChunkId[0], idChunk), ("%#x\n", idChunk));
1868	ASMAtomicBitClear(&pGMM->bmChunkId[0], idChunk);
1869	}
1870
1871
1872	/**
1873	* Allocates a new Chunk ID.
1874	*
1875	* @returns The Chunk ID.
1876	* @param pGMM Pointer to the GMM instance.
1877	*/
1878	static uint32_t gmmR0AllocateChunkId(PGMM pGMM)
1879	{
1880	AssertCompile(!((GMM_CHUNKID_LAST + 1) & 31)); /* must be a multiple of 32 */
1881	AssertCompile(NIL_GMM_CHUNKID == 0);
1882
1883	/*
1884	* Try the next sequential one.
1885	*/
1886	int32_t idChunk = ++pGMM->idChunkPrev;
1887	#if 0 /** @todo enable this code */
1888	if ( idChunk <= GMM_CHUNKID_LAST
1889	&& idChunk > NIL_GMM_CHUNKID
1890	&& !ASMAtomicBitTestAndSet(&pVMM->bmChunkId[0], idChunk))
1891	return idChunk;
1892	#endif
1893
1894	/*
1895	* Scan sequentially from the last one.
1896	*/
1897	if ( (uint32_t)idChunk < GMM_CHUNKID_LAST
1898	&& idChunk > NIL_GMM_CHUNKID)
1899	{
1900	idChunk = ASMBitNextClear(&pGMM->bmChunkId[0], GMM_CHUNKID_LAST + 1, idChunk);
1901	if (idChunk > NIL_GMM_CHUNKID)
1902	{
1903	AssertMsgReturn(!ASMAtomicBitTestAndSet(&pGMM->bmChunkId[0], idChunk), ("%#x\n", idChunk), NIL_GMM_CHUNKID);
1904	return pGMM->idChunkPrev = idChunk;
1905	}
1906	}
1907
1908	/*
1909	* Ok, scan from the start.
1910	* We're not racing anyone, so there is no need to expect failures or have restart loops.
1911	*/
1912	idChunk = ASMBitFirstClear(&pGMM->bmChunkId[0], GMM_CHUNKID_LAST + 1);
1913	AssertMsgReturn(idChunk > NIL_GMM_CHUNKID, ("%#x\n", idChunk), NIL_GVM_HANDLE);
1914	AssertMsgReturn(!ASMAtomicBitTestAndSet(&pGMM->bmChunkId[0], idChunk), ("%#x\n", idChunk), NIL_GMM_CHUNKID);
1915
1916	return pGMM->idChunkPrev = idChunk;
1917	}
1918
1919
1920	/**
1921	* Allocates one private page.
1922	*
1923	* Worker for gmmR0AllocatePages.
1924	*
1925	* @param pGMM Pointer to the GMM instance data.
1926	* @param hGVM The GVM handle of the VM requesting memory.
1927	* @param pChunk The chunk to allocate it from.
1928	* @param pPageDesc The page descriptor.
1929	*/
1930	static void gmmR0AllocatePage(PGMM pGMM, uint32_t hGVM, PGMMCHUNK pChunk, PGMMPAGEDESC pPageDesc)
1931	{
1932	/* update the chunk stats. */
1933	if (pChunk->hGVM == NIL_GVM_HANDLE)
1934	pChunk->hGVM = hGVM;
1935	Assert(pChunk->cFree);
1936	pChunk->cFree--;
1937	pChunk->cPrivate++;
1938
1939	/* unlink the first free page. */
1940	const uint32_t iPage = pChunk->iFreeHead;
1941	AssertReleaseMsg(iPage < RT_ELEMENTS(pChunk->aPages), ("%d\n", iPage));
1942	PGMMPAGE pPage = &pChunk->aPages[iPage];
1943	Assert(GMM_PAGE_IS_FREE(pPage));
1944	pChunk->iFreeHead = pPage->Free.iNext;
1945	Log3(("A pPage=%p iPage=%#x/%#x u2State=%d iFreeHead=%#x iNext=%#x\n",
1946	pPage, iPage, (pChunk->Core.Key << GMM_CHUNKID_SHIFT) \| iPage,
1947	pPage->Common.u2State, pChunk->iFreeHead, pPage->Free.iNext));
1948
1949	/* make the page private. */
1950	pPage->u = 0;
1951	AssertCompile(GMM_PAGE_STATE_PRIVATE == 0);
1952	pPage->Private.hGVM = hGVM;
1953	AssertCompile(NIL_RTHCPHYS >= GMM_GCPHYS_LAST);
1954	AssertCompile(GMM_GCPHYS_UNSHAREABLE >= GMM_GCPHYS_LAST);
1955	if (pPageDesc->HCPhysGCPhys <= GMM_GCPHYS_LAST)
1956	pPage->Private.pfn = pPageDesc->HCPhysGCPhys >> PAGE_SHIFT;
1957	else
1958	pPage->Private.pfn = GMM_PAGE_PFN_UNSHAREABLE; /* unshareable / unassigned - same thing. */
1959
1960	/* update the page descriptor. */
1961	pPageDesc->HCPhysGCPhys = RTR0MemObjGetPagePhysAddr(pChunk->hMemObj, iPage);
1962	Assert(pPageDesc->HCPhysGCPhys != NIL_RTHCPHYS);
1963	pPageDesc->idPage = (pChunk->Core.Key << GMM_CHUNKID_SHIFT) \| iPage;
1964	pPageDesc->idSharedPage = NIL_GMM_PAGEID;
1965	}
1966
1967
1968	/**
1969	* Picks the free pages from a chunk.
1970	*
1971	* @returns The new page descriptor table index.
1972	* @param pGMM Pointer to the GMM instance data.
1973	* @param hGVM The VM handle.
1974	* @param pChunk The chunk.
1975	* @param iPage The current page descriptor table index.
1976	* @param cPages The total number of pages to allocate.
1977	* @param paPages The page descriptor table (input + ouput).
1978	*/
1979	static uint32_t gmmR0AllocatePagesFromChunk(PGMM pGMM, uint16_t const hGVM, PGMMCHUNK pChunk, uint32_t iPage, uint32_t cPages,
1980	PGMMPAGEDESC paPages)
1981	{
1982	PGMMCHUNKFREESET pSet = pChunk->pSet; Assert(pSet);
1983	gmmR0UnlinkChunk(pChunk);
1984
1985	for (; pChunk->cFree && iPage < cPages; iPage++)
1986	gmmR0AllocatePage(pGMM, hGVM, pChunk, &paPages[iPage]);
1987
1988	gmmR0LinkChunk(pChunk, pSet);
1989	return iPage;
1990	}
1991
1992
1993	/**
1994	* Registers a new chunk of memory.
1995	*
1996	* This is called by both gmmR0AllocateOneChunk and GMMR0SeedChunk.
1997	*
1998	* @returns VBox status code. On success, the giant GMM lock will be held, the
1999	* caller must release it (ugly).
2000	* @param pGMM Pointer to the GMM instance.
2001	* @param pSet Pointer to the set.
2002	* @param MemObj The memory object for the chunk.
2003	* @param hGVM The affinity of the chunk. NIL_GVM_HANDLE for no
2004	* affinity.
2005	* @param fChunkFlags The chunk flags, GMM_CHUNK_FLAGS_XXX.
2006	* @param ppChunk Chunk address (out). Optional.
2007	*
2008	* @remarks The caller must not own the giant GMM mutex.
2009	* The giant GMM mutex will be acquired and returned acquired in
2010	* the success path. On failure, no locks will be held.
2011	*/
2012	static int gmmR0RegisterChunk(PGMM pGMM, PGMMCHUNKFREESET pSet, RTR0MEMOBJ MemObj, uint16_t hGVM, uint16_t fChunkFlags,
2013	PGMMCHUNK *ppChunk)
2014	{
2015	Assert(pGMM->hMtxOwner != RTThreadNativeSelf());
2016	Assert(hGVM != NIL_GVM_HANDLE \|\| pGMM->fBoundMemoryMode);
2017	Assert(fChunkFlags == 0 \|\| fChunkFlags == GMM_CHUNK_FLAGS_LARGE_PAGE);
2018
2019	int rc;
2020	PGMMCHUNK pChunk = (PGMMCHUNK)RTMemAllocZ(sizeof(*pChunk));
2021	if (pChunk)
2022	{
2023	/*
2024	* Initialize it.
2025	*/
2026	pChunk->hMemObj = MemObj;
2027	pChunk->cFree = GMM_CHUNK_NUM_PAGES;
2028	pChunk->hGVM = hGVM;
2029	/pChunk->iFreeHead = 0;/
2030	pChunk->idNumaNode = gmmR0GetCurrentNumaNodeId();
2031	pChunk->iChunkMtx = UINT8_MAX;
2032	pChunk->fFlags = fChunkFlags;
2033	for (unsigned iPage = 0; iPage < RT_ELEMENTS(pChunk->aPages) - 1; iPage++)
2034	{
2035	pChunk->aPages[iPage].Free.u2State = GMM_PAGE_STATE_FREE;
2036	pChunk->aPages[iPage].Free.iNext = iPage + 1;
2037	}
2038	pChunk->aPages[RT_ELEMENTS(pChunk->aPages) - 1].Free.u2State = GMM_PAGE_STATE_FREE;
2039	pChunk->aPages[RT_ELEMENTS(pChunk->aPages) - 1].Free.iNext = UINT16_MAX;
2040
2041	/*
2042	* Allocate a Chunk ID and insert it into the tree.
2043	* This has to be done behind the mutex of course.
2044	*/
2045	rc = gmmR0MutexAcquire(pGMM);
2046	if (RT_SUCCESS(rc))
2047	{
2048	if (GMM_CHECK_SANITY_UPON_ENTERING(pGMM))
2049	{
2050	pChunk->Core.Key = gmmR0AllocateChunkId(pGMM);
2051	if ( pChunk->Core.Key != NIL_GMM_CHUNKID
2052	&& pChunk->Core.Key <= GMM_CHUNKID_LAST
2053	&& RTAvlU32Insert(&pGMM->pChunks, &pChunk->Core))
2054	{
2055	pGMM->cChunks++;
2056	RTListAppend(&pGMM->ChunkList, &pChunk->ListNode);
2057	gmmR0LinkChunk(pChunk, pSet);
2058	LogFlow(("gmmR0RegisterChunk: pChunk=%p id=%#x cChunks=%d\n", pChunk, pChunk->Core.Key, pGMM->cChunks));
2059
2060	if (ppChunk)
2061	*ppChunk = pChunk;
2062	GMM_CHECK_SANITY_UPON_LEAVING(pGMM);
2063	return VINF_SUCCESS;
2064	}
2065
2066	/* bail out */
2067	rc = VERR_INTERNAL_ERROR;
2068	}
2069	else
2070	rc = VERR_INTERNAL_ERROR_5;
2071	gmmR0MutexRelease(pGMM);
2072	}
2073
2074	RTMemFree(pChunk);
2075	}
2076	else
2077	rc = VERR_NO_MEMORY;
2078	return rc;
2079	}
2080
2081
2082	/**
2083	* Allocate a new chunk, immediately pick the requested pages from it, and adds
2084	* what's remaining to the specified free set.
2085	*
2086	* @note This will leave the giant mutex while allocating the new chunk!
2087	*
2088	* @returns VBox status code.
2089	* @param pGMM Pointer to the GMM instance data.
2090	* @param pGVM Pointer to the kernel-only VM instace data.
2091	* @param pSet Pointer to the free set.
2092	* @param cPages The number of pages requested.
2093	* @param paPages The page descriptor table (input + output).
2094	* @param piPage The pointer to the page descriptor table index
2095	* variable. This will be updated.
2096	*/
2097	static int gmmR0AllocateChunkNew(PGMM pGMM, PGVM pGVM, PGMMCHUNKFREESET pSet, uint32_t cPages,
2098	PGMMPAGEDESC paPages, uint32_t *piPage)
2099	{
2100	gmmR0MutexRelease(pGMM);
2101
2102	RTR0MEMOBJ hMemObj;
2103	int rc = RTR0MemObjAllocPhysNC(&hMemObj, GMM_CHUNK_SIZE, NIL_RTHCPHYS);
2104	if (RT_SUCCESS(rc))
2105	{
2106	/** @todo Duplicate gmmR0RegisterChunk here so we can avoid chaining up the
2107	* free pages first and then unchaining them right afterwards. Instead
2108	* do as much work as possible without holding the giant lock. */
2109	PGMMCHUNK pChunk;
2110	rc = gmmR0RegisterChunk(pGMM, pSet, hMemObj, pGVM->hSelf, 0 /fChunkFlags/, &pChunk);
2111	if (RT_SUCCESS(rc))
2112	{
2113	piPage = gmmR0AllocatePagesFromChunk(pGMM, pGVM->hSelf, pChunk, piPage, cPages, paPages);
2114	return VINF_SUCCESS;
2115	}
2116
2117	/* bail out */
2118	RTR0MemObjFree(hMemObj, false /* fFreeMappings */);
2119	}
2120
2121	int rc2 = gmmR0MutexAcquire(pGMM);
2122	AssertRCReturn(rc2, RT_FAILURE(rc) ? rc : rc2);
2123	return rc;
2124
2125	}
2126
2127
2128	/**
2129	* As a last restort we'll pick any page we can get.
2130	*
2131	* @returns The new page descriptor table index.
2132	* @param pGMM Pointer to the GMM instance data.
2133	* @param pGVM Pointer to the global VM structure.
2134	* @param pSet The set to pick from.
2135	* @param iPage The current page descriptor table index.
2136	* @param cPages The total number of pages to allocate.
2137	* @param paPages The page descriptor table (input + ouput).
2138	*/
2139	static uint32_t gmmR0AllocatePagesIndiscriminately(PGMM pGMM, PGVM pGVM, PGMMCHUNKFREESET pSet,
2140	uint32_t iPage, uint32_t cPages, PGMMPAGEDESC paPages)
2141	{
2142	unsigned iList = RT_ELEMENTS(pSet->apLists);
2143	while (iList-- > 0)
2144	{
2145	PGMMCHUNK pChunk = pSet->apLists[iList];
2146	while (pChunk)
2147	{
2148	PGMMCHUNK pNext = pChunk->pFreeNext;
2149
2150	iPage = gmmR0AllocatePagesFromChunk(pGMM, pGVM->hSelf, pChunk, iPage, cPages, paPages);
2151	if (iPage >= cPages)
2152	return iPage;
2153
2154	pChunk = pNext;
2155	}
2156	}
2157	return iPage;
2158	}
2159
2160
2161	/**
2162	* Pick pages from empty chunks on the same NUMA node.
2163	*
2164	* @returns The new page descriptor table index.
2165	* @param pGMM Pointer to the GMM instance data.
2166	* @param pGVM Pointer to the global VM structure.
2167	* @param pSet The set to pick from.
2168	* @param iPage The current page descriptor table index.
2169	* @param cPages The total number of pages to allocate.
2170	* @param paPages The page descriptor table (input + ouput).
2171	*/
2172	static uint32_t gmmR0AllocatePagesFromEmptyChunksOnSameNode(PGMM pGMM, PGVM pGVM, PGMMCHUNKFREESET pSet,
2173	uint32_t iPage, uint32_t cPages, PGMMPAGEDESC paPages)
2174	{
2175	PGMMCHUNK pChunk = pSet->apLists[GMM_CHUNK_FREE_SET_UNUSED_LIST];
2176	if (pChunk)
2177	{
2178	uint16_t const idNumaNode = gmmR0GetCurrentNumaNodeId();
2179	while (pChunk)
2180	{
2181	PGMMCHUNK pNext = pChunk->pFreeNext;
2182
2183	if (pChunk->idNumaNode == idNumaNode)
2184	{
2185	pChunk->hGVM = pGVM->hSelf;
2186	iPage = gmmR0AllocatePagesFromChunk(pGMM, pGVM->hSelf, pChunk, iPage, cPages, paPages);
2187	if (iPage >= cPages)
2188	{
2189	pGVM->gmm.s.idLastChunkHint = pChunk->cFree ? pChunk->Core.Key : NIL_GMM_CHUNKID;
2190	return iPage;
2191	}
2192	}
2193
2194	pChunk = pNext;
2195	}
2196	}
2197	return iPage;
2198	}
2199
2200
2201	/**
2202	* Pick pages from non-empty chunks on the same NUMA node.
2203	*
2204	* @returns The new page descriptor table index.
2205	* @param pGMM Pointer to the GMM instance data.
2206	* @param pGVM Pointer to the global VM structure.
2207	* @param pSet The set to pick from.
2208	* @param iPage The current page descriptor table index.
2209	* @param cPages The total number of pages to allocate.
2210	* @param paPages The page descriptor table (input + ouput).
2211	*/
2212	static uint32_t gmmR0AllocatePagesFromSameNode(PGMM pGMM, PGVM pGVM, PGMMCHUNKFREESET pSet,
2213	uint32_t iPage, uint32_t cPages, PGMMPAGEDESC paPages)
2214	{
2215	/** @todo start by picking from chunks with about the right size first? */
2216	uint16_t const idNumaNode = gmmR0GetCurrentNumaNodeId();
2217	unsigned iList = GMM_CHUNK_FREE_SET_UNUSED_LIST;
2218	while (iList-- > 0)
2219	{
2220	PGMMCHUNK pChunk = pSet->apLists[iList];
2221	while (pChunk)
2222	{
2223	PGMMCHUNK pNext = pChunk->pFreeNext;
2224
2225	if (pChunk->idNumaNode == idNumaNode)
2226	{
2227	iPage = gmmR0AllocatePagesFromChunk(pGMM, pGVM->hSelf, pChunk, iPage, cPages, paPages);
2228	if (iPage >= cPages)
2229	{
2230	pGVM->gmm.s.idLastChunkHint = pChunk->cFree ? pChunk->Core.Key : NIL_GMM_CHUNKID;
2231	return iPage;
2232	}
2233	}
2234
2235	pChunk = pNext;
2236	}
2237	}
2238	return iPage;
2239	}
2240
2241
2242	/**
2243	* Pick pages that are in chunks already associated with the VM.
2244	*
2245	* @returns The new page descriptor table index.
2246	* @param pGMM Pointer to the GMM instance data.
2247	* @param pGVM Pointer to the global VM structure.
2248	* @param pSet The set to pick from.
2249	* @param iPage The current page descriptor table index.
2250	* @param cPages The total number of pages to allocate.
2251	* @param paPages The page descriptor table (input + ouput).
2252	*/
2253	static uint32_t gmmR0AllocatePagesAssociatedWithVM(PGMM pGMM, PGVM pGVM, PGMMCHUNKFREESET pSet,
2254	uint32_t iPage, uint32_t cPages, PGMMPAGEDESC paPages)
2255	{
2256	uint16_t const hGVM = pGVM->hSelf;
2257
2258	/* Hint. */
2259	if (pGVM->gmm.s.idLastChunkHint != NIL_GMM_CHUNKID)
2260	{
2261	PGMMCHUNK pChunk = gmmR0GetChunk(pGMM, pGVM->gmm.s.idLastChunkHint);
2262	if (pChunk && pChunk->cFree)
2263	{
2264	iPage = gmmR0AllocatePagesFromChunk(pGMM, hGVM, pChunk, iPage, cPages, paPages);
2265	if (iPage >= cPages)
2266	return iPage;
2267	}
2268	}
2269
2270	/* Scan. */
2271	for (unsigned iList = 0; iList < RT_ELEMENTS(pSet->apLists); iList++)
2272	{
2273	PGMMCHUNK pChunk = pSet->apLists[iList];
2274	while (pChunk)
2275	{
2276	PGMMCHUNK pNext = pChunk->pFreeNext;
2277
2278	if (pChunk->hGVM == hGVM)
2279	{
2280	iPage = gmmR0AllocatePagesFromChunk(pGMM, hGVM, pChunk, iPage, cPages, paPages);
2281	if (iPage >= cPages)
2282	{
2283	pGVM->gmm.s.idLastChunkHint = pChunk->cFree ? pChunk->Core.Key : NIL_GMM_CHUNKID;
2284	return iPage;
2285	}
2286	}
2287
2288	pChunk = pNext;
2289	}
2290	}
2291	return iPage;
2292	}
2293
2294
2295
2296	/**
2297	* Pick pages in bound memory mode.
2298	*
2299	* @returns The new page descriptor table index.
2300	* @param pGMM Pointer to the GMM instance data.
2301	* @param pGVM Pointer to the global VM structure.
2302	* @param iPage The current page descriptor table index.
2303	* @param cPages The total number of pages to allocate.
2304	* @param paPages The page descriptor table (input + ouput).
2305	*/
2306	static uint32_t gmmR0AllocatePagesInBoundMode(PGMM pGMM, PGVM pGVM, uint32_t iPage, uint32_t cPages, PGMMPAGEDESC paPages)
2307	{
2308	for (unsigned iList = 0; iList < RT_ELEMENTS(pGVM->gmm.s.Private.apLists); iList++)
2309	{
2310	PGMMCHUNK pChunk = pGVM->gmm.s.Private.apLists[iList];
2311	while (pChunk)
2312	{
2313	Assert(pChunk->hGVM == pGVM->hSelf);
2314	PGMMCHUNK pNext = pChunk->pFreeNext;
2315	iPage = gmmR0AllocatePagesFromChunk(pGMM, pGVM->hSelf, pChunk, iPage, cPages, paPages);
2316	if (iPage >= cPages)
2317	return iPage;
2318	pChunk = pNext;
2319	}
2320	}
2321	return iPage;
2322	}
2323
2324
2325	/**
2326	* Checks if we should start picking pages from chunks of other VMs.
2327	*
2328	* @returns @c true if we should, @c false if we should first try allocate more
2329	* chunks.
2330	*/
2331	static bool gmmR0ShouldAllocatePagesInOtherChunks(PGVM pGVM)
2332	{
2333	/*
2334	* Don't allocate a new chunk if we're
2335	*/
2336	uint64_t cPgReserved = pGVM->gmm.s.Reserved.cBasePages
2337	+ pGVM->gmm.s.Reserved.cFixedPages
2338	- pGVM->gmm.s.cBalloonedPages
2339	/** @todo what about shared pages? */;
2340	uint64_t cPgAllocated = pGVM->gmm.s.Allocated.cBasePages
2341	+ pGVM->gmm.s.Allocated.cFixedPages;
2342	uint64_t cPgDelta = cPgReserved - cPgAllocated;
2343	if (cPgDelta < GMM_CHUNK_NUM_PAGES * 4)
2344	return true;
2345	/** @todo make the threshold configurable, also test the code to see if
2346	* this ever kicks in (we might be reserving too much or smth). */
2347
2348	/*
2349	* Check how close we're to the max memory limit and how many fragments
2350	* there are?...
2351	*/
2352	/** @todo. */
2353
2354	return false;
2355	}
2356
2357
2358	/**
2359	* Common worker for GMMR0AllocateHandyPages and GMMR0AllocatePages.
2360	*
2361	* @returns VBox status code:
2362	* @retval VINF_SUCCESS on success.
2363	* @retval VERR_GMM_SEED_ME if seeding via GMMR0SeedChunk or
2364	* gmmR0AllocateMoreChunks is necessary.
2365	* @retval VERR_GMM_HIT_GLOBAL_LIMIT if we've exhausted the available pages.
2366	* @retval VERR_GMM_HIT_VM_ACCOUNT_LIMIT if we've hit the VM account limit,
2367	* that is we're trying to allocate more than we've reserved.
2368	*
2369	* @param pGMM Pointer to the GMM instance data.
2370	* @param pGVM Pointer to the shared VM structure.
2371	* @param cPages The number of pages to allocate.
2372	* @param paPages Pointer to the page descriptors.
2373	* See GMMPAGEDESC for details on what is expected on input.
2374	* @param enmAccount The account to charge.
2375	*
2376	* @remarks Call takes the giant GMM lock.
2377	*/
2378	static int gmmR0AllocatePagesNew(PGMM pGMM, PGVM pGVM, uint32_t cPages, PGMMPAGEDESC paPages, GMMACCOUNT enmAccount)
2379	{
2380	Assert(pGMM->hMtxOwner == RTThreadNativeSelf());
2381
2382	/*
2383	* Check allocation limits.
2384	*/
2385	if (RT_UNLIKELY(pGMM->cAllocatedPages + cPages > pGMM->cMaxPages))
2386	return VERR_GMM_HIT_GLOBAL_LIMIT;
2387
2388	switch (enmAccount)
2389	{
2390	case GMMACCOUNT_BASE:
2391	if (RT_UNLIKELY( pGVM->gmm.s.Allocated.cBasePages + pGVM->gmm.s.cBalloonedPages + cPages
2392	> pGVM->gmm.s.Reserved.cBasePages))
2393	{
2394	Log(("gmmR0AllocatePages:Base: Reserved=%#llx Allocated+Ballooned+Requested=%#llx+%#llx+%#x!\n",
2395	pGVM->gmm.s.Reserved.cBasePages, pGVM->gmm.s.Allocated.cBasePages, pGVM->gmm.s.cBalloonedPages, cPages));
2396	return VERR_GMM_HIT_VM_ACCOUNT_LIMIT;
2397	}
2398	break;
2399	case GMMACCOUNT_SHADOW:
2400	if (RT_UNLIKELY(pGVM->gmm.s.Allocated.cShadowPages + cPages > pGVM->gmm.s.Reserved.cShadowPages))
2401	{
2402	Log(("gmmR0AllocatePages:Shadow: Reserved=%#x Allocated+Requested=%#x+%#x!\n",
2403	pGVM->gmm.s.Reserved.cShadowPages, pGVM->gmm.s.Allocated.cShadowPages, cPages));
2404	return VERR_GMM_HIT_VM_ACCOUNT_LIMIT;
2405	}
2406	break;
2407	case GMMACCOUNT_FIXED:
2408	if (RT_UNLIKELY(pGVM->gmm.s.Allocated.cFixedPages + cPages > pGVM->gmm.s.Reserved.cFixedPages))
2409	{
2410	Log(("gmmR0AllocatePages:Fixed: Reserved=%#x Allocated+Requested=%#x+%#x!\n",
2411	pGVM->gmm.s.Reserved.cFixedPages, pGVM->gmm.s.Allocated.cFixedPages, cPages));
2412	return VERR_GMM_HIT_VM_ACCOUNT_LIMIT;
2413	}
2414	break;
2415	default:
2416	AssertMsgFailedReturn(("enmAccount=%d\n", enmAccount), VERR_INTERNAL_ERROR);
2417	}
2418
2419	/*
2420	* If we're in legacy memory mode, it's easy to figure if we have
2421	* sufficient number of pages up-front.
2422	*/
2423	if ( pGMM->fLegacyAllocationMode
2424	&& pGVM->gmm.s.Private.cFreePages < cPages)
2425	{
2426	Assert(pGMM->fBoundMemoryMode);
2427	return VERR_GMM_SEED_ME;
2428	}
2429
2430	/*
2431	* Update the accounts before we proceed because we might be leaving the
2432	* protection of the global mutex and thus run the risk of permitting
2433	* too much memory to be allocated.
2434	*/
2435	switch (enmAccount)
2436	{
2437	case GMMACCOUNT_BASE: pGVM->gmm.s.Allocated.cBasePages += cPages; break;
2438	case GMMACCOUNT_SHADOW: pGVM->gmm.s.Allocated.cShadowPages += cPages; break;
2439	case GMMACCOUNT_FIXED: pGVM->gmm.s.Allocated.cFixedPages += cPages; break;
2440	default: AssertMsgFailedReturn(("enmAccount=%d\n", enmAccount), VERR_INTERNAL_ERROR);
2441	}
2442	pGVM->gmm.s.cPrivatePages += cPages;
2443	pGMM->cAllocatedPages += cPages;
2444
2445	/*
2446	* Part two of it's-easy-in-legacy-memory-mode.
2447	*/
2448	uint32_t iPage = 0;
2449	if (pGMM->fLegacyAllocationMode)
2450	{
2451	iPage = gmmR0AllocatePagesInBoundMode(pGMM, pGVM, iPage, cPages, paPages);
2452	AssertReleaseReturn(iPage == cPages, VERR_INTERNAL_ERROR_3);
2453	return VINF_SUCCESS;
2454	}
2455
2456	/*
2457	* Bound mode is also relatively straightforward.
2458	*/
2459	int rc = VINF_SUCCESS;
2460	if (pGMM->fBoundMemoryMode)
2461	{
2462	iPage = gmmR0AllocatePagesInBoundMode(pGMM, pGVM, iPage, cPages, paPages);
2463	if (iPage < cPages)
2464	do
2465	rc = gmmR0AllocateChunkNew(pGMM, pGVM, &pGVM->gmm.s.Private, cPages, paPages, &iPage);
2466	while (iPage < cPages && RT_SUCCESS(rc));
2467	}
2468	/*
2469	* Shared mode is trickier as we should try archive the same locality as
2470	* in bound mode, but smartly make use of non-full chunks allocated by
2471	* other VMs if we're low on memory.
2472	*/
2473	else
2474	{
2475	/* Pick the most optimal pages first. */
2476	iPage = gmmR0AllocatePagesAssociatedWithVM(pGMM, pGVM, &pGMM->PrivateX, iPage, cPages, paPages);
2477	if (iPage < cPages)
2478	{
2479	/* Maybe we should try getting pages from chunks "belonging" to
2480	other VMs before allocating more chunks? */
2481	if (gmmR0ShouldAllocatePagesInOtherChunks(pGVM))
2482	iPage = gmmR0AllocatePagesFromSameNode(pGMM, pGVM, &pGMM->PrivateX, iPage, cPages, paPages);
2483
2484	/* Allocate memory from empty chunks. */
2485	if (iPage < cPages)
2486	iPage = gmmR0AllocatePagesFromEmptyChunksOnSameNode(pGMM, pGVM, &pGMM->PrivateX, iPage, cPages, paPages);
2487
2488	/* Grab empty shared chunks. */
2489	if (iPage < cPages)
2490	iPage = gmmR0AllocatePagesFromEmptyChunksOnSameNode(pGMM, pGVM, &pGMM->Shared, iPage, cPages, paPages);
2491
2492	/*
2493	* Ok, try allocate new chunks.
2494	*/
2495	if (iPage < cPages)
2496	{
2497	do
2498	rc = gmmR0AllocateChunkNew(pGMM, pGVM, &pGMM->PrivateX, cPages, paPages, &iPage);
2499	while (iPage < cPages && RT_SUCCESS(rc));
2500
2501	/* If the host is out of memory, take whatever we can get. */
2502	if ( rc == VERR_NO_MEMORY
2503	&& pGMM->PrivateX.cFreePages + pGMM->Shared.cFreePages >= cPages - iPage)
2504	{
2505	iPage = gmmR0AllocatePagesIndiscriminately(pGMM, pGVM, &pGMM->PrivateX, iPage, cPages, paPages);
2506	if (iPage < cPages)
2507	iPage = gmmR0AllocatePagesIndiscriminately(pGMM, pGVM, &pGMM->Shared, iPage, cPages, paPages);
2508	AssertRelease(iPage == cPages);
2509	rc = VINF_SUCCESS;
2510	}
2511	}
2512	}
2513	}
2514
2515	/*
2516	* Clean up on failure. Since this is bound to be a low-memory condition
2517	* we will give back any empty chunks that might be hanging around.
2518	*/
2519	if (RT_FAILURE(rc))
2520	{
2521	/* Update the statistics. */
2522	pGVM->gmm.s.cPrivatePages -= cPages;
2523	pGMM->cAllocatedPages -= cPages - iPage;
2524	switch (enmAccount)
2525	{
2526	case GMMACCOUNT_BASE: pGVM->gmm.s.Allocated.cBasePages -= cPages; break;
2527	case GMMACCOUNT_SHADOW: pGVM->gmm.s.Allocated.cShadowPages -= cPages; break;
2528	case GMMACCOUNT_FIXED: pGVM->gmm.s.Allocated.cFixedPages -= cPages; break;
2529	default: AssertMsgFailedReturn(("enmAccount=%d\n", enmAccount), VERR_INTERNAL_ERROR);
2530	}
2531
2532	/* Release the pages. */
2533	while (iPage-- > 0)
2534	{
2535	uint32_t idPage = paPages[iPage].idPage;
2536	PGMMPAGE pPage = gmmR0GetPage(pGMM, idPage);
2537	if (RT_LIKELY(pPage))
2538	{
2539	Assert(GMM_PAGE_IS_PRIVATE(pPage));
2540	Assert(pPage->Private.hGVM == pGVM->hSelf);
2541	gmmR0FreePrivatePage(pGMM, pGVM, idPage, pPage);
2542	}
2543	else
2544	AssertMsgFailed(("idPage=%#x\n", idPage));
2545	}
2546
2547	/* Free empty chunks. */
2548	/** @todo */
2549	}
2550	return VINF_SUCCESS;
2551	}
2552
2553
2554	/**
2555	* Updates the previous allocations and allocates more pages.
2556	*
2557	* The handy pages are always taken from the 'base' memory account.
2558	* The allocated pages are not cleared and will contains random garbage.
2559	*
2560	* @returns VBox status code:
2561	* @retval VINF_SUCCESS on success.
2562	* @retval VERR_NOT_OWNER if the caller is not an EMT.
2563	* @retval VERR_GMM_PAGE_NOT_FOUND if one of the pages to update wasn't found.
2564	* @retval VERR_GMM_PAGE_NOT_PRIVATE if one of the pages to update wasn't a
2565	* private page.
2566	* @retval VERR_GMM_PAGE_NOT_SHARED if one of the pages to update wasn't a
2567	* shared page.
2568	* @retval VERR_GMM_NOT_PAGE_OWNER if one of the pages to be updated wasn't
2569	* owned by the VM.
2570	* @retval VERR_GMM_SEED_ME if seeding via GMMR0SeedChunk is necessary.
2571	* @retval VERR_GMM_HIT_GLOBAL_LIMIT if we've exhausted the available pages.
2572	* @retval VERR_GMM_HIT_VM_ACCOUNT_LIMIT if we've hit the VM account limit,
2573	* that is we're trying to allocate more than we've reserved.
2574	*
2575	* @param pVM Pointer to the shared VM structure.
2576	* @param idCpu VCPU id
2577	* @param cPagesToUpdate The number of pages to update (starting from the head).
2578	* @param cPagesToAlloc The number of pages to allocate (starting from the head).
2579	* @param paPages The array of page descriptors.
2580	* See GMMPAGEDESC for details on what is expected on input.
2581	* @thread EMT.
2582	*/
2583	GMMR0DECL(int) GMMR0AllocateHandyPages(PVM pVM, VMCPUID idCpu, uint32_t cPagesToUpdate, uint32_t cPagesToAlloc, PGMMPAGEDESC paPages)
2584	{
2585	LogFlow(("GMMR0AllocateHandyPages: pVM=%p cPagesToUpdate=%#x cPagesToAlloc=%#x paPages=%p\n",
2586	pVM, cPagesToUpdate, cPagesToAlloc, paPages));
2587
2588	/*
2589	* Validate, get basics and take the semaphore.
2590	* (This is a relatively busy path, so make predictions where possible.)
2591	*/
2592	PGMM pGMM;
2593	GMM_GET_VALID_INSTANCE(pGMM, VERR_INTERNAL_ERROR);
2594	PGVM pGVM;
2595	int rc = GVMMR0ByVMAndEMT(pVM, idCpu, &pGVM);
2596	if (RT_FAILURE(rc))
2597	return rc;
2598
2599	AssertPtrReturn(paPages, VERR_INVALID_PARAMETER);
2600	AssertMsgReturn( (cPagesToUpdate && cPagesToUpdate < 1024)
2601	\|\| (cPagesToAlloc && cPagesToAlloc < 1024),
2602	("cPagesToUpdate=%#x cPagesToAlloc=%#x\n", cPagesToUpdate, cPagesToAlloc),
2603	VERR_INVALID_PARAMETER);
2604
2605	unsigned iPage = 0;
2606	for (; iPage < cPagesToUpdate; iPage++)
2607	{
2608	AssertMsgReturn( ( paPages[iPage].HCPhysGCPhys <= GMM_GCPHYS_LAST
2609	&& !(paPages[iPage].HCPhysGCPhys & PAGE_OFFSET_MASK))
2610	\|\| paPages[iPage].HCPhysGCPhys == NIL_RTHCPHYS
2611	\|\| paPages[iPage].HCPhysGCPhys == GMM_GCPHYS_UNSHAREABLE,
2612	("#%#x: %RHp\n", iPage, paPages[iPage].HCPhysGCPhys),
2613	VERR_INVALID_PARAMETER);
2614	AssertMsgReturn( paPages[iPage].idPage <= GMM_PAGEID_LAST
2615	/\|\| paPages[iPage].idPage == NIL_GMM_PAGEID/,
2616	("#%#x: %#x\n", iPage, paPages[iPage].idPage), VERR_INVALID_PARAMETER);
2617	AssertMsgReturn( paPages[iPage].idPage <= GMM_PAGEID_LAST
2618	/\|\| paPages[iPage].idSharedPage == NIL_GMM_PAGEID/,
2619	("#%#x: %#x\n", iPage, paPages[iPage].idSharedPage), VERR_INVALID_PARAMETER);
2620	}
2621
2622	for (; iPage < cPagesToAlloc; iPage++)
2623	{
2624	AssertMsgReturn(paPages[iPage].HCPhysGCPhys == NIL_RTHCPHYS, ("#%#x: %RHp\n", iPage, paPages[iPage].HCPhysGCPhys), VERR_INVALID_PARAMETER);
2625	AssertMsgReturn(paPages[iPage].idPage == NIL_GMM_PAGEID, ("#%#x: %#x\n", iPage, paPages[iPage].idPage), VERR_INVALID_PARAMETER);
2626	AssertMsgReturn(paPages[iPage].idSharedPage == NIL_GMM_PAGEID, ("#%#x: %#x\n", iPage, paPages[iPage].idSharedPage), VERR_INVALID_PARAMETER);
2627	}
2628
2629	gmmR0MutexAcquire(pGMM);
2630	if (GMM_CHECK_SANITY_UPON_ENTERING(pGMM))
2631	{
2632	/* No allocations before the initial reservation has been made! */
2633	if (RT_LIKELY( pGVM->gmm.s.Reserved.cBasePages
2634	&& pGVM->gmm.s.Reserved.cFixedPages
2635	&& pGVM->gmm.s.Reserved.cShadowPages))
2636	{
2637	/*
2638	* Perform the updates.
2639	* Stop on the first error.
2640	*/
2641	for (iPage = 0; iPage < cPagesToUpdate; iPage++)
2642	{
2643	if (paPages[iPage].idPage != NIL_GMM_PAGEID)
2644	{
2645	PGMMPAGE pPage = gmmR0GetPage(pGMM, paPages[iPage].idPage);
2646	if (RT_LIKELY(pPage))
2647	{
2648	if (RT_LIKELY(GMM_PAGE_IS_PRIVATE(pPage)))
2649	{
2650	if (RT_LIKELY(pPage->Private.hGVM == pGVM->hSelf))
2651	{
2652	AssertCompile(NIL_RTHCPHYS > GMM_GCPHYS_LAST && GMM_GCPHYS_UNSHAREABLE > GMM_GCPHYS_LAST);
2653	if (RT_LIKELY(paPages[iPage].HCPhysGCPhys <= GMM_GCPHYS_LAST))
2654	pPage->Private.pfn = paPages[iPage].HCPhysGCPhys >> PAGE_SHIFT;
2655	else if (paPages[iPage].HCPhysGCPhys == GMM_GCPHYS_UNSHAREABLE)
2656	pPage->Private.pfn = GMM_PAGE_PFN_UNSHAREABLE;
2657	/* else: NIL_RTHCPHYS nothing */
2658
2659	paPages[iPage].idPage = NIL_GMM_PAGEID;
2660	paPages[iPage].HCPhysGCPhys = NIL_RTHCPHYS;
2661	}
2662	else
2663	{
2664	Log(("GMMR0AllocateHandyPages: #%#x/%#x: Not owner! hGVM=%#x hSelf=%#x\n",
2665	iPage, paPages[iPage].idPage, pPage->Private.hGVM, pGVM->hSelf));
2666	rc = VERR_GMM_NOT_PAGE_OWNER;
2667	break;
2668	}
2669	}
2670	else
2671	{
2672	Log(("GMMR0AllocateHandyPages: #%#x/%#x: Not private! %.Rhxs (type %d)\n", iPage, paPages[iPage].idPage, sizeof(pPage), pPage, pPage->Common.u2State));
2673	rc = VERR_GMM_PAGE_NOT_PRIVATE;
2674	break;
2675	}
2676	}
2677	else
2678	{
2679	Log(("GMMR0AllocateHandyPages: #%#x/%#x: Not found! (private)\n", iPage, paPages[iPage].idPage));
2680	rc = VERR_GMM_PAGE_NOT_FOUND;
2681	break;
2682	}
2683	}
2684
2685	if (paPages[iPage].idSharedPage != NIL_GMM_PAGEID)
2686	{
2687	PGMMPAGE pPage = gmmR0GetPage(pGMM, paPages[iPage].idSharedPage);
2688	if (RT_LIKELY(pPage))
2689	{
2690	if (RT_LIKELY(GMM_PAGE_IS_SHARED(pPage)))
2691	{
2692	AssertCompile(NIL_RTHCPHYS > GMM_GCPHYS_LAST && GMM_GCPHYS_UNSHAREABLE > GMM_GCPHYS_LAST);
2693	Assert(pPage->Shared.cRefs);
2694	Assert(pGVM->gmm.s.cSharedPages);
2695	Assert(pGVM->gmm.s.Allocated.cBasePages);
2696
2697	Log(("GMMR0AllocateHandyPages: free shared page %x cRefs=%d\n", paPages[iPage].idSharedPage, pPage->Shared.cRefs));
2698	pGVM->gmm.s.cSharedPages--;
2699	pGVM->gmm.s.Allocated.cBasePages--;
2700	if (!--pPage->Shared.cRefs)
2701	gmmR0FreeSharedPage(pGMM, pGVM, paPages[iPage].idSharedPage, pPage);
2702	else
2703	{
2704	Assert(pGMM->cDuplicatePages);
2705	pGMM->cDuplicatePages--;
2706	}
2707
2708	paPages[iPage].idSharedPage = NIL_GMM_PAGEID;
2709	}
2710	else
2711	{
2712	Log(("GMMR0AllocateHandyPages: #%#x/%#x: Not shared!\n", iPage, paPages[iPage].idSharedPage));
2713	rc = VERR_GMM_PAGE_NOT_SHARED;
2714	break;
2715	}
2716	}
2717	else
2718	{
2719	Log(("GMMR0AllocateHandyPages: #%#x/%#x: Not found! (shared)\n", iPage, paPages[iPage].idSharedPage));
2720	rc = VERR_GMM_PAGE_NOT_FOUND;
2721	break;
2722	}
2723	}
2724	}
2725
2726	/*
2727	* Join paths with GMMR0AllocatePages for the allocation.
2728	* Note! gmmR0AllocateMoreChunks may leave the protection of the mutex!
2729	*/
2730	rc = gmmR0AllocatePagesNew(pGMM, pGVM, cPagesToAlloc, paPages, GMMACCOUNT_BASE);
2731	}
2732	else
2733	rc = VERR_WRONG_ORDER;
2734	GMM_CHECK_SANITY_UPON_LEAVING(pGMM);
2735	}
2736	else
2737	rc = VERR_INTERNAL_ERROR_5;
2738	gmmR0MutexRelease(pGMM);
2739	LogFlow(("GMMR0AllocateHandyPages: returns %Rrc\n", rc));
2740	return rc;
2741	}
2742
2743
2744	/**
2745	* Allocate one or more pages.
2746	*
2747	* This is typically used for ROMs and MMIO2 (VRAM) during VM creation.
2748	* The allocated pages are not cleared and will contains random garbage.
2749	*
2750	* @returns VBox status code:
2751	* @retval VINF_SUCCESS on success.
2752	* @retval VERR_NOT_OWNER if the caller is not an EMT.
2753	* @retval VERR_GMM_SEED_ME if seeding via GMMR0SeedChunk is necessary.
2754	* @retval VERR_GMM_HIT_GLOBAL_LIMIT if we've exhausted the available pages.
2755	* @retval VERR_GMM_HIT_VM_ACCOUNT_LIMIT if we've hit the VM account limit,
2756	* that is we're trying to allocate more than we've reserved.
2757	*
2758	* @param pVM Pointer to the shared VM structure.
2759	* @param idCpu VCPU id
2760	* @param cPages The number of pages to allocate.
2761	* @param paPages Pointer to the page descriptors.
2762	* See GMMPAGEDESC for details on what is expected on input.
2763	* @param enmAccount The account to charge.
2764	*
2765	* @thread EMT.
2766	*/
2767	GMMR0DECL(int) GMMR0AllocatePages(PVM pVM, VMCPUID idCpu, uint32_t cPages, PGMMPAGEDESC paPages, GMMACCOUNT enmAccount)
2768	{
2769	LogFlow(("GMMR0AllocatePages: pVM=%p cPages=%#x paPages=%p enmAccount=%d\n", pVM, cPages, paPages, enmAccount));
2770
2771	/*
2772	* Validate, get basics and take the semaphore.
2773	*/
2774	PGMM pGMM;
2775	GMM_GET_VALID_INSTANCE(pGMM, VERR_INTERNAL_ERROR);
2776	PGVM pGVM;
2777	int rc = GVMMR0ByVMAndEMT(pVM, idCpu, &pGVM);
2778	if (RT_FAILURE(rc))
2779	return rc;
2780
2781	AssertPtrReturn(paPages, VERR_INVALID_PARAMETER);
2782	AssertMsgReturn(enmAccount > GMMACCOUNT_INVALID && enmAccount < GMMACCOUNT_END, ("%d\n", enmAccount), VERR_INVALID_PARAMETER);
2783	AssertMsgReturn(cPages > 0 && cPages < RT_BIT(32 - PAGE_SHIFT), ("%#x\n", cPages), VERR_INVALID_PARAMETER);
2784
2785	for (unsigned iPage = 0; iPage < cPages; iPage++)
2786	{
2787	AssertMsgReturn( paPages[iPage].HCPhysGCPhys == NIL_RTHCPHYS
2788	\|\| paPages[iPage].HCPhysGCPhys == GMM_GCPHYS_UNSHAREABLE
2789	\|\| ( enmAccount == GMMACCOUNT_BASE
2790	&& paPages[iPage].HCPhysGCPhys <= GMM_GCPHYS_LAST
2791	&& !(paPages[iPage].HCPhysGCPhys & PAGE_OFFSET_MASK)),
2792	("#%#x: %RHp enmAccount=%d\n", iPage, paPages[iPage].HCPhysGCPhys, enmAccount),
2793	VERR_INVALID_PARAMETER);
2794	AssertMsgReturn(paPages[iPage].idPage == NIL_GMM_PAGEID, ("#%#x: %#x\n", iPage, paPages[iPage].idPage), VERR_INVALID_PARAMETER);
2795	AssertMsgReturn(paPages[iPage].idSharedPage == NIL_GMM_PAGEID, ("#%#x: %#x\n", iPage, paPages[iPage].idSharedPage), VERR_INVALID_PARAMETER);
2796	}
2797
2798	gmmR0MutexAcquire(pGMM);
2799	if (GMM_CHECK_SANITY_UPON_ENTERING(pGMM))
2800	{
2801
2802	/* No allocations before the initial reservation has been made! */
2803	if (RT_LIKELY( pGVM->gmm.s.Reserved.cBasePages
2804	&& pGVM->gmm.s.Reserved.cFixedPages
2805	&& pGVM->gmm.s.Reserved.cShadowPages))
2806	rc = gmmR0AllocatePagesNew(pGMM, pGVM, cPages, paPages, enmAccount);
2807	else
2808	rc = VERR_WRONG_ORDER;
2809	GMM_CHECK_SANITY_UPON_LEAVING(pGMM);
2810	}
2811	else
2812	rc = VERR_INTERNAL_ERROR_5;
2813	gmmR0MutexRelease(pGMM);
2814	LogFlow(("GMMR0AllocatePages: returns %Rrc\n", rc));
2815	return rc;
2816	}
2817
2818
2819	/**
2820	* VMMR0 request wrapper for GMMR0AllocatePages.
2821	*
2822	* @returns see GMMR0AllocatePages.
2823	* @param pVM Pointer to the shared VM structure.
2824	* @param idCpu VCPU id
2825	* @param pReq The request packet.
2826	*/
2827	GMMR0DECL(int) GMMR0AllocatePagesReq(PVM pVM, VMCPUID idCpu, PGMMALLOCATEPAGESREQ pReq)
2828	{
2829	/*
2830	* Validate input and pass it on.
2831	*/
2832	AssertPtrReturn(pVM, VERR_INVALID_POINTER);
2833	AssertPtrReturn(pReq, VERR_INVALID_POINTER);
2834	AssertMsgReturn(pReq->Hdr.cbReq >= RT_UOFFSETOF(GMMALLOCATEPAGESREQ, aPages[0]),
2835	("%#x < %#x\n", pReq->Hdr.cbReq, RT_UOFFSETOF(GMMALLOCATEPAGESREQ, aPages[0])),
2836	VERR_INVALID_PARAMETER);
2837	AssertMsgReturn(pReq->Hdr.cbReq == RT_UOFFSETOF(GMMALLOCATEPAGESREQ, aPages[pReq->cPages]),
2838	("%#x != %#x\n", pReq->Hdr.cbReq, RT_UOFFSETOF(GMMALLOCATEPAGESREQ, aPages[pReq->cPages])),
2839	VERR_INVALID_PARAMETER);
2840
2841	return GMMR0AllocatePages(pVM, idCpu, pReq->cPages, &pReq->aPages[0], pReq->enmAccount);
2842	}
2843
2844
2845	/**
2846	* Allocate a large page to represent guest RAM
2847	*
2848	* The allocated pages are not cleared and will contains random garbage.
2849	*
2850	* @returns VBox status code:
2851	* @retval VINF_SUCCESS on success.
2852	* @retval VERR_NOT_OWNER if the caller is not an EMT.
2853	* @retval VERR_GMM_SEED_ME if seeding via GMMR0SeedChunk is necessary.
2854	* @retval VERR_GMM_HIT_GLOBAL_LIMIT if we've exhausted the available pages.
2855	* @retval VERR_GMM_HIT_VM_ACCOUNT_LIMIT if we've hit the VM account limit,
2856	* that is we're trying to allocate more than we've reserved.
2857	* @returns see GMMR0AllocatePages.
2858	* @param pVM Pointer to the shared VM structure.
2859	* @param idCpu VCPU id
2860	* @param cbPage Large page size
2861	*/
2862	GMMR0DECL(int) GMMR0AllocateLargePage(PVM pVM, VMCPUID idCpu, uint32_t cbPage, uint32_t pIdPage, RTHCPHYS pHCPhys)
2863	{
2864	LogFlow(("GMMR0AllocateLargePage: pVM=%p cbPage=%x\n", pVM, cbPage));
2865
2866	AssertReturn(cbPage == GMM_CHUNK_SIZE, VERR_INVALID_PARAMETER);
2867	AssertPtrReturn(pIdPage, VERR_INVALID_PARAMETER);
2868	AssertPtrReturn(pHCPhys, VERR_INVALID_PARAMETER);
2869
2870	/*
2871	* Validate, get basics and take the semaphore.
2872	*/
2873	PGMM pGMM;
2874	GMM_GET_VALID_INSTANCE(pGMM, VERR_INTERNAL_ERROR);
2875	PGVM pGVM;
2876	int rc = GVMMR0ByVMAndEMT(pVM, idCpu, &pGVM);
2877	if (RT_FAILURE(rc))
2878	return rc;
2879
2880	/* Not supported in legacy mode where we allocate the memory in ring 3 and lock it in ring 0. */
2881	if (pGMM->fLegacyAllocationMode)
2882	return VERR_NOT_SUPPORTED;
2883
2884	*pHCPhys = NIL_RTHCPHYS;
2885	*pIdPage = NIL_GMM_PAGEID;
2886
2887	gmmR0MutexAcquire(pGMM);
2888	if (GMM_CHECK_SANITY_UPON_ENTERING(pGMM))
2889	{
2890	const unsigned cPages = (GMM_CHUNK_SIZE >> PAGE_SHIFT);
2891	if (RT_UNLIKELY( pGVM->gmm.s.Allocated.cBasePages + pGVM->gmm.s.cBalloonedPages + cPages
2892	> pGVM->gmm.s.Reserved.cBasePages))
2893	{
2894	Log(("GMMR0AllocateLargePage: Reserved=%#llx Allocated+Requested=%#llx+%#x!\n",
2895	pGVM->gmm.s.Reserved.cBasePages, pGVM->gmm.s.Allocated.cBasePages, cPages));
2896	gmmR0MutexRelease(pGMM);
2897	return VERR_GMM_HIT_VM_ACCOUNT_LIMIT;
2898	}
2899
2900	/*
2901	* Allocate a new large page chunk.
2902	*
2903	* Note! We leave the giant GMM lock temporarily as the allocation might
2904	* take a long time. gmmR0RegisterChunk will retake it (ugly).
2905	*/
2906	AssertCompile(GMM_CHUNK_SIZE == _2M);
2907	gmmR0MutexRelease(pGMM);
2908
2909	RTR0MEMOBJ hMemObj;
2910	rc = RTR0MemObjAllocPhysEx(&hMemObj, GMM_CHUNK_SIZE, NIL_RTHCPHYS, GMM_CHUNK_SIZE);
2911	if (RT_SUCCESS(rc))
2912	{
2913	PGMMCHUNKFREESET pSet = pGMM->fBoundMemoryMode ? &pGVM->gmm.s.Private : &pGMM->PrivateX;
2914	PGMMCHUNK pChunk;
2915	rc = gmmR0RegisterChunk(pGMM, pSet, hMemObj, pGVM->hSelf, GMM_CHUNK_FLAGS_LARGE_PAGE, &pChunk);
2916	if (RT_SUCCESS(rc))
2917	{
2918	/*
2919	* Allocate all the pages in the chunk.
2920	*/
2921	/* Unlink the new chunk from the free list. */
2922	gmmR0UnlinkChunk(pChunk);
2923
2924	/** @todo rewrite this to skip the looping. */
2925	/* Allocate all pages. */
2926	GMMPAGEDESC PageDesc;
2927	gmmR0AllocatePage(pGMM, pGVM->hSelf, pChunk, &PageDesc);
2928
2929	/* Return the first page as we'll use the whole chunk as one big page. */
2930	*pIdPage = PageDesc.idPage;
2931	*pHCPhys = PageDesc.HCPhysGCPhys;
2932
2933	for (unsigned i = 1; i < cPages; i++)
2934	gmmR0AllocatePage(pGMM, pGVM->hSelf, pChunk, &PageDesc);
2935
2936	/* Update accounting. */
2937	pGVM->gmm.s.Allocated.cBasePages += cPages;
2938	pGVM->gmm.s.cPrivatePages += cPages;
2939	pGMM->cAllocatedPages += cPages;
2940
2941	gmmR0LinkChunk(pChunk, pSet);
2942	gmmR0MutexRelease(pGMM);
2943	}
2944	else
2945	RTR0MemObjFree(hMemObj, false /* fFreeMappings */);
2946	}
2947	}
2948	else
2949	{
2950	gmmR0MutexRelease(pGMM);
2951	rc = VERR_INTERNAL_ERROR_5;
2952	}
2953
2954	LogFlow(("GMMR0AllocateLargePage: returns %Rrc\n", rc));
2955	return rc;
2956	}
2957
2958
2959	/**
2960	* Free a large page
2961	*
2962	* @returns VBox status code:
2963	* @param pVM Pointer to the shared VM structure.
2964	* @param idCpu VCPU id
2965	* @param idPage Large page id
2966	*/
2967	GMMR0DECL(int) GMMR0FreeLargePage(PVM pVM, VMCPUID idCpu, uint32_t idPage)
2968	{
2969	LogFlow(("GMMR0FreeLargePage: pVM=%p idPage=%x\n", pVM, idPage));
2970
2971	/*
2972	* Validate, get basics and take the semaphore.
2973	*/
2974	PGMM pGMM;
2975	GMM_GET_VALID_INSTANCE(pGMM, VERR_INTERNAL_ERROR);
2976	PGVM pGVM;
2977	int rc = GVMMR0ByVMAndEMT(pVM, idCpu, &pGVM);
2978	if (RT_FAILURE(rc))
2979	return rc;
2980
2981	/* Not supported in legacy mode where we allocate the memory in ring 3 and lock it in ring 0. */
2982	if (pGMM->fLegacyAllocationMode)
2983	return VERR_NOT_SUPPORTED;
2984
2985	gmmR0MutexAcquire(pGMM);
2986	if (GMM_CHECK_SANITY_UPON_ENTERING(pGMM))
2987	{
2988	const unsigned cPages = (GMM_CHUNK_SIZE >> PAGE_SHIFT);
2989
2990	if (RT_UNLIKELY(pGVM->gmm.s.Allocated.cBasePages < cPages))
2991	{
2992	Log(("GMMR0FreeLargePage: allocated=%#llx cPages=%#x!\n", pGVM->gmm.s.Allocated.cBasePages, cPages));
2993	gmmR0MutexRelease(pGMM);
2994	return VERR_GMM_ATTEMPT_TO_FREE_TOO_MUCH;
2995	}
2996
2997	PGMMPAGE pPage = gmmR0GetPage(pGMM, idPage);
2998	if (RT_LIKELY( pPage
2999	&& GMM_PAGE_IS_PRIVATE(pPage)))
3000	{
3001	PGMMCHUNK pChunk = gmmR0GetChunk(pGMM, idPage >> GMM_CHUNKID_SHIFT);
3002	Assert(pChunk);
3003	Assert(pChunk->cFree < GMM_CHUNK_NUM_PAGES);
3004	Assert(pChunk->cPrivate > 0);
3005
3006	/* Release the memory immediately. */
3007	gmmR0FreeChunk(pGMM, NULL, pChunk, false /fRelaxedSem/); /** @todo this can be relaxed too! */
3008
3009	/* Update accounting. */
3010	pGVM->gmm.s.Allocated.cBasePages -= cPages;
3011	pGVM->gmm.s.cPrivatePages -= cPages;
3012	pGMM->cAllocatedPages -= cPages;
3013	}
3014	else
3015	rc = VERR_GMM_PAGE_NOT_FOUND;
3016	}
3017	else
3018	rc = VERR_INTERNAL_ERROR_5;
3019
3020	gmmR0MutexRelease(pGMM);
3021	LogFlow(("GMMR0FreeLargePage: returns %Rrc\n", rc));
3022	return rc;
3023	}
3024
3025
3026	/**
3027	* VMMR0 request wrapper for GMMR0FreeLargePage.
3028	*
3029	* @returns see GMMR0FreeLargePage.
3030	* @param pVM Pointer to the shared VM structure.
3031	* @param idCpu VCPU id
3032	* @param pReq The request packet.
3033	*/
3034	GMMR0DECL(int) GMMR0FreeLargePageReq(PVM pVM, VMCPUID idCpu, PGMMFREELARGEPAGEREQ pReq)
3035	{
3036	/*
3037	* Validate input and pass it on.
3038	*/
3039	AssertPtrReturn(pVM, VERR_INVALID_POINTER);
3040	AssertPtrReturn(pReq, VERR_INVALID_POINTER);
3041	AssertMsgReturn(pReq->Hdr.cbReq == sizeof(GMMFREEPAGESREQ),
3042	("%#x != %#x\n", pReq->Hdr.cbReq, sizeof(GMMFREEPAGESREQ)),
3043	VERR_INVALID_PARAMETER);
3044
3045	return GMMR0FreeLargePage(pVM, idCpu, pReq->idPage);
3046	}
3047
3048
3049	/**
3050	* Frees a chunk, giving it back to the host OS.
3051	*
3052	* @param pGMM Pointer to the GMM instance.
3053	* @param pGVM This is set when called from GMMR0CleanupVM so we can
3054	* unmap and free the chunk in one go.
3055	* @param pChunk The chunk to free.
3056	* @param fRelaxedSem Whether we can release the semaphore while doing the
3057	* freeing (@c true) or not.
3058	*/
3059	static bool gmmR0FreeChunk(PGMM pGMM, PGVM pGVM, PGMMCHUNK pChunk, bool fRelaxedSem)
3060	{
3061	Assert(pChunk->Core.Key != NIL_GMM_CHUNKID);
3062
3063	GMMR0CHUNKMTXSTATE MtxState;
3064	gmmR0ChunkMutexAcquire(&MtxState, pGMM, pChunk, GMMR0CHUNK_MTX_KEEP_GIANT);
3065
3066	/*
3067	* Cleanup hack! Unmap the chunk from the callers address space.
3068	* This shouldn't happen, so screw lock contention...
3069	*/
3070	if ( pChunk->cMappingsX
3071	&& !pGMM->fLegacyAllocationMode
3072	&& pGVM)
3073	gmmR0UnmapChunkLocked(pGMM, pGVM, pChunk);
3074
3075	/*
3076	* If there are current mappings of the chunk, then request the
3077	* VMs to unmap them. Reposition the chunk in the free list so
3078	* it won't be a likely candidate for allocations.
3079	*/
3080	if (pChunk->cMappingsX)
3081	{
3082	/** @todo R0 -> VM request */
3083	/* The chunk can be mapped by more than one VM if fBoundMemoryMode is false! */
3084	Log(("gmmR0FreeChunk: chunk still has %d/%d mappings; don't free!\n", pChunk->cMappingsX));
3085	gmmR0ChunkMutexRelease(&MtxState, pChunk);
3086	return false;
3087	}
3088
3089
3090	/*
3091	* Save and trash the handle.
3092	*/
3093	RTR0MEMOBJ const hMemObj = pChunk->hMemObj;
3094	pChunk->hMemObj = NIL_RTR0MEMOBJ;
3095
3096	/*
3097	* Unlink it from everywhere.
3098	*/
3099	gmmR0UnlinkChunk(pChunk);
3100
3101	RTListNodeRemove(&pChunk->ListNode);
3102
3103	PAVLU32NODECORE pCore = RTAvlU32Remove(&pGMM->pChunks, pChunk->Core.Key);
3104	Assert(pCore == &pChunk->Core); NOREF(pCore);
3105
3106	PGMMCHUNKTLBE pTlbe = &pGMM->ChunkTLB.aEntries[GMM_CHUNKTLB_IDX(pChunk->Core.Key)];
3107	if (pTlbe->pChunk == pChunk)
3108	{
3109	pTlbe->idChunk = NIL_GMM_CHUNKID;
3110	pTlbe->pChunk = NULL;
3111	}
3112
3113	Assert(pGMM->cChunks > 0);
3114	pGMM->cChunks--;
3115
3116	/*
3117	* Free the Chunk ID before dropping the locks and freeing the rest.
3118	*/
3119	gmmR0FreeChunkId(pGMM, pChunk->Core.Key);
3120	pChunk->Core.Key = NIL_GMM_CHUNKID;
3121
3122	pGMM->cFreedChunks++;
3123
3124	gmmR0ChunkMutexRelease(&MtxState, NULL);
3125	if (fRelaxedSem)
3126	gmmR0MutexRelease(pGMM);
3127
3128	RTMemFree(pChunk->paMappingsX);
3129	pChunk->paMappingsX = NULL;
3130
3131	RTMemFree(pChunk);
3132
3133	int rc = RTR0MemObjFree(hMemObj, false /* fFreeMappings */);
3134	AssertLogRelRC(rc);
3135
3136	if (fRelaxedSem)
3137	gmmR0MutexAcquire(pGMM);
3138	return fRelaxedSem;
3139	}
3140
3141
3142	/**
3143	* Free page worker.
3144	*
3145	* The caller does all the statistic decrementing, we do all the incrementing.
3146	*
3147	* @param pGMM Pointer to the GMM instance data.
3148	* @param pGVM Pointer to the GVM instance.
3149	* @param pChunk Pointer to the chunk this page belongs to.
3150	* @param idPage The Page ID.
3151	* @param pPage Pointer to the page.
3152	*/
3153	static void gmmR0FreePageWorker(PGMM pGMM, PGVM pGVM, PGMMCHUNK pChunk, uint32_t idPage, PGMMPAGE pPage)
3154	{
3155	Log3(("F pPage=%p iPage=%#x/%#x u2State=%d iFreeHead=%#x\n",
3156	pPage, pPage - &pChunk->aPages[0], idPage, pPage->Common.u2State, pChunk->iFreeHead)); NOREF(idPage);
3157
3158	/*
3159	* Put the page on the free list.
3160	*/
3161	pPage->u = 0;
3162	pPage->Free.u2State = GMM_PAGE_STATE_FREE;
3163	Assert(pChunk->iFreeHead < RT_ELEMENTS(pChunk->aPages) \|\| pChunk->iFreeHead == UINT16_MAX);
3164	pPage->Free.iNext = pChunk->iFreeHead;
3165	pChunk->iFreeHead = pPage - &pChunk->aPages[0];
3166
3167	/*
3168	* Update statistics (the cShared/cPrivate stats are up to date already),
3169	* and relink the chunk if necessary.
3170	*/
3171	unsigned const cFree = pChunk->cFree;
3172	if ( !cFree
3173	\|\| gmmR0SelectFreeSetList(cFree) != gmmR0SelectFreeSetList(cFree + 1))
3174	{
3175	gmmR0UnlinkChunk(pChunk);
3176	pChunk->cFree++;
3177	gmmR0SelectSetAndLinkChunk(pGMM, pGVM, pChunk);
3178	}
3179	else
3180	{
3181	pChunk->cFree = cFree + 1;
3182	pChunk->pSet->cFreePages++;
3183	}
3184
3185	/*
3186	* If the chunk becomes empty, consider giving memory back to the host OS.
3187	*
3188	* The current strategy is to try give it back if there are other chunks
3189	* in this free list, meaning if there are at least 240 free pages in this
3190	* category. Note that since there are probably mappings of the chunk,
3191	* it won't be freed up instantly, which probably screws up this logic
3192	* a bit...
3193	*/
3194	/** @todo Do this on the way out. */
3195	if (RT_UNLIKELY( pChunk->cFree == GMM_CHUNK_NUM_PAGES
3196	&& pChunk->pFreeNext
3197	&& pChunk->pFreePrev /** @todo this is probably misfiring, see reset... */
3198	&& !pGMM->fLegacyAllocationMode))
3199	gmmR0FreeChunk(pGMM, NULL, pChunk, false);
3200
3201	}
3202
3203
3204	/**
3205	* Frees a shared page, the page is known to exist and be valid and such.
3206	*
3207	* @param pGMM Pointer to the GMM instance.
3208	* @param pGVM Pointer to the GVM instance.
3209	* @param idPage The Page ID
3210	* @param pPage The page structure.
3211	*/
3212	DECLINLINE(void) gmmR0FreeSharedPage(PGMM pGMM, PGVM pGVM, uint32_t idPage, PGMMPAGE pPage)
3213	{
3214	PGMMCHUNK pChunk = gmmR0GetChunk(pGMM, idPage >> GMM_CHUNKID_SHIFT);
3215	Assert(pChunk);
3216	Assert(pChunk->cFree < GMM_CHUNK_NUM_PAGES);
3217	Assert(pChunk->cShared > 0);
3218	Assert(pGMM->cSharedPages > 0);
3219	Assert(pGMM->cAllocatedPages > 0);
3220	Assert(!pPage->Shared.cRefs);
3221
3222	pChunk->cShared--;
3223	pGMM->cAllocatedPages--;
3224	pGMM->cSharedPages--;
3225	gmmR0FreePageWorker(pGMM, pGVM, pChunk, idPage, pPage);
3226	}
3227
3228
3229	/**
3230	* Frees a private page, the page is known to exist and be valid and such.
3231	*
3232	* @param pGMM Pointer to the GMM instance.
3233	* @param pGVM Pointer to the GVM instance.
3234	* @param idPage The Page ID
3235	* @param pPage The page structure.
3236	*/
3237	DECLINLINE(void) gmmR0FreePrivatePage(PGMM pGMM, PGVM pGVM, uint32_t idPage, PGMMPAGE pPage)
3238	{
3239	PGMMCHUNK pChunk = gmmR0GetChunk(pGMM, idPage >> GMM_CHUNKID_SHIFT);
3240	Assert(pChunk);
3241	Assert(pChunk->cFree < GMM_CHUNK_NUM_PAGES);
3242	Assert(pChunk->cPrivate > 0);
3243	Assert(pGMM->cAllocatedPages > 0);
3244
3245	pChunk->cPrivate--;
3246	pGMM->cAllocatedPages--;
3247	gmmR0FreePageWorker(pGMM, pGVM, pChunk, idPage, pPage);
3248	}
3249
3250
3251	/**
3252	* Common worker for GMMR0FreePages and GMMR0BalloonedPages.
3253	*
3254	* @returns VBox status code:
3255	* @retval xxx
3256	*
3257	* @param pGMM Pointer to the GMM instance data.
3258	* @param pGVM Pointer to the shared VM structure.
3259	* @param cPages The number of pages to free.
3260	* @param paPages Pointer to the page descriptors.
3261	* @param enmAccount The account this relates to.
3262	*/
3263	static int gmmR0FreePages(PGMM pGMM, PGVM pGVM, uint32_t cPages, PGMMFREEPAGEDESC paPages, GMMACCOUNT enmAccount)
3264	{
3265	/*
3266	* Check that the request isn't impossible wrt to the account status.
3267	*/
3268	switch (enmAccount)
3269	{
3270	case GMMACCOUNT_BASE:
3271	if (RT_UNLIKELY(pGVM->gmm.s.Allocated.cBasePages < cPages))
3272	{
3273	Log(("gmmR0FreePages: allocated=%#llx cPages=%#x!\n", pGVM->gmm.s.Allocated.cBasePages, cPages));
3274	return VERR_GMM_ATTEMPT_TO_FREE_TOO_MUCH;
3275	}
3276	break;
3277	case GMMACCOUNT_SHADOW:
3278	if (RT_UNLIKELY(pGVM->gmm.s.Allocated.cShadowPages < cPages))
3279	{
3280	Log(("gmmR0FreePages: allocated=%#llx cPages=%#x!\n", pGVM->gmm.s.Allocated.cShadowPages, cPages));
3281	return VERR_GMM_ATTEMPT_TO_FREE_TOO_MUCH;
3282	}
3283	break;
3284	case GMMACCOUNT_FIXED:
3285	if (RT_UNLIKELY(pGVM->gmm.s.Allocated.cFixedPages < cPages))
3286	{
3287	Log(("gmmR0FreePages: allocated=%#llx cPages=%#x!\n", pGVM->gmm.s.Allocated.cFixedPages, cPages));
3288	return VERR_GMM_ATTEMPT_TO_FREE_TOO_MUCH;
3289	}
3290	break;
3291	default:
3292	AssertMsgFailedReturn(("enmAccount=%d\n", enmAccount), VERR_INTERNAL_ERROR);
3293	}
3294
3295	/*
3296	* Walk the descriptors and free the pages.
3297	*
3298	* Statistics (except the account) are being updated as we go along,
3299	* unlike the alloc code. Also, stop on the first error.
3300	*/
3301	int rc = VINF_SUCCESS;
3302	uint32_t iPage;
3303	for (iPage = 0; iPage < cPages; iPage++)
3304	{
3305	uint32_t idPage = paPages[iPage].idPage;
3306	PGMMPAGE pPage = gmmR0GetPage(pGMM, idPage);
3307	if (RT_LIKELY(pPage))
3308	{
3309	if (RT_LIKELY(GMM_PAGE_IS_PRIVATE(pPage)))
3310	{
3311	if (RT_LIKELY(pPage->Private.hGVM == pGVM->hSelf))
3312	{
3313	Assert(pGVM->gmm.s.cPrivatePages);
3314	pGVM->gmm.s.cPrivatePages--;
3315	gmmR0FreePrivatePage(pGMM, pGVM, idPage, pPage);
3316	}
3317	else
3318	{
3319	Log(("gmmR0AllocatePages: #%#x/%#x: not owner! hGVM=%#x hSelf=%#x\n", iPage, idPage,
3320	pPage->Private.hGVM, pGVM->hSelf));
3321	rc = VERR_GMM_NOT_PAGE_OWNER;
3322	break;
3323	}
3324	}
3325	else if (RT_LIKELY(GMM_PAGE_IS_SHARED(pPage)))
3326	{
3327	Assert(pGVM->gmm.s.cSharedPages);
3328	pGVM->gmm.s.cSharedPages--;
3329	Assert(pPage->Shared.cRefs);
3330	if (!--pPage->Shared.cRefs)
3331	gmmR0FreeSharedPage(pGMM, pGVM, idPage, pPage);
3332	else
3333	{
3334	Assert(pGMM->cDuplicatePages);
3335	pGMM->cDuplicatePages--;
3336	}
3337	}
3338	else
3339	{
3340	Log(("gmmR0AllocatePages: #%#x/%#x: already free!\n", iPage, idPage));
3341	rc = VERR_GMM_PAGE_ALREADY_FREE;
3342	break;
3343	}
3344	}
3345	else
3346	{
3347	Log(("gmmR0AllocatePages: #%#x/%#x: not found!\n", iPage, idPage));
3348	rc = VERR_GMM_PAGE_NOT_FOUND;
3349	break;
3350	}
3351	paPages[iPage].idPage = NIL_GMM_PAGEID;
3352	}
3353
3354	/*
3355	* Update the account.
3356	*/
3357	switch (enmAccount)
3358	{
3359	case GMMACCOUNT_BASE: pGVM->gmm.s.Allocated.cBasePages -= iPage; break;
3360	case GMMACCOUNT_SHADOW: pGVM->gmm.s.Allocated.cShadowPages -= iPage; break;
3361	case GMMACCOUNT_FIXED: pGVM->gmm.s.Allocated.cFixedPages -= iPage; break;
3362	default:
3363	AssertMsgFailedReturn(("enmAccount=%d\n", enmAccount), VERR_INTERNAL_ERROR);
3364	}
3365
3366	/*
3367	* Any threshold stuff to be done here?
3368	*/
3369
3370	return rc;
3371	}
3372
3373
3374	/**
3375	* Free one or more pages.
3376	*
3377	* This is typically used at reset time or power off.
3378	*
3379	* @returns VBox status code:
3380	* @retval xxx
3381	*
3382	* @param pVM Pointer to the shared VM structure.
3383	* @param idCpu VCPU id
3384	* @param cPages The number of pages to allocate.
3385	* @param paPages Pointer to the page descriptors containing the Page IDs for each page.
3386	* @param enmAccount The account this relates to.
3387	* @thread EMT.
3388	*/
3389	GMMR0DECL(int) GMMR0FreePages(PVM pVM, VMCPUID idCpu, uint32_t cPages, PGMMFREEPAGEDESC paPages, GMMACCOUNT enmAccount)
3390	{
3391	LogFlow(("GMMR0FreePages: pVM=%p cPages=%#x paPages=%p enmAccount=%d\n", pVM, cPages, paPages, enmAccount));
3392
3393	/*
3394	* Validate input and get the basics.
3395	*/
3396	PGMM pGMM;
3397	GMM_GET_VALID_INSTANCE(pGMM, VERR_INTERNAL_ERROR);
3398	PGVM pGVM;
3399	int rc = GVMMR0ByVMAndEMT(pVM, idCpu, &pGVM);
3400	if (RT_FAILURE(rc))
3401	return rc;
3402
3403	AssertPtrReturn(paPages, VERR_INVALID_PARAMETER);
3404	AssertMsgReturn(enmAccount > GMMACCOUNT_INVALID && enmAccount < GMMACCOUNT_END, ("%d\n", enmAccount), VERR_INVALID_PARAMETER);
3405	AssertMsgReturn(cPages > 0 && cPages < RT_BIT(32 - PAGE_SHIFT), ("%#x\n", cPages), VERR_INVALID_PARAMETER);
3406
3407	for (unsigned iPage = 0; iPage < cPages; iPage++)
3408	AssertMsgReturn( paPages[iPage].idPage <= GMM_PAGEID_LAST
3409	/\|\| paPages[iPage].idPage == NIL_GMM_PAGEID/,
3410	("#%#x: %#x\n", iPage, paPages[iPage].idPage), VERR_INVALID_PARAMETER);
3411
3412	/*
3413	* Take the semaphore and call the worker function.
3414	*/
3415	gmmR0MutexAcquire(pGMM);
3416	if (GMM_CHECK_SANITY_UPON_ENTERING(pGMM))
3417	{
3418	rc = gmmR0FreePages(pGMM, pGVM, cPages, paPages, enmAccount);
3419	GMM_CHECK_SANITY_UPON_LEAVING(pGMM);
3420	}
3421	else
3422	rc = VERR_INTERNAL_ERROR_5;
3423	gmmR0MutexRelease(pGMM);
3424	LogFlow(("GMMR0FreePages: returns %Rrc\n", rc));
3425	return rc;
3426	}
3427
3428
3429	/**
3430	* VMMR0 request wrapper for GMMR0FreePages.
3431	*
3432	* @returns see GMMR0FreePages.
3433	* @param pVM Pointer to the shared VM structure.
3434	* @param idCpu VCPU id
3435	* @param pReq The request packet.
3436	*/
3437	GMMR0DECL(int) GMMR0FreePagesReq(PVM pVM, VMCPUID idCpu, PGMMFREEPAGESREQ pReq)
3438	{
3439	/*
3440	* Validate input and pass it on.
3441	*/
3442	AssertPtrReturn(pVM, VERR_INVALID_POINTER);
3443	AssertPtrReturn(pReq, VERR_INVALID_POINTER);
3444	AssertMsgReturn(pReq->Hdr.cbReq >= RT_UOFFSETOF(GMMFREEPAGESREQ, aPages[0]),
3445	("%#x < %#x\n", pReq->Hdr.cbReq, RT_UOFFSETOF(GMMFREEPAGESREQ, aPages[0])),
3446	VERR_INVALID_PARAMETER);
3447	AssertMsgReturn(pReq->Hdr.cbReq == RT_UOFFSETOF(GMMFREEPAGESREQ, aPages[pReq->cPages]),
3448	("%#x != %#x\n", pReq->Hdr.cbReq, RT_UOFFSETOF(GMMFREEPAGESREQ, aPages[pReq->cPages])),
3449	VERR_INVALID_PARAMETER);
3450
3451	return GMMR0FreePages(pVM, idCpu, pReq->cPages, &pReq->aPages[0], pReq->enmAccount);
3452	}
3453
3454
3455	/**
3456	* Report back on a memory ballooning request.
3457	*
3458	* The request may or may not have been initiated by the GMM. If it was initiated
3459	* by the GMM it is important that this function is called even if no pages were
3460	* ballooned.
3461	*
3462	* @returns VBox status code:
3463	* @retval VERR_GMM_ATTEMPT_TO_FREE_TOO_MUCH
3464	* @retval VERR_GMM_ATTEMPT_TO_DEFLATE_TOO_MUCH
3465	* @retval VERR_GMM_OVERCOMMITTED_TRY_AGAIN_IN_A_BIT - reset condition
3466	* indicating that we won't necessarily have sufficient RAM to boot
3467	* the VM again and that it should pause until this changes (we'll try
3468	* balloon some other VM). (For standard deflate we have little choice
3469	* but to hope the VM won't use the memory that was returned to it.)
3470	*
3471	* @param pVM Pointer to the shared VM structure.
3472	* @param idCpu VCPU id
3473	* @param enmAction Inflate/deflate/reset
3474	* @param cBalloonedPages The number of pages that was ballooned.
3475	*
3476	* @thread EMT.
3477	*/
3478	GMMR0DECL(int) GMMR0BalloonedPages(PVM pVM, VMCPUID idCpu, GMMBALLOONACTION enmAction, uint32_t cBalloonedPages)
3479	{
3480	LogFlow(("GMMR0BalloonedPages: pVM=%p enmAction=%d cBalloonedPages=%#x\n",
3481	pVM, enmAction, cBalloonedPages));
3482
3483	AssertMsgReturn(cBalloonedPages < RT_BIT(32 - PAGE_SHIFT), ("%#x\n", cBalloonedPages), VERR_INVALID_PARAMETER);
3484
3485	/*
3486	* Validate input and get the basics.
3487	*/
3488	PGMM pGMM;
3489	GMM_GET_VALID_INSTANCE(pGMM, VERR_INTERNAL_ERROR);
3490	PGVM pGVM;
3491	int rc = GVMMR0ByVMAndEMT(pVM, idCpu, &pGVM);
3492	if (RT_FAILURE(rc))
3493	return rc;
3494
3495	/*
3496	* Take the semaphore and do some more validations.
3497	*/
3498	gmmR0MutexAcquire(pGMM);
3499	if (GMM_CHECK_SANITY_UPON_ENTERING(pGMM))
3500	{
3501	switch (enmAction)
3502	{
3503	case GMMBALLOONACTION_INFLATE:
3504	{
3505	if (RT_LIKELY(pGVM->gmm.s.Allocated.cBasePages + pGVM->gmm.s.cBalloonedPages + cBalloonedPages <= pGVM->gmm.s.Reserved.cBasePages))
3506	{
3507	/*
3508	* Record the ballooned memory.
3509	*/
3510	pGMM->cBalloonedPages += cBalloonedPages;
3511	if (pGVM->gmm.s.cReqBalloonedPages)
3512	{
3513	/* Codepath never taken. Might be interesting in the future to request ballooned memory from guests in low memory conditions.. */
3514	AssertFailed();
3515
3516	pGVM->gmm.s.cBalloonedPages += cBalloonedPages;
3517	pGVM->gmm.s.cReqActuallyBalloonedPages += cBalloonedPages;
3518	Log(("GMMR0BalloonedPages: +%#x - Global=%#llx / VM: Total=%#llx Req=%#llx Actual=%#llx (pending)\n", cBalloonedPages,
3519	pGMM->cBalloonedPages, pGVM->gmm.s.cBalloonedPages, pGVM->gmm.s.cReqBalloonedPages, pGVM->gmm.s.cReqActuallyBalloonedPages));
3520	}
3521	else
3522	{
3523	pGVM->gmm.s.cBalloonedPages += cBalloonedPages;
3524	Log(("GMMR0BalloonedPages: +%#x - Global=%#llx / VM: Total=%#llx (user)\n",
3525	cBalloonedPages, pGMM->cBalloonedPages, pGVM->gmm.s.cBalloonedPages));
3526	}
3527	}
3528	else
3529	{
3530	Log(("GMMR0BalloonedPages: cBasePages=%#llx Total=%#llx cBalloonedPages=%#llx Reserved=%#llx\n",
3531	pGVM->gmm.s.Allocated.cBasePages, pGVM->gmm.s.cBalloonedPages, cBalloonedPages, pGVM->gmm.s.Reserved.cBasePages));
3532	rc = VERR_GMM_ATTEMPT_TO_FREE_TOO_MUCH;
3533	}
3534	break;
3535	}
3536
3537	case GMMBALLOONACTION_DEFLATE:
3538	{
3539	/* Deflate. */
3540	if (pGVM->gmm.s.cBalloonedPages >= cBalloonedPages)
3541	{
3542	/*
3543	* Record the ballooned memory.
3544	*/
3545	Assert(pGMM->cBalloonedPages >= cBalloonedPages);
3546	pGMM->cBalloonedPages -= cBalloonedPages;
3547	pGVM->gmm.s.cBalloonedPages -= cBalloonedPages;
3548	if (pGVM->gmm.s.cReqDeflatePages)
3549	{
3550	AssertFailed(); /* This is path is for later. */
3551	Log(("GMMR0BalloonedPages: -%#x - Global=%#llx / VM: Total=%#llx Req=%#llx\n",
3552	cBalloonedPages, pGMM->cBalloonedPages, pGVM->gmm.s.cBalloonedPages, pGVM->gmm.s.cReqDeflatePages));
3553
3554	/*
3555	* Anything we need to do here now when the request has been completed?
3556	*/
3557	pGVM->gmm.s.cReqDeflatePages = 0;
3558	}
3559	else
3560	Log(("GMMR0BalloonedPages: -%#x - Global=%#llx / VM: Total=%#llx (user)\n",
3561	cBalloonedPages, pGMM->cBalloonedPages, pGVM->gmm.s.cBalloonedPages));
3562	}
3563	else
3564	{
3565	Log(("GMMR0BalloonedPages: Total=%#llx cBalloonedPages=%#llx\n", pGVM->gmm.s.cBalloonedPages, cBalloonedPages));
3566	rc = VERR_GMM_ATTEMPT_TO_DEFLATE_TOO_MUCH;
3567	}
3568	break;
3569	}
3570
3571	case GMMBALLOONACTION_RESET:
3572	{
3573	/* Reset to an empty balloon. */
3574	Assert(pGMM->cBalloonedPages >= pGVM->gmm.s.cBalloonedPages);
3575
3576	pGMM->cBalloonedPages -= pGVM->gmm.s.cBalloonedPages;
3577	pGVM->gmm.s.cBalloonedPages = 0;
3578	break;
3579	}
3580
3581	default:
3582	rc = VERR_INVALID_PARAMETER;
3583	break;
3584	}
3585	GMM_CHECK_SANITY_UPON_LEAVING(pGMM);
3586	}
3587	else
3588	rc = VERR_INTERNAL_ERROR_5;
3589
3590	gmmR0MutexRelease(pGMM);
3591	LogFlow(("GMMR0BalloonedPages: returns %Rrc\n", rc));
3592	return rc;
3593	}
3594
3595
3596	/**
3597	* VMMR0 request wrapper for GMMR0BalloonedPages.
3598	*
3599	* @returns see GMMR0BalloonedPages.
3600	* @param pVM Pointer to the shared VM structure.
3601	* @param idCpu VCPU id
3602	* @param pReq The request packet.
3603	*/
3604	GMMR0DECL(int) GMMR0BalloonedPagesReq(PVM pVM, VMCPUID idCpu, PGMMBALLOONEDPAGESREQ pReq)
3605	{
3606	/*
3607	* Validate input and pass it on.
3608	*/
3609	AssertPtrReturn(pVM, VERR_INVALID_POINTER);
3610	AssertPtrReturn(pReq, VERR_INVALID_POINTER);
3611	AssertMsgReturn(pReq->Hdr.cbReq == sizeof(GMMBALLOONEDPAGESREQ),
3612	("%#x < %#x\n", pReq->Hdr.cbReq, sizeof(GMMBALLOONEDPAGESREQ)),
3613	VERR_INVALID_PARAMETER);
3614
3615	return GMMR0BalloonedPages(pVM, idCpu, pReq->enmAction, pReq->cBalloonedPages);
3616	}
3617
3618	/**
3619	* Return memory statistics for the hypervisor
3620	*
3621	* @returns VBox status code:
3622	* @param pVM Pointer to the shared VM structure.
3623	* @param pReq The request packet.
3624	*/
3625	GMMR0DECL(int) GMMR0QueryHypervisorMemoryStatsReq(PVM pVM, PGMMMEMSTATSREQ pReq)
3626	{
3627	/*
3628	* Validate input and pass it on.
3629	*/
3630	AssertPtrReturn(pVM, VERR_INVALID_POINTER);
3631	AssertPtrReturn(pReq, VERR_INVALID_POINTER);
3632	AssertMsgReturn(pReq->Hdr.cbReq == sizeof(GMMMEMSTATSREQ),
3633	("%#x < %#x\n", pReq->Hdr.cbReq, sizeof(GMMMEMSTATSREQ)),
3634	VERR_INVALID_PARAMETER);
3635
3636	/*
3637	* Validate input and get the basics.
3638	*/
3639	PGMM pGMM;
3640	GMM_GET_VALID_INSTANCE(pGMM, VERR_INTERNAL_ERROR);
3641	pReq->cAllocPages = pGMM->cAllocatedPages;
3642	pReq->cFreePages = (pGMM->cChunks << (GMM_CHUNK_SHIFT- PAGE_SHIFT)) - pGMM->cAllocatedPages;
3643	pReq->cBalloonedPages = pGMM->cBalloonedPages;
3644	pReq->cMaxPages = pGMM->cMaxPages;
3645	pReq->cSharedPages = pGMM->cDuplicatePages;
3646	GMM_CHECK_SANITY_UPON_LEAVING(pGMM);
3647
3648	return VINF_SUCCESS;
3649	}
3650
3651	/**
3652	* Return memory statistics for the VM
3653	*
3654	* @returns VBox status code:
3655	* @param pVM Pointer to the shared VM structure.
3656	* @parma idCpu Cpu id.
3657	* @param pReq The request packet.
3658	*/
3659	GMMR0DECL(int) GMMR0QueryMemoryStatsReq(PVM pVM, VMCPUID idCpu, PGMMMEMSTATSREQ pReq)
3660	{
3661	/*
3662	* Validate input and pass it on.
3663	*/
3664	AssertPtrReturn(pVM, VERR_INVALID_POINTER);
3665	AssertPtrReturn(pReq, VERR_INVALID_POINTER);
3666	AssertMsgReturn(pReq->Hdr.cbReq == sizeof(GMMMEMSTATSREQ),
3667	("%#x < %#x\n", pReq->Hdr.cbReq, sizeof(GMMMEMSTATSREQ)),
3668	VERR_INVALID_PARAMETER);
3669
3670	/*
3671	* Validate input and get the basics.
3672	*/
3673	PGMM pGMM;
3674	GMM_GET_VALID_INSTANCE(pGMM, VERR_INTERNAL_ERROR);
3675	PGVM pGVM;
3676	int rc = GVMMR0ByVMAndEMT(pVM, idCpu, &pGVM);
3677	if (RT_FAILURE(rc))
3678	return rc;
3679
3680	/*
3681	* Take the semaphore and do some more validations.
3682	*/
3683	gmmR0MutexAcquire(pGMM);
3684	if (GMM_CHECK_SANITY_UPON_ENTERING(pGMM))
3685	{
3686	pReq->cAllocPages = pGVM->gmm.s.Allocated.cBasePages;
3687	pReq->cBalloonedPages = pGVM->gmm.s.cBalloonedPages;
3688	pReq->cMaxPages = pGVM->gmm.s.Reserved.cBasePages;
3689	pReq->cFreePages = pReq->cMaxPages - pReq->cAllocPages;
3690	}
3691	else
3692	rc = VERR_INTERNAL_ERROR_5;
3693
3694	gmmR0MutexRelease(pGMM);
3695	LogFlow(("GMMR3QueryVMMemoryStats: returns %Rrc\n", rc));
3696	return rc;
3697	}
3698
3699
3700	/**
3701	* Worker for gmmR0UnmapChunk and gmmr0FreeChunk.
3702	*
3703	* Don't call this in legacy allocation mode!
3704	*
3705	* @returns VBox status code.
3706	* @param pGMM Pointer to the GMM instance data.
3707	* @param pGVM Pointer to the Global VM structure.
3708	* @param pChunk Pointer to the chunk to be unmapped.
3709	*/
3710	static int gmmR0UnmapChunkLocked(PGMM pGMM, PGVM pGVM, PGMMCHUNK pChunk)
3711	{
3712	Assert(!pGMM->fLegacyAllocationMode);
3713
3714	/*
3715	* Find the mapping and try unmapping it.
3716	*/
3717	uint32_t cMappings = pChunk->cMappingsX;
3718	for (uint32_t i = 0; i < cMappings; i++)
3719	{
3720	Assert(pChunk->paMappingsX[i].pGVM && pChunk->paMappingsX[i].hMapObj != NIL_RTR0MEMOBJ);
3721	if (pChunk->paMappingsX[i].pGVM == pGVM)
3722	{
3723	/* unmap */
3724	int rc = RTR0MemObjFree(pChunk->paMappingsX[i].hMapObj, false /* fFreeMappings (NA) */);
3725	if (RT_SUCCESS(rc))
3726	{
3727	/* update the record. */
3728	cMappings--;
3729	if (i < cMappings)
3730	pChunk->paMappingsX[i] = pChunk->paMappingsX[cMappings];
3731	pChunk->paMappingsX[cMappings].hMapObj = NIL_RTR0MEMOBJ;
3732	pChunk->paMappingsX[cMappings].pGVM = NULL;
3733	Assert(pChunk->cMappingsX - 1U == cMappings);
3734	pChunk->cMappingsX = cMappings;
3735	}
3736
3737	return rc;
3738	}
3739	}
3740
3741	Log(("gmmR0UnmapChunk: Chunk %#x is not mapped into pGVM=%p/%#x\n", pChunk->Core.Key, pGVM, pGVM->hSelf));
3742	return VERR_GMM_CHUNK_NOT_MAPPED;
3743	}
3744
3745
3746	/**
3747	* Unmaps a chunk previously mapped into the address space of the current process.
3748	*
3749	* @returns VBox status code.
3750	* @param pGMM Pointer to the GMM instance data.
3751	* @param pGVM Pointer to the Global VM structure.
3752	* @param pChunk Pointer to the chunk to be unmapped.
3753	*/
3754	static int gmmR0UnmapChunk(PGMM pGMM, PGVM pGVM, PGMMCHUNK pChunk, bool fRelaxedSem)
3755	{
3756	if (!pGMM->fLegacyAllocationMode)
3757	{
3758	/*
3759	* Lock the chunk and if possible leave the giant GMM lock.
3760	*/
3761	GMMR0CHUNKMTXSTATE MtxState;
3762	int rc = gmmR0ChunkMutexAcquire(&MtxState, pGMM, pChunk,
3763	fRelaxedSem ? GMMR0CHUNK_MTX_RETAKE_GIANT : GMMR0CHUNK_MTX_KEEP_GIANT);
3764	if (RT_SUCCESS(rc))
3765	{
3766	rc = gmmR0UnmapChunkLocked(pGMM, pGVM, pChunk);
3767	gmmR0ChunkMutexRelease(&MtxState, pChunk);
3768	}
3769	return rc;
3770	}
3771
3772	if (pChunk->hGVM == pGVM->hSelf)
3773	return VINF_SUCCESS;
3774
3775	Log(("gmmR0UnmapChunk: Chunk %#x is not mapped into pGVM=%p/%#x (legacy)\n", pChunk->Core.Key, pGVM, pGVM->hSelf));
3776	return VERR_GMM_CHUNK_NOT_MAPPED;
3777	}
3778
3779
3780	/**
3781	* Worker for gmmR0MapChunk.
3782	*
3783	* @returns VBox status code.
3784	* @param pGMM Pointer to the GMM instance data.
3785	* @param pGVM Pointer to the Global VM structure.
3786	* @param pChunk Pointer to the chunk to be mapped.
3787	* @param ppvR3 Where to store the ring-3 address of the mapping.
3788	* In the VERR_GMM_CHUNK_ALREADY_MAPPED case, this will be
3789	* contain the address of the existing mapping.
3790	*/
3791	static int gmmR0MapChunkLocked(PGMM pGMM, PGVM pGVM, PGMMCHUNK pChunk, PRTR3PTR ppvR3)
3792	{
3793	/*
3794	* If we're in legacy mode this is simple.
3795	*/
3796	if (pGMM->fLegacyAllocationMode)
3797	{
3798	if (pChunk->hGVM != pGVM->hSelf)
3799	{
3800	Log(("gmmR0MapChunk: chunk %#x is already mapped at %p!\n", pChunk->Core.Key, *ppvR3));
3801	return VERR_GMM_CHUNK_NOT_FOUND;
3802	}
3803
3804	*ppvR3 = RTR0MemObjAddressR3(pChunk->hMemObj);
3805	return VINF_SUCCESS;
3806	}
3807
3808	/*
3809	* Check to see if the chunk is already mapped.
3810	*/
3811	for (uint32_t i = 0; i < pChunk->cMappingsX; i++)
3812	{
3813	Assert(pChunk->paMappingsX[i].pGVM && pChunk->paMappingsX[i].hMapObj != NIL_RTR0MEMOBJ);
3814	if (pChunk->paMappingsX[i].pGVM == pGVM)
3815	{
3816	*ppvR3 = RTR0MemObjAddressR3(pChunk->paMappingsX[i].hMapObj);
3817	Log(("gmmR0MapChunk: chunk %#x is already mapped at %p!\n", pChunk->Core.Key, *ppvR3));
3818	#ifdef VBOX_WITH_PAGE_SHARING
3819	/* The ring-3 chunk cache can be out of sync; don't fail. */
3820	return VINF_SUCCESS;
3821	#else
3822	return VERR_GMM_CHUNK_ALREADY_MAPPED;
3823	#endif
3824	}
3825	}
3826
3827	/*
3828	* Do the mapping.
3829	*/
3830	RTR0MEMOBJ hMapObj;
3831	int rc = RTR0MemObjMapUser(&hMapObj, pChunk->hMemObj, (RTR3PTR)-1, 0, RTMEM_PROT_READ \| RTMEM_PROT_WRITE, NIL_RTR0PROCESS);
3832	if (RT_SUCCESS(rc))
3833	{
3834	/* reallocate the array? assumes few users per chunk (usually one). */
3835	unsigned iMapping = pChunk->cMappingsX;
3836	if ( iMapping <= 3
3837	\|\| (iMapping & 3) == 0)
3838	{
3839	unsigned cNewSize = iMapping <= 3
3840	? iMapping + 1
3841	: iMapping + 4;
3842	Assert(cNewSize < 4 \|\| RT_ALIGN_32(cNewSize, 4) == cNewSize);
3843	if (RT_UNLIKELY(cNewSize > UINT16_MAX))
3844	{
3845	rc = RTR0MemObjFree(hMapObj, false /* fFreeMappings (NA) */); AssertRC(rc);
3846	return VERR_GMM_TOO_MANY_CHUNK_MAPPINGS;
3847	}
3848
3849	void pvMappings = RTMemRealloc(pChunk->paMappingsX, cNewSize sizeof(pChunk->paMappingsX[0]));
3850	if (RT_UNLIKELY(!pvMappings))
3851	{
3852	rc = RTR0MemObjFree(hMapObj, false /* fFreeMappings (NA) */); AssertRC(rc);
3853	return VERR_NO_MEMORY;
3854	}
3855	pChunk->paMappingsX = (PGMMCHUNKMAP)pvMappings;
3856	}
3857
3858	/* insert new entry */
3859	pChunk->paMappingsX[iMapping].hMapObj = hMapObj;
3860	pChunk->paMappingsX[iMapping].pGVM = pGVM;
3861	Assert(pChunk->cMappingsX == iMapping);
3862	pChunk->cMappingsX = iMapping + 1;
3863
3864	*ppvR3 = RTR0MemObjAddressR3(hMapObj);
3865	}
3866
3867	return rc;
3868	}
3869
3870
3871	/**
3872	* Maps a chunk into the user address space of the current process.
3873	*
3874	* @returns VBox status code.
3875	* @param pGMM Pointer to the GMM instance data.
3876	* @param pGVM Pointer to the Global VM structure.
3877	* @param pChunk Pointer to the chunk to be mapped.
3878	* @param fRelaxedSem Whether we can release the semaphore while doing the
3879	* mapping (@c true) or not.
3880	* @param ppvR3 Where to store the ring-3 address of the mapping.
3881	* In the VERR_GMM_CHUNK_ALREADY_MAPPED case, this will be
3882	* contain the address of the existing mapping.
3883	*/
3884	static int gmmR0MapChunk(PGMM pGMM, PGVM pGVM, PGMMCHUNK pChunk, bool fRelaxedSem, PRTR3PTR ppvR3)
3885	{
3886	/*
3887	* Take the chunk lock and leave the giant GMM lock when possible, then
3888	* call the worker function.
3889	*/
3890	GMMR0CHUNKMTXSTATE MtxState;
3891	int rc = gmmR0ChunkMutexAcquire(&MtxState, pGMM, pChunk,
3892	fRelaxedSem ? GMMR0CHUNK_MTX_RETAKE_GIANT : GMMR0CHUNK_MTX_KEEP_GIANT);
3893	if (RT_SUCCESS(rc))
3894	{
3895	rc = gmmR0MapChunkLocked(pGMM, pGVM, pChunk, ppvR3);
3896	gmmR0ChunkMutexRelease(&MtxState, pChunk);
3897	}
3898
3899	return rc;
3900	}
3901
3902
3903
3904	/**
3905	* Check if a chunk is mapped into the specified VM
3906	*
3907	* @returns mapped yes/no
3908	* @param pGMM Pointer to the GMM instance.
3909	* @param pGVM Pointer to the Global VM structure.
3910	* @param pChunk Pointer to the chunk to be mapped.
3911	* @param ppvR3 Where to store the ring-3 address of the mapping.
3912	*/
3913	static int gmmR0IsChunkMapped(PGMM pGMM, PGVM pGVM, PGMMCHUNK pChunk, PRTR3PTR ppvR3)
3914	{
3915	GMMR0CHUNKMTXSTATE MtxState;
3916	gmmR0ChunkMutexAcquire(&MtxState, pGMM, pChunk, GMMR0CHUNK_MTX_KEEP_GIANT);
3917	for (uint32_t i = 0; i < pChunk->cMappingsX; i++)
3918	{
3919	Assert(pChunk->paMappingsX[i].pGVM && pChunk->paMappingsX[i].hMapObj != NIL_RTR0MEMOBJ);
3920	if (pChunk->paMappingsX[i].pGVM == pGVM)
3921	{
3922	*ppvR3 = RTR0MemObjAddressR3(pChunk->paMappingsX[i].hMapObj);
3923	gmmR0ChunkMutexRelease(&MtxState, pChunk);
3924	return true;
3925	}
3926	}
3927	*ppvR3 = NULL;
3928	gmmR0ChunkMutexRelease(&MtxState, pChunk);
3929	return false;
3930	}
3931
3932
3933	/**
3934	* Map a chunk and/or unmap another chunk.
3935	*
3936	* The mapping and unmapping applies to the current process.
3937	*
3938	* This API does two things because it saves a kernel call per mapping when
3939	* when the ring-3 mapping cache is full.
3940	*
3941	* @returns VBox status code.
3942	* @param pVM The VM.
3943	* @param idChunkMap The chunk to map. NIL_GMM_CHUNKID if nothing to map.
3944	* @param idChunkUnmap The chunk to unmap. NIL_GMM_CHUNKID if nothing to unmap.
3945	* @param ppvR3 Where to store the address of the mapped chunk. NULL is ok if nothing to map.
3946	* @thread EMT
3947	*/
3948	GMMR0DECL(int) GMMR0MapUnmapChunk(PVM pVM, uint32_t idChunkMap, uint32_t idChunkUnmap, PRTR3PTR ppvR3)
3949	{
3950	LogFlow(("GMMR0MapUnmapChunk: pVM=%p idChunkMap=%#x idChunkUnmap=%#x ppvR3=%p\n",
3951	pVM, idChunkMap, idChunkUnmap, ppvR3));
3952
3953	/*
3954	* Validate input and get the basics.
3955	*/
3956	PGMM pGMM;
3957	GMM_GET_VALID_INSTANCE(pGMM, VERR_INTERNAL_ERROR);
3958	PGVM pGVM;
3959	int rc = GVMMR0ByVM(pVM, &pGVM);
3960	if (RT_FAILURE(rc))
3961	return rc;
3962
3963	AssertCompile(NIL_GMM_CHUNKID == 0);
3964	AssertMsgReturn(idChunkMap <= GMM_CHUNKID_LAST, ("%#x\n", idChunkMap), VERR_INVALID_PARAMETER);
3965	AssertMsgReturn(idChunkUnmap <= GMM_CHUNKID_LAST, ("%#x\n", idChunkUnmap), VERR_INVALID_PARAMETER);
3966
3967	if ( idChunkMap == NIL_GMM_CHUNKID
3968	&& idChunkUnmap == NIL_GMM_CHUNKID)
3969	return VERR_INVALID_PARAMETER;
3970
3971	if (idChunkMap != NIL_GMM_CHUNKID)
3972	{
3973	AssertPtrReturn(ppvR3, VERR_INVALID_POINTER);
3974	*ppvR3 = NIL_RTR3PTR;
3975	}
3976
3977	/*
3978	* Take the semaphore and do the work.
3979	*
3980	* The unmapping is done last since it's easier to undo a mapping than
3981	* undoing an unmapping. The ring-3 mapping cache cannot not be so big
3982	* that it pushes the user virtual address space to within a chunk of
3983	* it it's limits, so, no problem here.
3984	*/
3985	gmmR0MutexAcquire(pGMM);
3986	if (GMM_CHECK_SANITY_UPON_ENTERING(pGMM))
3987	{
3988	PGMMCHUNK pMap = NULL;
3989	if (idChunkMap != NIL_GVM_HANDLE)
3990	{
3991	pMap = gmmR0GetChunk(pGMM, idChunkMap);
3992	if (RT_LIKELY(pMap))
3993	rc = gmmR0MapChunk(pGMM, pGVM, pMap, true /fRelaxedSem/, ppvR3);
3994	else
3995	{
3996	Log(("GMMR0MapUnmapChunk: idChunkMap=%#x\n", idChunkMap));
3997	rc = VERR_GMM_CHUNK_NOT_FOUND;
3998	}
3999	}
4000	/** @todo split this operation, the bail out might (theoretcially) not be
4001	* entirely safe. */
4002
4003	if ( idChunkUnmap != NIL_GMM_CHUNKID
4004	&& RT_SUCCESS(rc))
4005	{
4006	PGMMCHUNK pUnmap = gmmR0GetChunk(pGMM, idChunkUnmap);
4007	if (RT_LIKELY(pUnmap))
4008	rc = gmmR0UnmapChunk(pGMM, pGVM, pUnmap, true /fRelaxedSem/);
4009	else
4010	{
4011	Log(("GMMR0MapUnmapChunk: idChunkUnmap=%#x\n", idChunkUnmap));
4012	rc = VERR_GMM_CHUNK_NOT_FOUND;
4013	}
4014
4015	if (RT_FAILURE(rc) && pMap)
4016	gmmR0UnmapChunk(pGMM, pGVM, pMap, false /fRelaxedSem/);
4017	}
4018
4019	GMM_CHECK_SANITY_UPON_LEAVING(pGMM);
4020	}
4021	else
4022	rc = VERR_INTERNAL_ERROR_5;
4023	gmmR0MutexRelease(pGMM);
4024
4025	LogFlow(("GMMR0MapUnmapChunk: returns %Rrc\n", rc));
4026	return rc;
4027	}
4028
4029
4030	/**
4031	* VMMR0 request wrapper for GMMR0MapUnmapChunk.
4032	*
4033	* @returns see GMMR0MapUnmapChunk.
4034	* @param pVM Pointer to the shared VM structure.
4035	* @param pReq The request packet.
4036	*/
4037	GMMR0DECL(int) GMMR0MapUnmapChunkReq(PVM pVM, PGMMMAPUNMAPCHUNKREQ pReq)
4038	{
4039	/*
4040	* Validate input and pass it on.
4041	*/
4042	AssertPtrReturn(pVM, VERR_INVALID_POINTER);
4043	AssertPtrReturn(pReq, VERR_INVALID_POINTER);
4044	AssertMsgReturn(pReq->Hdr.cbReq == sizeof(pReq), ("%#x != %#x\n", pReq->Hdr.cbReq, sizeof(pReq)), VERR_INVALID_PARAMETER);
4045
4046	return GMMR0MapUnmapChunk(pVM, pReq->idChunkMap, pReq->idChunkUnmap, &pReq->pvR3);
4047	}
4048
4049
4050	/**
4051	* Legacy mode API for supplying pages.
4052	*
4053	* The specified user address points to a allocation chunk sized block that
4054	* will be locked down and used by the GMM when the GM asks for pages.
4055	*
4056	* @returns VBox status code.
4057	* @param pVM The VM.
4058	* @param idCpu VCPU id
4059	* @param pvR3 Pointer to the chunk size memory block to lock down.
4060	*/
4061	GMMR0DECL(int) GMMR0SeedChunk(PVM pVM, VMCPUID idCpu, RTR3PTR pvR3)
4062	{
4063	/*
4064	* Validate input and get the basics.
4065	*/
4066	PGMM pGMM;
4067	GMM_GET_VALID_INSTANCE(pGMM, VERR_INTERNAL_ERROR);
4068	PGVM pGVM;
4069	int rc = GVMMR0ByVMAndEMT(pVM, idCpu, &pGVM);
4070	if (RT_FAILURE(rc))
4071	return rc;
4072
4073	AssertPtrReturn(pvR3, VERR_INVALID_POINTER);
4074	AssertReturn(!(PAGE_OFFSET_MASK & pvR3), VERR_INVALID_POINTER);
4075
4076	if (!pGMM->fLegacyAllocationMode)
4077	{
4078	Log(("GMMR0SeedChunk: not in legacy allocation mode!\n"));
4079	return VERR_NOT_SUPPORTED;
4080	}
4081
4082	/*
4083	* Lock the memory and add it as new chunk with our hGVM.
4084	* (The GMM locking is done inside gmmR0RegisterChunk.)
4085	*/
4086	RTR0MEMOBJ MemObj;
4087	rc = RTR0MemObjLockUser(&MemObj, pvR3, GMM_CHUNK_SIZE, RTMEM_PROT_READ \| RTMEM_PROT_WRITE, NIL_RTR0PROCESS);
4088	if (RT_SUCCESS(rc))
4089	{
4090	rc = gmmR0RegisterChunk(pGMM, &pGVM->gmm.s.Private, MemObj, pGVM->hSelf, 0 /fChunkFlags/, NULL);
4091	if (RT_SUCCESS(rc))
4092	gmmR0MutexRelease(pGMM);
4093	else
4094	RTR0MemObjFree(MemObj, false /* fFreeMappings */);
4095	}
4096
4097	LogFlow(("GMMR0SeedChunk: rc=%d (pvR3=%p)\n", rc, pvR3));
4098	return rc;
4099	}
4100
4101
4102	typedef struct
4103	{
4104	PAVLGCPTRNODECORE pNode;
4105	char *pszModuleName;
4106	char *pszVersion;
4107	VBOXOSFAMILY enmGuestOS;
4108	} GMMFINDMODULEBYNAME, *PGMMFINDMODULEBYNAME;
4109
4110	/**
4111	* Tree enumeration callback for finding identical modules by name and version
4112	*/
4113	DECLCALLBACK(int) gmmR0CheckForIdenticalModule(PAVLGCPTRNODECORE pNode, void *pvUser)
4114	{
4115	PGMMFINDMODULEBYNAME pInfo = (PGMMFINDMODULEBYNAME)pvUser;
4116	PGMMSHAREDMODULE pModule = (PGMMSHAREDMODULE)pNode;
4117
4118	if ( pInfo
4119	&& pInfo->enmGuestOS == pModule->enmGuestOS
4120	/** @todo replace with RTStrNCmp */
4121	&& !strcmp(pModule->szName, pInfo->pszModuleName)
4122	&& !strcmp(pModule->szVersion, pInfo->pszVersion))
4123	{
4124	pInfo->pNode = pNode;
4125	return 1; /* stop search */
4126	}
4127	return 0;
4128	}
4129
4130
4131	/**
4132	* Registers a new shared module for the VM
4133	*
4134	* @returns VBox status code.
4135	* @param pVM VM handle
4136	* @param idCpu VCPU id
4137	* @param enmGuestOS Guest OS type
4138	* @param pszModuleName Module name
4139	* @param pszVersion Module version
4140	* @param GCBaseAddr Module base address
4141	* @param cbModule Module size
4142	* @param cRegions Number of shared region descriptors
4143	* @param pRegions Shared region(s)
4144	*/
4145	GMMR0DECL(int) GMMR0RegisterSharedModule(PVM pVM, VMCPUID idCpu, VBOXOSFAMILY enmGuestOS, char pszModuleName, char pszVersion, RTGCPTR GCBaseAddr, uint32_t cbModule,
4146	unsigned cRegions, VMMDEVSHAREDREGIONDESC *pRegions)
4147	{
4148	#ifdef VBOX_WITH_PAGE_SHARING
4149	/*
4150	* Validate input and get the basics.
4151	*/
4152	PGMM pGMM;
4153	GMM_GET_VALID_INSTANCE(pGMM, VERR_INTERNAL_ERROR);
4154	PGVM pGVM;
4155	int rc = GVMMR0ByVMAndEMT(pVM, idCpu, &pGVM);
4156	if (RT_FAILURE(rc))
4157	return rc;
4158
4159	Log(("GMMR0RegisterSharedModule %s %s base %RGv size %x\n", pszModuleName, pszVersion, GCBaseAddr, cbModule));
4160
4161	/*
4162	* Take the semaphore and do some more validations.
4163	*/
4164	gmmR0MutexAcquire(pGMM);
4165	if (GMM_CHECK_SANITY_UPON_ENTERING(pGMM))
4166	{
4167	bool fNewModule = false;
4168
4169	/* Check if this module is already locally registered. */
4170	PGMMSHAREDMODULEPERVM pRecVM = (PGMMSHAREDMODULEPERVM)RTAvlGCPtrGet(&pGVM->gmm.s.pSharedModuleTree, GCBaseAddr);
4171	if (!pRecVM)
4172	{
4173	pRecVM = (PGMMSHAREDMODULEPERVM)RTMemAllocZ(RT_OFFSETOF(GMMSHAREDMODULEPERVM, aRegions[cRegions]));
4174	if (!pRecVM)
4175	{
4176	AssertFailed();
4177	rc = VERR_NO_MEMORY;
4178	goto end;
4179	}
4180	pRecVM->Core.Key = GCBaseAddr;
4181	pRecVM->cRegions = cRegions;
4182
4183	/* Save the region data as they can differ between VMs (address space scrambling or simply different loading order) */
4184	for (unsigned i = 0; i < cRegions; i++)
4185	{
4186	pRecVM->aRegions[i].GCRegionAddr = pRegions[i].GCRegionAddr;
4187	pRecVM->aRegions[i].cbRegion = RT_ALIGN_T(pRegions[i].cbRegion, PAGE_SIZE, uint32_t);
4188	pRecVM->aRegions[i].u32Alignment = 0;
4189	pRecVM->aRegions[i].paHCPhysPageID = NULL; /* unused */
4190	}
4191
4192	bool ret = RTAvlGCPtrInsert(&pGVM->gmm.s.pSharedModuleTree, &pRecVM->Core);
4193	Assert(ret);
4194
4195	Log(("GMMR0RegisterSharedModule: new local module %s\n", pszModuleName));
4196	fNewModule = true;
4197	}
4198	else
4199	rc = VINF_PGM_SHARED_MODULE_ALREADY_REGISTERED;
4200
4201	/* Check if this module is already globally registered. */
4202	PGMMSHAREDMODULE pGlobalModule = (PGMMSHAREDMODULE)RTAvlGCPtrGet(&pGMM->pGlobalSharedModuleTree, GCBaseAddr);
4203	if ( !pGlobalModule
4204	&& enmGuestOS == VBOXOSFAMILY_Windows64)
4205	{
4206	/* Two identical copies of e.g. Win7 x64 will typically not have a similar virtual address space layout for dlls or kernel modules.
4207	* Try to find identical binaries based on name and version.
4208	*/
4209	GMMFINDMODULEBYNAME Info;
4210
4211	Info.pNode = NULL;
4212	Info.pszVersion = pszVersion;
4213	Info.pszModuleName = pszModuleName;
4214	Info.enmGuestOS = enmGuestOS;
4215
4216	Log(("Try to find identical module %s\n", pszModuleName));
4217	int ret = RTAvlGCPtrDoWithAll(&pGMM->pGlobalSharedModuleTree, true /* fFromLeft */, gmmR0CheckForIdenticalModule, &Info);
4218	if (ret == 1)
4219	{
4220	Assert(Info.pNode);
4221	pGlobalModule = (PGMMSHAREDMODULE)Info.pNode;
4222	Log(("Found identical module at %RGv\n", pGlobalModule->Core.Key));
4223	}
4224	}
4225
4226	if (!pGlobalModule)
4227	{
4228	Assert(fNewModule);
4229	Assert(!pRecVM->fCollision);
4230
4231	pGlobalModule = (PGMMSHAREDMODULE)RTMemAllocZ(RT_OFFSETOF(GMMSHAREDMODULE, aRegions[cRegions]));
4232	if (!pGlobalModule)
4233	{
4234	AssertFailed();
4235	rc = VERR_NO_MEMORY;
4236	goto end;
4237	}
4238
4239	pGlobalModule->Core.Key = GCBaseAddr;
4240	pGlobalModule->cbModule = cbModule;
4241	/* Input limit already safe; no need to check again. */
4242	/** @todo replace with RTStrCopy */
4243	strcpy(pGlobalModule->szName, pszModuleName);
4244	strcpy(pGlobalModule->szVersion, pszVersion);
4245
4246	pGlobalModule->enmGuestOS = enmGuestOS;
4247	pGlobalModule->cRegions = cRegions;
4248
4249	for (unsigned i = 0; i < cRegions; i++)
4250	{
4251	Log(("New region %d base=%RGv size %x\n", i, pRegions[i].GCRegionAddr, pRegions[i].cbRegion));
4252	pGlobalModule->aRegions[i].GCRegionAddr = pRegions[i].GCRegionAddr;
4253	pGlobalModule->aRegions[i].cbRegion = RT_ALIGN_T(pRegions[i].cbRegion, PAGE_SIZE, uint32_t);
4254	pGlobalModule->aRegions[i].u32Alignment = 0;
4255	pGlobalModule->aRegions[i].paHCPhysPageID = NULL; /* uninitialized. */
4256	}
4257
4258	/* Save reference. */
4259	pRecVM->pGlobalModule = pGlobalModule;
4260	pRecVM->fCollision = false;
4261	pGlobalModule->cUsers++;
4262	rc = VINF_SUCCESS;
4263
4264	bool ret = RTAvlGCPtrInsert(&pGMM->pGlobalSharedModuleTree, &pGlobalModule->Core);
4265	Assert(ret);
4266
4267	Log(("GMMR0RegisterSharedModule: new global module %s\n", pszModuleName));
4268	}
4269	else
4270	{
4271	Assert(pGlobalModule->cUsers > 0);
4272
4273	/* Make sure the name and version are identical. */
4274	/** @todo replace with RTStrNCmp */
4275	if ( !strcmp(pGlobalModule->szName, pszModuleName)
4276	&& !strcmp(pGlobalModule->szVersion, pszVersion))
4277	{
4278	/* Save reference. */
4279	pRecVM->pGlobalModule = pGlobalModule;
4280	if ( fNewModule
4281	\|\| pRecVM->fCollision == true) /* colliding module unregistered and new one registered since the last check */
4282	{
4283	pGlobalModule->cUsers++;
4284	Log(("GMMR0RegisterSharedModule: using existing module %s cUser=%d!\n", pszModuleName, pGlobalModule->cUsers));
4285	}
4286	pRecVM->fCollision = false;
4287	rc = VINF_SUCCESS;
4288	}
4289	else
4290	{
4291	Log(("GMMR0RegisterSharedModule: module %s collision!\n", pszModuleName));
4292	pRecVM->fCollision = true;
4293	rc = VINF_PGM_SHARED_MODULE_COLLISION;
4294	goto end;
4295	}
4296	}
4297
4298	GMM_CHECK_SANITY_UPON_LEAVING(pGMM);
4299	}
4300	else
4301	rc = VERR_INTERNAL_ERROR_5;
4302
4303	end:
4304	gmmR0MutexRelease(pGMM);
4305	return rc;
4306	#else
4307	return VERR_NOT_IMPLEMENTED;
4308	#endif
4309	}
4310
4311
4312	/**
4313	* VMMR0 request wrapper for GMMR0RegisterSharedModule.
4314	*
4315	* @returns see GMMR0RegisterSharedModule.
4316	* @param pVM Pointer to the shared VM structure.
4317	* @param idCpu VCPU id
4318	* @param pReq The request packet.
4319	*/
4320	GMMR0DECL(int) GMMR0RegisterSharedModuleReq(PVM pVM, VMCPUID idCpu, PGMMREGISTERSHAREDMODULEREQ pReq)
4321	{
4322	/*
4323	* Validate input and pass it on.
4324	*/
4325	AssertPtrReturn(pVM, VERR_INVALID_POINTER);
4326	AssertPtrReturn(pReq, VERR_INVALID_POINTER);
4327	AssertMsgReturn(pReq->Hdr.cbReq >= sizeof(pReq) && pReq->Hdr.cbReq == RT_UOFFSETOF(GMMREGISTERSHAREDMODULEREQ, aRegions[pReq->cRegions]), ("%#x != %#x\n", pReq->Hdr.cbReq, sizeof(pReq)), VERR_INVALID_PARAMETER);
4328
4329	/* Pass back return code in the request packet to preserve informational codes. (VMMR3CallR0 chokes on them) */
4330	pReq->rc = GMMR0RegisterSharedModule(pVM, idCpu, pReq->enmGuestOS, pReq->szName, pReq->szVersion, pReq->GCBaseAddr, pReq->cbModule, pReq->cRegions, pReq->aRegions);
4331	return VINF_SUCCESS;
4332	}
4333
4334
4335	/**
4336	* Unregisters a shared module for the VM
4337	*
4338	* @returns VBox status code.
4339	* @param pVM VM handle
4340	* @param idCpu VCPU id
4341	* @param pszModuleName Module name
4342	* @param pszVersion Module version
4343	* @param GCBaseAddr Module base address
4344	* @param cbModule Module size
4345	*/
4346	GMMR0DECL(int) GMMR0UnregisterSharedModule(PVM pVM, VMCPUID idCpu, char pszModuleName, char pszVersion, RTGCPTR GCBaseAddr, uint32_t cbModule)
4347	{
4348	#ifdef VBOX_WITH_PAGE_SHARING
4349	/*
4350	* Validate input and get the basics.
4351	*/
4352	PGMM pGMM;
4353	GMM_GET_VALID_INSTANCE(pGMM, VERR_INTERNAL_ERROR);
4354	PGVM pGVM;
4355	int rc = GVMMR0ByVMAndEMT(pVM, idCpu, &pGVM);
4356	if (RT_FAILURE(rc))
4357	return rc;
4358
4359	Log(("GMMR0UnregisterSharedModule %s %s base=%RGv size %x\n", pszModuleName, pszVersion, GCBaseAddr, cbModule));
4360
4361	/*
4362	* Take the semaphore and do some more validations.
4363	*/
4364	gmmR0MutexAcquire(pGMM);
4365	if (GMM_CHECK_SANITY_UPON_ENTERING(pGMM))
4366	{
4367	PGMMSHAREDMODULEPERVM pRecVM = (PGMMSHAREDMODULEPERVM)RTAvlGCPtrGet(&pGVM->gmm.s.pSharedModuleTree, GCBaseAddr);
4368	if (pRecVM)
4369	{
4370	/* Remove reference to global shared module. */
4371	if (!pRecVM->fCollision)
4372	{
4373	PGMMSHAREDMODULE pRec = pRecVM->pGlobalModule;
4374	Assert(pRec);
4375
4376	if (pRec) /* paranoia */
4377	{
4378	Assert(pRec->cUsers);
4379	pRec->cUsers--;
4380	if (pRec->cUsers == 0)
4381	{
4382	/* Free the ranges, but leave the pages intact as there might still be references; they will be cleared by the COW mechanism. */
4383	for (unsigned i = 0; i < pRec->cRegions; i++)
4384	if (pRec->aRegions[i].paHCPhysPageID)
4385	RTMemFree(pRec->aRegions[i].paHCPhysPageID);
4386
4387	Assert(pRec->Core.Key == GCBaseAddr \|\| pRec->enmGuestOS == VBOXOSFAMILY_Windows64);
4388	Assert(pRec->cRegions == pRecVM->cRegions);
4389	#ifdef VBOX_STRICT
4390	for (unsigned i = 0; i < pRecVM->cRegions; i++)
4391	{
4392	Assert(pRecVM->aRegions[i].GCRegionAddr == pRec->aRegions[i].GCRegionAddr);
4393	Assert(pRecVM->aRegions[i].cbRegion == pRec->aRegions[i].cbRegion);
4394	}
4395	#endif
4396
4397	/* Remove from the tree and free memory. */
4398	RTAvlGCPtrRemove(&pGMM->pGlobalSharedModuleTree, pRec->Core.Key);
4399	RTMemFree(pRec);
4400	}
4401	}
4402	else
4403	rc = VERR_PGM_SHARED_MODULE_REGISTRATION_INCONSISTENCY;
4404	}
4405	else
4406	Assert(!pRecVM->pGlobalModule);
4407
4408	/* Remove from the tree and free memory. */
4409	RTAvlGCPtrRemove(&pGVM->gmm.s.pSharedModuleTree, GCBaseAddr);
4410	RTMemFree(pRecVM);
4411	}
4412	else
4413	rc = VERR_PGM_SHARED_MODULE_NOT_FOUND;
4414
4415	GMM_CHECK_SANITY_UPON_LEAVING(pGMM);
4416	}
4417	else
4418	rc = VERR_INTERNAL_ERROR_5;
4419
4420	gmmR0MutexRelease(pGMM);
4421	return rc;
4422	#else
4423	return VERR_NOT_IMPLEMENTED;
4424	#endif
4425	}
4426
4427
4428	/**
4429	* VMMR0 request wrapper for GMMR0UnregisterSharedModule.
4430	*
4431	* @returns see GMMR0UnregisterSharedModule.
4432	* @param pVM Pointer to the shared VM structure.
4433	* @param idCpu VCPU id
4434	* @param pReq The request packet.
4435	*/
4436	GMMR0DECL(int) GMMR0UnregisterSharedModuleReq(PVM pVM, VMCPUID idCpu, PGMMUNREGISTERSHAREDMODULEREQ pReq)
4437	{
4438	/*
4439	* Validate input and pass it on.
4440	*/
4441	AssertPtrReturn(pVM, VERR_INVALID_POINTER);
4442	AssertPtrReturn(pReq, VERR_INVALID_POINTER);
4443	AssertMsgReturn(pReq->Hdr.cbReq == sizeof(pReq), ("%#x != %#x\n", pReq->Hdr.cbReq, sizeof(pReq)), VERR_INVALID_PARAMETER);
4444
4445	return GMMR0UnregisterSharedModule(pVM, idCpu, pReq->szName, pReq->szVersion, pReq->GCBaseAddr, pReq->cbModule);
4446	}
4447
4448	#ifdef VBOX_WITH_PAGE_SHARING
4449
4450	/**
4451	* Increase the use count of a shared page, the page is known to exist and be valid and such.
4452	*
4453	* @param pGMM Pointer to the GMM instance.
4454	* @param pGVM Pointer to the GVM instance.
4455	* @param pPage The page structure.
4456	*/
4457	DECLINLINE(void) gmmR0UseSharedPage(PGMM pGMM, PGVM pGVM, PGMMPAGE pPage)
4458	{
4459	Assert(pGMM->cSharedPages > 0);
4460	Assert(pGMM->cAllocatedPages > 0);
4461
4462	pGMM->cDuplicatePages++;
4463
4464	pPage->Shared.cRefs++;
4465	pGVM->gmm.s.cSharedPages++;
4466	pGVM->gmm.s.Allocated.cBasePages++;
4467	}
4468
4469
4470	/**
4471	* Converts a private page to a shared page, the page is known to exist and be valid and such.
4472	*
4473	* @param pGMM Pointer to the GMM instance.
4474	* @param pGVM Pointer to the GVM instance.
4475	* @param HCPhys Host physical address
4476	* @param idPage The Page ID
4477	* @param pPage The page structure.
4478	*/
4479	DECLINLINE(void) gmmR0ConvertToSharedPage(PGMM pGMM, PGVM pGVM, RTHCPHYS HCPhys, uint32_t idPage, PGMMPAGE pPage)
4480	{
4481	PGMMCHUNK pChunk = gmmR0GetChunk(pGMM, idPage >> GMM_CHUNKID_SHIFT);
4482	Assert(pChunk);
4483	Assert(pChunk->cFree < GMM_CHUNK_NUM_PAGES);
4484	Assert(GMM_PAGE_IS_PRIVATE(pPage));
4485
4486	pChunk->cPrivate--;
4487	pChunk->cShared++;
4488
4489	pGMM->cSharedPages++;
4490
4491	pGVM->gmm.s.cSharedPages++;
4492	pGVM->gmm.s.cPrivatePages--;
4493
4494	/* Modify the page structure. */
4495	pPage->Shared.pfn = (uint32_t)(uint64_t)(HCPhys >> PAGE_SHIFT);
4496	pPage->Shared.cRefs = 1;
4497	pPage->Common.u2State = GMM_PAGE_STATE_SHARED;
4498	}
4499
4500
4501	/**
4502	* Checks specified shared module range for changes
4503	*
4504	* Performs the following tasks:
4505	* - If a shared page is new, then it changes the GMM page type to shared and
4506	* returns it in the pPageDesc descriptor.
4507	* - If a shared page already exists, then it checks if the VM page is
4508	* identical and if so frees the VM page and returns the shared page in
4509	* pPageDesc descriptor.
4510	*
4511	* @remarks ASSUMES the caller has acquired the GMM semaphore!!
4512	*
4513	* @returns VBox status code.
4514	* @param pGMM Pointer to the GMM instance data.
4515	* @param pGVM Pointer to the GVM instance data.
4516	* @param pModule Module description
4517	* @param idxRegion Region index
4518	* @param idxPage Page index
4519	* @param paPageDesc Page descriptor
4520	*/
4521	GMMR0DECL(int) GMMR0SharedModuleCheckPage(PGVM pGVM, PGMMSHAREDMODULE pModule, unsigned idxRegion, unsigned idxPage,
4522	PGMMSHAREDPAGEDESC pPageDesc)
4523	{
4524	int rc = VINF_SUCCESS;
4525	PGMM pGMM;
4526	GMM_GET_VALID_INSTANCE(pGMM, VERR_INTERNAL_ERROR);
4527	unsigned cPages = pModule->aRegions[idxRegion].cbRegion >> PAGE_SHIFT;
4528
4529	AssertReturn(idxRegion < pModule->cRegions, VERR_INVALID_PARAMETER);
4530	AssertReturn(idxPage < cPages, VERR_INVALID_PARAMETER);
4531
4532	LogFlow(("GMMR0SharedModuleCheckRange %s base %RGv region %d idxPage %d\n", pModule->szName, pModule->Core.Key, idxRegion, idxPage));
4533
4534	PGMMSHAREDREGIONDESC pGlobalRegion = &pModule->aRegions[idxRegion];
4535	if (!pGlobalRegion->paHCPhysPageID)
4536	{
4537	/* First time; create a page descriptor array. */
4538	Log(("Allocate page descriptor array for %d pages\n", cPages));
4539	pGlobalRegion->paHCPhysPageID = (uint32_t )RTMemAlloc(cPages sizeof(*pGlobalRegion->paHCPhysPageID));
4540	if (!pGlobalRegion->paHCPhysPageID)
4541	{
4542	AssertFailed();
4543	rc = VERR_NO_MEMORY;
4544	goto end;
4545	}
4546	/* Invalidate all descriptors. */
4547	for (unsigned i = 0; i < cPages; i++)
4548	pGlobalRegion->paHCPhysPageID[i] = NIL_GMM_PAGEID;
4549	}
4550
4551	/* We've seen this shared page for the first time? */
4552	if (pGlobalRegion->paHCPhysPageID[idxPage] == NIL_GMM_PAGEID)
4553	{
4554	new_shared_page:
4555	Log(("New shared page guest %RGp host %RHp\n", pPageDesc->GCPhys, pPageDesc->HCPhys));
4556
4557	/* Easy case: just change the internal page type. */
4558	PGMMPAGE pPage = gmmR0GetPage(pGMM, pPageDesc->uHCPhysPageId);
4559	if (!pPage)
4560	{
4561	Log(("GMMR0SharedModuleCheckPage: Invalid idPage=%#x #1 (GCPhys=%RGp HCPhys=%RHp idxRegion=%#x idxPage=%#x)\n",
4562	pPageDesc->uHCPhysPageId, pPageDesc->GCPhys, pPageDesc->HCPhys, idxRegion, idxPage));
4563	AssertFailed();
4564	rc = VERR_PGM_PHYS_INVALID_PAGE_ID;
4565	goto end;
4566	}
4567
4568	AssertMsg(pPageDesc->GCPhys == (pPage->Private.pfn << 12), ("desc %RGp gmm %RGp\n", pPageDesc->HCPhys, (pPage->Private.pfn << 12)));
4569
4570	gmmR0ConvertToSharedPage(pGMM, pGVM, pPageDesc->HCPhys, pPageDesc->uHCPhysPageId, pPage);
4571
4572	/* Keep track of these references. */
4573	pGlobalRegion->paHCPhysPageID[idxPage] = pPageDesc->uHCPhysPageId;
4574	}
4575	else
4576	{
4577	uint8_t pbLocalPage, pbSharedPage;
4578	uint8_t *pbChunk;
4579	PGMMCHUNK pChunk;
4580
4581	Assert(pPageDesc->uHCPhysPageId != pGlobalRegion->paHCPhysPageID[idxPage]);
4582
4583	Log(("Replace existing page guest %RGp host %RHp id %x -> id %x\n", pPageDesc->GCPhys, pPageDesc->HCPhys, pPageDesc->uHCPhysPageId, pGlobalRegion->paHCPhysPageID[idxPage]));
4584
4585	/* Get the shared page source. */
4586	PGMMPAGE pPage = gmmR0GetPage(pGMM, pGlobalRegion->paHCPhysPageID[idxPage]);
4587	if (!pPage)
4588	{
4589	Log(("GMMR0SharedModuleCheckPage: Invalid idPage=%#x #2 (idxRegion=%#x idxPage=%#x)\n",
4590	pPageDesc->uHCPhysPageId, idxRegion, idxPage));
4591	AssertFailed();
4592	rc = VERR_PGM_PHYS_INVALID_PAGE_ID;
4593	goto end;
4594	}
4595	if (pPage->Common.u2State != GMM_PAGE_STATE_SHARED)
4596	{
4597	/* Page was freed at some point; invalidate this entry. */
4598	/** @todo this isn't really bullet proof. */
4599	Log(("Old shared page was freed -> create a new one\n"));
4600	pGlobalRegion->paHCPhysPageID[idxPage] = NIL_GMM_PAGEID;
4601	goto new_shared_page; /* ugly goto */
4602	}
4603
4604	Log(("Replace existing page guest host %RHp -> %RHp\n", pPageDesc->HCPhys, ((uint64_t)pPage->Shared.pfn) << PAGE_SHIFT));
4605
4606	/* Calculate the virtual address of the local page. */
4607	pChunk = gmmR0GetChunk(pGMM, pPageDesc->uHCPhysPageId >> GMM_CHUNKID_SHIFT);
4608	if (pChunk)
4609	{
4610	if (!gmmR0IsChunkMapped(pGMM, pGVM, pChunk, (PRTR3PTR)&pbChunk))
4611	{
4612	Log(("GMMR0SharedModuleCheckPage: Invalid idPage=%#x #3\n", pPageDesc->uHCPhysPageId));
4613	AssertFailed();
4614	rc = VERR_PGM_PHYS_INVALID_PAGE_ID;
4615	goto end;
4616	}
4617	pbLocalPage = pbChunk + ((pPageDesc->uHCPhysPageId & GMM_PAGEID_IDX_MASK) << PAGE_SHIFT);
4618	}
4619	else
4620	{
4621	Log(("GMMR0SharedModuleCheckPage: Invalid idPage=%#x #4\n", pPageDesc->uHCPhysPageId));
4622	AssertFailed();
4623	rc = VERR_PGM_PHYS_INVALID_PAGE_ID;
4624	goto end;
4625	}
4626
4627	/* Calculate the virtual address of the shared page. */
4628	pChunk = gmmR0GetChunk(pGMM, pGlobalRegion->paHCPhysPageID[idxPage] >> GMM_CHUNKID_SHIFT);
4629	Assert(pChunk); /* can't fail as gmmR0GetPage succeeded. */
4630
4631	/* Get the virtual address of the physical page; map the chunk into the VM process if not already done. */
4632	if (!gmmR0IsChunkMapped(pGMM, pGVM, pChunk, (PRTR3PTR)&pbChunk))
4633	{
4634	Log(("Map chunk into process!\n"));
4635	rc = gmmR0MapChunk(pGMM, pGVM, pChunk, false /fRelaxedSem/, (PRTR3PTR)&pbChunk);
4636	if (rc != VINF_SUCCESS)
4637	{
4638	AssertRC(rc);
4639	goto end;
4640	}
4641	}
4642	pbSharedPage = pbChunk + ((pGlobalRegion->paHCPhysPageID[idxPage] & GMM_PAGEID_IDX_MASK) << PAGE_SHIFT);
4643
4644	/** @todo write ASMMemComparePage. */
4645	if (memcmp(pbSharedPage, pbLocalPage, PAGE_SIZE))
4646	{
4647	Log(("Unexpected differences found between local and shared page; skip\n"));
4648	/* Signal to the caller that this one hasn't changed. */
4649	pPageDesc->uHCPhysPageId = NIL_GMM_PAGEID;
4650	goto end;
4651	}
4652
4653	/* Free the old local page. */
4654	GMMFREEPAGEDESC PageDesc;
4655
4656	PageDesc.idPage = pPageDesc->uHCPhysPageId;
4657	rc = gmmR0FreePages(pGMM, pGVM, 1, &PageDesc, GMMACCOUNT_BASE);
4658	AssertRCReturn(rc, rc);
4659
4660	gmmR0UseSharedPage(pGMM, pGVM, pPage);
4661
4662	/* Pass along the new physical address & page id. */
4663	pPageDesc->HCPhys = ((uint64_t)pPage->Shared.pfn) << PAGE_SHIFT;
4664	pPageDesc->uHCPhysPageId = pGlobalRegion->paHCPhysPageID[idxPage];
4665	}
4666	end:
4667	return rc;
4668	}
4669
4670
4671	/**
4672	* RTAvlGCPtrDestroy callback.
4673	*
4674	* @returns 0 or VERR_INTERNAL_ERROR.
4675	* @param pNode The node to destroy.
4676	* @param pvGVM The GVM handle.
4677	*/
4678	static DECLCALLBACK(int) gmmR0CleanupSharedModule(PAVLGCPTRNODECORE pNode, void *pvGVM)
4679	{
4680	PGVM pGVM = (PGVM)pvGVM;
4681	PGMMSHAREDMODULEPERVM pRecVM = (PGMMSHAREDMODULEPERVM)pNode;
4682
4683	Assert(pRecVM->pGlobalModule \|\| pRecVM->fCollision);
4684	if (pRecVM->pGlobalModule)
4685	{
4686	PGMMSHAREDMODULE pRec = pRecVM->pGlobalModule;
4687	AssertPtr(pRec);
4688	Assert(pRec->cUsers);
4689
4690	Log(("gmmR0CleanupSharedModule: %s %s cUsers=%d\n", pRec->szName, pRec->szVersion, pRec->cUsers));
4691	pRec->cUsers--;
4692	if (pRec->cUsers == 0)
4693	{
4694	for (uint32_t i = 0; i < pRec->cRegions; i++)
4695	if (pRec->aRegions[i].paHCPhysPageID)
4696	RTMemFree(pRec->aRegions[i].paHCPhysPageID);
4697
4698	/* Remove from the tree and free memory. */
4699	PGMM pGMM;
4700	GMM_GET_VALID_INSTANCE(pGMM, VERR_INTERNAL_ERROR);
4701	RTAvlGCPtrRemove(&pGMM->pGlobalSharedModuleTree, pRec->Core.Key);
4702	RTMemFree(pRec);
4703	}
4704	}
4705	RTMemFree(pRecVM);
4706	return 0;
4707	}
4708
4709
4710	/**
4711	* Used by GMMR0CleanupVM to clean up shared modules.
4712	*
4713	* This is called without taking the GMM lock so that it can be yielded as
4714	* needed here.
4715	*
4716	* @param pGMM The GMM handle.
4717	* @param pGVM The global VM handle.
4718	*/
4719	static void gmmR0SharedModuleCleanup(PGMM pGMM, PGVM pGVM)
4720	{
4721	gmmR0MutexAcquire(pGMM);
4722	GMM_CHECK_SANITY_UPON_ENTERING(pGMM);
4723
4724	RTAvlGCPtrDestroy(&pGVM->gmm.s.pSharedModuleTree, gmmR0CleanupSharedModule, pGVM);
4725
4726	gmmR0MutexRelease(pGMM);
4727	}
4728
4729	#endif /* VBOX_WITH_PAGE_SHARING */
4730
4731	/**
4732	* Removes all shared modules for the specified VM
4733	*
4734	* @returns VBox status code.
4735	* @param pVM VM handle
4736	* @param idCpu VCPU id
4737	*/
4738	GMMR0DECL(int) GMMR0ResetSharedModules(PVM pVM, VMCPUID idCpu)
4739	{
4740	#ifdef VBOX_WITH_PAGE_SHARING
4741	/*
4742	* Validate input and get the basics.
4743	*/
4744	PGMM pGMM;
4745	GMM_GET_VALID_INSTANCE(pGMM, VERR_INTERNAL_ERROR);
4746	PGVM pGVM;
4747	int rc = GVMMR0ByVMAndEMT(pVM, idCpu, &pGVM);
4748	if (RT_FAILURE(rc))
4749	return rc;
4750
4751	/*
4752	* Take the semaphore and do some more validations.
4753	*/
4754	gmmR0MutexAcquire(pGMM);
4755	if (GMM_CHECK_SANITY_UPON_ENTERING(pGMM))
4756	{
4757	Log(("GMMR0ResetSharedModules\n"));
4758	RTAvlGCPtrDestroy(&pGVM->gmm.s.pSharedModuleTree, gmmR0CleanupSharedModule, pGVM);
4759
4760	rc = VINF_SUCCESS;
4761	GMM_CHECK_SANITY_UPON_LEAVING(pGMM);
4762	}
4763	else
4764	rc = VERR_INTERNAL_ERROR_5;
4765
4766	gmmR0MutexRelease(pGMM);
4767	return rc;
4768	#else
4769	return VERR_NOT_IMPLEMENTED;
4770	#endif
4771	}
4772
4773	#ifdef VBOX_WITH_PAGE_SHARING
4774
4775	typedef struct
4776	{
4777	PGVM pGVM;
4778	VMCPUID idCpu;
4779	int rc;
4780	} GMMCHECKSHAREDMODULEINFO, *PGMMCHECKSHAREDMODULEINFO;
4781
4782	/**
4783	* Tree enumeration callback for checking a shared module.
4784	*/
4785	DECLCALLBACK(int) gmmR0CheckSharedModule(PAVLGCPTRNODECORE pNode, void *pvUser)
4786	{
4787	PGMMCHECKSHAREDMODULEINFO pInfo = (PGMMCHECKSHAREDMODULEINFO)pvUser;
4788	PGMMSHAREDMODULEPERVM pLocalModule = (PGMMSHAREDMODULEPERVM)pNode;
4789	PGMMSHAREDMODULE pGlobalModule = pLocalModule->pGlobalModule;
4790
4791	if ( !pLocalModule->fCollision
4792	&& pGlobalModule)
4793	{
4794	Log(("gmmR0CheckSharedModule: check %s %s base=%RGv size=%x collision=%d\n", pGlobalModule->szName, pGlobalModule->szVersion, pGlobalModule->Core.Key, pGlobalModule->cbModule, pLocalModule->fCollision));
4795	pInfo->rc = PGMR0SharedModuleCheck(pInfo->pGVM->pVM, pInfo->pGVM, pInfo->idCpu, pGlobalModule, pLocalModule->cRegions, pLocalModule->aRegions);
4796	if (RT_FAILURE(pInfo->rc))
4797	return 1; /* stop enumeration. */
4798	}
4799	return 0;
4800	}
4801
4802	#endif /* VBOX_WITH_PAGE_SHARING */
4803	#ifdef DEBUG_sandervl
4804
4805	/**
4806	* Setup for a GMMR0CheckSharedModules call (to allow log flush jumps back to ring 3)
4807	*
4808	* @returns VBox status code.
4809	* @param pVM VM handle
4810	*/
4811	GMMR0DECL(int) GMMR0CheckSharedModulesStart(PVM pVM)
4812	{
4813	/*
4814	* Validate input and get the basics.
4815	*/
4816	PGMM pGMM;
4817	GMM_GET_VALID_INSTANCE(pGMM, VERR_INTERNAL_ERROR);
4818
4819	/*
4820	* Take the semaphore and do some more validations.
4821	*/
4822	gmmR0MutexAcquire(pGMM);
4823	if (!GMM_CHECK_SANITY_UPON_ENTERING(pGMM))
4824	rc = VERR_INTERNAL_ERROR_5;
4825	else
4826	rc = VINF_SUCCESS;
4827
4828	return rc;
4829	}
4830
4831	/**
4832	* Clean up after a GMMR0CheckSharedModules call (to allow log flush jumps back to ring 3)
4833	*
4834	* @returns VBox status code.
4835	* @param pVM VM handle
4836	*/
4837	GMMR0DECL(int) GMMR0CheckSharedModulesEnd(PVM pVM)
4838	{
4839	/*
4840	* Validate input and get the basics.
4841	*/
4842	PGMM pGMM;
4843	GMM_GET_VALID_INSTANCE(pGMM, VERR_INTERNAL_ERROR);
4844
4845	gmmR0MutexRelease(pGMM);
4846	return VINF_SUCCESS;
4847	}
4848
4849	#endif /* DEBUG_sandervl */
4850
4851	/**
4852	* Check all shared modules for the specified VM
4853	*
4854	* @returns VBox status code.
4855	* @param pVM VM handle
4856	* @param pVCpu VMCPU handle
4857	*/
4858	GMMR0DECL(int) GMMR0CheckSharedModules(PVM pVM, PVMCPU pVCpu)
4859	{
4860	#ifdef VBOX_WITH_PAGE_SHARING
4861	/*
4862	* Validate input and get the basics.
4863	*/
4864	PGMM pGMM;
4865	GMM_GET_VALID_INSTANCE(pGMM, VERR_INTERNAL_ERROR);
4866	PGVM pGVM;
4867	int rc = GVMMR0ByVMAndEMT(pVM, pVCpu->idCpu, &pGVM);
4868	if (RT_FAILURE(rc))
4869	return rc;
4870
4871	# ifndef DEBUG_sandervl
4872	/*
4873	* Take the semaphore and do some more validations.
4874	*/
4875	gmmR0MutexAcquire(pGMM);
4876	# endif
4877	if (GMM_CHECK_SANITY_UPON_ENTERING(pGMM))
4878	{
4879	GMMCHECKSHAREDMODULEINFO Info;
4880
4881	Log(("GMMR0CheckSharedModules\n"));
4882	Info.pGVM = pGVM;
4883	Info.idCpu = pVCpu->idCpu;
4884	Info.rc = VINF_SUCCESS;
4885
4886	RTAvlGCPtrDoWithAll(&pGVM->gmm.s.pSharedModuleTree, true /* fFromLeft */, gmmR0CheckSharedModule, &Info);
4887
4888	rc = Info.rc;
4889
4890	Log(("GMMR0CheckSharedModules done!\n"));
4891
4892	GMM_CHECK_SANITY_UPON_LEAVING(pGMM);
4893	}
4894	else
4895	rc = VERR_INTERNAL_ERROR_5;
4896
4897	# ifndef DEBUG_sandervl
4898	gmmR0MutexRelease(pGMM);
4899	# endif
4900	return rc;
4901	#else
4902	return VERR_NOT_IMPLEMENTED;
4903	#endif
4904	}
4905
4906	#if defined(VBOX_STRICT) && HC_ARCH_BITS == 64
4907
4908	typedef struct
4909	{
4910	PGVM pGVM;
4911	PGMM pGMM;
4912	uint8_t *pSourcePage;
4913	bool fFoundDuplicate;
4914	} GMMFINDDUPPAGEINFO, *PGMMFINDDUPPAGEINFO;
4915
4916	/**
4917	* RTAvlU32DoWithAll callback.
4918	*
4919	* @returns 0
4920	* @param pNode The node to search.
4921	* @param pvInfo Pointer to the input parameters
4922	*/
4923	static DECLCALLBACK(int) gmmR0FindDupPageInChunk(PAVLU32NODECORE pNode, void *pvInfo)
4924	{
4925	PGMMCHUNK pChunk = (PGMMCHUNK)pNode;
4926	PGMMFINDDUPPAGEINFO pInfo = (PGMMFINDDUPPAGEINFO)pvInfo;
4927	PGVM pGVM = pInfo->pGVM;
4928	PGMM pGMM = pInfo->pGMM;
4929	uint8_t *pbChunk;
4930
4931	/* Only take chunks not mapped into this VM process; not entirely correct. */
4932	if (!gmmR0IsChunkMapped(pGMM, pGVM, pChunk, (PRTR3PTR)&pbChunk))
4933	{
4934	int rc = gmmR0MapChunk(pGMM, pGVM, pChunk, false /fRelaxedSem/, (PRTR3PTR)&pbChunk);
4935	if (RT_SUCCESS(rc))
4936	{
4937	/*
4938	* Look for duplicate pages
4939	*/
4940	unsigned iPage = (GMM_CHUNK_SIZE >> PAGE_SHIFT);
4941	while (iPage-- > 0)
4942	{
4943	if (GMM_PAGE_IS_PRIVATE(&pChunk->aPages[iPage]))
4944	{
4945	uint8_t *pbDestPage = pbChunk + (iPage << PAGE_SHIFT);
4946
4947	if (!memcmp(pInfo->pSourcePage, pbDestPage, PAGE_SIZE))
4948	{
4949	pInfo->fFoundDuplicate = true;
4950	break;
4951	}
4952	}
4953	}
4954	gmmR0UnmapChunk(pGMM, pGVM, pChunk, false /fRelaxedSem/);
4955	}
4956	}
4957	return pInfo->fFoundDuplicate; /* (stops search if true) */
4958	}
4959
4960
4961	/**
4962	* Find a duplicate of the specified page in other active VMs
4963	*
4964	* @returns VBox status code.
4965	* @param pVM VM handle
4966	* @param pReq Request packet
4967	*/
4968	GMMR0DECL(int) GMMR0FindDuplicatePageReq(PVM pVM, PGMMFINDDUPLICATEPAGEREQ pReq)
4969	{
4970	/*
4971	* Validate input and pass it on.
4972	*/
4973	AssertPtrReturn(pVM, VERR_INVALID_POINTER);
4974	AssertPtrReturn(pReq, VERR_INVALID_POINTER);
4975	AssertMsgReturn(pReq->Hdr.cbReq == sizeof(pReq), ("%#x != %#x\n", pReq->Hdr.cbReq, sizeof(pReq)), VERR_INVALID_PARAMETER);
4976
4977	PGMM pGMM;
4978	GMM_GET_VALID_INSTANCE(pGMM, VERR_INTERNAL_ERROR);
4979
4980	PGVM pGVM;
4981	int rc = GVMMR0ByVM(pVM, &pGVM);
4982	if (RT_FAILURE(rc))
4983	return rc;
4984
4985	/*
4986	* Take the semaphore and do some more validations.
4987	*/
4988	rc = gmmR0MutexAcquire(pGMM);
4989	if (GMM_CHECK_SANITY_UPON_ENTERING(pGMM))
4990	{
4991	uint8_t *pbChunk;
4992	PGMMCHUNK pChunk = gmmR0GetChunk(pGMM, pReq->idPage >> GMM_CHUNKID_SHIFT);
4993	if (pChunk)
4994	{
4995	if (gmmR0IsChunkMapped(pGMM, pGVM, pChunk, (PRTR3PTR)&pbChunk))
4996	{
4997	uint8_t *pbSourcePage = pbChunk + ((pReq->idPage & GMM_PAGEID_IDX_MASK) << PAGE_SHIFT);
4998	PGMMPAGE pPage = gmmR0GetPage(pGMM, pReq->idPage);
4999	if (pPage)
5000	{
5001	GMMFINDDUPPAGEINFO Info;
5002	Info.pGVM = pGVM;
5003	Info.pGMM = pGMM;
5004	Info.pSourcePage = pbSourcePage;
5005	Info.fFoundDuplicate = false;
5006	RTAvlU32DoWithAll(&pGMM->pChunks, true /* fFromLeft */, gmmR0FindDupPageInChunk, &Info);
5007
5008	pReq->fDuplicate = Info.fFoundDuplicate;
5009	}
5010	else
5011	{
5012	AssertFailed();
5013	rc = VERR_PGM_PHYS_INVALID_PAGE_ID;
5014	}
5015	}
5016	else
5017	AssertFailed();
5018	}
5019	else
5020	AssertFailed();
5021	}
5022	else
5023	rc = VERR_INTERNAL_ERROR_5;
5024
5025	gmmR0MutexRelease(pGMM);
5026	return rc;
5027	}
5028
5029	#endif /* VBOX_STRICT && HC_ARCH_BITS == 64 */
5030

Note: See TracBrowser for help on using the repository browser.

source: vbox/trunk/src/VBox/VMM/VMMR0/GMMR0.cpp@ 37529

Download in other formats: