GMMR0.cpp@ 29719

Last change on this file since 29719 was 29639, checked in by vboxsync, 15 years ago
Missing stats adjustment
Property svn:eol-style set to `native` Property svn:keywords set to `Id`
File size: 143.6 KB

Line
1	/* $Id: GMMR0.cpp 29639 2010-05-18 14:18:40Z vboxsync $ */
2	/** @file
3	* GMM - Global Memory Manager.
4	*/
5
6	/*
7	* Copyright (C) 2007 Oracle Corporation
8	*
9	* This file is part of VirtualBox Open Source Edition (OSE), as
10	* available from http://www.virtualbox.org. This file is free software;
11	* you can redistribute it and/or modify it under the terms of the GNU
12	* General Public License (GPL) as published by the Free Software
13	* Foundation, in version 2 as it comes in the "COPYING" file of the
14	* VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15	* hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16	*/
17
18
19	/** @page pg_gmm GMM - The Global Memory Manager
20	*
21	* As the name indicates, this component is responsible for global memory
22	* management. Currently only guest RAM is allocated from the GMM, but this
23	* may change to include shadow page tables and other bits later.
24	*
25	* Guest RAM is managed as individual pages, but allocated from the host OS
26	* in chunks for reasons of portability / efficiency. To minimize the memory
27	* footprint all tracking structure must be as small as possible without
28	* unnecessary performance penalties.
29	*
30	* The allocation chunks has fixed sized, the size defined at compile time
31	* by the #GMM_CHUNK_SIZE \#define.
32	*
33	* Each chunk is given an unquie ID. Each page also has a unique ID. The
34	* relation ship between the two IDs is:
35	* @code
36	* GMM_CHUNK_SHIFT = log2(GMM_CHUNK_SIZE / PAGE_SIZE);
37	* idPage = (idChunk << GMM_CHUNK_SHIFT) \| iPage;
38	* @endcode
39	* Where iPage is the index of the page within the chunk. This ID scheme
40	* permits for efficient chunk and page lookup, but it relies on the chunk size
41	* to be set at compile time. The chunks are organized in an AVL tree with their
42	* IDs being the keys.
43	*
44	* The physical address of each page in an allocation chunk is maintained by
45	* the #RTR0MEMOBJ and obtained using #RTR0MemObjGetPagePhysAddr. There is no
46	* need to duplicate this information (it'll cost 8-bytes per page if we did).
47	*
48	* So what do we need to track per page? Most importantly we need to know
49	* which state the page is in:
50	* - Private - Allocated for (eventually) backing one particular VM page.
51	* - Shared - Readonly page that is used by one or more VMs and treated
52	* as COW by PGM.
53	* - Free - Not used by anyone.
54	*
55	* For the page replacement operations (sharing, defragmenting and freeing)
56	* to be somewhat efficient, private pages needs to be associated with a
57	* particular page in a particular VM.
58	*
59	* Tracking the usage of shared pages is impractical and expensive, so we'll
60	* settle for a reference counting system instead.
61	*
62	* Free pages will be chained on LIFOs
63	*
64	* On 64-bit systems we will use a 64-bit bitfield per page, while on 32-bit
65	* systems a 32-bit bitfield will have to suffice because of address space
66	* limitations. The #GMMPAGE structure shows the details.
67	*
68	*
69	* @section sec_gmm_alloc_strat Page Allocation Strategy
70	*
71	* The strategy for allocating pages has to take fragmentation and shared
72	* pages into account, or we may end up with with 2000 chunks with only
73	* a few pages in each. Shared pages cannot easily be reallocated because
74	* of the inaccurate usage accounting (see above). Private pages can be
75	* reallocated by a defragmentation thread in the same manner that sharing
76	* is done.
77	*
78	* The first approach is to manage the free pages in two sets depending on
79	* whether they are mainly for the allocation of shared or private pages.
80	* In the initial implementation there will be almost no possibility for
81	* mixing shared and private pages in the same chunk (only if we're really
82	* stressed on memory), but when we implement forking of VMs and have to
83	* deal with lots of COW pages it'll start getting kind of interesting.
84	*
85	* The sets are lists of chunks with approximately the same number of
86	* free pages. Say the chunk size is 1MB, meaning 256 pages, and a set
87	* consists of 16 lists. So, the first list will contain the chunks with
88	* 1-7 free pages, the second covers 8-15, and so on. The chunks will be
89	* moved between the lists as pages are freed up or allocated.
90	*
91	*
92	* @section sec_gmm_costs Costs
93	*
94	* The per page cost in kernel space is 32-bit plus whatever RTR0MEMOBJ
95	* entails. In addition there is the chunk cost of approximately
96	* (sizeof(RT0MEMOBJ) + sizof(CHUNK)) / 2^CHUNK_SHIFT bytes per page.
97	*
98	* On Windows the per page #RTR0MEMOBJ cost is 32-bit on 32-bit windows
99	* and 64-bit on 64-bit windows (a PFN_NUMBER in the MDL). So, 64-bit per page.
100	* The cost on Linux is identical, but here it's because of sizeof(struct page *).
101	*
102	*
103	* @section sec_gmm_legacy Legacy Mode for Non-Tier-1 Platforms
104	*
105	* In legacy mode the page source is locked user pages and not
106	* #RTR0MemObjAllocPhysNC, this means that a page can only be allocated
107	* by the VM that locked it. We will make no attempt at implementing
108	* page sharing on these systems, just do enough to make it all work.
109	*
110	*
111	* @subsection sub_gmm_locking Serializing
112	*
113	* One simple fast mutex will be employed in the initial implementation, not
114	* two as metioned in @ref subsec_pgmPhys_Serializing.
115	*
116	* @see @ref subsec_pgmPhys_Serializing
117	*
118	*
119	* @section sec_gmm_overcommit Memory Over-Commitment Management
120	*
121	* The GVM will have to do the system wide memory over-commitment
122	* management. My current ideas are:
123	* - Per VM oc policy that indicates how much to initially commit
124	* to it and what to do in a out-of-memory situation.
125	* - Prevent overtaxing the host.
126	*
127	* There are some challenges here, the main ones are configurability and
128	* security. Should we for instance permit anyone to request 100% memory
129	* commitment? Who should be allowed to do runtime adjustments of the
130	* config. And how to prevent these settings from being lost when the last
131	* VM process exits? The solution is probably to have an optional root
132	* daemon the will keep VMMR0.r0 in memory and enable the security measures.
133	*
134	*
135	*
136	* @section sec_gmm_numa NUMA
137	*
138	* NUMA considerations will be designed and implemented a bit later.
139	*
140	* The preliminary guesses is that we will have to try allocate memory as
141	* close as possible to the CPUs the VM is executed on (EMT and additional CPU
142	* threads). Which means it's mostly about allocation and sharing policies.
143	* Both the scheduler and allocator interface will to supply some NUMA info
144	* and we'll need to have a way to calc access costs.
145	*
146	*/
147
148
149	/*******************************************************************************
150	* Header Files *
151	*******************************************************************************/
152	#define LOG_GROUP LOG_GROUP_GMM
153	#include <VBox/vm.h>
154	#include <VBox/gmm.h>
155	#include "GMMR0Internal.h"
156	#include <VBox/gvm.h>
157	#include <VBox/pgm.h>
158	#include <VBox/log.h>
159	#include <VBox/param.h>
160	#include <VBox/err.h>
161	#include <iprt/asm.h>
162	#include <iprt/avl.h>
163	#include <iprt/mem.h>
164	#include <iprt/memobj.h>
165	#include <iprt/semaphore.h>
166	#include <iprt/string.h>
167
168
169	/*******************************************************************************
170	* Structures and Typedefs *
171	*******************************************************************************/
172	/** Pointer to set of free chunks. */
173	typedef struct GMMCHUNKFREESET *PGMMCHUNKFREESET;
174
175	/** Pointer to a GMM allocation chunk. */
176	typedef struct GMMCHUNK *PGMMCHUNK;
177
178	/**
179	* The per-page tracking structure employed by the GMM.
180	*
181	* On 32-bit hosts we'll some trickery is necessary to compress all
182	* the information into 32-bits. When the fSharedFree member is set,
183	* the 30th bit decides whether it's a free page or not.
184	*
185	* Because of the different layout on 32-bit and 64-bit hosts, macros
186	* are used to get and set some of the data.
187	*/
188	typedef union GMMPAGE
189	{
190	#if HC_ARCH_BITS == 64
191	/** Unsigned integer view. */
192	uint64_t u;
193
194	/** The common view. */
195	struct GMMPAGECOMMON
196	{
197	uint32_t uStuff1 : 32;
198	uint32_t uStuff2 : 30;
199	/** The page state. */
200	uint32_t u2State : 2;
201	} Common;
202
203	/** The view of a private page. */
204	struct GMMPAGEPRIVATE
205	{
206	/** The guest page frame number. (Max addressable: 2 ^ 44 - 16) */
207	uint32_t pfn;
208	/** The GVM handle. (64K VMs) */
209	uint32_t hGVM : 16;
210	/** Reserved. */
211	uint32_t u16Reserved : 14;
212	/** The page state. */
213	uint32_t u2State : 2;
214	} Private;
215
216	/** The view of a shared page. */
217	struct GMMPAGESHARED
218	{
219	/** The host page frame number. (Max addressable: 2 ^ 44 - 16) */
220	uint32_t pfn;
221	/** The reference count (64K VMs). */
222	uint32_t cRefs : 16;
223	/** Reserved. Checksum or something? Two hGVMs for forking? */
224	uint32_t u14Reserved : 14;
225	/** The page state. */
226	uint32_t u2State : 2;
227	} Shared;
228
229	/** The view of a free page. */
230	struct GMMPAGEFREE
231	{
232	/** The index of the next page in the free list. UINT16_MAX is NIL. */
233	uint16_t iNext;
234	/** Reserved. Checksum or something? */
235	uint16_t u16Reserved0;
236	/** Reserved. Checksum or something? */
237	uint32_t u30Reserved1 : 30;
238	/** The page state. */
239	uint32_t u2State : 2;
240	} Free;
241
242	#else /* 32-bit */
243	/** Unsigned integer view. */
244	uint32_t u;
245
246	/** The common view. */
247	struct GMMPAGECOMMON
248	{
249	uint32_t uStuff : 30;
250	/** The page state. */
251	uint32_t u2State : 2;
252	} Common;
253
254	/** The view of a private page. */
255	struct GMMPAGEPRIVATE
256	{
257	/** The guest page frame number. (Max addressable: 2 ^ 36) */
258	uint32_t pfn : 24;
259	/** The GVM handle. (127 VMs) */
260	uint32_t hGVM : 7;
261	/** The top page state bit, MBZ. */
262	uint32_t fZero : 1;
263	} Private;
264
265	/** The view of a shared page. */
266	struct GMMPAGESHARED
267	{
268	/** The reference count. */
269	uint32_t cRefs : 30;
270	/** The page state. */
271	uint32_t u2State : 2;
272	} Shared;
273
274	/** The view of a free page. */
275	struct GMMPAGEFREE
276	{
277	/** The index of the next page in the free list. UINT16_MAX is NIL. */
278	uint32_t iNext : 16;
279	/** Reserved. Checksum or something? */
280	uint32_t u14Reserved : 14;
281	/** The page state. */
282	uint32_t u2State : 2;
283	} Free;
284	#endif
285	} GMMPAGE;
286	AssertCompileSize(GMMPAGE, sizeof(RTHCUINTPTR));
287	/** Pointer to a GMMPAGE. */
288	typedef GMMPAGE *PGMMPAGE;
289
290
291	/** @name The Page States.
292	* @{ */
293	/** A private page. */
294	#define GMM_PAGE_STATE_PRIVATE 0
295	/** A private page - alternative value used on the 32-bit implemenation.
296	* This will never be used on 64-bit hosts. */
297	#define GMM_PAGE_STATE_PRIVATE_32 1
298	/** A shared page. */
299	#define GMM_PAGE_STATE_SHARED 2
300	/** A free page. */
301	#define GMM_PAGE_STATE_FREE 3
302	/** @} */
303
304
305	/** @def GMM_PAGE_IS_PRIVATE
306	*
307	* @returns true if private, false if not.
308	* @param pPage The GMM page.
309	*/
310	#if HC_ARCH_BITS == 64
311	# define GMM_PAGE_IS_PRIVATE(pPage) ( (pPage)->Common.u2State == GMM_PAGE_STATE_PRIVATE )
312	#else
313	# define GMM_PAGE_IS_PRIVATE(pPage) ( (pPage)->Private.fZero == 0 )
314	#endif
315
316	/** @def GMM_PAGE_IS_SHARED
317	*
318	* @returns true if shared, false if not.
319	* @param pPage The GMM page.
320	*/
321	#define GMM_PAGE_IS_SHARED(pPage) ( (pPage)->Common.u2State == GMM_PAGE_STATE_SHARED )
322
323	/** @def GMM_PAGE_IS_FREE
324	*
325	* @returns true if free, false if not.
326	* @param pPage The GMM page.
327	*/
328	#define GMM_PAGE_IS_FREE(pPage) ( (pPage)->Common.u2State == GMM_PAGE_STATE_FREE )
329
330	/** @def GMM_PAGE_PFN_LAST
331	* The last valid guest pfn range.
332	* @remark Some of the values outside the range has special meaning,
333	* see GMM_PAGE_PFN_UNSHAREABLE.
334	*/
335	#if HC_ARCH_BITS == 64
336	# define GMM_PAGE_PFN_LAST UINT32_C(0xfffffff0)
337	#else
338	# define GMM_PAGE_PFN_LAST UINT32_C(0x00fffff0)
339	#endif
340	AssertCompile(GMM_PAGE_PFN_LAST == (GMM_GCPHYS_LAST >> PAGE_SHIFT));
341
342	/** @def GMM_PAGE_PFN_UNSHAREABLE
343	* Indicates that this page isn't used for normal guest memory and thus isn't shareable.
344	*/
345	#if HC_ARCH_BITS == 64
346	# define GMM_PAGE_PFN_UNSHAREABLE UINT32_C(0xfffffff1)
347	#else
348	# define GMM_PAGE_PFN_UNSHAREABLE UINT32_C(0x00fffff1)
349	#endif
350	AssertCompile(GMM_PAGE_PFN_UNSHAREABLE == (GMM_GCPHYS_UNSHAREABLE >> PAGE_SHIFT));
351
352
353	/**
354	* A GMM allocation chunk ring-3 mapping record.
355	*
356	* This should really be associated with a session and not a VM, but
357	* it's simpler to associated with a VM and cleanup with the VM object
358	* is destroyed.
359	*/
360	typedef struct GMMCHUNKMAP
361	{
362	/** The mapping object. */
363	RTR0MEMOBJ MapObj;
364	/** The VM owning the mapping. */
365	PGVM pGVM;
366	} GMMCHUNKMAP;
367	/** Pointer to a GMM allocation chunk mapping. */
368	typedef struct GMMCHUNKMAP *PGMMCHUNKMAP;
369
370	typedef enum GMMCHUNKTYPE
371	{
372	GMMCHUNKTYPE_INVALID = 0,
373	GMMCHUNKTYPE_NON_CONTINUOUS = 1, /* 4 kb pages */
374	GMMCHUNKTYPE_CONTINUOUS = 2, /* one 2 MB continuous physical range. */
375	GMMCHUNKTYPE_32BIT_HACK = 0x7fffffff
376	} GMMCHUNKTYPE;
377
378
379	/**
380	* A GMM allocation chunk.
381	*/
382	typedef struct GMMCHUNK
383	{
384	/** The AVL node core.
385	* The Key is the chunk ID. */
386	AVLU32NODECORE Core;
387	/** The memory object.
388	* Either from RTR0MemObjAllocPhysNC or RTR0MemObjLockUser depending on
389	* what the host can dish up with. */
390	RTR0MEMOBJ MemObj;
391	/** Pointer to the next chunk in the free list. */
392	PGMMCHUNK pFreeNext;
393	/** Pointer to the previous chunk in the free list. */
394	PGMMCHUNK pFreePrev;
395	/** Pointer to the free set this chunk belongs to. NULL for
396	* chunks with no free pages. */
397	PGMMCHUNKFREESET pSet;
398	/** Pointer to an array of mappings. */
399	PGMMCHUNKMAP paMappings;
400	/** The number of mappings. */
401	uint16_t cMappings;
402	/** The head of the list of free pages. UINT16_MAX is the NIL value. */
403	uint16_t iFreeHead;
404	/** The number of free pages. */
405	uint16_t cFree;
406	/** The GVM handle of the VM that first allocated pages from this chunk, this
407	* is used as a preference when there are several chunks to choose from.
408	* When in bound memory mode this isn't a preference any longer. */
409	uint16_t hGVM;
410	/** The number of private pages. */
411	uint16_t cPrivate;
412	/** The number of shared pages. */
413	uint16_t cShared;
414	/** Chunk type */
415	GMMCHUNKTYPE enmType;
416	/** The pages. */
417	GMMPAGE aPages[GMM_CHUNK_SIZE >> PAGE_SHIFT];
418	} GMMCHUNK;
419
420
421	/**
422	* An allocation chunk TLB entry.
423	*/
424	typedef struct GMMCHUNKTLBE
425	{
426	/** The chunk id. */
427	uint32_t idChunk;
428	/** Pointer to the chunk. */
429	PGMMCHUNK pChunk;
430	} GMMCHUNKTLBE;
431	/** Pointer to an allocation chunk TLB entry. */
432	typedef GMMCHUNKTLBE *PGMMCHUNKTLBE;
433
434
435	/** The number of entries tin the allocation chunk TLB. */
436	#define GMM_CHUNKTLB_ENTRIES 32
437	/** Gets the TLB entry index for the given Chunk ID. */
438	#define GMM_CHUNKTLB_IDX(idChunk) ( (idChunk) & (GMM_CHUNKTLB_ENTRIES - 1) )
439
440	/**
441	* An allocation chunk TLB.
442	*/
443	typedef struct GMMCHUNKTLB
444	{
445	/** The TLB entries. */
446	GMMCHUNKTLBE aEntries[GMM_CHUNKTLB_ENTRIES];
447	} GMMCHUNKTLB;
448	/** Pointer to an allocation chunk TLB. */
449	typedef GMMCHUNKTLB *PGMMCHUNKTLB;
450
451
452	/** The GMMCHUNK::cFree shift count. */
453	#define GMM_CHUNK_FREE_SET_SHIFT 4
454	/** The GMMCHUNK::cFree mask for use when considering relinking a chunk. */
455	#define GMM_CHUNK_FREE_SET_MASK 15
456	/** The number of lists in set. */
457	#define GMM_CHUNK_FREE_SET_LISTS (GMM_CHUNK_NUM_PAGES >> GMM_CHUNK_FREE_SET_SHIFT)
458
459	/**
460	* A set of free chunks.
461	*/
462	typedef struct GMMCHUNKFREESET
463	{
464	/** The number of free pages in the set. */
465	uint64_t cFreePages;
466	/** Chunks ordered by increasing number of free pages. */
467	PGMMCHUNK apLists[GMM_CHUNK_FREE_SET_LISTS];
468	} GMMCHUNKFREESET;
469
470
471	/**
472	* The GMM instance data.
473	*/
474	typedef struct GMM
475	{
476	/** Magic / eye catcher. GMM_MAGIC */
477	uint32_t u32Magic;
478	/** The fast mutex protecting the GMM.
479	* More fine grained locking can be implemented later if necessary. */
480	RTSEMFASTMUTEX Mtx;
481	/** The chunk tree. */
482	PAVLU32NODECORE pChunks;
483	/** The chunk TLB. */
484	GMMCHUNKTLB ChunkTLB;
485	/** The private free set. */
486	GMMCHUNKFREESET Private;
487	/** The shared free set. */
488	GMMCHUNKFREESET Shared;
489
490	/** Shared module tree (global). */
491	/** todo seperate trees for distinctly different guest OSes. */
492	PAVLGCPTRNODECORE pGlobalSharedModuleTree;
493
494	/** The maximum number of pages we're allowed to allocate.
495	* @gcfgm 64-bit GMM/MaxPages Direct.
496	* @gcfgm 32-bit GMM/PctPages Relative to the number of host pages. */
497	uint64_t cMaxPages;
498	/** The number of pages that has been reserved.
499	* The deal is that cReservedPages - cOverCommittedPages <= cMaxPages. */
500	uint64_t cReservedPages;
501	/** The number of pages that we have over-committed in reservations. */
502	uint64_t cOverCommittedPages;
503	/** The number of actually allocated (committed if you like) pages. */
504	uint64_t cAllocatedPages;
505	/** The number of pages that are shared. A subset of cAllocatedPages. */
506	uint64_t cSharedPages;
507	/** The number of pages that are actually shared between VMs. */
508	uint64_t cDuplicatePages;
509	/** The number of pages that are shared that has been left behind by
510	* VMs not doing proper cleanups. */
511	uint64_t cLeftBehindSharedPages;
512	/** The number of allocation chunks.
513	* (The number of pages we've allocated from the host can be derived from this.) */
514	uint32_t cChunks;
515	/** The number of current ballooned pages. */
516	uint64_t cBalloonedPages;
517
518	/** The legacy allocation mode indicator.
519	* This is determined at initialization time. */
520	bool fLegacyAllocationMode;
521	/** The bound memory mode indicator.
522	* When set, the memory will be bound to a specific VM and never
523	* shared. This is always set if fLegacyAllocationMode is set.
524	* (Also determined at initialization time.) */
525	bool fBoundMemoryMode;
526	/** The number of registered VMs. */
527	uint16_t cRegisteredVMs;
528
529	/** The previous allocated Chunk ID.
530	* Used as a hint to avoid scanning the whole bitmap. */
531	uint32_t idChunkPrev;
532	/** Chunk ID allocation bitmap.
533	* Bits of allocated IDs are set, free ones are clear.
534	* The NIL id (0) is marked allocated. */
535	uint32_t bmChunkId[(GMM_CHUNKID_LAST + 1 + 31) / 32];
536	} GMM;
537	/** Pointer to the GMM instance. */
538	typedef GMM *PGMM;
539
540	/** The value of GMM::u32Magic (Katsuhiro Otomo). */
541	#define GMM_MAGIC 0x19540414
542
543
544	/*******************************************************************************
545	* Global Variables *
546	*******************************************************************************/
547	/** Pointer to the GMM instance data. */
548	static PGMM g_pGMM = NULL;
549
550	/** Macro for obtaining and validating the g_pGMM pointer.
551	* On failure it will return from the invoking function with the specified return value.
552	*
553	* @param pGMM The name of the pGMM variable.
554	* @param rc The return value on failure. Use VERR_INTERNAL_ERROR for
555	* VBox status codes.
556	*/
557	#define GMM_GET_VALID_INSTANCE(pGMM, rc) \
558	do { \
559	(pGMM) = g_pGMM; \
560	AssertPtrReturn((pGMM), (rc)); \
561	AssertMsgReturn((pGMM)->u32Magic == GMM_MAGIC, ("%p - %#x\n", (pGMM), (pGMM)->u32Magic), (rc)); \
562	} while (0)
563
564	/** Macro for obtaining and validating the g_pGMM pointer, void function variant.
565	* On failure it will return from the invoking function.
566	*
567	* @param pGMM The name of the pGMM variable.
568	*/
569	#define GMM_GET_VALID_INSTANCE_VOID(pGMM) \
570	do { \
571	(pGMM) = g_pGMM; \
572	AssertPtrReturnVoid((pGMM)); \
573	AssertMsgReturnVoid((pGMM)->u32Magic == GMM_MAGIC, ("%p - %#x\n", (pGMM), (pGMM)->u32Magic)); \
574	} while (0)
575
576
577	/** @def GMM_CHECK_SANITY_UPON_ENTERING
578	* Checks the sanity of the GMM instance data before making changes.
579	*
580	* This is macro is a stub by default and must be enabled manually in the code.
581	*
582	* @returns true if sane, false if not.
583	* @param pGMM The name of the pGMM variable.
584	*/
585	#if defined(VBOX_STRICT) && 0
586	# define GMM_CHECK_SANITY_UPON_ENTERING(pGMM) (gmmR0SanityCheck((pGMM), __PRETTY_FUNCTION__, __LINE__) == 0)
587	#else
588	# define GMM_CHECK_SANITY_UPON_ENTERING(pGMM) (true)
589	#endif
590
591	/** @def GMM_CHECK_SANITY_UPON_LEAVING
592	* Checks the sanity of the GMM instance data after making changes.
593	*
594	* This is macro is a stub by default and must be enabled manually in the code.
595	*
596	* @returns true if sane, false if not.
597	* @param pGMM The name of the pGMM variable.
598	*/
599	#if defined(VBOX_STRICT) && 0
600	# define GMM_CHECK_SANITY_UPON_LEAVING(pGMM) (gmmR0SanityCheck((pGMM), __PRETTY_FUNCTION__, __LINE__) == 0)
601	#else
602	# define GMM_CHECK_SANITY_UPON_LEAVING(pGMM) (true)
603	#endif
604
605	/** @def GMM_CHECK_SANITY_IN_LOOPS
606	* Checks the sanity of the GMM instance in the allocation loops.
607	*
608	* This is macro is a stub by default and must be enabled manually in the code.
609	*
610	* @returns true if sane, false if not.
611	* @param pGMM The name of the pGMM variable.
612	*/
613	#if defined(VBOX_STRICT) && 0
614	# define GMM_CHECK_SANITY_IN_LOOPS(pGMM) (gmmR0SanityCheck((pGMM), __PRETTY_FUNCTION__, __LINE__) == 0)
615	#else
616	# define GMM_CHECK_SANITY_IN_LOOPS(pGMM) (true)
617	#endif
618
619
620	/*******************************************************************************
621	* Internal Functions *
622	*******************************************************************************/
623	static DECLCALLBACK(int) gmmR0TermDestroyChunk(PAVLU32NODECORE pNode, void *pvGMM);
624	static DECLCALLBACK(int) gmmR0CleanupVMScanChunk(PAVLU32NODECORE pNode, void *pvGMM);
625	static DECLCALLBACK(int) gmmR0CleanupSharedModule(PAVLGCPTRNODECORE pNode, void *pvGVM);
626	/static/ DECLCALLBACK(int) gmmR0CleanupVMDestroyChunk(PAVLU32NODECORE pNode, void *pvGVM);
627	DECLINLINE(void) gmmR0LinkChunk(PGMMCHUNK pChunk, PGMMCHUNKFREESET pSet);
628	DECLINLINE(void) gmmR0UnlinkChunk(PGMMCHUNK pChunk);
629	static uint32_t gmmR0SanityCheck(PGMM pGMM, const char *pszFunction, unsigned uLineNo);
630	static void gmmR0FreeChunk(PGMM pGMM, PGVM pGVM, PGMMCHUNK pChunk);
631	static void gmmR0FreeSharedPage(PGMM pGMM, uint32_t idPage, PGMMPAGE pPage);
632	static int gmmR0UnmapChunk(PGMM pGMM, PGVM pGVM, PGMMCHUNK pChunk);
633
634
635
636	/**
637	* Initializes the GMM component.
638	*
639	* This is called when the VMMR0.r0 module is loaded and protected by the
640	* loader semaphore.
641	*
642	* @returns VBox status code.
643	*/
644	GMMR0DECL(int) GMMR0Init(void)
645	{
646	LogFlow(("GMMInit:\n"));
647
648	/*
649	* Allocate the instance data and the lock(s).
650	*/
651	PGMM pGMM = (PGMM)RTMemAllocZ(sizeof(*pGMM));
652	if (!pGMM)
653	return VERR_NO_MEMORY;
654	pGMM->u32Magic = GMM_MAGIC;
655	for (unsigned i = 0; i < RT_ELEMENTS(pGMM->ChunkTLB.aEntries); i++)
656	pGMM->ChunkTLB.aEntries[i].idChunk = NIL_GMM_CHUNKID;
657	ASMBitSet(&pGMM->bmChunkId[0], NIL_GMM_CHUNKID);
658
659	int rc = RTSemFastMutexCreate(&pGMM->Mtx);
660	if (RT_SUCCESS(rc))
661	{
662	/*
663	* Check and see if RTR0MemObjAllocPhysNC works.
664	*/
665	#if 0 /* later, see #3170. */
666	RTR0MEMOBJ MemObj;
667	rc = RTR0MemObjAllocPhysNC(&MemObj, _64K, NIL_RTHCPHYS);
668	if (RT_SUCCESS(rc))
669	{
670	rc = RTR0MemObjFree(MemObj, true);
671	AssertRC(rc);
672	}
673	else if (rc == VERR_NOT_SUPPORTED)
674	pGMM->fLegacyAllocationMode = pGMM->fBoundMemoryMode = true;
675	else
676	SUPR0Printf("GMMR0Init: RTR0MemObjAllocPhysNC(,64K,Any) -> %d!\n", rc);
677	#else
678	# if defined(RT_OS_WINDOWS) \|\| defined(RT_OS_SOLARIS) \|\| defined(RT_OS_LINUX) \|\| defined(RT_OS_FREEBSD)
679	pGMM->fLegacyAllocationMode = false;
680	# if ARCH_BITS == 32
681	/* Don't reuse possibly partial chunks because of the virtual address space limitation. */
682	pGMM->fBoundMemoryMode = true;
683	# else
684	pGMM->fBoundMemoryMode = false;
685	# endif
686	# else
687	pGMM->fLegacyAllocationMode = true;
688	pGMM->fBoundMemoryMode = true;
689	# endif
690	#endif
691
692	/*
693	* Query system page count and guess a reasonable cMaxPages value.
694	*/
695	pGMM->cMaxPages = UINT32_MAX; /** @todo IPRT function for query ram size and such. */
696
697	g_pGMM = pGMM;
698	LogFlow(("GMMInit: pGMM=%p fLegacyAllocationMode=%RTbool fBoundMemoryMode=%RTbool\n", pGMM, pGMM->fLegacyAllocationMode, pGMM->fBoundMemoryMode));
699	return VINF_SUCCESS;
700	}
701
702	RTMemFree(pGMM);
703	SUPR0Printf("GMMR0Init: failed! rc=%d\n", rc);
704	return rc;
705	}
706
707
708	/**
709	* Terminates the GMM component.
710	*/
711	GMMR0DECL(void) GMMR0Term(void)
712	{
713	LogFlow(("GMMTerm:\n"));
714
715	/*
716	* Take care / be paranoid...
717	*/
718	PGMM pGMM = g_pGMM;
719	if (!VALID_PTR(pGMM))
720	return;
721	if (pGMM->u32Magic != GMM_MAGIC)
722	{
723	SUPR0Printf("GMMR0Term: u32Magic=%#x\n", pGMM->u32Magic);
724	return;
725	}
726
727	/*
728	* Undo what init did and free all the resources we've acquired.
729	*/
730	/* Destroy the fundamentals. */
731	g_pGMM = NULL;
732	pGMM->u32Magic++;
733	RTSemFastMutexDestroy(pGMM->Mtx);
734	pGMM->Mtx = NIL_RTSEMFASTMUTEX;
735
736	/* free any chunks still hanging around. */
737	RTAvlU32Destroy(&pGMM->pChunks, gmmR0TermDestroyChunk, pGMM);
738
739	/* finally the instance data itself. */
740	RTMemFree(pGMM);
741	LogFlow(("GMMTerm: done\n"));
742	}
743
744
745	/**
746	* RTAvlU32Destroy callback.
747	*
748	* @returns 0
749	* @param pNode The node to destroy.
750	* @param pvGMM The GMM handle.
751	*/
752	static DECLCALLBACK(int) gmmR0TermDestroyChunk(PAVLU32NODECORE pNode, void *pvGMM)
753	{
754	PGMMCHUNK pChunk = (PGMMCHUNK)pNode;
755
756	if (pChunk->cFree != (GMM_CHUNK_SIZE >> PAGE_SHIFT))
757	SUPR0Printf("GMMR0Term: %p/%#x: cFree=%d cPrivate=%d cShared=%d cMappings=%d\n", pChunk,
758	pChunk->Core.Key, pChunk->cFree, pChunk->cPrivate, pChunk->cShared, pChunk->cMappings);
759
760	int rc = RTR0MemObjFree(pChunk->MemObj, true /* fFreeMappings */);
761	if (RT_FAILURE(rc))
762	{
763	SUPR0Printf("GMMR0Term: %p/%#x: RTRMemObjFree(%p,true) -> %d (cMappings=%d)\n", pChunk,
764	pChunk->Core.Key, pChunk->MemObj, rc, pChunk->cMappings);
765	AssertRC(rc);
766	}
767	pChunk->MemObj = NIL_RTR0MEMOBJ;
768
769	RTMemFree(pChunk->paMappings);
770	pChunk->paMappings = NULL;
771
772	RTMemFree(pChunk);
773	NOREF(pvGMM);
774	return 0;
775	}
776
777
778	/**
779	* Initializes the per-VM data for the GMM.
780	*
781	* This is called from within the GVMM lock (from GVMMR0CreateVM)
782	* and should only initialize the data members so GMMR0CleanupVM
783	* can deal with them. We reserve no memory or anything here,
784	* that's done later in GMMR0InitVM.
785	*
786	* @param pGVM Pointer to the Global VM structure.
787	*/
788	GMMR0DECL(void) GMMR0InitPerVMData(PGVM pGVM)
789	{
790	AssertCompile(RT_SIZEOFMEMB(GVM,gmm.s) <= RT_SIZEOFMEMB(GVM,gmm.padding));
791
792	pGVM->gmm.s.enmPolicy = GMMOCPOLICY_INVALID;
793	pGVM->gmm.s.enmPriority = GMMPRIORITY_INVALID;
794	pGVM->gmm.s.fMayAllocate = false;
795	}
796
797
798	/**
799	* Cleans up when a VM is terminating.
800	*
801	* @param pGVM Pointer to the Global VM structure.
802	*/
803	GMMR0DECL(void) GMMR0CleanupVM(PGVM pGVM)
804	{
805	LogFlow(("GMMR0CleanupVM: pGVM=%p:{.pVM=%p, .hSelf=%#x}\n", pGVM, pGVM->pVM, pGVM->hSelf));
806
807	PGMM pGMM;
808	GMM_GET_VALID_INSTANCE_VOID(pGMM);
809
810	int rc = RTSemFastMutexRequest(pGMM->Mtx);
811	AssertRC(rc);
812	GMM_CHECK_SANITY_UPON_ENTERING(pGMM);
813
814	#ifdef VBOX_WITH_PAGE_SHARING
815	/* Clean up all registered shared modules. */
816	RTAvlGCPtrDestroy(&pGVM->gmm.s.pSharedModuleTree, gmmR0CleanupSharedModule, pGVM);
817	#endif
818
819	/*
820	* The policy is 'INVALID' until the initial reservation
821	* request has been serviced.
822	*/
823	if ( pGVM->gmm.s.enmPolicy > GMMOCPOLICY_INVALID
824	&& pGVM->gmm.s.enmPolicy < GMMOCPOLICY_END)
825	{
826	/*
827	* If it's the last VM around, we can skip walking all the chunk looking
828	* for the pages owned by this VM and instead flush the whole shebang.
829	*
830	* This takes care of the eventuality that a VM has left shared page
831	* references behind (shouldn't happen of course, but you never know).
832	*/
833	Assert(pGMM->cRegisteredVMs);
834	pGMM->cRegisteredVMs--;
835	#if 0 /* disabled so it won't hide bugs. */
836	if (!pGMM->cRegisteredVMs)
837	{
838	RTAvlU32Destroy(&pGMM->pChunks, gmmR0CleanupVMDestroyChunk, pGMM);
839
840	for (unsigned i = 0; i < RT_ELEMENTS(pGMM->ChunkTLB.aEntries); i++)
841	{
842	pGMM->ChunkTLB.aEntries[i].idChunk = NIL_GMM_CHUNKID;
843	pGMM->ChunkTLB.aEntries[i].pChunk = NULL;
844	}
845
846	memset(&pGMM->Private, 0, sizeof(pGMM->Private));
847	memset(&pGMM->Shared, 0, sizeof(pGMM->Shared));
848
849	memset(&pGMM->bmChunkId[0], 0, sizeof(pGMM->bmChunkId));
850	ASMBitSet(&pGMM->bmChunkId[0], NIL_GMM_CHUNKID);
851
852	pGMM->cReservedPages = 0;
853	pGMM->cOverCommittedPages = 0;
854	pGMM->cAllocatedPages = 0;
855	pGMM->cSharedPages = 0;
856	pGMM->cDuplicatePages = 0;
857	pGMM->cLeftBehindSharedPages = 0;
858	pGMM->cChunks = 0;
859	pGMM->cBalloonedPages = 0;
860	}
861	else
862	#endif
863	{
864	/*
865	* Walk the entire pool looking for pages that belong to this VM
866	* and left over mappings. (This'll only catch private pages, shared
867	* pages will be 'left behind'.)
868	*/
869	uint64_t cPrivatePages = pGVM->gmm.s.cPrivatePages; /* save */
870	RTAvlU32DoWithAll(&pGMM->pChunks, true /* fFromLeft */, gmmR0CleanupVMScanChunk, pGVM);
871	if (pGVM->gmm.s.cPrivatePages)
872	SUPR0Printf("GMMR0CleanupVM: hGVM=%#x has %#x private pages that cannot be found!\n", pGVM->hSelf, pGVM->gmm.s.cPrivatePages);
873	pGMM->cAllocatedPages -= cPrivatePages;
874
875	/* free empty chunks. */
876	if (cPrivatePages)
877	{
878	PGMMCHUNK pCur = pGMM->Private.apLists[RT_ELEMENTS(pGMM->Private.apLists) - 1];
879	while (pCur)
880	{
881	PGMMCHUNK pNext = pCur->pFreeNext;
882	if ( pCur->cFree == GMM_CHUNK_NUM_PAGES
883	&& ( !pGMM->fBoundMemoryMode
884	\|\| pCur->hGVM == pGVM->hSelf))
885	gmmR0FreeChunk(pGMM, pGVM, pCur);
886	pCur = pNext;
887	}
888	}
889
890	/* account for shared pages that weren't freed. */
891	if (pGVM->gmm.s.cSharedPages)
892	{
893	Assert(pGMM->cSharedPages >= pGVM->gmm.s.cSharedPages);
894	SUPR0Printf("GMMR0CleanupVM: hGVM=%#x left %#x shared pages behind!\n", pGVM->hSelf, pGVM->gmm.s.cSharedPages);
895	pGMM->cLeftBehindSharedPages += pGVM->gmm.s.cSharedPages;
896	}
897
898	/*
899	* Update the over-commitment management statistics.
900	*/
901	pGMM->cReservedPages -= pGVM->gmm.s.Reserved.cBasePages
902	+ pGVM->gmm.s.Reserved.cFixedPages
903	+ pGVM->gmm.s.Reserved.cShadowPages;
904	switch (pGVM->gmm.s.enmPolicy)
905	{
906	case GMMOCPOLICY_NO_OC:
907	break;
908	default:
909	/** @todo Update GMM->cOverCommittedPages */
910	break;
911	}
912	}
913	}
914
915	/* zap the GVM data. */
916	pGVM->gmm.s.enmPolicy = GMMOCPOLICY_INVALID;
917	pGVM->gmm.s.enmPriority = GMMPRIORITY_INVALID;
918	pGVM->gmm.s.fMayAllocate = false;
919
920	GMM_CHECK_SANITY_UPON_LEAVING(pGMM);
921	RTSemFastMutexRelease(pGMM->Mtx);
922
923	LogFlow(("GMMR0CleanupVM: returns\n"));
924	}
925
926
927	/**
928	* RTAvlU32DoWithAll callback.
929	*
930	* @returns 0
931	* @param pNode The node to search.
932	* @param pvGVM Pointer to the shared VM structure.
933	*/
934	static DECLCALLBACK(int) gmmR0CleanupVMScanChunk(PAVLU32NODECORE pNode, void *pvGVM)
935	{
936	PGMMCHUNK pChunk = (PGMMCHUNK)pNode;
937	PGVM pGVM = (PGVM)pvGVM;
938
939	/*
940	* Look for pages belonging to the VM.
941	* (Perform some internal checks while we're scanning.)
942	*/
943	#ifndef VBOX_STRICT
944	if (pChunk->cFree != (GMM_CHUNK_SIZE >> PAGE_SHIFT))
945	#endif
946	{
947	unsigned cPrivate = 0;
948	unsigned cShared = 0;
949	unsigned cFree = 0;
950
951	gmmR0UnlinkChunk(pChunk); /* avoiding cFreePages updates. */
952
953	uint16_t hGVM = pGVM->hSelf;
954	unsigned iPage = (GMM_CHUNK_SIZE >> PAGE_SHIFT);
955	while (iPage-- > 0)
956	if (GMM_PAGE_IS_PRIVATE(&pChunk->aPages[iPage]))
957	{
958	if (pChunk->aPages[iPage].Private.hGVM == hGVM)
959	{
960	/*
961	* Free the page.
962	*
963	* The reason for not using gmmR0FreePrivatePage here is that we
964	* must not cause the chunk to be freed from under us - we're in
965	* an AVL tree walk here.
966	*/
967	pChunk->aPages[iPage].u = 0;
968	pChunk->aPages[iPage].Free.iNext = pChunk->iFreeHead;
969	pChunk->aPages[iPage].Free.u2State = GMM_PAGE_STATE_FREE;
970	pChunk->iFreeHead = iPage;
971	pChunk->cPrivate--;
972	pChunk->cFree++;
973	pGVM->gmm.s.cPrivatePages--;
974	cFree++;
975	}
976	else
977	cPrivate++;
978	}
979	else if (GMM_PAGE_IS_FREE(&pChunk->aPages[iPage]))
980	cFree++;
981	else
982	cShared++;
983
984	gmmR0LinkChunk(pChunk, pChunk->cShared ? &g_pGMM->Shared : &g_pGMM->Private);
985
986	/*
987	* Did it add up?
988	*/
989	if (RT_UNLIKELY( pChunk->cFree != cFree
990	\|\| pChunk->cPrivate != cPrivate
991	\|\| pChunk->cShared != cShared))
992	{
993	SUPR0Printf("gmmR0CleanupVMScanChunk: Chunk %p/%#x has bogus stats - free=%d/%d private=%d/%d shared=%d/%d\n",
994	pChunk->cFree, cFree, pChunk->cPrivate, cPrivate, pChunk->cShared, cShared);
995	pChunk->cFree = cFree;
996	pChunk->cPrivate = cPrivate;
997	pChunk->cShared = cShared;
998	}
999	}
1000
1001	/*
1002	* Look for the mapping belonging to the terminating VM.
1003	*/
1004	for (unsigned i = 0; i < pChunk->cMappings; i++)
1005	if (pChunk->paMappings[i].pGVM == pGVM)
1006	{
1007	RTR0MEMOBJ MemObj = pChunk->paMappings[i].MapObj;
1008
1009	pChunk->cMappings--;
1010	if (i < pChunk->cMappings)
1011	pChunk->paMappings[i] = pChunk->paMappings[pChunk->cMappings];
1012	pChunk->paMappings[pChunk->cMappings].pGVM = NULL;
1013	pChunk->paMappings[pChunk->cMappings].MapObj = NIL_RTR0MEMOBJ;
1014
1015	int rc = RTR0MemObjFree(MemObj, false /* fFreeMappings (NA) */);
1016	if (RT_FAILURE(rc))
1017	{
1018	SUPR0Printf("gmmR0CleanupVMScanChunk: %p/%#x: mapping #%x: RTRMemObjFree(%p,false) -> %d \n",
1019	pChunk, pChunk->Core.Key, i, MemObj, rc);
1020	AssertRC(rc);
1021	}
1022	break;
1023	}
1024
1025	/*
1026	* If not in bound memory mode, we should reset the hGVM field
1027	* if it has our handle in it.
1028	*/
1029	if (pChunk->hGVM == pGVM->hSelf)
1030	{
1031	if (!g_pGMM->fBoundMemoryMode)
1032	pChunk->hGVM = NIL_GVM_HANDLE;
1033	else if (pChunk->cFree != GMM_CHUNK_NUM_PAGES)
1034	{
1035	SUPR0Printf("gmmR0CleanupVMScanChunk: %p/%#x: cFree=%#x - it should be 0 in bound mode!\n",
1036	pChunk, pChunk->Core.Key, pChunk->cFree);
1037	AssertMsgFailed(("%p/%#x: cFree=%#x - it should be 0 in bound mode!\n", pChunk, pChunk->Core.Key, pChunk->cFree));
1038
1039	gmmR0UnlinkChunk(pChunk);
1040	pChunk->cFree = GMM_CHUNK_NUM_PAGES;
1041	gmmR0LinkChunk(pChunk, pChunk->cShared ? &g_pGMM->Shared : &g_pGMM->Private);
1042	}
1043	}
1044
1045	return 0;
1046	}
1047
1048
1049	/**
1050	* RTAvlU32Destroy callback for GMMR0CleanupVM.
1051	*
1052	* @returns 0
1053	* @param pNode The node (allocation chunk) to destroy.
1054	* @param pvGVM Pointer to the shared VM structure.
1055	*/
1056	/static/ DECLCALLBACK(int) gmmR0CleanupVMDestroyChunk(PAVLU32NODECORE pNode, void *pvGVM)
1057	{
1058	PGMMCHUNK pChunk = (PGMMCHUNK)pNode;
1059	PGVM pGVM = (PGVM)pvGVM;
1060
1061	for (unsigned i = 0; i < pChunk->cMappings; i++)
1062	{
1063	if (pChunk->paMappings[i].pGVM != pGVM)
1064	SUPR0Printf("gmmR0CleanupVMDestroyChunk: %p/%#x: mapping #%x: pGVM=%p exepcted %p\n", pChunk,
1065	pChunk->Core.Key, i, pChunk->paMappings[i].pGVM, pGVM);
1066	int rc = RTR0MemObjFree(pChunk->paMappings[i].MapObj, false /* fFreeMappings (NA) */);
1067	if (RT_FAILURE(rc))
1068	{
1069	SUPR0Printf("gmmR0CleanupVMDestroyChunk: %p/%#x: mapping #%x: RTRMemObjFree(%p,false) -> %d \n", pChunk,
1070	pChunk->Core.Key, i, pChunk->paMappings[i].MapObj, rc);
1071	AssertRC(rc);
1072	}
1073	}
1074
1075	int rc = RTR0MemObjFree(pChunk->MemObj, true /* fFreeMappings */);
1076	if (RT_FAILURE(rc))
1077	{
1078	SUPR0Printf("gmmR0CleanupVMDestroyChunk: %p/%#x: RTRMemObjFree(%p,true) -> %d (cMappings=%d)\n", pChunk,
1079	pChunk->Core.Key, pChunk->MemObj, rc, pChunk->cMappings);
1080	AssertRC(rc);
1081	}
1082	pChunk->MemObj = NIL_RTR0MEMOBJ;
1083
1084	RTMemFree(pChunk->paMappings);
1085	pChunk->paMappings = NULL;
1086
1087	RTMemFree(pChunk);
1088	return 0;
1089	}
1090
1091
1092	/**
1093	* The initial resource reservations.
1094	*
1095	* This will make memory reservations according to policy and priority. If there aren't
1096	* sufficient resources available to sustain the VM this function will fail and all
1097	* future allocations requests will fail as well.
1098	*
1099	* These are just the initial reservations made very very early during the VM creation
1100	* process and will be adjusted later in the GMMR0UpdateReservation call after the
1101	* ring-3 init has completed.
1102	*
1103	* @returns VBox status code.
1104	* @retval VERR_GMM_MEMORY_RESERVATION_DECLINED
1105	* @retval VERR_GMM_
1106	*
1107	* @param pVM Pointer to the shared VM structure.
1108	* @param idCpu VCPU id
1109	* @param cBasePages The number of pages that may be allocated for the base RAM and ROMs.
1110	* This does not include MMIO2 and similar.
1111	* @param cShadowPages The number of pages that may be allocated for shadow pageing structures.
1112	* @param cFixedPages The number of pages that may be allocated for fixed objects like the
1113	* hyper heap, MMIO2 and similar.
1114	* @param enmPolicy The OC policy to use on this VM.
1115	* @param enmPriority The priority in an out-of-memory situation.
1116	*
1117	* @thread The creator thread / EMT.
1118	*/
1119	GMMR0DECL(int) GMMR0InitialReservation(PVM pVM, VMCPUID idCpu, uint64_t cBasePages, uint32_t cShadowPages, uint32_t cFixedPages,
1120	GMMOCPOLICY enmPolicy, GMMPRIORITY enmPriority)
1121	{
1122	LogFlow(("GMMR0InitialReservation: pVM=%p cBasePages=%#llx cShadowPages=%#x cFixedPages=%#x enmPolicy=%d enmPriority=%d\n",
1123	pVM, cBasePages, cShadowPages, cFixedPages, enmPolicy, enmPriority));
1124
1125	/*
1126	* Validate, get basics and take the semaphore.
1127	*/
1128	PGMM pGMM;
1129	GMM_GET_VALID_INSTANCE(pGMM, VERR_INTERNAL_ERROR);
1130	PGVM pGVM;
1131	int rc = GVMMR0ByVMAndEMT(pVM, idCpu, &pGVM);
1132	if (RT_FAILURE(rc))
1133	return rc;
1134
1135	AssertReturn(cBasePages, VERR_INVALID_PARAMETER);
1136	AssertReturn(cShadowPages, VERR_INVALID_PARAMETER);
1137	AssertReturn(cFixedPages, VERR_INVALID_PARAMETER);
1138	AssertReturn(enmPolicy > GMMOCPOLICY_INVALID && enmPolicy < GMMOCPOLICY_END, VERR_INVALID_PARAMETER);
1139	AssertReturn(enmPriority > GMMPRIORITY_INVALID && enmPriority < GMMPRIORITY_END, VERR_INVALID_PARAMETER);
1140
1141	rc = RTSemFastMutexRequest(pGMM->Mtx);
1142	AssertRC(rc);
1143	if (GMM_CHECK_SANITY_UPON_ENTERING(pGMM))
1144	{
1145	if ( !pGVM->gmm.s.Reserved.cBasePages
1146	&& !pGVM->gmm.s.Reserved.cFixedPages
1147	&& !pGVM->gmm.s.Reserved.cShadowPages)
1148	{
1149	/*
1150	* Check if we can accomodate this.
1151	*/
1152	/* ... later ... */
1153	if (RT_SUCCESS(rc))
1154	{
1155	/*
1156	* Update the records.
1157	*/
1158	pGVM->gmm.s.Reserved.cBasePages = cBasePages;
1159	pGVM->gmm.s.Reserved.cFixedPages = cFixedPages;
1160	pGVM->gmm.s.Reserved.cShadowPages = cShadowPages;
1161	pGVM->gmm.s.enmPolicy = enmPolicy;
1162	pGVM->gmm.s.enmPriority = enmPriority;
1163	pGVM->gmm.s.fMayAllocate = true;
1164
1165	pGMM->cReservedPages += cBasePages + cFixedPages + cShadowPages;
1166	pGMM->cRegisteredVMs++;
1167	}
1168	}
1169	else
1170	rc = VERR_WRONG_ORDER;
1171	GMM_CHECK_SANITY_UPON_LEAVING(pGMM);
1172	}
1173	else
1174	rc = VERR_INTERNAL_ERROR_5;
1175	RTSemFastMutexRelease(pGMM->Mtx);
1176	LogFlow(("GMMR0InitialReservation: returns %Rrc\n", rc));
1177	return rc;
1178	}
1179
1180
1181	/**
1182	* VMMR0 request wrapper for GMMR0InitialReservation.
1183	*
1184	* @returns see GMMR0InitialReservation.
1185	* @param pVM Pointer to the shared VM structure.
1186	* @param idCpu VCPU id
1187	* @param pReq The request packet.
1188	*/
1189	GMMR0DECL(int) GMMR0InitialReservationReq(PVM pVM, VMCPUID idCpu, PGMMINITIALRESERVATIONREQ pReq)
1190	{
1191	/*
1192	* Validate input and pass it on.
1193	*/
1194	AssertPtrReturn(pVM, VERR_INVALID_POINTER);
1195	AssertPtrReturn(pReq, VERR_INVALID_POINTER);
1196	AssertMsgReturn(pReq->Hdr.cbReq == sizeof(pReq), ("%#x != %#x\n", pReq->Hdr.cbReq, sizeof(pReq)), VERR_INVALID_PARAMETER);
1197
1198	return GMMR0InitialReservation(pVM, idCpu, pReq->cBasePages, pReq->cShadowPages, pReq->cFixedPages, pReq->enmPolicy, pReq->enmPriority);
1199	}
1200
1201
1202	/**
1203	* This updates the memory reservation with the additional MMIO2 and ROM pages.
1204	*
1205	* @returns VBox status code.
1206	* @retval VERR_GMM_MEMORY_RESERVATION_DECLINED
1207	*
1208	* @param pVM Pointer to the shared VM structure.
1209	* @param idCpu VCPU id
1210	* @param cBasePages The number of pages that may be allocated for the base RAM and ROMs.
1211	* This does not include MMIO2 and similar.
1212	* @param cShadowPages The number of pages that may be allocated for shadow pageing structures.
1213	* @param cFixedPages The number of pages that may be allocated for fixed objects like the
1214	* hyper heap, MMIO2 and similar.
1215	*
1216	* @thread EMT.
1217	*/
1218	GMMR0DECL(int) GMMR0UpdateReservation(PVM pVM, VMCPUID idCpu, uint64_t cBasePages, uint32_t cShadowPages, uint32_t cFixedPages)
1219	{
1220	LogFlow(("GMMR0UpdateReservation: pVM=%p cBasePages=%#llx cShadowPages=%#x cFixedPages=%#x\n",
1221	pVM, cBasePages, cShadowPages, cFixedPages));
1222
1223	/*
1224	* Validate, get basics and take the semaphore.
1225	*/
1226	PGMM pGMM;
1227	GMM_GET_VALID_INSTANCE(pGMM, VERR_INTERNAL_ERROR);
1228	PGVM pGVM;
1229	int rc = GVMMR0ByVMAndEMT(pVM, idCpu, &pGVM);
1230	if (RT_FAILURE(rc))
1231	return rc;
1232
1233	AssertReturn(cBasePages, VERR_INVALID_PARAMETER);
1234	AssertReturn(cShadowPages, VERR_INVALID_PARAMETER);
1235	AssertReturn(cFixedPages, VERR_INVALID_PARAMETER);
1236
1237	rc = RTSemFastMutexRequest(pGMM->Mtx);
1238	AssertRC(rc);
1239	if (GMM_CHECK_SANITY_UPON_ENTERING(pGMM))
1240	{
1241	if ( pGVM->gmm.s.Reserved.cBasePages
1242	&& pGVM->gmm.s.Reserved.cFixedPages
1243	&& pGVM->gmm.s.Reserved.cShadowPages)
1244	{
1245	/*
1246	* Check if we can accomodate this.
1247	*/
1248	/* ... later ... */
1249	if (RT_SUCCESS(rc))
1250	{
1251	/*
1252	* Update the records.
1253	*/
1254	pGMM->cReservedPages -= pGVM->gmm.s.Reserved.cBasePages
1255	+ pGVM->gmm.s.Reserved.cFixedPages
1256	+ pGVM->gmm.s.Reserved.cShadowPages;
1257	pGMM->cReservedPages += cBasePages + cFixedPages + cShadowPages;
1258
1259	pGVM->gmm.s.Reserved.cBasePages = cBasePages;
1260	pGVM->gmm.s.Reserved.cFixedPages = cFixedPages;
1261	pGVM->gmm.s.Reserved.cShadowPages = cShadowPages;
1262	}
1263	}
1264	else
1265	rc = VERR_WRONG_ORDER;
1266	GMM_CHECK_SANITY_UPON_LEAVING(pGMM);
1267	}
1268	else
1269	rc = VERR_INTERNAL_ERROR_5;
1270	RTSemFastMutexRelease(pGMM->Mtx);
1271	LogFlow(("GMMR0UpdateReservation: returns %Rrc\n", rc));
1272	return rc;
1273	}
1274
1275
1276	/**
1277	* VMMR0 request wrapper for GMMR0UpdateReservation.
1278	*
1279	* @returns see GMMR0UpdateReservation.
1280	* @param pVM Pointer to the shared VM structure.
1281	* @param idCpu VCPU id
1282	* @param pReq The request packet.
1283	*/
1284	GMMR0DECL(int) GMMR0UpdateReservationReq(PVM pVM, VMCPUID idCpu, PGMMUPDATERESERVATIONREQ pReq)
1285	{
1286	/*
1287	* Validate input and pass it on.
1288	*/
1289	AssertPtrReturn(pVM, VERR_INVALID_POINTER);
1290	AssertPtrReturn(pReq, VERR_INVALID_POINTER);
1291	AssertMsgReturn(pReq->Hdr.cbReq == sizeof(pReq), ("%#x != %#x\n", pReq->Hdr.cbReq, sizeof(pReq)), VERR_INVALID_PARAMETER);
1292
1293	return GMMR0UpdateReservation(pVM, idCpu, pReq->cBasePages, pReq->cShadowPages, pReq->cFixedPages);
1294	}
1295
1296
1297	/**
1298	* Performs sanity checks on a free set.
1299	*
1300	* @returns Error count.
1301	*
1302	* @param pGMM Pointer to the GMM instance.
1303	* @param pSet Pointer to the set.
1304	* @param pszSetName The set name.
1305	* @param pszFunction The function from which it was called.
1306	* @param uLine The line number.
1307	*/
1308	static uint32_t gmmR0SanityCheckSet(PGMM pGMM, PGMMCHUNKFREESET pSet, const char *pszSetName,
1309	const char *pszFunction, unsigned uLineNo)
1310	{
1311	uint32_t cErrors = 0;
1312
1313	/*
1314	* Count the free pages in all the chunks and match it against pSet->cFreePages.
1315	*/
1316	uint32_t cPages = 0;
1317	for (unsigned i = 0; i < RT_ELEMENTS(pSet->apLists); i++)
1318	{
1319	for (PGMMCHUNK pCur = pSet->apLists[i]; pCur; pCur = pCur->pFreeNext)
1320	{
1321	/** @todo check that the chunk is hash into the right set. */
1322	cPages += pCur->cFree;
1323	}
1324	}
1325	if (RT_UNLIKELY(cPages != pSet->cFreePages))
1326	{
1327	SUPR0Printf("GMM insanity: found %#x pages in the %s set, expected %#x. (%s, line %u)\n",
1328	cPages, pszSetName, pSet->cFreePages, pszFunction, uLineNo);
1329	cErrors++;
1330	}
1331
1332	return cErrors;
1333	}
1334
1335
1336	/**
1337	* Performs some sanity checks on the GMM while owning lock.
1338	*
1339	* @returns Error count.
1340	*
1341	* @param pGMM Pointer to the GMM instance.
1342	* @param pszFunction The function from which it is called.
1343	* @param uLineNo The line number.
1344	*/
1345	static uint32_t gmmR0SanityCheck(PGMM pGMM, const char *pszFunction, unsigned uLineNo)
1346	{
1347	uint32_t cErrors = 0;
1348
1349	cErrors += gmmR0SanityCheckSet(pGMM, &pGMM->Private, "private", pszFunction, uLineNo);
1350	cErrors += gmmR0SanityCheckSet(pGMM, &pGMM->Shared, "shared", pszFunction, uLineNo);
1351	/** @todo add more sanity checks. */
1352
1353	return cErrors;
1354	}
1355
1356
1357	/**
1358	* Looks up a chunk in the tree and fill in the TLB entry for it.
1359	*
1360	* This is not expected to fail and will bitch if it does.
1361	*
1362	* @returns Pointer to the allocation chunk, NULL if not found.
1363	* @param pGMM Pointer to the GMM instance.
1364	* @param idChunk The ID of the chunk to find.
1365	* @param pTlbe Pointer to the TLB entry.
1366	*/
1367	static PGMMCHUNK gmmR0GetChunkSlow(PGMM pGMM, uint32_t idChunk, PGMMCHUNKTLBE pTlbe)
1368	{
1369	PGMMCHUNK pChunk = (PGMMCHUNK)RTAvlU32Get(&pGMM->pChunks, idChunk);
1370	AssertMsgReturn(pChunk, ("Chunk %#x not found!\n", idChunk), NULL);
1371	pTlbe->idChunk = idChunk;
1372	pTlbe->pChunk = pChunk;
1373	return pChunk;
1374	}
1375
1376
1377	/**
1378	* Finds a allocation chunk.
1379	*
1380	* This is not expected to fail and will bitch if it does.
1381	*
1382	* @returns Pointer to the allocation chunk, NULL if not found.
1383	* @param pGMM Pointer to the GMM instance.
1384	* @param idChunk The ID of the chunk to find.
1385	*/
1386	DECLINLINE(PGMMCHUNK) gmmR0GetChunk(PGMM pGMM, uint32_t idChunk)
1387	{
1388	/*
1389	* Do a TLB lookup, branch if not in the TLB.
1390	*/
1391	PGMMCHUNKTLBE pTlbe = &pGMM->ChunkTLB.aEntries[GMM_CHUNKTLB_IDX(idChunk)];
1392	if ( pTlbe->idChunk != idChunk
1393	\|\| !pTlbe->pChunk)
1394	return gmmR0GetChunkSlow(pGMM, idChunk, pTlbe);
1395	return pTlbe->pChunk;
1396	}
1397
1398
1399	/**
1400	* Finds a page.
1401	*
1402	* This is not expected to fail and will bitch if it does.
1403	*
1404	* @returns Pointer to the page, NULL if not found.
1405	* @param pGMM Pointer to the GMM instance.
1406	* @param idPage The ID of the page to find.
1407	*/
1408	DECLINLINE(PGMMPAGE) gmmR0GetPage(PGMM pGMM, uint32_t idPage)
1409	{
1410	PGMMCHUNK pChunk = gmmR0GetChunk(pGMM, idPage >> GMM_CHUNKID_SHIFT);
1411	if (RT_LIKELY(pChunk))
1412	return &pChunk->aPages[idPage & GMM_PAGEID_IDX_MASK];
1413	return NULL;
1414	}
1415
1416
1417	/**
1418	* Unlinks the chunk from the free list it's currently on (if any).
1419	*
1420	* @param pChunk The allocation chunk.
1421	*/
1422	DECLINLINE(void) gmmR0UnlinkChunk(PGMMCHUNK pChunk)
1423	{
1424	PGMMCHUNKFREESET pSet = pChunk->pSet;
1425	if (RT_LIKELY(pSet))
1426	{
1427	pSet->cFreePages -= pChunk->cFree;
1428
1429	PGMMCHUNK pPrev = pChunk->pFreePrev;
1430	PGMMCHUNK pNext = pChunk->pFreeNext;
1431	if (pPrev)
1432	pPrev->pFreeNext = pNext;
1433	else
1434	pSet->apLists[(pChunk->cFree - 1) >> GMM_CHUNK_FREE_SET_SHIFT] = pNext;
1435	if (pNext)
1436	pNext->pFreePrev = pPrev;
1437
1438	pChunk->pSet = NULL;
1439	pChunk->pFreeNext = NULL;
1440	pChunk->pFreePrev = NULL;
1441	}
1442	else
1443	{
1444	Assert(!pChunk->pFreeNext);
1445	Assert(!pChunk->pFreePrev);
1446	Assert(!pChunk->cFree);
1447	}
1448	}
1449
1450
1451	/**
1452	* Links the chunk onto the appropriate free list in the specified free set.
1453	*
1454	* If no free entries, it's not linked into any list.
1455	*
1456	* @param pChunk The allocation chunk.
1457	* @param pSet The free set.
1458	*/
1459	DECLINLINE(void) gmmR0LinkChunk(PGMMCHUNK pChunk, PGMMCHUNKFREESET pSet)
1460	{
1461	Assert(!pChunk->pSet);
1462	Assert(!pChunk->pFreeNext);
1463	Assert(!pChunk->pFreePrev);
1464
1465	if (pChunk->cFree > 0)
1466	{
1467	pChunk->pSet = pSet;
1468	pChunk->pFreePrev = NULL;
1469	unsigned iList = (pChunk->cFree - 1) >> GMM_CHUNK_FREE_SET_SHIFT;
1470	pChunk->pFreeNext = pSet->apLists[iList];
1471	if (pChunk->pFreeNext)
1472	pChunk->pFreeNext->pFreePrev = pChunk;
1473	pSet->apLists[iList] = pChunk;
1474
1475	pSet->cFreePages += pChunk->cFree;
1476	}
1477	}
1478
1479
1480	/**
1481	* Frees a Chunk ID.
1482	*
1483	* @param pGMM Pointer to the GMM instance.
1484	* @param idChunk The Chunk ID to free.
1485	*/
1486	static void gmmR0FreeChunkId(PGMM pGMM, uint32_t idChunk)
1487	{
1488	AssertReturnVoid(idChunk != NIL_GMM_CHUNKID);
1489	AssertMsg(ASMBitTest(&pGMM->bmChunkId[0], idChunk), ("%#x\n", idChunk));
1490	ASMAtomicBitClear(&pGMM->bmChunkId[0], idChunk);
1491	}
1492
1493
1494	/**
1495	* Allocates a new Chunk ID.
1496	*
1497	* @returns The Chunk ID.
1498	* @param pGMM Pointer to the GMM instance.
1499	*/
1500	static uint32_t gmmR0AllocateChunkId(PGMM pGMM)
1501	{
1502	AssertCompile(!((GMM_CHUNKID_LAST + 1) & 31)); /* must be a multiple of 32 */
1503	AssertCompile(NIL_GMM_CHUNKID == 0);
1504
1505	/*
1506	* Try the next sequential one.
1507	*/
1508	int32_t idChunk = ++pGMM->idChunkPrev;
1509	#if 0 /* test the fallback first */
1510	if ( idChunk <= GMM_CHUNKID_LAST
1511	&& idChunk > NIL_GMM_CHUNKID
1512	&& !ASMAtomicBitTestAndSet(&pVMM->bmChunkId[0], idChunk))
1513	return idChunk;
1514	#endif
1515
1516	/*
1517	* Scan sequentially from the last one.
1518	*/
1519	if ( (uint32_t)idChunk < GMM_CHUNKID_LAST
1520	&& idChunk > NIL_GMM_CHUNKID)
1521	{
1522	idChunk = ASMBitNextClear(&pGMM->bmChunkId[0], GMM_CHUNKID_LAST + 1, idChunk);
1523	if (idChunk > NIL_GMM_CHUNKID)
1524	{
1525	AssertMsgReturn(!ASMAtomicBitTestAndSet(&pGMM->bmChunkId[0], idChunk), ("%#x\n", idChunk), NIL_GMM_CHUNKID);
1526	return pGMM->idChunkPrev = idChunk;
1527	}
1528	}
1529
1530	/*
1531	* Ok, scan from the start.
1532	* We're not racing anyone, so there is no need to expect failures or have restart loops.
1533	*/
1534	idChunk = ASMBitFirstClear(&pGMM->bmChunkId[0], GMM_CHUNKID_LAST + 1);
1535	AssertMsgReturn(idChunk > NIL_GMM_CHUNKID, ("%#x\n", idChunk), NIL_GVM_HANDLE);
1536	AssertMsgReturn(!ASMAtomicBitTestAndSet(&pGMM->bmChunkId[0], idChunk), ("%#x\n", idChunk), NIL_GMM_CHUNKID);
1537
1538	return pGMM->idChunkPrev = idChunk;
1539	}
1540
1541
1542	/**
1543	* Registers a new chunk of memory.
1544	*
1545	* This is called by both gmmR0AllocateOneChunk and GMMR0SeedChunk. The caller
1546	* must own the global lock.
1547	*
1548	* @returns VBox status code.
1549	* @param pGMM Pointer to the GMM instance.
1550	* @param pSet Pointer to the set.
1551	* @param MemObj The memory object for the chunk.
1552	* @param hGVM The affinity of the chunk. NIL_GVM_HANDLE for no
1553	* affinity.
1554	* @param enmChunkType Chunk type (continuous or non-continuous)
1555	* @param ppChunk Chunk address (out)
1556	*/
1557	static int gmmR0RegisterChunk(PGMM pGMM, PGMMCHUNKFREESET pSet, RTR0MEMOBJ MemObj, uint16_t hGVM, GMMCHUNKTYPE enmChunkType, PGMMCHUNK *ppChunk = NULL)
1558	{
1559	Assert(hGVM != NIL_GVM_HANDLE \|\| pGMM->fBoundMemoryMode);
1560
1561	int rc;
1562	PGMMCHUNK pChunk = (PGMMCHUNK)RTMemAllocZ(sizeof(*pChunk));
1563	if (pChunk)
1564	{
1565	/*
1566	* Initialize it.
1567	*/
1568	pChunk->MemObj = MemObj;
1569	pChunk->cFree = GMM_CHUNK_NUM_PAGES;
1570	pChunk->hGVM = hGVM;
1571	pChunk->iFreeHead = 0;
1572	pChunk->enmType = enmChunkType;
1573	for (unsigned iPage = 0; iPage < RT_ELEMENTS(pChunk->aPages) - 1; iPage++)
1574	{
1575	pChunk->aPages[iPage].Free.u2State = GMM_PAGE_STATE_FREE;
1576	pChunk->aPages[iPage].Free.iNext = iPage + 1;
1577	}
1578	pChunk->aPages[RT_ELEMENTS(pChunk->aPages) - 1].Free.u2State = GMM_PAGE_STATE_FREE;
1579	pChunk->aPages[RT_ELEMENTS(pChunk->aPages) - 1].Free.iNext = UINT16_MAX;
1580
1581	/*
1582	* Allocate a Chunk ID and insert it into the tree.
1583	* This has to be done behind the mutex of course.
1584	*/
1585	if (GMM_CHECK_SANITY_UPON_ENTERING(pGMM))
1586	{
1587	pChunk->Core.Key = gmmR0AllocateChunkId(pGMM);
1588	if ( pChunk->Core.Key != NIL_GMM_CHUNKID
1589	&& pChunk->Core.Key <= GMM_CHUNKID_LAST
1590	&& RTAvlU32Insert(&pGMM->pChunks, &pChunk->Core))
1591	{
1592	pGMM->cChunks++;
1593	gmmR0LinkChunk(pChunk, pSet);
1594	LogFlow(("gmmR0RegisterChunk: pChunk=%p id=%#x cChunks=%d\n", pChunk, pChunk->Core.Key, pGMM->cChunks));
1595
1596	if (ppChunk)
1597	*ppChunk = pChunk;
1598
1599	GMM_CHECK_SANITY_UPON_LEAVING(pGMM);
1600	return VINF_SUCCESS;
1601	}
1602
1603	/* bail out */
1604	rc = VERR_INTERNAL_ERROR;
1605	}
1606	else
1607	rc = VERR_INTERNAL_ERROR_5;
1608
1609	RTMemFree(pChunk);
1610	}
1611	else
1612	rc = VERR_NO_MEMORY;
1613	return rc;
1614	}
1615
1616
1617	/**
1618	* Allocate one new chunk and add it to the specified free set.
1619	*
1620	* @returns VBox status code.
1621	* @param pGMM Pointer to the GMM instance.
1622	* @param pSet Pointer to the set.
1623	* @param hGVM The affinity of the new chunk.
1624	* @param enmChunkType Chunk type (continuous or non-continuous)
1625	* @param ppChunk Chunk address (out)
1626	*
1627	* @remarks Called without owning the mutex.
1628	*/
1629	static int gmmR0AllocateOneChunk(PGMM pGMM, PGMMCHUNKFREESET pSet, uint16_t hGVM, GMMCHUNKTYPE enmChunkType, PGMMCHUNK *ppChunk = NULL)
1630	{
1631	/*
1632	* Allocate the memory.
1633	*/
1634	RTR0MEMOBJ MemObj;
1635	int rc;
1636
1637	AssertCompile(GMM_CHUNK_SIZE == _2M);
1638	AssertReturn(enmChunkType == GMMCHUNKTYPE_NON_CONTINUOUS \|\| enmChunkType == GMMCHUNKTYPE_CONTINUOUS, VERR_INVALID_PARAMETER);
1639
1640	/* Leave the lock temporarily as the allocation might take long. */
1641	RTSemFastMutexRelease(pGMM->Mtx);
1642	if (enmChunkType == GMMCHUNKTYPE_NON_CONTINUOUS)
1643	rc = RTR0MemObjAllocPhysNC(&MemObj, GMM_CHUNK_SIZE, NIL_RTHCPHYS);
1644	else
1645	rc = RTR0MemObjAllocPhysEx(&MemObj, GMM_CHUNK_SIZE, NIL_RTHCPHYS, GMM_CHUNK_SIZE);
1646
1647	/* Grab the lock again. */
1648	int rc2 = RTSemFastMutexRequest(pGMM->Mtx);
1649	AssertRCReturn(rc2, rc2);
1650
1651	if (RT_SUCCESS(rc))
1652	{
1653	rc = gmmR0RegisterChunk(pGMM, pSet, MemObj, hGVM, enmChunkType, ppChunk);
1654	if (RT_FAILURE(rc))
1655	RTR0MemObjFree(MemObj, false /* fFreeMappings */);
1656	}
1657	/** @todo Check that RTR0MemObjAllocPhysNC always returns VERR_NO_MEMORY on
1658	* allocation failure. */
1659	return rc;
1660	}
1661
1662
1663	/**
1664	* Attempts to allocate more pages until the requested amount is met.
1665	*
1666	* @returns VBox status code.
1667	* @param pGMM Pointer to the GMM instance data.
1668	* @param pGVM The calling VM.
1669	* @param pSet Pointer to the free set to grow.
1670	* @param cPages The number of pages needed.
1671	*
1672	* @remarks Called owning the mutex, but will leave it temporarily while
1673	* allocating the memory!
1674	*/
1675	static int gmmR0AllocateMoreChunks(PGMM pGMM, PGVM pGVM, PGMMCHUNKFREESET pSet, uint32_t cPages)
1676	{
1677	Assert(!pGMM->fLegacyAllocationMode);
1678
1679	if (!GMM_CHECK_SANITY_IN_LOOPS(pGMM))
1680	return VERR_INTERNAL_ERROR_4;
1681
1682	if (!pGMM->fBoundMemoryMode)
1683	{
1684	/*
1685	* Try steal free chunks from the other set first. (Only take 100% free chunks.)
1686	*/
1687	PGMMCHUNKFREESET pOtherSet = pSet == &pGMM->Private ? &pGMM->Shared : &pGMM->Private;
1688	while ( pSet->cFreePages < cPages
1689	&& pOtherSet->cFreePages >= GMM_CHUNK_NUM_PAGES)
1690	{
1691	PGMMCHUNK pChunk = pOtherSet->apLists[GMM_CHUNK_FREE_SET_LISTS - 1];
1692	while (pChunk && pChunk->cFree != GMM_CHUNK_NUM_PAGES)
1693	pChunk = pChunk->pFreeNext;
1694	if (!pChunk)
1695	break;
1696
1697	gmmR0UnlinkChunk(pChunk);
1698	gmmR0LinkChunk(pChunk, pSet);
1699	}
1700
1701	/*
1702	* If we need still more pages, allocate new chunks.
1703	* Note! We will leave the mutex while doing the allocation,
1704	*/
1705	while (pSet->cFreePages < cPages)
1706	{
1707	int rc = gmmR0AllocateOneChunk(pGMM, pSet, pGVM->hSelf, GMMCHUNKTYPE_NON_CONTINUOUS);
1708	if (RT_FAILURE(rc))
1709	return rc;
1710	if (!GMM_CHECK_SANITY_UPON_ENTERING(pGMM))
1711	return VERR_INTERNAL_ERROR_5;
1712	}
1713	}
1714	else
1715	{
1716	/*
1717	* The memory is bound to the VM allocating it, so we have to count
1718	* the free pages carefully as well as making sure we brand them with
1719	* our VM handle.
1720	*
1721	* Note! We will leave the mutex while doing the allocation,
1722	*/
1723	uint16_t const hGVM = pGVM->hSelf;
1724	for (;;)
1725	{
1726	/* Count and see if we've reached the goal. */
1727	uint32_t cPagesFound = 0;
1728	for (unsigned i = 0; i < RT_ELEMENTS(pSet->apLists); i++)
1729	for (PGMMCHUNK pCur = pSet->apLists[i]; pCur; pCur = pCur->pFreeNext)
1730	if (pCur->hGVM == hGVM)
1731	{
1732	cPagesFound += pCur->cFree;
1733	if (cPagesFound >= cPages)
1734	break;
1735	}
1736	if (cPagesFound >= cPages)
1737	break;
1738
1739	/* Allocate more. */
1740	int rc = gmmR0AllocateOneChunk(pGMM, pSet, hGVM, GMMCHUNKTYPE_NON_CONTINUOUS);
1741	if (RT_FAILURE(rc))
1742	return rc;
1743	if (!GMM_CHECK_SANITY_UPON_ENTERING(pGMM))
1744	return VERR_INTERNAL_ERROR_5;
1745	}
1746	}
1747
1748	return VINF_SUCCESS;
1749	}
1750
1751
1752	/**
1753	* Allocates one private page.
1754	*
1755	* Worker for gmmR0AllocatePages.
1756	*
1757	* @param pGMM Pointer to the GMM instance data.
1758	* @param hGVM The GVM handle of the VM requesting memory.
1759	* @param pChunk The chunk to allocate it from.
1760	* @param pPageDesc The page descriptor.
1761	*/
1762	static void gmmR0AllocatePage(PGMM pGMM, uint32_t hGVM, PGMMCHUNK pChunk, PGMMPAGEDESC pPageDesc)
1763	{
1764	/* update the chunk stats. */
1765	if (pChunk->hGVM == NIL_GVM_HANDLE)
1766	pChunk->hGVM = hGVM;
1767	Assert(pChunk->cFree);
1768	pChunk->cFree--;
1769	pChunk->cPrivate++;
1770
1771	/* unlink the first free page. */
1772	const uint32_t iPage = pChunk->iFreeHead;
1773	AssertReleaseMsg(iPage < RT_ELEMENTS(pChunk->aPages), ("%d\n", iPage));
1774	PGMMPAGE pPage = &pChunk->aPages[iPage];
1775	Assert(GMM_PAGE_IS_FREE(pPage));
1776	pChunk->iFreeHead = pPage->Free.iNext;
1777	Log3(("A pPage=%p iPage=%#x/%#x u2State=%d iFreeHead=%#x iNext=%#x\n",
1778	pPage, iPage, (pChunk->Core.Key << GMM_CHUNKID_SHIFT) \| iPage,
1779	pPage->Common.u2State, pChunk->iFreeHead, pPage->Free.iNext));
1780
1781	/* make the page private. */
1782	pPage->u = 0;
1783	AssertCompile(GMM_PAGE_STATE_PRIVATE == 0);
1784	pPage->Private.hGVM = hGVM;
1785	AssertCompile(NIL_RTHCPHYS >= GMM_GCPHYS_LAST);
1786	AssertCompile(GMM_GCPHYS_UNSHAREABLE >= GMM_GCPHYS_LAST);
1787	if (pPageDesc->HCPhysGCPhys <= GMM_GCPHYS_LAST)
1788	pPage->Private.pfn = pPageDesc->HCPhysGCPhys >> PAGE_SHIFT;
1789	else
1790	pPage->Private.pfn = GMM_PAGE_PFN_UNSHAREABLE; /* unshareable / unassigned - same thing. */
1791
1792	/* update the page descriptor. */
1793	pPageDesc->HCPhysGCPhys = RTR0MemObjGetPagePhysAddr(pChunk->MemObj, iPage);
1794	Assert(pPageDesc->HCPhysGCPhys != NIL_RTHCPHYS);
1795	pPageDesc->idPage = (pChunk->Core.Key << GMM_CHUNKID_SHIFT) \| iPage;
1796	pPageDesc->idSharedPage = NIL_GMM_PAGEID;
1797	}
1798
1799
1800	/**
1801	* Common worker for GMMR0AllocateHandyPages and GMMR0AllocatePages.
1802	*
1803	* @returns VBox status code:
1804	* @retval VINF_SUCCESS on success.
1805	* @retval VERR_GMM_SEED_ME if seeding via GMMR0SeedChunk or
1806	* gmmR0AllocateMoreChunks is necessary.
1807	* @retval VERR_GMM_HIT_GLOBAL_LIMIT if we've exhausted the available pages.
1808	* @retval VERR_GMM_HIT_VM_ACCOUNT_LIMIT if we've hit the VM account limit,
1809	* that is we're trying to allocate more than we've reserved.
1810	*
1811	* @param pGMM Pointer to the GMM instance data.
1812	* @param pGVM Pointer to the shared VM structure.
1813	* @param cPages The number of pages to allocate.
1814	* @param paPages Pointer to the page descriptors.
1815	* See GMMPAGEDESC for details on what is expected on input.
1816	* @param enmAccount The account to charge.
1817	*/
1818	static int gmmR0AllocatePages(PGMM pGMM, PGVM pGVM, uint32_t cPages, PGMMPAGEDESC paPages, GMMACCOUNT enmAccount)
1819	{
1820	/*
1821	* Check allocation limits.
1822	*/
1823	if (RT_UNLIKELY(pGMM->cAllocatedPages + cPages > pGMM->cMaxPages))
1824	return VERR_GMM_HIT_GLOBAL_LIMIT;
1825
1826	switch (enmAccount)
1827	{
1828	case GMMACCOUNT_BASE:
1829	if (RT_UNLIKELY(pGVM->gmm.s.Allocated.cBasePages + pGVM->gmm.s.cBalloonedPages + cPages > pGVM->gmm.s.Reserved.cBasePages))
1830	{
1831	Log(("gmmR0AllocatePages:Base: Reserved=%#llx Allocated+Ballooned+Requested=%#llx+%#llx+%#x!\n",
1832	pGVM->gmm.s.Reserved.cBasePages, pGVM->gmm.s.Allocated.cBasePages, pGVM->gmm.s.cBalloonedPages, cPages));
1833	return VERR_GMM_HIT_VM_ACCOUNT_LIMIT;
1834	}
1835	break;
1836	case GMMACCOUNT_SHADOW:
1837	if (RT_UNLIKELY(pGVM->gmm.s.Allocated.cShadowPages + cPages > pGVM->gmm.s.Reserved.cShadowPages))
1838	{
1839	Log(("gmmR0AllocatePages:Shadow: Reserved=%#llx Allocated+Requested=%#llx+%#x!\n",
1840	pGVM->gmm.s.Reserved.cShadowPages, pGVM->gmm.s.Allocated.cShadowPages, cPages));
1841	return VERR_GMM_HIT_VM_ACCOUNT_LIMIT;
1842	}
1843	break;
1844	case GMMACCOUNT_FIXED:
1845	if (RT_UNLIKELY(pGVM->gmm.s.Allocated.cFixedPages + cPages > pGVM->gmm.s.Reserved.cFixedPages))
1846	{
1847	Log(("gmmR0AllocatePages:Fixed: Reserved=%#llx Allocated+Requested=%#llx+%#x!\n",
1848	pGVM->gmm.s.Reserved.cFixedPages, pGVM->gmm.s.Allocated.cFixedPages, cPages));
1849	return VERR_GMM_HIT_VM_ACCOUNT_LIMIT;
1850	}
1851	break;
1852	default:
1853	AssertMsgFailedReturn(("enmAccount=%d\n", enmAccount), VERR_INTERNAL_ERROR);
1854	}
1855
1856	/*
1857	* Check if we need to allocate more memory or not. In bound memory mode this
1858	* is a bit extra work but it's easier to do it upfront than bailing out later.
1859	*/
1860	PGMMCHUNKFREESET pSet = &pGMM->Private;
1861	if (pSet->cFreePages < cPages)
1862	return VERR_GMM_SEED_ME;
1863	if (pGMM->fBoundMemoryMode)
1864	{
1865	uint16_t hGVM = pGVM->hSelf;
1866	uint32_t cPagesFound = 0;
1867	for (unsigned i = 0; i < RT_ELEMENTS(pSet->apLists); i++)
1868	for (PGMMCHUNK pCur = pSet->apLists[i]; pCur; pCur = pCur->pFreeNext)
1869	if (pCur->hGVM == hGVM)
1870	{
1871	cPagesFound += pCur->cFree;
1872	if (cPagesFound >= cPages)
1873	break;
1874	}
1875	if (cPagesFound < cPages)
1876	return VERR_GMM_SEED_ME;
1877	}
1878
1879	/*
1880	* Pick the pages.
1881	* Try make some effort keeping VMs sharing private chunks.
1882	*/
1883	uint16_t hGVM = pGVM->hSelf;
1884	uint32_t iPage = 0;
1885
1886	/* first round, pick from chunks with an affinity to the VM. */
1887	for (unsigned i = 0; i < RT_ELEMENTS(pSet->apLists) && iPage < cPages; i++)
1888	{
1889	PGMMCHUNK pCurFree = NULL;
1890	PGMMCHUNK pCur = pSet->apLists[i];
1891	while (pCur && iPage < cPages)
1892	{
1893	PGMMCHUNK pNext = pCur->pFreeNext;
1894
1895	if ( pCur->hGVM == hGVM
1896	&& pCur->cFree < GMM_CHUNK_NUM_PAGES)
1897	{
1898	gmmR0UnlinkChunk(pCur);
1899	for (; pCur->cFree && iPage < cPages; iPage++)
1900	gmmR0AllocatePage(pGMM, hGVM, pCur, &paPages[iPage]);
1901	gmmR0LinkChunk(pCur, pSet);
1902	}
1903
1904	pCur = pNext;
1905	}
1906	}
1907
1908	if (iPage < cPages)
1909	{
1910	/* second round, pick pages from the 100% empty chunks we just skipped above. */
1911	PGMMCHUNK pCurFree = NULL;
1912	PGMMCHUNK pCur = pSet->apLists[RT_ELEMENTS(pSet->apLists) - 1];
1913	while (pCur && iPage < cPages)
1914	{
1915	PGMMCHUNK pNext = pCur->pFreeNext;
1916
1917	if ( pCur->cFree == GMM_CHUNK_NUM_PAGES
1918	&& ( pCur->hGVM == hGVM
1919	\|\| !pGMM->fBoundMemoryMode))
1920	{
1921	gmmR0UnlinkChunk(pCur);
1922	for (; pCur->cFree && iPage < cPages; iPage++)
1923	gmmR0AllocatePage(pGMM, hGVM, pCur, &paPages[iPage]);
1924	gmmR0LinkChunk(pCur, pSet);
1925	}
1926
1927	pCur = pNext;
1928	}
1929	}
1930
1931	if ( iPage < cPages
1932	&& !pGMM->fBoundMemoryMode)
1933	{
1934	/* third round, disregard affinity. */
1935	unsigned i = RT_ELEMENTS(pSet->apLists);
1936	while (i-- > 0 && iPage < cPages)
1937	{
1938	PGMMCHUNK pCurFree = NULL;
1939	PGMMCHUNK pCur = pSet->apLists[i];
1940	while (pCur && iPage < cPages)
1941	{
1942	PGMMCHUNK pNext = pCur->pFreeNext;
1943
1944	if ( pCur->cFree > GMM_CHUNK_NUM_PAGES / 2
1945	&& cPages >= GMM_CHUNK_NUM_PAGES / 2)
1946	pCur->hGVM = hGVM; /* change chunk affinity */
1947
1948	gmmR0UnlinkChunk(pCur);
1949	for (; pCur->cFree && iPage < cPages; iPage++)
1950	gmmR0AllocatePage(pGMM, hGVM, pCur, &paPages[iPage]);
1951	gmmR0LinkChunk(pCur, pSet);
1952
1953	pCur = pNext;
1954	}
1955	}
1956	}
1957
1958	/*
1959	* Update the account.
1960	*/
1961	switch (enmAccount)
1962	{
1963	case GMMACCOUNT_BASE: pGVM->gmm.s.Allocated.cBasePages += iPage; break;
1964	case GMMACCOUNT_SHADOW: pGVM->gmm.s.Allocated.cShadowPages += iPage; break;
1965	case GMMACCOUNT_FIXED: pGVM->gmm.s.Allocated.cFixedPages += iPage; break;
1966	default:
1967	AssertMsgFailedReturn(("enmAccount=%d\n", enmAccount), VERR_INTERNAL_ERROR);
1968	}
1969	pGVM->gmm.s.cPrivatePages += iPage;
1970	pGMM->cAllocatedPages += iPage;
1971
1972	AssertMsgReturn(iPage == cPages, ("%u != %u\n", iPage, cPages), VERR_INTERNAL_ERROR);
1973
1974	/*
1975	* Check if we've reached some threshold and should kick one or two VMs and tell
1976	* them to inflate their balloons a bit more... later.
1977	*/
1978
1979	return VINF_SUCCESS;
1980	}
1981
1982
1983	/**
1984	* Updates the previous allocations and allocates more pages.
1985	*
1986	* The handy pages are always taken from the 'base' memory account.
1987	* The allocated pages are not cleared and will contains random garbage.
1988	*
1989	* @returns VBox status code:
1990	* @retval VINF_SUCCESS on success.
1991	* @retval VERR_NOT_OWNER if the caller is not an EMT.
1992	* @retval VERR_GMM_PAGE_NOT_FOUND if one of the pages to update wasn't found.
1993	* @retval VERR_GMM_PAGE_NOT_PRIVATE if one of the pages to update wasn't a
1994	* private page.
1995	* @retval VERR_GMM_PAGE_NOT_SHARED if one of the pages to update wasn't a
1996	* shared page.
1997	* @retval VERR_GMM_NOT_PAGE_OWNER if one of the pages to be updated wasn't
1998	* owned by the VM.
1999	* @retval VERR_GMM_SEED_ME if seeding via GMMR0SeedChunk is necessary.
2000	* @retval VERR_GMM_HIT_GLOBAL_LIMIT if we've exhausted the available pages.
2001	* @retval VERR_GMM_HIT_VM_ACCOUNT_LIMIT if we've hit the VM account limit,
2002	* that is we're trying to allocate more than we've reserved.
2003	*
2004	* @param pVM Pointer to the shared VM structure.
2005	* @param idCpu VCPU id
2006	* @param cPagesToUpdate The number of pages to update (starting from the head).
2007	* @param cPagesToAlloc The number of pages to allocate (starting from the head).
2008	* @param paPages The array of page descriptors.
2009	* See GMMPAGEDESC for details on what is expected on input.
2010	* @thread EMT.
2011	*/
2012	GMMR0DECL(int) GMMR0AllocateHandyPages(PVM pVM, VMCPUID idCpu, uint32_t cPagesToUpdate, uint32_t cPagesToAlloc, PGMMPAGEDESC paPages)
2013	{
2014	LogFlow(("GMMR0AllocateHandyPages: pVM=%p cPagesToUpdate=%#x cPagesToAlloc=%#x paPages=%p\n",
2015	pVM, cPagesToUpdate, cPagesToAlloc, paPages));
2016
2017	/*
2018	* Validate, get basics and take the semaphore.
2019	* (This is a relatively busy path, so make predictions where possible.)
2020	*/
2021	PGMM pGMM;
2022	GMM_GET_VALID_INSTANCE(pGMM, VERR_INTERNAL_ERROR);
2023	PGVM pGVM;
2024	int rc = GVMMR0ByVMAndEMT(pVM, idCpu, &pGVM);
2025	if (RT_FAILURE(rc))
2026	return rc;
2027
2028	AssertPtrReturn(paPages, VERR_INVALID_PARAMETER);
2029	AssertMsgReturn( (cPagesToUpdate && cPagesToUpdate < 1024)
2030	\|\| (cPagesToAlloc && cPagesToAlloc < 1024),
2031	("cPagesToUpdate=%#x cPagesToAlloc=%#x\n", cPagesToUpdate, cPagesToAlloc),
2032	VERR_INVALID_PARAMETER);
2033
2034	unsigned iPage = 0;
2035	for (; iPage < cPagesToUpdate; iPage++)
2036	{
2037	AssertMsgReturn( ( paPages[iPage].HCPhysGCPhys <= GMM_GCPHYS_LAST
2038	&& !(paPages[iPage].HCPhysGCPhys & PAGE_OFFSET_MASK))
2039	\|\| paPages[iPage].HCPhysGCPhys == NIL_RTHCPHYS
2040	\|\| paPages[iPage].HCPhysGCPhys == GMM_GCPHYS_UNSHAREABLE,
2041	("#%#x: %RHp\n", iPage, paPages[iPage].HCPhysGCPhys),
2042	VERR_INVALID_PARAMETER);
2043	AssertMsgReturn( paPages[iPage].idPage <= GMM_PAGEID_LAST
2044	/\|\| paPages[iPage].idPage == NIL_GMM_PAGEID/,
2045	("#%#x: %#x\n", iPage, paPages[iPage].idPage), VERR_INVALID_PARAMETER);
2046	AssertMsgReturn( paPages[iPage].idPage <= GMM_PAGEID_LAST
2047	/\|\| paPages[iPage].idSharedPage == NIL_GMM_PAGEID/,
2048	("#%#x: %#x\n", iPage, paPages[iPage].idSharedPage), VERR_INVALID_PARAMETER);
2049	}
2050
2051	for (; iPage < cPagesToAlloc; iPage++)
2052	{
2053	AssertMsgReturn(paPages[iPage].HCPhysGCPhys == NIL_RTHCPHYS, ("#%#x: %RHp\n", iPage, paPages[iPage].HCPhysGCPhys), VERR_INVALID_PARAMETER);
2054	AssertMsgReturn(paPages[iPage].idPage == NIL_GMM_PAGEID, ("#%#x: %#x\n", iPage, paPages[iPage].idPage), VERR_INVALID_PARAMETER);
2055	AssertMsgReturn(paPages[iPage].idSharedPage == NIL_GMM_PAGEID, ("#%#x: %#x\n", iPage, paPages[iPage].idSharedPage), VERR_INVALID_PARAMETER);
2056	}
2057
2058	rc = RTSemFastMutexRequest(pGMM->Mtx);
2059	AssertRC(rc);
2060	if (GMM_CHECK_SANITY_UPON_ENTERING(pGMM))
2061	{
2062
2063	/* No allocations before the initial reservation has been made! */
2064	if (RT_LIKELY( pGVM->gmm.s.Reserved.cBasePages
2065	&& pGVM->gmm.s.Reserved.cFixedPages
2066	&& pGVM->gmm.s.Reserved.cShadowPages))
2067	{
2068	/*
2069	* Perform the updates.
2070	* Stop on the first error.
2071	*/
2072	for (iPage = 0; iPage < cPagesToUpdate; iPage++)
2073	{
2074	if (paPages[iPage].idPage != NIL_GMM_PAGEID)
2075	{
2076	PGMMPAGE pPage = gmmR0GetPage(pGMM, paPages[iPage].idPage);
2077	if (RT_LIKELY(pPage))
2078	{
2079	if (RT_LIKELY(GMM_PAGE_IS_PRIVATE(pPage)))
2080	{
2081	if (RT_LIKELY(pPage->Private.hGVM == pGVM->hSelf))
2082	{
2083	AssertCompile(NIL_RTHCPHYS > GMM_GCPHYS_LAST && GMM_GCPHYS_UNSHAREABLE > GMM_GCPHYS_LAST);
2084	if (RT_LIKELY(paPages[iPage].HCPhysGCPhys <= GMM_GCPHYS_LAST))
2085	pPage->Private.pfn = paPages[iPage].HCPhysGCPhys >> PAGE_SHIFT;
2086	else if (paPages[iPage].HCPhysGCPhys == GMM_GCPHYS_UNSHAREABLE)
2087	pPage->Private.pfn = GMM_PAGE_PFN_UNSHAREABLE;
2088	/* else: NIL_RTHCPHYS nothing */
2089
2090	paPages[iPage].idPage = NIL_GMM_PAGEID;
2091	paPages[iPage].HCPhysGCPhys = NIL_RTHCPHYS;
2092	}
2093	else
2094	{
2095	Log(("GMMR0AllocateHandyPages: #%#x/%#x: Not owner! hGVM=%#x hSelf=%#x\n",
2096	iPage, paPages[iPage].idPage, pPage->Private.hGVM, pGVM->hSelf));
2097	rc = VERR_GMM_NOT_PAGE_OWNER;
2098	break;
2099	}
2100	}
2101	else
2102	{
2103	Log(("GMMR0AllocateHandyPages: #%#x/%#x: Not private! %.Rhxs (type %d)\n", iPage, paPages[iPage].idPage, sizeof(pPage), pPage, pPage->Common.u2State));
2104	rc = VERR_GMM_PAGE_NOT_PRIVATE;
2105	break;
2106	}
2107	}
2108	else
2109	{
2110	Log(("GMMR0AllocateHandyPages: #%#x/%#x: Not found! (private)\n", iPage, paPages[iPage].idPage));
2111	rc = VERR_GMM_PAGE_NOT_FOUND;
2112	break;
2113	}
2114	}
2115
2116	if (paPages[iPage].idSharedPage != NIL_GMM_PAGEID)
2117	{
2118	PGMMPAGE pPage = gmmR0GetPage(pGMM, paPages[iPage].idSharedPage);
2119	if (RT_LIKELY(pPage))
2120	{
2121	if (RT_LIKELY(GMM_PAGE_IS_SHARED(pPage)))
2122	{
2123	AssertCompile(NIL_RTHCPHYS > GMM_GCPHYS_LAST && GMM_GCPHYS_UNSHAREABLE > GMM_GCPHYS_LAST);
2124	Assert(pPage->Shared.cRefs);
2125	Assert(pGVM->gmm.s.cSharedPages);
2126	Assert(pGVM->gmm.s.Allocated.cBasePages);
2127
2128	Log(("GMMR0AllocateHandyPages: free shared page %x cRefs=%d\n", paPages[iPage].idSharedPage, pPage->Shared.cRefs));
2129	pGVM->gmm.s.cSharedPages--;
2130	pGVM->gmm.s.Allocated.cBasePages--;
2131	if (!--pPage->Shared.cRefs)
2132	{
2133	gmmR0FreeSharedPage(pGMM, paPages[iPage].idSharedPage, pPage);
2134	}
2135	else
2136	{
2137	Assert(pGMM->cDuplicatePages);
2138	pGMM->cDuplicatePages--;
2139	}
2140
2141	paPages[iPage].idSharedPage = NIL_GMM_PAGEID;
2142	}
2143	else
2144	{
2145	Log(("GMMR0AllocateHandyPages: #%#x/%#x: Not shared!\n", iPage, paPages[iPage].idSharedPage));
2146	rc = VERR_GMM_PAGE_NOT_SHARED;
2147	break;
2148	}
2149	}
2150	else
2151	{
2152	Log(("GMMR0AllocateHandyPages: #%#x/%#x: Not found! (shared)\n", iPage, paPages[iPage].idSharedPage));
2153	rc = VERR_GMM_PAGE_NOT_FOUND;
2154	break;
2155	}
2156	}
2157	}
2158
2159	/*
2160	* Join paths with GMMR0AllocatePages for the allocation.
2161	* Note! gmmR0AllocateMoreChunks may leave the protection of the mutex!
2162	*/
2163	while (RT_SUCCESS(rc))
2164	{
2165	rc = gmmR0AllocatePages(pGMM, pGVM, cPagesToAlloc, paPages, GMMACCOUNT_BASE);
2166	if ( rc != VERR_GMM_SEED_ME
2167	\|\| pGMM->fLegacyAllocationMode)
2168	break;
2169	rc = gmmR0AllocateMoreChunks(pGMM, pGVM, &pGMM->Private, cPagesToAlloc);
2170	}
2171	}
2172	else
2173	rc = VERR_WRONG_ORDER;
2174	GMM_CHECK_SANITY_UPON_LEAVING(pGMM);
2175	}
2176	else
2177	rc = VERR_INTERNAL_ERROR_5;
2178	RTSemFastMutexRelease(pGMM->Mtx);
2179	LogFlow(("GMMR0AllocateHandyPages: returns %Rrc\n", rc));
2180	return rc;
2181	}
2182
2183
2184	/**
2185	* Allocate one or more pages.
2186	*
2187	* This is typically used for ROMs and MMIO2 (VRAM) during VM creation.
2188	* The allocated pages are not cleared and will contains random garbage.
2189	*
2190	* @returns VBox status code:
2191	* @retval VINF_SUCCESS on success.
2192	* @retval VERR_NOT_OWNER if the caller is not an EMT.
2193	* @retval VERR_GMM_SEED_ME if seeding via GMMR0SeedChunk is necessary.
2194	* @retval VERR_GMM_HIT_GLOBAL_LIMIT if we've exhausted the available pages.
2195	* @retval VERR_GMM_HIT_VM_ACCOUNT_LIMIT if we've hit the VM account limit,
2196	* that is we're trying to allocate more than we've reserved.
2197	*
2198	* @param pVM Pointer to the shared VM structure.
2199	* @param idCpu VCPU id
2200	* @param cPages The number of pages to allocate.
2201	* @param paPages Pointer to the page descriptors.
2202	* See GMMPAGEDESC for details on what is expected on input.
2203	* @param enmAccount The account to charge.
2204	*
2205	* @thread EMT.
2206	*/
2207	GMMR0DECL(int) GMMR0AllocatePages(PVM pVM, VMCPUID idCpu, uint32_t cPages, PGMMPAGEDESC paPages, GMMACCOUNT enmAccount)
2208	{
2209	LogFlow(("GMMR0AllocatePages: pVM=%p cPages=%#x paPages=%p enmAccount=%d\n", pVM, cPages, paPages, enmAccount));
2210
2211	/*
2212	* Validate, get basics and take the semaphore.
2213	*/
2214	PGMM pGMM;
2215	GMM_GET_VALID_INSTANCE(pGMM, VERR_INTERNAL_ERROR);
2216	PGVM pGVM;
2217	int rc = GVMMR0ByVMAndEMT(pVM, idCpu, &pGVM);
2218	if (RT_FAILURE(rc))
2219	return rc;
2220
2221	AssertPtrReturn(paPages, VERR_INVALID_PARAMETER);
2222	AssertMsgReturn(enmAccount > GMMACCOUNT_INVALID && enmAccount < GMMACCOUNT_END, ("%d\n", enmAccount), VERR_INVALID_PARAMETER);
2223	AssertMsgReturn(cPages > 0 && cPages < RT_BIT(32 - PAGE_SHIFT), ("%#x\n", cPages), VERR_INVALID_PARAMETER);
2224
2225	for (unsigned iPage = 0; iPage < cPages; iPage++)
2226	{
2227	AssertMsgReturn( paPages[iPage].HCPhysGCPhys == NIL_RTHCPHYS
2228	\|\| paPages[iPage].HCPhysGCPhys == GMM_GCPHYS_UNSHAREABLE
2229	\|\| ( enmAccount == GMMACCOUNT_BASE
2230	&& paPages[iPage].HCPhysGCPhys <= GMM_GCPHYS_LAST
2231	&& !(paPages[iPage].HCPhysGCPhys & PAGE_OFFSET_MASK)),
2232	("#%#x: %RHp enmAccount=%d\n", iPage, paPages[iPage].HCPhysGCPhys, enmAccount),
2233	VERR_INVALID_PARAMETER);
2234	AssertMsgReturn(paPages[iPage].idPage == NIL_GMM_PAGEID, ("#%#x: %#x\n", iPage, paPages[iPage].idPage), VERR_INVALID_PARAMETER);
2235	AssertMsgReturn(paPages[iPage].idSharedPage == NIL_GMM_PAGEID, ("#%#x: %#x\n", iPage, paPages[iPage].idSharedPage), VERR_INVALID_PARAMETER);
2236	}
2237
2238	rc = RTSemFastMutexRequest(pGMM->Mtx);
2239	AssertRC(rc);
2240	if (GMM_CHECK_SANITY_UPON_ENTERING(pGMM))
2241	{
2242
2243	/* No allocations before the initial reservation has been made! */
2244	if (RT_LIKELY( pGVM->gmm.s.Reserved.cBasePages
2245	&& pGVM->gmm.s.Reserved.cFixedPages
2246	&& pGVM->gmm.s.Reserved.cShadowPages))
2247	{
2248	/*
2249	* gmmR0AllocatePages seed loop.
2250	* Note! gmmR0AllocateMoreChunks may leave the protection of the mutex!
2251	*/
2252	while (RT_SUCCESS(rc))
2253	{
2254	rc = gmmR0AllocatePages(pGMM, pGVM, cPages, paPages, enmAccount);
2255	if ( rc != VERR_GMM_SEED_ME
2256	\|\| pGMM->fLegacyAllocationMode)
2257	break;
2258	rc = gmmR0AllocateMoreChunks(pGMM, pGVM, &pGMM->Private, cPages);
2259	}
2260	}
2261	else
2262	rc = VERR_WRONG_ORDER;
2263	GMM_CHECK_SANITY_UPON_LEAVING(pGMM);
2264	}
2265	else
2266	rc = VERR_INTERNAL_ERROR_5;
2267	RTSemFastMutexRelease(pGMM->Mtx);
2268	LogFlow(("GMMR0AllocatePages: returns %Rrc\n", rc));
2269	return rc;
2270	}
2271
2272
2273	/**
2274	* VMMR0 request wrapper for GMMR0AllocatePages.
2275	*
2276	* @returns see GMMR0AllocatePages.
2277	* @param pVM Pointer to the shared VM structure.
2278	* @param idCpu VCPU id
2279	* @param pReq The request packet.
2280	*/
2281	GMMR0DECL(int) GMMR0AllocatePagesReq(PVM pVM, VMCPUID idCpu, PGMMALLOCATEPAGESREQ pReq)
2282	{
2283	/*
2284	* Validate input and pass it on.
2285	*/
2286	AssertPtrReturn(pVM, VERR_INVALID_POINTER);
2287	AssertPtrReturn(pReq, VERR_INVALID_POINTER);
2288	AssertMsgReturn(pReq->Hdr.cbReq >= RT_UOFFSETOF(GMMALLOCATEPAGESREQ, aPages[0]),
2289	("%#x < %#x\n", pReq->Hdr.cbReq, RT_UOFFSETOF(GMMALLOCATEPAGESREQ, aPages[0])),
2290	VERR_INVALID_PARAMETER);
2291	AssertMsgReturn(pReq->Hdr.cbReq == RT_UOFFSETOF(GMMALLOCATEPAGESREQ, aPages[pReq->cPages]),
2292	("%#x != %#x\n", pReq->Hdr.cbReq, RT_UOFFSETOF(GMMALLOCATEPAGESREQ, aPages[pReq->cPages])),
2293	VERR_INVALID_PARAMETER);
2294
2295	return GMMR0AllocatePages(pVM, idCpu, pReq->cPages, &pReq->aPages[0], pReq->enmAccount);
2296	}
2297
2298	/**
2299	* Allocate a large page to represent guest RAM
2300	*
2301	* The allocated pages are not cleared and will contains random garbage.
2302	*
2303	* @returns VBox status code:
2304	* @retval VINF_SUCCESS on success.
2305	* @retval VERR_NOT_OWNER if the caller is not an EMT.
2306	* @retval VERR_GMM_SEED_ME if seeding via GMMR0SeedChunk is necessary.
2307	* @retval VERR_GMM_HIT_GLOBAL_LIMIT if we've exhausted the available pages.
2308	* @retval VERR_GMM_HIT_VM_ACCOUNT_LIMIT if we've hit the VM account limit,
2309	* that is we're trying to allocate more than we've reserved.
2310	* @returns see GMMR0AllocatePages.
2311	* @param pVM Pointer to the shared VM structure.
2312	* @param idCpu VCPU id
2313	* @param cbPage Large page size
2314	*/
2315	GMMR0DECL(int) GMMR0AllocateLargePage(PVM pVM, VMCPUID idCpu, uint32_t cbPage, uint32_t pIdPage, RTHCPHYS pHCPhys)
2316	{
2317	LogFlow(("GMMR0AllocateLargePage: pVM=%p cbPage=%x\n", pVM, cbPage));
2318
2319	AssertReturn(cbPage == GMM_CHUNK_SIZE, VERR_INVALID_PARAMETER);
2320	AssertPtrReturn(pIdPage, VERR_INVALID_PARAMETER);
2321	AssertPtrReturn(pHCPhys, VERR_INVALID_PARAMETER);
2322
2323	/*
2324	* Validate, get basics and take the semaphore.
2325	*/
2326	PGMM pGMM;
2327	GMM_GET_VALID_INSTANCE(pGMM, VERR_INTERNAL_ERROR);
2328	PGVM pGVM;
2329	int rc = GVMMR0ByVMAndEMT(pVM, idCpu, &pGVM);
2330	if (RT_FAILURE(rc))
2331	return rc;
2332
2333	/* Not supported in legacy mode where we allocate the memory in ring 3 and lock it in ring 0. */
2334	if (pGMM->fLegacyAllocationMode)
2335	return VERR_NOT_SUPPORTED;
2336
2337	*pHCPhys = NIL_RTHCPHYS;
2338	*pIdPage = NIL_GMM_PAGEID;
2339
2340	rc = RTSemFastMutexRequest(pGMM->Mtx);
2341	AssertRCReturn(rc, rc);
2342	if (GMM_CHECK_SANITY_UPON_ENTERING(pGMM))
2343	{
2344	const unsigned cPages = (GMM_CHUNK_SIZE >> PAGE_SHIFT);
2345	PGMMCHUNK pChunk;
2346	GMMPAGEDESC PageDesc;
2347
2348	if (RT_UNLIKELY(pGVM->gmm.s.Allocated.cBasePages + pGVM->gmm.s.cBalloonedPages + cPages > pGVM->gmm.s.Reserved.cBasePages))
2349	{
2350	Log(("GMMR0AllocateLargePage: Reserved=%#llx Allocated+Requested=%#llx+%#x!\n",
2351	pGVM->gmm.s.Reserved.cBasePages, pGVM->gmm.s.Allocated.cBasePages, cPages));
2352	RTSemFastMutexRelease(pGMM->Mtx);
2353	return VERR_GMM_HIT_VM_ACCOUNT_LIMIT;
2354	}
2355
2356	/* Allocate a new continous chunk. */
2357	rc = gmmR0AllocateOneChunk(pGMM, &pGMM->Private, pGVM->hSelf, GMMCHUNKTYPE_CONTINUOUS, &pChunk);
2358	if (RT_FAILURE(rc))
2359	{
2360	RTSemFastMutexRelease(pGMM->Mtx);
2361	return rc;
2362	}
2363
2364	/* Unlink the new chunk from the free list. */
2365	gmmR0UnlinkChunk(pChunk);
2366
2367	/* Allocate all pages. */
2368	gmmR0AllocatePage(pGMM, pGVM->hSelf, pChunk, &PageDesc);
2369	/* Return the first page as we'll use the whole chunk as one big page. */
2370	*pIdPage = PageDesc.idPage;
2371	*pHCPhys = PageDesc.HCPhysGCPhys;
2372
2373	for (unsigned i = 1; i < cPages; i++)
2374	gmmR0AllocatePage(pGMM, pGVM->hSelf, pChunk, &PageDesc);
2375
2376	/* Update accounting. */
2377	pGVM->gmm.s.Allocated.cBasePages += cPages;
2378	pGVM->gmm.s.cPrivatePages += cPages;
2379	pGMM->cAllocatedPages += cPages;
2380
2381	gmmR0LinkChunk(pChunk, &pGMM->Private);
2382	}
2383	else
2384	rc = VERR_INTERNAL_ERROR_5;
2385
2386	RTSemFastMutexRelease(pGMM->Mtx);
2387	LogFlow(("GMMR0AllocatePages: returns %Rrc\n", rc));
2388	return rc;
2389	}
2390
2391
2392	/**
2393	* Free a large page
2394	*
2395	* @returns VBox status code:
2396	* @param pVM Pointer to the shared VM structure.
2397	* @param idCpu VCPU id
2398	* @param idPage Large page id
2399	*/
2400	GMMR0DECL(int) GMMR0FreeLargePage(PVM pVM, VMCPUID idCpu, uint32_t idPage)
2401	{
2402	LogFlow(("GMMR0FreeLargePage: pVM=%p idPage=%x\n", pVM, idPage));
2403
2404	/*
2405	* Validate, get basics and take the semaphore.
2406	*/
2407	PGMM pGMM;
2408	GMM_GET_VALID_INSTANCE(pGMM, VERR_INTERNAL_ERROR);
2409	PGVM pGVM;
2410	int rc = GVMMR0ByVMAndEMT(pVM, idCpu, &pGVM);
2411	if (RT_FAILURE(rc))
2412	return rc;
2413
2414	/* Not supported in legacy mode where we allocate the memory in ring 3 and lock it in ring 0. */
2415	if (pGMM->fLegacyAllocationMode)
2416	return VERR_NOT_SUPPORTED;
2417
2418	rc = RTSemFastMutexRequest(pGMM->Mtx);
2419	AssertRC(rc);
2420	if (GMM_CHECK_SANITY_UPON_ENTERING(pGMM))
2421	{
2422	const unsigned cPages = (GMM_CHUNK_SIZE >> PAGE_SHIFT);
2423
2424	if (RT_UNLIKELY(pGVM->gmm.s.Allocated.cBasePages < cPages))
2425	{
2426	Log(("GMMR0FreeLargePage: allocated=%#llx cPages=%#x!\n", pGVM->gmm.s.Allocated.cBasePages, cPages));
2427	RTSemFastMutexRelease(pGMM->Mtx);
2428	return VERR_GMM_ATTEMPT_TO_FREE_TOO_MUCH;
2429	}
2430
2431	PGMMPAGE pPage = gmmR0GetPage(pGMM, idPage);
2432	if ( RT_LIKELY(pPage)
2433	&& RT_LIKELY(GMM_PAGE_IS_PRIVATE(pPage)))
2434	{
2435	PGMMCHUNK pChunk = gmmR0GetChunk(pGMM, idPage >> GMM_CHUNKID_SHIFT);
2436	Assert(pChunk);
2437	Assert(pChunk->cFree < GMM_CHUNK_NUM_PAGES);
2438	Assert(pChunk->cPrivate > 0);
2439
2440	/* Release the memory immediately. */
2441	gmmR0FreeChunk(pGMM, NULL, pChunk);
2442
2443	/* Update accounting. */
2444	pGVM->gmm.s.Allocated.cBasePages -= cPages;
2445	pGVM->gmm.s.cPrivatePages -= cPages;
2446	pGMM->cAllocatedPages -= cPages;
2447	}
2448	else
2449	rc = VERR_GMM_PAGE_NOT_FOUND;
2450	}
2451	else
2452	rc = VERR_INTERNAL_ERROR_5;
2453
2454	RTSemFastMutexRelease(pGMM->Mtx);
2455	LogFlow(("GMMR0FreeLargePage: returns %Rrc\n", rc));
2456	return rc;
2457	}
2458
2459
2460	/**
2461	* VMMR0 request wrapper for GMMR0FreeLargePage.
2462	*
2463	* @returns see GMMR0FreeLargePage.
2464	* @param pVM Pointer to the shared VM structure.
2465	* @param idCpu VCPU id
2466	* @param pReq The request packet.
2467	*/
2468	GMMR0DECL(int) GMMR0FreeLargePageReq(PVM pVM, VMCPUID idCpu, PGMMFREELARGEPAGEREQ pReq)
2469	{
2470	/*
2471	* Validate input and pass it on.
2472	*/
2473	AssertPtrReturn(pVM, VERR_INVALID_POINTER);
2474	AssertPtrReturn(pReq, VERR_INVALID_POINTER);
2475	AssertMsgReturn(pReq->Hdr.cbReq == sizeof(GMMFREEPAGESREQ),
2476	("%#x != %#x\n", pReq->Hdr.cbReq, sizeof(GMMFREEPAGESREQ)),
2477	VERR_INVALID_PARAMETER);
2478
2479	return GMMR0FreeLargePage(pVM, idCpu, pReq->idPage);
2480	}
2481
2482	/**
2483	* Frees a chunk, giving it back to the host OS.
2484	*
2485	* @param pGMM Pointer to the GMM instance.
2486	* @param pGVM This is set when called from GMMR0CleanupVM so we can
2487	* unmap and free the chunk in one go.
2488	* @param pChunk The chunk to free.
2489	*/
2490	static void gmmR0FreeChunk(PGMM pGMM, PGVM pGVM, PGMMCHUNK pChunk)
2491	{
2492	Assert(pChunk->Core.Key != NIL_GMM_CHUNKID);
2493
2494	/*
2495	* Cleanup hack! Unmap the chunk from the callers address space.
2496	*/
2497	if ( pChunk->cMappings
2498	&& pGVM)
2499	gmmR0UnmapChunk(pGMM, pGVM, pChunk);
2500
2501	/*
2502	* If there are current mappings of the chunk, then request the
2503	* VMs to unmap them. Reposition the chunk in the free list so
2504	* it won't be a likely candidate for allocations.
2505	*/
2506	if (pChunk->cMappings)
2507	{
2508	/** @todo R0 -> VM request */
2509	/* The chunk can be owned by more than one VM if fBoundMemoryMode is false! */
2510	Log(("gmmR0FreeChunk: chunk still has %d mappings; don't free!\n", pChunk->cMappings));
2511	}
2512	else
2513	{
2514	/*
2515	* Try free the memory object.
2516	*/
2517	int rc = RTR0MemObjFree(pChunk->MemObj, false /* fFreeMappings */);
2518	if (RT_SUCCESS(rc))
2519	{
2520	pChunk->MemObj = NIL_RTR0MEMOBJ;
2521
2522	/*
2523	* Unlink it from everywhere.
2524	*/
2525	gmmR0UnlinkChunk(pChunk);
2526
2527	PAVLU32NODECORE pCore = RTAvlU32Remove(&pGMM->pChunks, pChunk->Core.Key);
2528	Assert(pCore == &pChunk->Core); NOREF(pCore);
2529
2530	PGMMCHUNKTLBE pTlbe = &pGMM->ChunkTLB.aEntries[GMM_CHUNKTLB_IDX(pChunk->Core.Key)];
2531	if (pTlbe->pChunk == pChunk)
2532	{
2533	pTlbe->idChunk = NIL_GMM_CHUNKID;
2534	pTlbe->pChunk = NULL;
2535	}
2536
2537	Assert(pGMM->cChunks > 0);
2538	pGMM->cChunks--;
2539
2540	/*
2541	* Free the Chunk ID and struct.
2542	*/
2543	gmmR0FreeChunkId(pGMM, pChunk->Core.Key);
2544	pChunk->Core.Key = NIL_GMM_CHUNKID;
2545
2546	RTMemFree(pChunk->paMappings);
2547	pChunk->paMappings = NULL;
2548
2549	RTMemFree(pChunk);
2550	}
2551	else
2552	AssertRC(rc);
2553	}
2554	}
2555
2556
2557	/**
2558	* Free page worker.
2559	*
2560	* The caller does all the statistic decrementing, we do all the incrementing.
2561	*
2562	* @param pGMM Pointer to the GMM instance data.
2563	* @param pChunk Pointer to the chunk this page belongs to.
2564	* @param idPage The Page ID.
2565	* @param pPage Pointer to the page.
2566	*/
2567	static void gmmR0FreePageWorker(PGMM pGMM, PGMMCHUNK pChunk, uint32_t idPage, PGMMPAGE pPage)
2568	{
2569	Log3(("F pPage=%p iPage=%#x/%#x u2State=%d iFreeHead=%#x\n",
2570	pPage, pPage - &pChunk->aPages[0], idPage, pPage->Common.u2State, pChunk->iFreeHead)); NOREF(idPage);
2571
2572	/*
2573	* Put the page on the free list.
2574	*/
2575	pPage->u = 0;
2576	pPage->Free.u2State = GMM_PAGE_STATE_FREE;
2577	Assert(pChunk->iFreeHead < RT_ELEMENTS(pChunk->aPages) \|\| pChunk->iFreeHead == UINT16_MAX);
2578	pPage->Free.iNext = pChunk->iFreeHead;
2579	pChunk->iFreeHead = pPage - &pChunk->aPages[0];
2580
2581	/*
2582	* Update statistics (the cShared/cPrivate stats are up to date already),
2583	* and relink the chunk if necessary.
2584	*/
2585	if ((pChunk->cFree & GMM_CHUNK_FREE_SET_MASK) == 0)
2586	{
2587	gmmR0UnlinkChunk(pChunk);
2588	pChunk->cFree++;
2589	gmmR0LinkChunk(pChunk, pChunk->cShared ? &pGMM->Shared : &pGMM->Private);
2590	}
2591	else
2592	{
2593	pChunk->cFree++;
2594	pChunk->pSet->cFreePages++;
2595
2596	/*
2597	* If the chunk becomes empty, consider giving memory back to the host OS.
2598	*
2599	* The current strategy is to try give it back if there are other chunks
2600	* in this free list, meaning if there are at least 240 free pages in this
2601	* category. Note that since there are probably mappings of the chunk,
2602	* it won't be freed up instantly, which probably screws up this logic
2603	* a bit...
2604	*/
2605	if (RT_UNLIKELY( pChunk->cFree == GMM_CHUNK_NUM_PAGES
2606	&& pChunk->pFreeNext
2607	&& pChunk->pFreePrev
2608	&& !pGMM->fLegacyAllocationMode))
2609	gmmR0FreeChunk(pGMM, NULL, pChunk);
2610	}
2611	}
2612
2613
2614	/**
2615	* Frees a shared page, the page is known to exist and be valid and such.
2616	*
2617	* @param pGMM Pointer to the GMM instance.
2618	* @param idPage The Page ID
2619	* @param pPage The page structure.
2620	*/
2621	DECLINLINE(void) gmmR0FreeSharedPage(PGMM pGMM, uint32_t idPage, PGMMPAGE pPage)
2622	{
2623	PGMMCHUNK pChunk = gmmR0GetChunk(pGMM, idPage >> GMM_CHUNKID_SHIFT);
2624	Assert(pChunk);
2625	Assert(pChunk->cFree < GMM_CHUNK_NUM_PAGES);
2626	Assert(pChunk->cShared > 0);
2627	Assert(pGMM->cSharedPages > 0);
2628	Assert(pGMM->cAllocatedPages > 0);
2629	Assert(!pPage->Shared.cRefs);
2630
2631	pChunk->cShared--;
2632	pGMM->cAllocatedPages--;
2633	pGMM->cSharedPages--;
2634	gmmR0FreePageWorker(pGMM, pChunk, idPage, pPage);
2635	}
2636
2637	#ifdef VBOX_WITH_PAGE_SHARING
2638	/**
2639	* Converts a private page to a shared page, the page is known to exist and be valid and such.
2640	*
2641	* @param pGMM Pointer to the GMM instance.
2642	* @param pGVM Pointer to the GVM instance.
2643	* @param HCPhys Host physical address
2644	* @param idPage The Page ID
2645	* @param pPage The page structure.
2646	*/
2647	DECLINLINE(void) gmmR0ConvertToSharedPage(PGMM pGMM, PGVM pGVM, RTHCPHYS HCPhys, uint32_t idPage, PGMMPAGE pPage)
2648	{
2649	PGMMCHUNK pChunk = gmmR0GetChunk(pGMM, idPage >> GMM_CHUNKID_SHIFT);
2650	Assert(pChunk);
2651	Assert(pChunk->cFree < GMM_CHUNK_NUM_PAGES);
2652	Assert(GMM_PAGE_IS_PRIVATE(pPage));
2653
2654	pChunk->cPrivate--;
2655	pChunk->cShared++;
2656
2657	pGMM->cSharedPages++;
2658
2659	pGVM->gmm.s.cSharedPages++;
2660	pGVM->gmm.s.cPrivatePages--;
2661
2662	/* Modify the page structure. */
2663	pPage->Shared.pfn = (uint32_t)(uint64_t)(HCPhys >> PAGE_SHIFT);
2664	pPage->Shared.cRefs = 1;
2665	pPage->Common.u2State = GMM_PAGE_STATE_SHARED;
2666	}
2667
2668	/**
2669	* Increase the use count of a shared page, the page is known to exist and be valid and such.
2670	*
2671	* @param pGMM Pointer to the GMM instance.
2672	* @param pGVM Pointer to the GVM instance.
2673	* @param pPage The page structure.
2674	*/
2675	DECLINLINE(void) gmmR0UseSharedPage(PGMM pGMM, PGVM pGVM, PGMMPAGE pPage)
2676	{
2677	Assert(pGMM->cSharedPages > 0);
2678	Assert(pGMM->cAllocatedPages > 0);
2679
2680	pGMM->cDuplicatePages++;
2681
2682	pPage->Shared.cRefs++;
2683	pGVM->gmm.s.cSharedPages++;
2684	pGVM->gmm.s.Allocated.cBasePages++;
2685	}
2686	#endif
2687
2688	/**
2689	* Frees a private page, the page is known to exist and be valid and such.
2690	*
2691	* @param pGMM Pointer to the GMM instance.
2692	* @param idPage The Page ID
2693	* @param pPage The page structure.
2694	*/
2695	DECLINLINE(void) gmmR0FreePrivatePage(PGMM pGMM, uint32_t idPage, PGMMPAGE pPage)
2696	{
2697	PGMMCHUNK pChunk = gmmR0GetChunk(pGMM, idPage >> GMM_CHUNKID_SHIFT);
2698	Assert(pChunk);
2699	Assert(pChunk->cFree < GMM_CHUNK_NUM_PAGES);
2700	Assert(pChunk->cPrivate > 0);
2701	Assert(pGMM->cAllocatedPages > 0);
2702
2703	pChunk->cPrivate--;
2704	pGMM->cAllocatedPages--;
2705	gmmR0FreePageWorker(pGMM, pChunk, idPage, pPage);
2706	}
2707
2708	/**
2709	* Common worker for GMMR0FreePages and GMMR0BalloonedPages.
2710	*
2711	* @returns VBox status code:
2712	* @retval xxx
2713	*
2714	* @param pGMM Pointer to the GMM instance data.
2715	* @param pGVM Pointer to the shared VM structure.
2716	* @param cPages The number of pages to free.
2717	* @param paPages Pointer to the page descriptors.
2718	* @param enmAccount The account this relates to.
2719	*/
2720	static int gmmR0FreePages(PGMM pGMM, PGVM pGVM, uint32_t cPages, PGMMFREEPAGEDESC paPages, GMMACCOUNT enmAccount)
2721	{
2722	/*
2723	* Check that the request isn't impossible wrt to the account status.
2724	*/
2725	switch (enmAccount)
2726	{
2727	case GMMACCOUNT_BASE:
2728	if (RT_UNLIKELY(pGVM->gmm.s.Allocated.cBasePages < cPages))
2729	{
2730	Log(("gmmR0FreePages: allocated=%#llx cPages=%#x!\n", pGVM->gmm.s.Allocated.cBasePages, cPages));
2731	return VERR_GMM_ATTEMPT_TO_FREE_TOO_MUCH;
2732	}
2733	break;
2734	case GMMACCOUNT_SHADOW:
2735	if (RT_UNLIKELY(pGVM->gmm.s.Allocated.cShadowPages < cPages))
2736	{
2737	Log(("gmmR0FreePages: allocated=%#llx cPages=%#x!\n", pGVM->gmm.s.Allocated.cShadowPages, cPages));
2738	return VERR_GMM_ATTEMPT_TO_FREE_TOO_MUCH;
2739	}
2740	break;
2741	case GMMACCOUNT_FIXED:
2742	if (RT_UNLIKELY(pGVM->gmm.s.Allocated.cFixedPages < cPages))
2743	{
2744	Log(("gmmR0FreePages: allocated=%#llx cPages=%#x!\n", pGVM->gmm.s.Allocated.cFixedPages, cPages));
2745	return VERR_GMM_ATTEMPT_TO_FREE_TOO_MUCH;
2746	}
2747	break;
2748	default:
2749	AssertMsgFailedReturn(("enmAccount=%d\n", enmAccount), VERR_INTERNAL_ERROR);
2750	}
2751
2752	/*
2753	* Walk the descriptors and free the pages.
2754	*
2755	* Statistics (except the account) are being updated as we go along,
2756	* unlike the alloc code. Also, stop on the first error.
2757	*/
2758	int rc = VINF_SUCCESS;
2759	uint32_t iPage;
2760	for (iPage = 0; iPage < cPages; iPage++)
2761	{
2762	uint32_t idPage = paPages[iPage].idPage;
2763	PGMMPAGE pPage = gmmR0GetPage(pGMM, idPage);
2764	if (RT_LIKELY(pPage))
2765	{
2766	if (RT_LIKELY(GMM_PAGE_IS_PRIVATE(pPage)))
2767	{
2768	if (RT_LIKELY(pPage->Private.hGVM == pGVM->hSelf))
2769	{
2770	Assert(pGVM->gmm.s.cPrivatePages);
2771	pGVM->gmm.s.cPrivatePages--;
2772	gmmR0FreePrivatePage(pGMM, idPage, pPage);
2773	}
2774	else
2775	{
2776	Log(("gmmR0AllocatePages: #%#x/%#x: not owner! hGVM=%#x hSelf=%#x\n", iPage, idPage,
2777	pPage->Private.hGVM, pGVM->hSelf));
2778	rc = VERR_GMM_NOT_PAGE_OWNER;
2779	break;
2780	}
2781	}
2782	else if (RT_LIKELY(GMM_PAGE_IS_SHARED(pPage)))
2783	{
2784	Assert(pGVM->gmm.s.cSharedPages);
2785	pGVM->gmm.s.cSharedPages--;
2786	Assert(pPage->Shared.cRefs);
2787	if (!--pPage->Shared.cRefs)
2788	{
2789	gmmR0FreeSharedPage(pGMM, idPage, pPage);
2790	}
2791	else
2792	{
2793	Assert(pGMM->cDuplicatePages);
2794	pGMM->cDuplicatePages--;
2795	}
2796	}
2797	else
2798	{
2799	Log(("gmmR0AllocatePages: #%#x/%#x: already free!\n", iPage, idPage));
2800	rc = VERR_GMM_PAGE_ALREADY_FREE;
2801	break;
2802	}
2803	}
2804	else
2805	{
2806	Log(("gmmR0AllocatePages: #%#x/%#x: not found!\n", iPage, idPage));
2807	rc = VERR_GMM_PAGE_NOT_FOUND;
2808	break;
2809	}
2810	paPages[iPage].idPage = NIL_GMM_PAGEID;
2811	}
2812
2813	/*
2814	* Update the account.
2815	*/
2816	switch (enmAccount)
2817	{
2818	case GMMACCOUNT_BASE: pGVM->gmm.s.Allocated.cBasePages -= iPage; break;
2819	case GMMACCOUNT_SHADOW: pGVM->gmm.s.Allocated.cShadowPages -= iPage; break;
2820	case GMMACCOUNT_FIXED: pGVM->gmm.s.Allocated.cFixedPages -= iPage; break;
2821	default:
2822	AssertMsgFailedReturn(("enmAccount=%d\n", enmAccount), VERR_INTERNAL_ERROR);
2823	}
2824
2825	/*
2826	* Any threshold stuff to be done here?
2827	*/
2828
2829	return rc;
2830	}
2831
2832
2833	/**
2834	* Free one or more pages.
2835	*
2836	* This is typically used at reset time or power off.
2837	*
2838	* @returns VBox status code:
2839	* @retval xxx
2840	*
2841	* @param pVM Pointer to the shared VM structure.
2842	* @param idCpu VCPU id
2843	* @param cPages The number of pages to allocate.
2844	* @param paPages Pointer to the page descriptors containing the Page IDs for each page.
2845	* @param enmAccount The account this relates to.
2846	* @thread EMT.
2847	*/
2848	GMMR0DECL(int) GMMR0FreePages(PVM pVM, VMCPUID idCpu, uint32_t cPages, PGMMFREEPAGEDESC paPages, GMMACCOUNT enmAccount)
2849	{
2850	LogFlow(("GMMR0FreePages: pVM=%p cPages=%#x paPages=%p enmAccount=%d\n", pVM, cPages, paPages, enmAccount));
2851
2852	/*
2853	* Validate input and get the basics.
2854	*/
2855	PGMM pGMM;
2856	GMM_GET_VALID_INSTANCE(pGMM, VERR_INTERNAL_ERROR);
2857	PGVM pGVM;
2858	int rc = GVMMR0ByVMAndEMT(pVM, idCpu, &pGVM);
2859	if (RT_FAILURE(rc))
2860	return rc;
2861
2862	AssertPtrReturn(paPages, VERR_INVALID_PARAMETER);
2863	AssertMsgReturn(enmAccount > GMMACCOUNT_INVALID && enmAccount < GMMACCOUNT_END, ("%d\n", enmAccount), VERR_INVALID_PARAMETER);
2864	AssertMsgReturn(cPages > 0 && cPages < RT_BIT(32 - PAGE_SHIFT), ("%#x\n", cPages), VERR_INVALID_PARAMETER);
2865
2866	for (unsigned iPage = 0; iPage < cPages; iPage++)
2867	AssertMsgReturn( paPages[iPage].idPage <= GMM_PAGEID_LAST
2868	/\|\| paPages[iPage].idPage == NIL_GMM_PAGEID/,
2869	("#%#x: %#x\n", iPage, paPages[iPage].idPage), VERR_INVALID_PARAMETER);
2870
2871	/*
2872	* Take the semaphore and call the worker function.
2873	*/
2874	rc = RTSemFastMutexRequest(pGMM->Mtx);
2875	AssertRC(rc);
2876	if (GMM_CHECK_SANITY_UPON_ENTERING(pGMM))
2877	{
2878	rc = gmmR0FreePages(pGMM, pGVM, cPages, paPages, enmAccount);
2879	GMM_CHECK_SANITY_UPON_LEAVING(pGMM);
2880	}
2881	else
2882	rc = VERR_INTERNAL_ERROR_5;
2883	RTSemFastMutexRelease(pGMM->Mtx);
2884	LogFlow(("GMMR0FreePages: returns %Rrc\n", rc));
2885	return rc;
2886	}
2887
2888
2889	/**
2890	* VMMR0 request wrapper for GMMR0FreePages.
2891	*
2892	* @returns see GMMR0FreePages.
2893	* @param pVM Pointer to the shared VM structure.
2894	* @param idCpu VCPU id
2895	* @param pReq The request packet.
2896	*/
2897	GMMR0DECL(int) GMMR0FreePagesReq(PVM pVM, VMCPUID idCpu, PGMMFREEPAGESREQ pReq)
2898	{
2899	/*
2900	* Validate input and pass it on.
2901	*/
2902	AssertPtrReturn(pVM, VERR_INVALID_POINTER);
2903	AssertPtrReturn(pReq, VERR_INVALID_POINTER);
2904	AssertMsgReturn(pReq->Hdr.cbReq >= RT_UOFFSETOF(GMMFREEPAGESREQ, aPages[0]),
2905	("%#x < %#x\n", pReq->Hdr.cbReq, RT_UOFFSETOF(GMMFREEPAGESREQ, aPages[0])),
2906	VERR_INVALID_PARAMETER);
2907	AssertMsgReturn(pReq->Hdr.cbReq == RT_UOFFSETOF(GMMFREEPAGESREQ, aPages[pReq->cPages]),
2908	("%#x != %#x\n", pReq->Hdr.cbReq, RT_UOFFSETOF(GMMFREEPAGESREQ, aPages[pReq->cPages])),
2909	VERR_INVALID_PARAMETER);
2910
2911	return GMMR0FreePages(pVM, idCpu, pReq->cPages, &pReq->aPages[0], pReq->enmAccount);
2912	}
2913
2914
2915	/**
2916	* Report back on a memory ballooning request.
2917	*
2918	* The request may or may not have been initiated by the GMM. If it was initiated
2919	* by the GMM it is important that this function is called even if no pages were
2920	* ballooned.
2921	*
2922	* @returns VBox status code:
2923	* @retval VERR_GMM_ATTEMPT_TO_FREE_TOO_MUCH
2924	* @retval VERR_GMM_ATTEMPT_TO_DEFLATE_TOO_MUCH
2925	* @retval VERR_GMM_OVERCOMMITED_TRY_AGAIN_IN_A_BIT - reset condition
2926	* indicating that we won't necessarily have sufficient RAM to boot
2927	* the VM again and that it should pause until this changes (we'll try
2928	* balloon some other VM). (For standard deflate we have little choice
2929	* but to hope the VM won't use the memory that was returned to it.)
2930	*
2931	* @param pVM Pointer to the shared VM structure.
2932	* @param idCpu VCPU id
2933	* @param enmAction Inflate/deflate/reset
2934	* @param cBalloonedPages The number of pages that was ballooned.
2935	*
2936	* @thread EMT.
2937	*/
2938	GMMR0DECL(int) GMMR0BalloonedPages(PVM pVM, VMCPUID idCpu, GMMBALLOONACTION enmAction, uint32_t cBalloonedPages)
2939	{
2940	LogFlow(("GMMR0BalloonedPages: pVM=%p enmAction=%d cBalloonedPages=%#x\n",
2941	pVM, enmAction, cBalloonedPages));
2942
2943	AssertMsgReturn(cBalloonedPages < RT_BIT(32 - PAGE_SHIFT), ("%#x\n", cBalloonedPages), VERR_INVALID_PARAMETER);
2944
2945	/*
2946	* Validate input and get the basics.
2947	*/
2948	PGMM pGMM;
2949	GMM_GET_VALID_INSTANCE(pGMM, VERR_INTERNAL_ERROR);
2950	PGVM pGVM;
2951	int rc = GVMMR0ByVMAndEMT(pVM, idCpu, &pGVM);
2952	if (RT_FAILURE(rc))
2953	return rc;
2954
2955	/*
2956	* Take the sempahore and do some more validations.
2957	*/
2958	rc = RTSemFastMutexRequest(pGMM->Mtx);
2959	AssertRC(rc);
2960	if (GMM_CHECK_SANITY_UPON_ENTERING(pGMM))
2961	{
2962	switch (enmAction)
2963	{
2964	case GMMBALLOONACTION_INFLATE:
2965	{
2966	if (pGVM->gmm.s.Allocated.cBasePages >= cBalloonedPages)
2967	{
2968	/*
2969	* Record the ballooned memory.
2970	*/
2971	pGMM->cBalloonedPages += cBalloonedPages;
2972	if (pGVM->gmm.s.cReqBalloonedPages)
2973	{
2974	/* Codepath never taken. Might be interesting in the future to request ballooned memory from guests in low memory conditions.. */
2975	AssertFailed();
2976
2977	pGVM->gmm.s.cBalloonedPages += cBalloonedPages;
2978	pGVM->gmm.s.cReqActuallyBalloonedPages += cBalloonedPages;
2979	Log(("GMMR0BalloonedPages: +%#x - Global=%#llx / VM: Total=%#llx Req=%#llx Actual=%#llx (pending)\n", cBalloonedPages,
2980	pGMM->cBalloonedPages, pGVM->gmm.s.cBalloonedPages, pGVM->gmm.s.cReqBalloonedPages, pGVM->gmm.s.cReqActuallyBalloonedPages));
2981	}
2982	else
2983	{
2984	pGVM->gmm.s.cBalloonedPages += cBalloonedPages;
2985	Log(("GMMR0BalloonedPages: +%#x - Global=%#llx / VM: Total=%#llx (user)\n",
2986	cBalloonedPages, pGMM->cBalloonedPages, pGVM->gmm.s.cBalloonedPages));
2987	}
2988	}
2989	else
2990	rc = VERR_GMM_ATTEMPT_TO_FREE_TOO_MUCH;
2991	break;
2992	}
2993
2994	case GMMBALLOONACTION_DEFLATE:
2995	{
2996	/* Deflate. */
2997	if (pGVM->gmm.s.cBalloonedPages >= cBalloonedPages)
2998	{
2999	/*
3000	* Record the ballooned memory.
3001	*/
3002	Assert(pGMM->cBalloonedPages >= cBalloonedPages);
3003	pGMM->cBalloonedPages -= cBalloonedPages;
3004	pGVM->gmm.s.cBalloonedPages -= cBalloonedPages;
3005	if (pGVM->gmm.s.cReqDeflatePages)
3006	{
3007	AssertFailed(); /* This is path is for later. */
3008	Log(("GMMR0BalloonedPages: -%#x - Global=%#llx / VM: Total=%#llx Req=%#llx\n",
3009	cBalloonedPages, pGMM->cBalloonedPages, pGVM->gmm.s.cBalloonedPages, pGVM->gmm.s.cReqDeflatePages));
3010
3011	/*
3012	* Anything we need to do here now when the request has been completed?
3013	*/
3014	pGVM->gmm.s.cReqDeflatePages = 0;
3015	}
3016	else
3017	Log(("GMMR0BalloonedPages: -%#x - Global=%#llx / VM: Total=%#llx (user)\n",
3018	cBalloonedPages, pGMM->cBalloonedPages, pGVM->gmm.s.cBalloonedPages));
3019	}
3020	else
3021	rc = VERR_GMM_ATTEMPT_TO_DEFLATE_TOO_MUCH;
3022	break;
3023	}
3024
3025	case GMMBALLOONACTION_RESET:
3026	{
3027	/* Reset to an empty balloon. */
3028	Assert(pGMM->cBalloonedPages >= pGVM->gmm.s.cBalloonedPages);
3029
3030	pGMM->cBalloonedPages -= pGVM->gmm.s.cBalloonedPages;
3031	pGVM->gmm.s.cBalloonedPages = 0;
3032	break;
3033	}
3034
3035	default:
3036	rc = VERR_INVALID_PARAMETER;
3037	break;
3038	}
3039	GMM_CHECK_SANITY_UPON_LEAVING(pGMM);
3040	}
3041	else
3042	rc = VERR_INTERNAL_ERROR_5;
3043
3044	RTSemFastMutexRelease(pGMM->Mtx);
3045	LogFlow(("GMMR0BalloonedPages: returns %Rrc\n", rc));
3046	return rc;
3047	}
3048
3049
3050	/**
3051	* VMMR0 request wrapper for GMMR0BalloonedPages.
3052	*
3053	* @returns see GMMR0BalloonedPages.
3054	* @param pVM Pointer to the shared VM structure.
3055	* @param idCpu VCPU id
3056	* @param pReq The request packet.
3057	*/
3058	GMMR0DECL(int) GMMR0BalloonedPagesReq(PVM pVM, VMCPUID idCpu, PGMMBALLOONEDPAGESREQ pReq)
3059	{
3060	/*
3061	* Validate input and pass it on.
3062	*/
3063	AssertPtrReturn(pVM, VERR_INVALID_POINTER);
3064	AssertPtrReturn(pReq, VERR_INVALID_POINTER);
3065	AssertMsgReturn(pReq->Hdr.cbReq == sizeof(GMMBALLOONEDPAGESREQ),
3066	("%#x < %#x\n", pReq->Hdr.cbReq, sizeof(GMMBALLOONEDPAGESREQ)),
3067	VERR_INVALID_PARAMETER);
3068
3069	return GMMR0BalloonedPages(pVM, idCpu, pReq->enmAction, pReq->cBalloonedPages);
3070	}
3071
3072	/**
3073	* Return memory statistics for the hypervisor
3074	*
3075	* @returns VBox status code:
3076	* @param pVM Pointer to the shared VM structure.
3077	* @param pReq The request packet.
3078	*/
3079	GMMR0DECL(int) GMMR0QueryHypervisorMemoryStatsReq(PVM pVM, PGMMMEMSTATSREQ pReq)
3080	{
3081	/*
3082	* Validate input and pass it on.
3083	*/
3084	AssertPtrReturn(pVM, VERR_INVALID_POINTER);
3085	AssertPtrReturn(pReq, VERR_INVALID_POINTER);
3086	AssertMsgReturn(pReq->Hdr.cbReq == sizeof(GMMMEMSTATSREQ),
3087	("%#x < %#x\n", pReq->Hdr.cbReq, sizeof(GMMMEMSTATSREQ)),
3088	VERR_INVALID_PARAMETER);
3089
3090	/*
3091	* Validate input and get the basics.
3092	*/
3093	PGMM pGMM;
3094	GMM_GET_VALID_INSTANCE(pGMM, VERR_INTERNAL_ERROR);
3095	pReq->cAllocPages = pGMM->cAllocatedPages;
3096	pReq->cFreePages = (pGMM->cChunks << (GMM_CHUNK_SHIFT- PAGE_SHIFT)) - pGMM->cAllocatedPages;
3097	pReq->cBalloonedPages = pGMM->cBalloonedPages;
3098	pReq->cMaxPages = pGMM->cMaxPages;
3099	pReq->cSharedPages = pGMM->cDuplicatePages;
3100	GMM_CHECK_SANITY_UPON_LEAVING(pGMM);
3101
3102	return VINF_SUCCESS;
3103	}
3104
3105	/**
3106	* Return memory statistics for the VM
3107	*
3108	* @returns VBox status code:
3109	* @param pVM Pointer to the shared VM structure.
3110	* @parma idCpu Cpu id.
3111	* @param pReq The request packet.
3112	*/
3113	GMMR0DECL(int) GMMR0QueryMemoryStatsReq(PVM pVM, VMCPUID idCpu, PGMMMEMSTATSREQ pReq)
3114	{
3115	/*
3116	* Validate input and pass it on.
3117	*/
3118	AssertPtrReturn(pVM, VERR_INVALID_POINTER);
3119	AssertPtrReturn(pReq, VERR_INVALID_POINTER);
3120	AssertMsgReturn(pReq->Hdr.cbReq == sizeof(GMMMEMSTATSREQ),
3121	("%#x < %#x\n", pReq->Hdr.cbReq, sizeof(GMMMEMSTATSREQ)),
3122	VERR_INVALID_PARAMETER);
3123
3124	/*
3125	* Validate input and get the basics.
3126	*/
3127	PGMM pGMM;
3128	GMM_GET_VALID_INSTANCE(pGMM, VERR_INTERNAL_ERROR);
3129	PGVM pGVM;
3130	int rc = GVMMR0ByVMAndEMT(pVM, idCpu, &pGVM);
3131	if (RT_FAILURE(rc))
3132	return rc;
3133
3134	/*
3135	* Take the sempahore and do some more validations.
3136	*/
3137	rc = RTSemFastMutexRequest(pGMM->Mtx);
3138	AssertRC(rc);
3139	if (GMM_CHECK_SANITY_UPON_ENTERING(pGMM))
3140	{
3141	pReq->cAllocPages = pGVM->gmm.s.Allocated.cBasePages;
3142	pReq->cBalloonedPages = pGVM->gmm.s.cBalloonedPages;
3143	pReq->cMaxPages = pGVM->gmm.s.Reserved.cBasePages;
3144	pReq->cFreePages = pReq->cMaxPages - pReq->cAllocPages;
3145	}
3146	else
3147	rc = VERR_INTERNAL_ERROR_5;
3148
3149	RTSemFastMutexRelease(pGMM->Mtx);
3150	LogFlow(("GMMR3QueryVMMemoryStats: returns %Rrc\n", rc));
3151	return rc;
3152	}
3153
3154	/**
3155	* Unmaps a chunk previously mapped into the address space of the current process.
3156	*
3157	* @returns VBox status code.
3158	* @param pGMM Pointer to the GMM instance data.
3159	* @param pGVM Pointer to the Global VM structure.
3160	* @param pChunk Pointer to the chunk to be unmapped.
3161	*/
3162	static int gmmR0UnmapChunk(PGMM pGMM, PGVM pGVM, PGMMCHUNK pChunk)
3163	{
3164	if (!pGMM->fLegacyAllocationMode)
3165	{
3166	/*
3167	* Find the mapping and try unmapping it.
3168	*/
3169	for (uint32_t i = 0; i < pChunk->cMappings; i++)
3170	{
3171	Assert(pChunk->paMappings[i].pGVM && pChunk->paMappings[i].MapObj != NIL_RTR0MEMOBJ);
3172	if (pChunk->paMappings[i].pGVM == pGVM)
3173	{
3174	/* unmap */
3175	int rc = RTR0MemObjFree(pChunk->paMappings[i].MapObj, false /* fFreeMappings (NA) */);
3176	if (RT_SUCCESS(rc))
3177	{
3178	/* update the record. */
3179	pChunk->cMappings--;
3180	if (i < pChunk->cMappings)
3181	pChunk->paMappings[i] = pChunk->paMappings[pChunk->cMappings];
3182	pChunk->paMappings[pChunk->cMappings].MapObj = NIL_RTR0MEMOBJ;
3183	pChunk->paMappings[pChunk->cMappings].pGVM = NULL;
3184	}
3185	return rc;
3186	}
3187	}
3188	}
3189	else if (pChunk->hGVM == pGVM->hSelf)
3190	return VINF_SUCCESS;
3191
3192	Log(("gmmR0UnmapChunk: Chunk %#x is not mapped into pGVM=%p/%#x\n", pChunk->Core.Key, pGVM, pGVM->hSelf));
3193	return VERR_GMM_CHUNK_NOT_MAPPED;
3194	}
3195
3196
3197	/**
3198	* Maps a chunk into the user address space of the current process.
3199	*
3200	* @returns VBox status code.
3201	* @param pGMM Pointer to the GMM instance data.
3202	* @param pGVM Pointer to the Global VM structure.
3203	* @param pChunk Pointer to the chunk to be mapped.
3204	* @param ppvR3 Where to store the ring-3 address of the mapping.
3205	* In the VERR_GMM_CHUNK_ALREADY_MAPPED case, this will be
3206	* contain the address of the existing mapping.
3207	*/
3208	static int gmmR0MapChunk(PGMM pGMM, PGVM pGVM, PGMMCHUNK pChunk, PRTR3PTR ppvR3)
3209	{
3210	/*
3211	* If we're in legacy mode this is simple.
3212	*/
3213	if (pGMM->fLegacyAllocationMode)
3214	{
3215	if (pChunk->hGVM != pGVM->hSelf)
3216	{
3217	Log(("gmmR0MapChunk: chunk %#x is already mapped at %p!\n", pChunk->Core.Key, *ppvR3));
3218	return VERR_GMM_CHUNK_NOT_FOUND;
3219	}
3220
3221	*ppvR3 = RTR0MemObjAddressR3(pChunk->MemObj);
3222	return VINF_SUCCESS;
3223	}
3224
3225	/*
3226	* Check to see if the chunk is already mapped.
3227	*/
3228	for (uint32_t i = 0; i < pChunk->cMappings; i++)
3229	{
3230	Assert(pChunk->paMappings[i].pGVM && pChunk->paMappings[i].MapObj != NIL_RTR0MEMOBJ);
3231	if (pChunk->paMappings[i].pGVM == pGVM)
3232	{
3233	*ppvR3 = RTR0MemObjAddressR3(pChunk->paMappings[i].MapObj);
3234	Log(("gmmR0MapChunk: chunk %#x is already mapped at %p!\n", pChunk->Core.Key, *ppvR3));
3235	#ifdef VBOX_WITH_PAGE_SHARING
3236	/* The ring-3 chunk cache can be out of sync; don't fail. */
3237	return VINF_SUCCESS;
3238	#else
3239	return VERR_GMM_CHUNK_ALREADY_MAPPED;
3240	#endif
3241	}
3242	}
3243
3244	/*
3245	* Do the mapping.
3246	*/
3247	RTR0MEMOBJ MapObj;
3248	int rc = RTR0MemObjMapUser(&MapObj, pChunk->MemObj, (RTR3PTR)-1, 0, RTMEM_PROT_READ \| RTMEM_PROT_WRITE, NIL_RTR0PROCESS);
3249	if (RT_SUCCESS(rc))
3250	{
3251	/* reallocate the array? */
3252	if ((pChunk->cMappings & 1 /7/) == 0)
3253	{
3254	void pvMappings = RTMemRealloc(pChunk->paMappings, (pChunk->cMappings + 2 /8/) sizeof(pChunk->paMappings[0]));
3255	if (RT_UNLIKELY(!pvMappings))
3256	{
3257	rc = RTR0MemObjFree(MapObj, false /* fFreeMappings (NA) */);
3258	AssertRC(rc);
3259	return VERR_NO_MEMORY;
3260	}
3261	pChunk->paMappings = (PGMMCHUNKMAP)pvMappings;
3262	}
3263
3264	/* insert new entry */
3265	pChunk->paMappings[pChunk->cMappings].MapObj = MapObj;
3266	pChunk->paMappings[pChunk->cMappings].pGVM = pGVM;
3267	pChunk->cMappings++;
3268
3269	*ppvR3 = RTR0MemObjAddressR3(MapObj);
3270	}
3271
3272	return rc;
3273	}
3274
3275	/**
3276	* Check if a chunk is mapped into the specified VM
3277	*
3278	* @returns mapped yes/no
3279	* @param pGVM Pointer to the Global VM structure.
3280	* @param pChunk Pointer to the chunk to be mapped.
3281	* @param ppvR3 Where to store the ring-3 address of the mapping.
3282	*/
3283	static int gmmR0IsChunkMapped(PGVM pGVM, PGMMCHUNK pChunk, PRTR3PTR ppvR3)
3284	{
3285	/*
3286	* Check to see if the chunk is already mapped.
3287	*/
3288	for (uint32_t i = 0; i < pChunk->cMappings; i++)
3289	{
3290	Assert(pChunk->paMappings[i].pGVM && pChunk->paMappings[i].MapObj != NIL_RTR0MEMOBJ);
3291	if (pChunk->paMappings[i].pGVM == pGVM)
3292	{
3293	*ppvR3 = RTR0MemObjAddressR3(pChunk->paMappings[i].MapObj);
3294	return true;
3295	}
3296	}
3297	*ppvR3 = NULL;
3298	return false;
3299	}
3300
3301	/**
3302	* Map a chunk and/or unmap another chunk.
3303	*
3304	* The mapping and unmapping applies to the current process.
3305	*
3306	* This API does two things because it saves a kernel call per mapping when
3307	* when the ring-3 mapping cache is full.
3308	*
3309	* @returns VBox status code.
3310	* @param pVM The VM.
3311	* @param idCpu VCPU id
3312	* @param idChunkMap The chunk to map. NIL_GMM_CHUNKID if nothing to map.
3313	* @param idChunkUnmap The chunk to unmap. NIL_GMM_CHUNKID if nothing to unmap.
3314	* @param ppvR3 Where to store the address of the mapped chunk. NULL is ok if nothing to map.
3315	* @thread EMT
3316	*/
3317	GMMR0DECL(int) GMMR0MapUnmapChunk(PVM pVM, VMCPUID idCpu, uint32_t idChunkMap, uint32_t idChunkUnmap, PRTR3PTR ppvR3)
3318	{
3319	LogFlow(("GMMR0MapUnmapChunk: pVM=%p idChunkMap=%#x idChunkUnmap=%#x ppvR3=%p\n",
3320	pVM, idChunkMap, idChunkUnmap, ppvR3));
3321
3322	/*
3323	* Validate input and get the basics.
3324	*/
3325	PGMM pGMM;
3326	GMM_GET_VALID_INSTANCE(pGMM, VERR_INTERNAL_ERROR);
3327	PGVM pGVM;
3328	int rc = GVMMR0ByVMAndEMT(pVM, idCpu, &pGVM);
3329	if (RT_FAILURE(rc))
3330	return rc;
3331
3332	AssertCompile(NIL_GMM_CHUNKID == 0);
3333	AssertMsgReturn(idChunkMap <= GMM_CHUNKID_LAST, ("%#x\n", idChunkMap), VERR_INVALID_PARAMETER);
3334	AssertMsgReturn(idChunkUnmap <= GMM_CHUNKID_LAST, ("%#x\n", idChunkUnmap), VERR_INVALID_PARAMETER);
3335
3336	if ( idChunkMap == NIL_GMM_CHUNKID
3337	&& idChunkUnmap == NIL_GMM_CHUNKID)
3338	return VERR_INVALID_PARAMETER;
3339
3340	if (idChunkMap != NIL_GMM_CHUNKID)
3341	{
3342	AssertPtrReturn(ppvR3, VERR_INVALID_POINTER);
3343	*ppvR3 = NIL_RTR3PTR;
3344	}
3345
3346	/*
3347	* Take the semaphore and do the work.
3348	*
3349	* The unmapping is done last since it's easier to undo a mapping than
3350	* undoing an unmapping. The ring-3 mapping cache cannot not be so big
3351	* that it pushes the user virtual address space to within a chunk of
3352	* it it's limits, so, no problem here.
3353	*/
3354	rc = RTSemFastMutexRequest(pGMM->Mtx);
3355	AssertRC(rc);
3356	if (GMM_CHECK_SANITY_UPON_ENTERING(pGMM))
3357	{
3358	PGMMCHUNK pMap = NULL;
3359	if (idChunkMap != NIL_GVM_HANDLE)
3360	{
3361	pMap = gmmR0GetChunk(pGMM, idChunkMap);
3362	if (RT_LIKELY(pMap))
3363	rc = gmmR0MapChunk(pGMM, pGVM, pMap, ppvR3);
3364	else
3365	{
3366	Log(("GMMR0MapUnmapChunk: idChunkMap=%#x\n", idChunkMap));
3367	rc = VERR_GMM_CHUNK_NOT_FOUND;
3368	}
3369	}
3370
3371	if ( idChunkUnmap != NIL_GMM_CHUNKID
3372	&& RT_SUCCESS(rc))
3373	{
3374	PGMMCHUNK pUnmap = gmmR0GetChunk(pGMM, idChunkUnmap);
3375	if (RT_LIKELY(pUnmap))
3376	rc = gmmR0UnmapChunk(pGMM, pGVM, pUnmap);
3377	else
3378	{
3379	Log(("GMMR0MapUnmapChunk: idChunkUnmap=%#x\n", idChunkUnmap));
3380	rc = VERR_GMM_CHUNK_NOT_FOUND;
3381	}
3382
3383	if (RT_FAILURE(rc) && pMap)
3384	gmmR0UnmapChunk(pGMM, pGVM, pMap);
3385	}
3386
3387	GMM_CHECK_SANITY_UPON_LEAVING(pGMM);
3388	}
3389	else
3390	rc = VERR_INTERNAL_ERROR_5;
3391	RTSemFastMutexRelease(pGMM->Mtx);
3392
3393	LogFlow(("GMMR0MapUnmapChunk: returns %Rrc\n", rc));
3394	return rc;
3395	}
3396
3397
3398	/**
3399	* VMMR0 request wrapper for GMMR0MapUnmapChunk.
3400	*
3401	* @returns see GMMR0MapUnmapChunk.
3402	* @param pVM Pointer to the shared VM structure.
3403	* @param idCpu VCPU id
3404	* @param pReq The request packet.
3405	*/
3406	GMMR0DECL(int) GMMR0MapUnmapChunkReq(PVM pVM, VMCPUID idCpu, PGMMMAPUNMAPCHUNKREQ pReq)
3407	{
3408	/*
3409	* Validate input and pass it on.
3410	*/
3411	AssertPtrReturn(pVM, VERR_INVALID_POINTER);
3412	AssertPtrReturn(pReq, VERR_INVALID_POINTER);
3413	AssertMsgReturn(pReq->Hdr.cbReq == sizeof(pReq), ("%#x != %#x\n", pReq->Hdr.cbReq, sizeof(pReq)), VERR_INVALID_PARAMETER);
3414
3415	return GMMR0MapUnmapChunk(pVM, idCpu, pReq->idChunkMap, pReq->idChunkUnmap, &pReq->pvR3);
3416	}
3417
3418
3419	/**
3420	* Legacy mode API for supplying pages.
3421	*
3422	* The specified user address points to a allocation chunk sized block that
3423	* will be locked down and used by the GMM when the GM asks for pages.
3424	*
3425	* @returns VBox status code.
3426	* @param pVM The VM.
3427	* @param idCpu VCPU id
3428	* @param pvR3 Pointer to the chunk size memory block to lock down.
3429	*/
3430	GMMR0DECL(int) GMMR0SeedChunk(PVM pVM, VMCPUID idCpu, RTR3PTR pvR3)
3431	{
3432	/*
3433	* Validate input and get the basics.
3434	*/
3435	PGMM pGMM;
3436	GMM_GET_VALID_INSTANCE(pGMM, VERR_INTERNAL_ERROR);
3437	PGVM pGVM;
3438	int rc = GVMMR0ByVMAndEMT(pVM, idCpu, &pGVM);
3439	if (RT_FAILURE(rc))
3440	return rc;
3441
3442	AssertPtrReturn(pvR3, VERR_INVALID_POINTER);
3443	AssertReturn(!(PAGE_OFFSET_MASK & pvR3), VERR_INVALID_POINTER);
3444
3445	if (!pGMM->fLegacyAllocationMode)
3446	{
3447	Log(("GMMR0SeedChunk: not in legacy allocation mode!\n"));
3448	return VERR_NOT_SUPPORTED;
3449	}
3450
3451	/*
3452	* Lock the memory before taking the semaphore.
3453	*/
3454	RTR0MEMOBJ MemObj;
3455	rc = RTR0MemObjLockUser(&MemObj, pvR3, GMM_CHUNK_SIZE, RTMEM_PROT_READ \| RTMEM_PROT_WRITE, NIL_RTR0PROCESS);
3456	if (RT_SUCCESS(rc))
3457	{
3458	/* Grab the lock. */
3459	rc = RTSemFastMutexRequest(pGMM->Mtx);
3460	AssertRCReturn(rc, rc);
3461
3462	/*
3463	* Add a new chunk with our hGVM.
3464	*/
3465	rc = gmmR0RegisterChunk(pGMM, &pGMM->Private, MemObj, pGVM->hSelf, GMMCHUNKTYPE_NON_CONTINUOUS);
3466	RTSemFastMutexRelease(pGMM->Mtx);
3467
3468	if (RT_FAILURE(rc))
3469	RTR0MemObjFree(MemObj, false /* fFreeMappings */);
3470	}
3471
3472	LogFlow(("GMMR0SeedChunk: rc=%d (pvR3=%p)\n", rc, pvR3));
3473	return rc;
3474	}
3475
3476
3477	/**
3478	* Registers a new shared module for the VM
3479	*
3480	* @returns VBox status code.
3481	* @param pVM VM handle
3482	* @param idCpu VCPU id
3483	* @param enmGuestOS Guest OS type
3484	* @param pszModuleName Module name
3485	* @param pszVersion Module version
3486	* @param GCBaseAddr Module base address
3487	* @param cbModule Module size
3488	* @param cRegions Number of shared region descriptors
3489	* @param pRegions Shared region(s)
3490	*/
3491	GMMR0DECL(int) GMMR0RegisterSharedModule(PVM pVM, VMCPUID idCpu, VBOXOSFAMILY enmGuestOS, char pszModuleName, char pszVersion, RTGCPTR GCBaseAddr, uint32_t cbModule,
3492	unsigned cRegions, VMMDEVSHAREDREGIONDESC *pRegions)
3493	{
3494	#ifdef VBOX_WITH_PAGE_SHARING
3495	/*
3496	* Validate input and get the basics.
3497	*/
3498	PGMM pGMM;
3499	GMM_GET_VALID_INSTANCE(pGMM, VERR_INTERNAL_ERROR);
3500	PGVM pGVM;
3501	int rc = GVMMR0ByVMAndEMT(pVM, idCpu, &pGVM);
3502	if (RT_FAILURE(rc))
3503	return rc;
3504
3505	Log(("GMMR0RegisterSharedModule %s %s base %RGv size %x\n", pszModuleName, pszVersion, GCBaseAddr, cbModule));
3506
3507	/*
3508	* Take the sempahore and do some more validations.
3509	*/
3510	rc = RTSemFastMutexRequest(pGMM->Mtx);
3511	AssertRC(rc);
3512	if (GMM_CHECK_SANITY_UPON_ENTERING(pGMM))
3513	{
3514	bool fNewModule = false;
3515
3516	/* Check if this module is already locally registered. */
3517	PGMMSHAREDMODULEPERVM pRecVM = (PGMMSHAREDMODULEPERVM)RTAvlGCPtrGet(&pGVM->gmm.s.pSharedModuleTree, GCBaseAddr);
3518	if (!pRecVM)
3519	{
3520	pRecVM = (PGMMSHAREDMODULEPERVM)RTMemAllocZ(sizeof(*pRecVM));
3521	if (!pRecVM)
3522	{
3523	AssertFailed();
3524	rc = VERR_NO_MEMORY;
3525	goto end;
3526	}
3527	pRecVM->Core.Key = GCBaseAddr;
3528
3529	bool ret = RTAvlGCPtrInsert(&pGVM->gmm.s.pSharedModuleTree, &pRecVM->Core);
3530	Assert(ret);
3531
3532	Log(("GMMR0RegisterSharedModule: new local module %s\n", pszModuleName));
3533	fNewModule = true;
3534	}
3535	else
3536	rc = VINF_PGM_SHARED_MODULE_ALREADY_REGISTERED;
3537
3538	/* Check if this module is already globally registered. */
3539	PGMMSHAREDMODULE pGlobalModule = (PGMMSHAREDMODULE)RTAvlGCPtrGet(&pGMM->pGlobalSharedModuleTree, GCBaseAddr);
3540	if (!pGlobalModule)
3541	{
3542	Assert(fNewModule);
3543	Assert(!pRecVM->fCollision);
3544
3545	pGlobalModule = (PGMMSHAREDMODULE)RTMemAllocZ(RT_OFFSETOF(GMMSHAREDMODULE, aRegions[cRegions]));
3546	if (!pGlobalModule)
3547	{
3548	AssertFailed();
3549	rc = VERR_NO_MEMORY;
3550	goto end;
3551	}
3552
3553	pGlobalModule->Core.Key = GCBaseAddr;
3554	pGlobalModule->cbModule = cbModule;
3555	/* Input limit already safe; no need to check again. */
3556	/** todo replace with RTStrCopy */
3557	strcpy(pGlobalModule->szName, pszModuleName);
3558	strcpy(pGlobalModule->szVersion, pszVersion);
3559
3560	pGlobalModule->enmGuestOS = enmGuestOS;
3561	pGlobalModule->cRegions = cRegions;
3562
3563	for (unsigned i = 0; i < cRegions; i++)
3564	{
3565	Log(("New region %d base=%RGv size %x\n", i, pRegions[i].GCRegionAddr, pRegions[i].cbRegion));
3566	pGlobalModule->aRegions[i].GCRegionAddr = pRegions[i].GCRegionAddr;
3567	pGlobalModule->aRegions[i].cbRegion = pRegions[i].cbRegion;
3568	pGlobalModule->aRegions[i].u32Alignment = 0;
3569	pGlobalModule->aRegions[i].paHCPhysPageID = NULL; /* uninitialized. */
3570	}
3571
3572	/* Save reference. */
3573	pRecVM->pGlobalModule = pGlobalModule;
3574	pRecVM->fCollision = false;
3575	pGlobalModule->cUsers++;
3576	rc = VINF_SUCCESS;
3577
3578	bool ret = RTAvlGCPtrInsert(&pGMM->pGlobalSharedModuleTree, &pGlobalModule->Core);
3579	Assert(ret);
3580
3581	Log(("GMMR0RegisterSharedModule: new global module %s\n", pszModuleName));
3582	}
3583	else
3584	{
3585	Assert(pGlobalModule->cUsers > 0);
3586
3587	/* Make sure the name and version are identical. */
3588	/** todo replace with RTStrNCmp */
3589	if ( !strcmp(pGlobalModule->szName, pszModuleName)
3590	&& !strcmp(pGlobalModule->szVersion, pszVersion))
3591	{
3592	/* Save reference. */
3593	pRecVM->pGlobalModule = pGlobalModule;
3594	if ( fNewModule
3595	\|\| pRecVM->fCollision == true) /* colliding module unregistered and new one registerd since the last check */
3596	{
3597	pGlobalModule->cUsers++;
3598	Log(("GMMR0RegisterSharedModule: using existing module %s cUser=%d!\n", pszModuleName, pGlobalModule->cUsers));
3599	}
3600	pRecVM->fCollision = false;
3601	rc = VINF_SUCCESS;
3602	}
3603	else
3604	{
3605	Log(("GMMR0RegisterSharedModule: module %s collision!\n", pszModuleName));
3606	pRecVM->fCollision = true;
3607	rc = VINF_PGM_SHARED_MODULE_COLLISION;
3608	goto end;
3609	}
3610	}
3611
3612	GMM_CHECK_SANITY_UPON_LEAVING(pGMM);
3613	}
3614	else
3615	rc = VERR_INTERNAL_ERROR_5;
3616
3617	end:
3618	RTSemFastMutexRelease(pGMM->Mtx);
3619	return rc;
3620	#else
3621	return VERR_NOT_IMPLEMENTED;
3622	#endif
3623	}
3624
3625
3626	/**
3627	* VMMR0 request wrapper for GMMR0RegisterSharedModule.
3628	*
3629	* @returns see GMMR0RegisterSharedModule.
3630	* @param pVM Pointer to the shared VM structure.
3631	* @param idCpu VCPU id
3632	* @param pReq The request packet.
3633	*/
3634	GMMR0DECL(int) GMMR0RegisterSharedModuleReq(PVM pVM, VMCPUID idCpu, PGMMREGISTERSHAREDMODULEREQ pReq)
3635	{
3636	/*
3637	* Validate input and pass it on.
3638	*/
3639	AssertPtrReturn(pVM, VERR_INVALID_POINTER);
3640	AssertPtrReturn(pReq, VERR_INVALID_POINTER);
3641	AssertMsgReturn(pReq->Hdr.cbReq >= sizeof(pReq) && pReq->Hdr.cbReq == RT_UOFFSETOF(GMMREGISTERSHAREDMODULEREQ, aRegions[pReq->cRegions]), ("%#x != %#x\n", pReq->Hdr.cbReq, sizeof(pReq)), VERR_INVALID_PARAMETER);
3642
3643	/* Pass back return code in the request packet to preserve informational codes. (VMMR3CallR0 chokes on them) */
3644	pReq->rc = GMMR0RegisterSharedModule(pVM, idCpu, pReq->enmGuestOS, pReq->szName, pReq->szVersion, pReq->GCBaseAddr, pReq->cbModule, pReq->cRegions, pReq->aRegions);
3645	return VINF_SUCCESS;
3646	}
3647
3648	/**
3649	* Unregisters a shared module for the VM
3650	*
3651	* @returns VBox status code.
3652	* @param pVM VM handle
3653	* @param idCpu VCPU id
3654	* @param pszModuleName Module name
3655	* @param pszVersion Module version
3656	* @param GCBaseAddr Module base address
3657	* @param cbModule Module size
3658	*/
3659	GMMR0DECL(int) GMMR0UnregisterSharedModule(PVM pVM, VMCPUID idCpu, char pszModuleName, char pszVersion, RTGCPTR GCBaseAddr, uint32_t cbModule)
3660	{
3661	#ifdef VBOX_WITH_PAGE_SHARING
3662	/*
3663	* Validate input and get the basics.
3664	*/
3665	PGMM pGMM;
3666	GMM_GET_VALID_INSTANCE(pGMM, VERR_INTERNAL_ERROR);
3667	PGVM pGVM;
3668	int rc = GVMMR0ByVMAndEMT(pVM, idCpu, &pGVM);
3669	if (RT_FAILURE(rc))
3670	return rc;
3671
3672	Log(("GMMR0UnregisterSharedModule %s %s base=%RGv size %x\n", pszModuleName, pszVersion, GCBaseAddr, cbModule));
3673
3674	/*
3675	* Take the sempahore and do some more validations.
3676	*/
3677	rc = RTSemFastMutexRequest(pGMM->Mtx);
3678	AssertRC(rc);
3679	if (GMM_CHECK_SANITY_UPON_ENTERING(pGMM))
3680	{
3681	PGMMSHAREDMODULEPERVM pRecVM = (PGMMSHAREDMODULEPERVM)RTAvlGCPtrGet(&pGVM->gmm.s.pSharedModuleTree, GCBaseAddr);
3682	if (!pRecVM)
3683	{
3684	rc = VERR_PGM_SHARED_MODULE_NOT_FOUND;
3685	goto end;
3686	}
3687	/* Remove reference to global shared module. */
3688	if (!pRecVM->fCollision)
3689	{
3690	PGMMSHAREDMODULE pRec = pRecVM->pGlobalModule;
3691	Assert(pRec);
3692
3693	if (pRec) /* paranoia */
3694	{
3695	Assert(pRec->cUsers);
3696	pRec->cUsers--;
3697	if (pRec->cUsers == 0)
3698	{
3699	/* Free the ranges, but leave the pages intact as there might still be references; they will be cleared by the COW mechanism. */
3700	for (unsigned i = 0; i < pRec->cRegions; i++)
3701	if (pRec->aRegions[i].paHCPhysPageID)
3702	RTMemFree(pRec->aRegions[i].paHCPhysPageID);
3703
3704	/* Remove from the tree and free memory. */
3705	RTAvlGCPtrRemove(&pGMM->pGlobalSharedModuleTree, GCBaseAddr);
3706	RTMemFree(pRec);
3707	}
3708	}
3709	else
3710	rc = VERR_PGM_SHARED_MODULE_REGISTRATION_INCONSISTENCY;
3711	}
3712	else
3713	Assert(!pRecVM->pGlobalModule);
3714
3715	/* Remove from the tree and free memory. */
3716	RTAvlGCPtrRemove(&pGVM->gmm.s.pSharedModuleTree, GCBaseAddr);
3717	RTMemFree(pRecVM);
3718
3719	GMM_CHECK_SANITY_UPON_LEAVING(pGMM);
3720	}
3721	else
3722	rc = VERR_INTERNAL_ERROR_5;
3723
3724	end:
3725	RTSemFastMutexRelease(pGMM->Mtx);
3726	return rc;
3727	#else
3728	return VERR_NOT_IMPLEMENTED;
3729	#endif
3730	}
3731
3732	/**
3733	* VMMR0 request wrapper for GMMR0UnregisterSharedModule.
3734	*
3735	* @returns see GMMR0UnregisterSharedModule.
3736	* @param pVM Pointer to the shared VM structure.
3737	* @param idCpu VCPU id
3738	* @param pReq The request packet.
3739	*/
3740	GMMR0DECL(int) GMMR0UnregisterSharedModuleReq(PVM pVM, VMCPUID idCpu, PGMMUNREGISTERSHAREDMODULEREQ pReq)
3741	{
3742	/*
3743	* Validate input and pass it on.
3744	*/
3745	AssertPtrReturn(pVM, VERR_INVALID_POINTER);
3746	AssertPtrReturn(pReq, VERR_INVALID_POINTER);
3747	AssertMsgReturn(pReq->Hdr.cbReq == sizeof(pReq), ("%#x != %#x\n", pReq->Hdr.cbReq, sizeof(pReq)), VERR_INVALID_PARAMETER);
3748
3749	return GMMR0UnregisterSharedModule(pVM, idCpu, pReq->szName, pReq->szVersion, pReq->GCBaseAddr, pReq->cbModule);
3750	}
3751
3752
3753	#ifdef VBOX_WITH_PAGE_SHARING
3754	/**
3755	* Checks specified shared module range for changes
3756	*
3757	* Performs the following tasks:
3758	* - if a shared page is new, then it changes the GMM page type to shared and returns it in the paPageDesc array
3759	* - if a shared page already exists, then it checks if the VM page is identical and if so frees the VM page and returns the shared page in the paPageDesc array
3760	*
3761	* Note: assumes the caller has acquired the GMM semaphore!!
3762	*
3763	* @returns VBox status code.
3764	* @param pGMM Pointer to the GMM instance data.
3765	* @param pGVM Pointer to the GVM instance data.
3766	* @param pModule Module description
3767	* @param idxRegion Region index
3768	* @param cPages Number of entries in the paPageDesc array
3769	* @param paPageDesc Page descriptor array (in/out)
3770	*/
3771	GMMR0DECL(int) GMMR0SharedModuleCheckRange(PGVM pGVM, PGMMSHAREDMODULE pModule, unsigned idxRegion, unsigned cPages, PGMMSHAREDPAGEDESC paPageDesc)
3772	{
3773	int rc = VINF_SUCCESS;
3774	PGMM pGMM;
3775	GMM_GET_VALID_INSTANCE(pGMM, VERR_INTERNAL_ERROR);
3776
3777	AssertReturn(idxRegion < pModule->cRegions, VERR_INVALID_PARAMETER);
3778	AssertReturn(cPages == (pModule->aRegions[idxRegion].cbRegion >> PAGE_SHIFT), VERR_INVALID_PARAMETER);
3779
3780	Log(("GMMR0SharedModuleCheckRange %s base %RGv region %d cPages %d\n", pModule->szName, pModule->Core.Key, idxRegion, cPages));
3781
3782	PGMMSHAREDREGIONDESC pGlobalRegion = &pModule->aRegions[idxRegion];
3783
3784	if (!pGlobalRegion->paHCPhysPageID)
3785	{
3786	/* First time; create a page descriptor array. */
3787	Log(("Allocate page descriptor array for %d pages\n", cPages));
3788	pGlobalRegion->paHCPhysPageID = (uint32_t )RTMemAlloc(cPages sizeof(*pGlobalRegion->paHCPhysPageID));
3789	if (!pGlobalRegion->paHCPhysPageID)
3790	{
3791	AssertFailed();
3792	rc = VERR_NO_MEMORY;
3793	goto end;
3794	}
3795	/* Invalidate all descriptors. */
3796	for (unsigned i = 0; i < cPages; i++)
3797	pGlobalRegion->paHCPhysPageID[i] = NIL_GMM_PAGEID;
3798	}
3799
3800	/* Check all pages in the region. */
3801	for (unsigned i = 0; i < cPages; i++)
3802	{
3803	/* Valid page present? */
3804	if (paPageDesc[i].uHCPhysPageId != NIL_GMM_PAGEID)
3805	{
3806	/* We've seen this shared page for the first time? */
3807	if (pGlobalRegion->paHCPhysPageID[i] == NIL_GMM_PAGEID)
3808	{
3809	new_shared_page:
3810	Log(("New shared page guest %RGp host %RHp\n", paPageDesc[i].GCPhys, paPageDesc[i].HCPhys));
3811
3812	/* Easy case: just change the internal page type. */
3813	PGMMPAGE pPage = gmmR0GetPage(pGMM, paPageDesc[i].uHCPhysPageId);
3814	if (!pPage)
3815	{
3816	AssertFailed();
3817	rc = VERR_PGM_PHYS_INVALID_PAGE_ID;
3818	goto end;
3819	}
3820
3821	AssertMsg(paPageDesc[i].GCPhys == (pPage->Private.pfn << 12), ("desc %RGp gmm %RGp\n", paPageDesc[i].HCPhys, (pPage->Private.pfn << 12)));
3822
3823	gmmR0ConvertToSharedPage(pGMM, pGVM, paPageDesc[i].HCPhys, paPageDesc[i].uHCPhysPageId, pPage);
3824
3825	/* Keep track of these references. */
3826	pGlobalRegion->paHCPhysPageID[i] = paPageDesc[i].uHCPhysPageId;
3827	}
3828	else
3829	{
3830	uint8_t pbLocalPage, pbSharedPage;
3831	uint8_t *pbChunk;
3832	PGMMCHUNK pChunk;
3833
3834	Assert(paPageDesc[i].uHCPhysPageId != pGlobalRegion->paHCPhysPageID[i]);
3835
3836	Log(("Replace existing page guest %RGp host %RHp id %x -> id %x\n", paPageDesc[i].GCPhys, paPageDesc[i].HCPhys, paPageDesc[i].uHCPhysPageId, pGlobalRegion->paHCPhysPageID[i]));
3837
3838	/* Get the shared page source. */
3839	PGMMPAGE pPage = gmmR0GetPage(pGMM, pGlobalRegion->paHCPhysPageID[i]);
3840	if (!pPage)
3841	{
3842	AssertFailed();
3843	rc = VERR_PGM_PHYS_INVALID_PAGE_ID;
3844	goto end;
3845	}
3846	if (pPage->Common.u2State != GMM_PAGE_STATE_SHARED)
3847	{
3848	/* Page was freed at some point; invalidate this entry. */
3849	/** todo this isn't really bullet proof. */
3850	Log(("Old shared page was freed -> create a new one\n"));
3851	pGlobalRegion->paHCPhysPageID[i] = NIL_GMM_PAGEID;
3852	goto new_shared_page; /* ugly goto */
3853	}
3854
3855	Log(("Replace existing page guest host %RHp -> %RHp\n", paPageDesc[i].HCPhys, ((uint64_t)pPage->Shared.pfn) << PAGE_SHIFT));
3856
3857	/* Calculate the virtual address of the local page. */
3858	pChunk = gmmR0GetChunk(pGMM, paPageDesc[i].uHCPhysPageId >> GMM_CHUNKID_SHIFT);
3859	if (pChunk)
3860	{
3861	if (!gmmR0IsChunkMapped(pGVM, pChunk, (PRTR3PTR)&pbChunk))
3862	{
3863	AssertFailed();
3864	rc = VERR_PGM_PHYS_INVALID_PAGE_ID;
3865	goto end;
3866	}
3867	pbLocalPage = pbChunk + ((paPageDesc[i].uHCPhysPageId & GMM_PAGEID_IDX_MASK) << PAGE_SHIFT);
3868	}
3869	else
3870	{
3871	AssertFailed();
3872	rc = VERR_PGM_PHYS_INVALID_PAGE_ID;
3873	goto end;
3874	}
3875
3876	/* Calculate the virtual address of the shared page. */
3877	pChunk = gmmR0GetChunk(pGMM, pGlobalRegion->paHCPhysPageID[i] >> GMM_CHUNKID_SHIFT);
3878	Assert(pChunk); /* can't fail as gmmR0GetPage succeeded. */
3879
3880	/* Get the virtual address of the physical page; map the chunk into the VM process if not already done. */
3881	if (!gmmR0IsChunkMapped(pGVM, pChunk, (PRTR3PTR)&pbChunk))
3882	{
3883	Log(("Map chunk into process!\n"));
3884	rc = gmmR0MapChunk(pGMM, pGVM, pChunk, (PRTR3PTR)&pbChunk);
3885	if (rc != VINF_SUCCESS)
3886	{
3887	AssertRC(rc);
3888	goto end;
3889	}
3890	}
3891	pbSharedPage = pbChunk + ((pGlobalRegion->paHCPhysPageID[i] & GMM_PAGEID_IDX_MASK) << PAGE_SHIFT);
3892
3893	/** todo write ASMMemComparePage. */
3894	if (memcmp(pbSharedPage, pbLocalPage, PAGE_SIZE))
3895	{
3896	Log(("Unexpected differences found between local and shared page; skip\n"));
3897	/* Signal to the caller that this one hasn't changed. */
3898	paPageDesc[i].uHCPhysPageId = NIL_GMM_PAGEID;
3899	continue;
3900	}
3901
3902	/* Free the old local page. */
3903	GMMFREEPAGEDESC PageDesc;
3904
3905	PageDesc.idPage = paPageDesc[i].uHCPhysPageId;
3906	rc = gmmR0FreePages(pGMM, pGVM, 1, &PageDesc, GMMACCOUNT_BASE);
3907	AssertRC(rc);
3908
3909	gmmR0UseSharedPage(pGMM, pGVM, pPage);
3910
3911	/* Pass along the new physical address & page id. */
3912	paPageDesc[i].HCPhys = ((uint64_t)pPage->Shared.pfn) << PAGE_SHIFT;
3913	paPageDesc[i].uHCPhysPageId = pGlobalRegion->paHCPhysPageID[i];
3914	}
3915	}
3916	}
3917	end:
3918	return rc;
3919	}
3920
3921	/**
3922	* RTAvlU32Destroy callback.
3923	*
3924	* @returns 0
3925	* @param pNode The node to destroy.
3926	* @param pvGVM The GVM handle.
3927	*/
3928	static DECLCALLBACK(int) gmmR0CleanupSharedModule(PAVLGCPTRNODECORE pNode, void *pvGVM)
3929	{
3930	PGVM pGVM = (PGVM)pvGVM;
3931	PGMMSHAREDMODULEPERVM pRecVM = (PGMMSHAREDMODULEPERVM)pNode;
3932	PGMM pGMM;
3933	GMM_GET_VALID_INSTANCE(pGMM, VERR_INTERNAL_ERROR);
3934
3935	Assert(pRecVM->pGlobalModule \|\| pRecVM->fCollision);
3936	if (pRecVM->pGlobalModule)
3937	{
3938	PGMMSHAREDMODULE pRec = pRecVM->pGlobalModule;
3939	Assert(pRec);
3940	Assert(pRec->cUsers);
3941
3942	Log(("gmmR0CleanupSharedModule: %s %s cUsers=%d\n", pRec->szName, pRec->szVersion, pRec->cUsers));
3943	pRec->cUsers--;
3944	if (pRec->cUsers == 0)
3945	{
3946	for (unsigned i = 0; i < pRec->cRegions; i++)
3947	if (pRec->aRegions[i].paHCPhysPageID)
3948	RTMemFree(pRec->aRegions[i].paHCPhysPageID);
3949
3950	/* Remove from the tree and free memory. */
3951	RTAvlGCPtrRemove(&pGMM->pGlobalSharedModuleTree, pRec->Core.Key);
3952	RTMemFree(pRec);
3953	}
3954	}
3955	RTMemFree(pRecVM);
3956	return 0;
3957	}
3958	#endif
3959
3960	/**
3961	* Removes all shared modules for the specified VM
3962	*
3963	* @returns VBox status code.
3964	* @param pVM VM handle
3965	* @param idCpu VCPU id
3966	*/
3967	GMMR0DECL(int) GMMR0ResetSharedModules(PVM pVM, VMCPUID idCpu)
3968	{
3969	#ifdef VBOX_WITH_PAGE_SHARING
3970	/*
3971	* Validate input and get the basics.
3972	*/
3973	PGMM pGMM;
3974	GMM_GET_VALID_INSTANCE(pGMM, VERR_INTERNAL_ERROR);
3975	PGVM pGVM;
3976	int rc = GVMMR0ByVMAndEMT(pVM, idCpu, &pGVM);
3977	if (RT_FAILURE(rc))
3978	return rc;
3979
3980	/*
3981	* Take the sempahore and do some more validations.
3982	*/
3983	rc = RTSemFastMutexRequest(pGMM->Mtx);
3984	AssertRC(rc);
3985	if (GMM_CHECK_SANITY_UPON_ENTERING(pGMM))
3986	{
3987	Log(("GMMR0ResetSharedModules\n"));
3988	RTAvlGCPtrDestroy(&pGVM->gmm.s.pSharedModuleTree, gmmR0CleanupSharedModule, pGVM);
3989
3990	rc = VINF_SUCCESS;
3991	GMM_CHECK_SANITY_UPON_LEAVING(pGMM);
3992	}
3993	else
3994	rc = VERR_INTERNAL_ERROR_5;
3995
3996	RTSemFastMutexRelease(pGMM->Mtx);
3997	return rc;
3998	#else
3999	return VERR_NOT_IMPLEMENTED;
4000	#endif
4001	}
4002
4003	#ifdef VBOX_WITH_PAGE_SHARING
4004	typedef struct
4005	{
4006	PGVM pGVM;
4007	VMCPUID idCpu;
4008	} GMMCHECKSHAREDMODULEINFO, *PGMMCHECKSHAREDMODULEINFO;
4009
4010	/**
4011	* Tree enumeration callback for checking a shared module.
4012	*/
4013	DECLCALLBACK(int) gmmR0CheckSharedModule(PAVLGCPTRNODECORE pNode, void *pvUser)
4014	{
4015	PGMMCHECKSHAREDMODULEINFO pInfo = (PGMMCHECKSHAREDMODULEINFO)pvUser;
4016	PGMMSHAREDMODULEPERVM pLocalModule = (PGMMSHAREDMODULEPERVM)pNode;
4017	PGMMSHAREDMODULE pGlobalModule = pLocalModule->pGlobalModule;
4018
4019	if ( !pLocalModule->fCollision
4020	&& pGlobalModule)
4021	{
4022	Log(("gmmR0CheckSharedModule: check %s %s base=%RGv size=%x collision=%d\n", pGlobalModule->szName, pGlobalModule->szVersion, pGlobalModule->Core.Key, pGlobalModule->cbModule, pLocalModule->fCollision));
4023	PGMR0SharedModuleCheckRegion(pInfo->pGVM->pVM, pInfo->idCpu, pGlobalModule, pInfo->pGVM);
4024	}
4025	return 0;
4026	}
4027	#endif
4028
4029	#ifdef DEBUG_sandervl
4030	/**
4031	* Setup for a GMMR0CheckSharedModules call (to allow log flush jumps back to ring 3)
4032	*
4033	* @returns VBox status code.
4034	* @param pVM VM handle
4035	*/
4036	GMMR0DECL(int) GMMR0CheckSharedModulesStart(PVM pVM)
4037	{
4038	/*
4039	* Validate input and get the basics.
4040	*/
4041	PGMM pGMM;
4042	GMM_GET_VALID_INSTANCE(pGMM, VERR_INTERNAL_ERROR);
4043
4044	/*
4045	* Take the sempahore and do some more validations.
4046	*/
4047	int rc = RTSemFastMutexRequest(pGMM->Mtx);
4048	AssertRC(rc);
4049	if (!GMM_CHECK_SANITY_UPON_ENTERING(pGMM))
4050	rc = VERR_INTERNAL_ERROR_5;
4051	else
4052	rc = VINF_SUCCESS;
4053
4054	return rc;
4055	}
4056
4057	/**
4058	* Clean up after a GMMR0CheckSharedModules call (to allow log flush jumps back to ring 3)
4059	*
4060	* @returns VBox status code.
4061	* @param pVM VM handle
4062	*/
4063	GMMR0DECL(int) GMMR0CheckSharedModulesEnd(PVM pVM)
4064	{
4065	/*
4066	* Validate input and get the basics.
4067	*/
4068	PGMM pGMM;
4069	GMM_GET_VALID_INSTANCE(pGMM, VERR_INTERNAL_ERROR);
4070
4071	RTSemFastMutexRelease(pGMM->Mtx);
4072	return VINF_SUCCESS;
4073	}
4074	#endif
4075
4076	/**
4077	* Check all shared modules for the specified VM
4078	*
4079	* @returns VBox status code.
4080	* @param pVM VM handle
4081	* @param pVCpu VMCPU handle
4082	*/
4083	GMMR0DECL(int) GMMR0CheckSharedModules(PVM pVM, PVMCPU pVCpu)
4084	{
4085	#ifdef VBOX_WITH_PAGE_SHARING
4086	/*
4087	* Validate input and get the basics.
4088	*/
4089	PGMM pGMM;
4090	GMM_GET_VALID_INSTANCE(pGMM, VERR_INTERNAL_ERROR);
4091	PGVM pGVM;
4092	int rc = GVMMR0ByVMAndEMT(pVM, pVCpu->idCpu, &pGVM);
4093	if (RT_FAILURE(rc))
4094	return rc;
4095
4096	# ifndef DEBUG_sandervl
4097	/*
4098	* Take the sempahore and do some more validations.
4099	*/
4100	rc = RTSemFastMutexRequest(pGMM->Mtx);
4101	AssertRC(rc);
4102	# endif
4103	if (GMM_CHECK_SANITY_UPON_ENTERING(pGMM))
4104	{
4105	GMMCHECKSHAREDMODULEINFO Info;
4106
4107	Log(("GMMR0CheckSharedModules\n"));
4108	Info.pGVM = pGVM;
4109	Info.idCpu = pVCpu->idCpu;
4110
4111	RTAvlGCPtrDoWithAll(&pGVM->gmm.s.pSharedModuleTree, true /* fFromLeft */, gmmR0CheckSharedModule, &Info);
4112
4113	Log(("GMMR0CheckSharedModules done!\n"));
4114	rc = VINF_SUCCESS;
4115	GMM_CHECK_SANITY_UPON_LEAVING(pGMM);
4116	}
4117	else
4118	rc = VERR_INTERNAL_ERROR_5;
4119
4120	# ifndef DEBUG_sandervl
4121	RTSemFastMutexRelease(pGMM->Mtx);
4122	# endif
4123	return rc;
4124	#else
4125	return VERR_NOT_IMPLEMENTED;
4126	#endif
4127	}

Note: See TracBrowser for help on using the repository browser.

source: vbox/trunk/src/VBox/VMM/VMMR0/GMMR0.cpp@ 29719

Download in other formats: