GMMR0.cpp@ 18992

Last change on this file since 18992 was 18870, checked in by vboxsync, 16 years ago
GMMR0: Fixed out of memory problem on Windows by separating the legacy and bound allocation mode and use IPRT with bound mode on Windows. This way the behavior doesn't change except for the way we allocate the chunks, thereby reducing the risk.
Property svn:eol-style set to `native` Property svn:keywords set to `Id`
File size: 103.0 KB

Line
1	/* $Id: GMMR0.cpp 18870 2009-04-11 21:15:59Z vboxsync $ */
2	/** @file
3	* GMM - Global Memory Manager.
4	*/
5
6	/*
7	* Copyright (C) 2007 Sun Microsystems, Inc.
8	*
9	* This file is part of VirtualBox Open Source Edition (OSE), as
10	* available from http://www.virtualbox.org. This file is free software;
11	* you can redistribute it and/or modify it under the terms of the GNU
12	* General Public License (GPL) as published by the Free Software
13	* Foundation, in version 2 as it comes in the "COPYING" file of the
14	* VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15	* hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16	*
17	* Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa
18	* Clara, CA 95054 USA or visit http://www.sun.com if you need
19	* additional information or have any questions.
20	*/
21
22
23	/** @page pg_gmm GMM - The Global Memory Manager
24	*
25	* As the name indicates, this component is responsible for global memory
26	* management. Currently only guest RAM is allocated from the GMM, but this
27	* may change to include shadow page tables and other bits later.
28	*
29	* Guest RAM is managed as individual pages, but allocated from the host OS
30	* in chunks for reasons of portability / efficiency. To minimize the memory
31	* footprint all tracking structure must be as small as possible without
32	* unnecessary performance penalties.
33	*
34	* The allocation chunks has fixed sized, the size defined at compile time
35	* by the #GMM_CHUNK_SIZE \#define.
36	*
37	* Each chunk is given an unquie ID. Each page also has a unique ID. The
38	* relation ship between the two IDs is:
39	* @code
40	* GMM_CHUNK_SHIFT = log2(GMM_CHUNK_SIZE / PAGE_SIZE);
41	* idPage = (idChunk << GMM_CHUNK_SHIFT) \| iPage;
42	* @endcode
43	* Where iPage is the index of the page within the chunk. This ID scheme
44	* permits for efficient chunk and page lookup, but it relies on the chunk size
45	* to be set at compile time. The chunks are organized in an AVL tree with their
46	* IDs being the keys.
47	*
48	* The physical address of each page in an allocation chunk is maintained by
49	* the #RTR0MEMOBJ and obtained using #RTR0MemObjGetPagePhysAddr. There is no
50	* need to duplicate this information (it'll cost 8-bytes per page if we did).
51	*
52	* So what do we need to track per page? Most importantly we need to know
53	* which state the page is in:
54	* - Private - Allocated for (eventually) backing one particular VM page.
55	* - Shared - Readonly page that is used by one or more VMs and treated
56	* as COW by PGM.
57	* - Free - Not used by anyone.
58	*
59	* For the page replacement operations (sharing, defragmenting and freeing)
60	* to be somewhat efficient, private pages needs to be associated with a
61	* particular page in a particular VM.
62	*
63	* Tracking the usage of shared pages is impractical and expensive, so we'll
64	* settle for a reference counting system instead.
65	*
66	* Free pages will be chained on LIFOs
67	*
68	* On 64-bit systems we will use a 64-bit bitfield per page, while on 32-bit
69	* systems a 32-bit bitfield will have to suffice because of address space
70	* limitations. The #GMMPAGE structure shows the details.
71	*
72	*
73	* @section sec_gmm_alloc_strat Page Allocation Strategy
74	*
75	* The strategy for allocating pages has to take fragmentation and shared
76	* pages into account, or we may end up with with 2000 chunks with only
77	* a few pages in each. Shared pages cannot easily be reallocated because
78	* of the inaccurate usage accounting (see above). Private pages can be
79	* reallocated by a defragmentation thread in the same manner that sharing
80	* is done.
81	*
82	* The first approach is to manage the free pages in two sets depending on
83	* whether they are mainly for the allocation of shared or private pages.
84	* In the initial implementation there will be almost no possibility for
85	* mixing shared and private pages in the same chunk (only if we're really
86	* stressed on memory), but when we implement forking of VMs and have to
87	* deal with lots of COW pages it'll start getting kind of interesting.
88	*
89	* The sets are lists of chunks with approximately the same number of
90	* free pages. Say the chunk size is 1MB, meaning 256 pages, and a set
91	* consists of 16 lists. So, the first list will contain the chunks with
92	* 1-7 free pages, the second covers 8-15, and so on. The chunks will be
93	* moved between the lists as pages are freed up or allocated.
94	*
95	*
96	* @section sec_gmm_costs Costs
97	*
98	* The per page cost in kernel space is 32-bit plus whatever RTR0MEMOBJ
99	* entails. In addition there is the chunk cost of approximately
100	* (sizeof(RT0MEMOBJ) + sizof(CHUNK)) / 2^CHUNK_SHIFT bytes per page.
101	*
102	* On Windows the per page #RTR0MEMOBJ cost is 32-bit on 32-bit windows
103	* and 64-bit on 64-bit windows (a PFN_NUMBER in the MDL). So, 64-bit per page.
104	* The cost on Linux is identical, but here it's because of sizeof(struct page *).
105	*
106	*
107	* @section sec_gmm_legacy Legacy Mode for Non-Tier-1 Platforms
108	*
109	* In legacy mode the page source is locked user pages and not
110	* #RTR0MemObjAllocPhysNC, this means that a page can only be allocated
111	* by the VM that locked it. We will make no attempt at implementing
112	* page sharing on these systems, just do enough to make it all work.
113	*
114	*
115	* @subsection sub_gmm_locking Serializing
116	*
117	* One simple fast mutex will be employed in the initial implementation, not
118	* two as metioned in @ref subsec_pgmPhys_Serializing.
119	*
120	* @see @ref subsec_pgmPhys_Serializing
121	*
122	*
123	* @section sec_gmm_overcommit Memory Over-Commitment Management
124	*
125	* The GVM will have to do the system wide memory over-commitment
126	* management. My current ideas are:
127	* - Per VM oc policy that indicates how much to initially commit
128	* to it and what to do in a out-of-memory situation.
129	* - Prevent overtaxing the host.
130	*
131	* There are some challenges here, the main ones are configurability and
132	* security. Should we for instance permit anyone to request 100% memory
133	* commitment? Who should be allowed to do runtime adjustments of the
134	* config. And how to prevent these settings from being lost when the last
135	* VM process exits? The solution is probably to have an optional root
136	* daemon the will keep VMMR0.r0 in memory and enable the security measures.
137	*
138	*
139	*
140	* @section sec_gmm_numa NUMA
141	*
142	* NUMA considerations will be designed and implemented a bit later.
143	*
144	* The preliminary guesses is that we will have to try allocate memory as
145	* close as possible to the CPUs the VM is executed on (EMT and additional CPU
146	* threads). Which means it's mostly about allocation and sharing policies.
147	* Both the scheduler and allocator interface will to supply some NUMA info
148	* and we'll need to have a way to calc access costs.
149	*
150	*/
151
152
153	/*******************************************************************************
154	* Header Files *
155	*******************************************************************************/
156	#define LOG_GROUP LOG_GROUP_GMM
157	#include <VBox/gmm.h>
158	#include "GMMR0Internal.h"
159	#include <VBox/gvm.h>
160	#include <VBox/log.h>
161	#include <VBox/param.h>
162	#include <VBox/err.h>
163	#include <iprt/avl.h>
164	#include <iprt/mem.h>
165	#include <iprt/memobj.h>
166	#include <iprt/semaphore.h>
167	#include <iprt/string.h>
168
169
170	/*******************************************************************************
171	* Structures and Typedefs *
172	*******************************************************************************/
173	/** Pointer to set of free chunks. */
174	typedef struct GMMCHUNKFREESET *PGMMCHUNKFREESET;
175
176	/** Pointer to a GMM allocation chunk. */
177	typedef struct GMMCHUNK *PGMMCHUNK;
178
179	/**
180	* The per-page tracking structure employed by the GMM.
181	*
182	* On 32-bit hosts we'll some trickery is necessary to compress all
183	* the information into 32-bits. When the fSharedFree member is set,
184	* the 30th bit decides whether it's a free page or not.
185	*
186	* Because of the different layout on 32-bit and 64-bit hosts, macros
187	* are used to get and set some of the data.
188	*/
189	typedef union GMMPAGE
190	{
191	#if HC_ARCH_BITS == 64
192	/** Unsigned integer view. */
193	uint64_t u;
194
195	/** The common view. */
196	struct GMMPAGECOMMON
197	{
198	uint32_t uStuff1 : 32;
199	uint32_t uStuff2 : 30;
200	/** The page state. */
201	uint32_t u2State : 2;
202	} Common;
203
204	/** The view of a private page. */
205	struct GMMPAGEPRIVATE
206	{
207	/** The guest page frame number. (Max addressable: 2 ^ 44 - 16) */
208	uint32_t pfn;
209	/** The GVM handle. (64K VMs) */
210	uint32_t hGVM : 16;
211	/** Reserved. */
212	uint32_t u16Reserved : 14;
213	/** The page state. */
214	uint32_t u2State : 2;
215	} Private;
216
217	/** The view of a shared page. */
218	struct GMMPAGESHARED
219	{
220	/** The reference count. */
221	uint32_t cRefs;
222	/** Reserved. Checksum or something? Two hGVMs for forking? */
223	uint32_t u30Reserved : 30;
224	/** The page state. */
225	uint32_t u2State : 2;
226	} Shared;
227
228	/** The view of a free page. */
229	struct GMMPAGEFREE
230	{
231	/** The index of the next page in the free list. UINT16_MAX is NIL. */
232	uint16_t iNext;
233	/** Reserved. Checksum or something? */
234	uint16_t u16Reserved0;
235	/** Reserved. Checksum or something? */
236	uint32_t u30Reserved1 : 30;
237	/** The page state. */
238	uint32_t u2State : 2;
239	} Free;
240
241	#else /* 32-bit */
242	/** Unsigned integer view. */
243	uint32_t u;
244
245	/** The common view. */
246	struct GMMPAGECOMMON
247	{
248	uint32_t uStuff : 30;
249	/** The page state. */
250	uint32_t u2State : 2;
251	} Common;
252
253	/** The view of a private page. */
254	struct GMMPAGEPRIVATE
255	{
256	/** The guest page frame number. (Max addressable: 2 ^ 36) */
257	uint32_t pfn : 24;
258	/** The GVM handle. (127 VMs) */
259	uint32_t hGVM : 7;
260	/** The top page state bit, MBZ. */
261	uint32_t fZero : 1;
262	} Private;
263
264	/** The view of a shared page. */
265	struct GMMPAGESHARED
266	{
267	/** The reference count. */
268	uint32_t cRefs : 30;
269	/** The page state. */
270	uint32_t u2State : 2;
271	} Shared;
272
273	/** The view of a free page. */
274	struct GMMPAGEFREE
275	{
276	/** The index of the next page in the free list. UINT16_MAX is NIL. */
277	uint32_t iNext : 16;
278	/** Reserved. Checksum or something? */
279	uint32_t u14Reserved : 14;
280	/** The page state. */
281	uint32_t u2State : 2;
282	} Free;
283	#endif
284	} GMMPAGE;
285	AssertCompileSize(GMMPAGE, sizeof(RTHCUINTPTR));
286	/** Pointer to a GMMPAGE. */
287	typedef GMMPAGE *PGMMPAGE;
288
289
290	/** @name The Page States.
291	* @{ */
292	/** A private page. */
293	#define GMM_PAGE_STATE_PRIVATE 0
294	/** A private page - alternative value used on the 32-bit implemenation.
295	* This will never be used on 64-bit hosts. */
296	#define GMM_PAGE_STATE_PRIVATE_32 1
297	/** A shared page. */
298	#define GMM_PAGE_STATE_SHARED 2
299	/** A free page. */
300	#define GMM_PAGE_STATE_FREE 3
301	/** @} */
302
303
304	/** @def GMM_PAGE_IS_PRIVATE
305	*
306	* @returns true if private, false if not.
307	* @param pPage The GMM page.
308	*/
309	#if HC_ARCH_BITS == 64
310	# define GMM_PAGE_IS_PRIVATE(pPage) ( (pPage)->Common.u2State == GMM_PAGE_STATE_PRIVATE )
311	#else
312	# define GMM_PAGE_IS_PRIVATE(pPage) ( (pPage)->Private.fZero == 0 )
313	#endif
314
315	/** @def GMM_PAGE_IS_SHARED
316	*
317	* @returns true if shared, false if not.
318	* @param pPage The GMM page.
319	*/
320	#define GMM_PAGE_IS_SHARED(pPage) ( (pPage)->Common.u2State == GMM_PAGE_STATE_SHARED )
321
322	/** @def GMM_PAGE_IS_FREE
323	*
324	* @returns true if free, false if not.
325	* @param pPage The GMM page.
326	*/
327	#define GMM_PAGE_IS_FREE(pPage) ( (pPage)->Common.u2State == GMM_PAGE_STATE_FREE )
328
329	/** @def GMM_PAGE_PFN_LAST
330	* The last valid guest pfn range.
331	* @remark Some of the values outside the range has special meaning,
332	* see GMM_PAGE_PFN_UNSHAREABLE.
333	*/
334	#if HC_ARCH_BITS == 64
335	# define GMM_PAGE_PFN_LAST UINT32_C(0xfffffff0)
336	#else
337	# define GMM_PAGE_PFN_LAST UINT32_C(0x00fffff0)
338	#endif
339	AssertCompile(GMM_PAGE_PFN_LAST == (GMM_GCPHYS_LAST >> PAGE_SHIFT));
340
341	/** @def GMM_PAGE_PFN_UNSHAREABLE
342	* Indicates that this page isn't used for normal guest memory and thus isn't shareable.
343	*/
344	#if HC_ARCH_BITS == 64
345	# define GMM_PAGE_PFN_UNSHAREABLE UINT32_C(0xfffffff1)
346	#else
347	# define GMM_PAGE_PFN_UNSHAREABLE UINT32_C(0x00fffff1)
348	#endif
349	AssertCompile(GMM_PAGE_PFN_UNSHAREABLE == (GMM_GCPHYS_UNSHAREABLE >> PAGE_SHIFT));
350
351
352	/**
353	* A GMM allocation chunk ring-3 mapping record.
354	*
355	* This should really be associated with a session and not a VM, but
356	* it's simpler to associated with a VM and cleanup with the VM object
357	* is destroyed.
358	*/
359	typedef struct GMMCHUNKMAP
360	{
361	/** The mapping object. */
362	RTR0MEMOBJ MapObj;
363	/** The VM owning the mapping. */
364	PGVM pGVM;
365	} GMMCHUNKMAP;
366	/** Pointer to a GMM allocation chunk mapping. */
367	typedef struct GMMCHUNKMAP *PGMMCHUNKMAP;
368
369
370	/**
371	* A GMM allocation chunk.
372	*/
373	typedef struct GMMCHUNK
374	{
375	/** The AVL node core.
376	* The Key is the chunk ID. */
377	AVLU32NODECORE Core;
378	/** The memory object.
379	* Either from RTR0MemObjAllocPhysNC or RTR0MemObjLockUser depending on
380	* what the host can dish up with. */
381	RTR0MEMOBJ MemObj;
382	/** Pointer to the next chunk in the free list. */
383	PGMMCHUNK pFreeNext;
384	/** Pointer to the previous chunk in the free list. */
385	PGMMCHUNK pFreePrev;
386	/** Pointer to the free set this chunk belongs to. NULL for
387	* chunks with no free pages. */
388	PGMMCHUNKFREESET pSet;
389	/** Pointer to an array of mappings. */
390	PGMMCHUNKMAP paMappings;
391	/** The number of mappings. */
392	uint16_t cMappings;
393	/** The head of the list of free pages. UINT16_MAX is the NIL value. */
394	uint16_t iFreeHead;
395	/** The number of free pages. */
396	uint16_t cFree;
397	/** The GVM handle of the VM that first allocated pages from this chunk, this
398	* is used as a preference when there are several chunks to choose from.
399	* When in bound memory mode this isn't a preference any longer. */
400	uint16_t hGVM;
401	/** The number of private pages. */
402	uint16_t cPrivate;
403	/** The number of shared pages. */
404	uint16_t cShared;
405	#if HC_ARCH_BITS == 64
406	/** Reserved for later. */
407	uint16_t au16Reserved[2];
408	#endif
409	/** The pages. */
410	GMMPAGE aPages[GMM_CHUNK_SIZE >> PAGE_SHIFT];
411	} GMMCHUNK;
412
413
414	/**
415	* An allocation chunk TLB entry.
416	*/
417	typedef struct GMMCHUNKTLBE
418	{
419	/** The chunk id. */
420	uint32_t idChunk;
421	/** Pointer to the chunk. */
422	PGMMCHUNK pChunk;
423	} GMMCHUNKTLBE;
424	/** Pointer to an allocation chunk TLB entry. */
425	typedef GMMCHUNKTLBE *PGMMCHUNKTLBE;
426
427
428	/** The number of entries tin the allocation chunk TLB. */
429	#define GMM_CHUNKTLB_ENTRIES 32
430	/** Gets the TLB entry index for the given Chunk ID. */
431	#define GMM_CHUNKTLB_IDX(idChunk) ( (idChunk) & (GMM_CHUNKTLB_ENTRIES - 1) )
432
433	/**
434	* An allocation chunk TLB.
435	*/
436	typedef struct GMMCHUNKTLB
437	{
438	/** The TLB entries. */
439	GMMCHUNKTLBE aEntries[GMM_CHUNKTLB_ENTRIES];
440	} GMMCHUNKTLB;
441	/** Pointer to an allocation chunk TLB. */
442	typedef GMMCHUNKTLB *PGMMCHUNKTLB;
443
444
445	/** The GMMCHUNK::cFree shift count. */
446	#define GMM_CHUNK_FREE_SET_SHIFT 4
447	/** The GMMCHUNK::cFree mask for use when considering relinking a chunk. */
448	#define GMM_CHUNK_FREE_SET_MASK 15
449	/** The number of lists in set. */
450	#define GMM_CHUNK_FREE_SET_LISTS (GMM_CHUNK_NUM_PAGES >> GMM_CHUNK_FREE_SET_SHIFT)
451
452	/**
453	* A set of free chunks.
454	*/
455	typedef struct GMMCHUNKFREESET
456	{
457	/** The number of free pages in the set. */
458	uint64_t cPages;
459	/** Chunks ordered by increasing number of free pages. */
460	PGMMCHUNK apLists[GMM_CHUNK_FREE_SET_LISTS];
461	} GMMCHUNKFREESET;
462
463
464	/**
465	* The GMM instance data.
466	*/
467	typedef struct GMM
468	{
469	/** Magic / eye catcher. GMM_MAGIC */
470	uint32_t u32Magic;
471	/** The fast mutex protecting the GMM.
472	* More fine grained locking can be implemented later if necessary. */
473	RTSEMFASTMUTEX Mtx;
474	/** The chunk tree. */
475	PAVLU32NODECORE pChunks;
476	/** The chunk TLB. */
477	GMMCHUNKTLB ChunkTLB;
478	/** The private free set. */
479	GMMCHUNKFREESET Private;
480	/** The shared free set. */
481	GMMCHUNKFREESET Shared;
482
483	/** The maximum number of pages we're allowed to allocate.
484	* @gcfgm 64-bit GMM/MaxPages Direct.
485	* @gcfgm 32-bit GMM/PctPages Relative to the number of host pages. */
486	uint64_t cMaxPages;
487	/** The number of pages that has been reserved.
488	* The deal is that cReservedPages - cOverCommittedPages <= cMaxPages. */
489	uint64_t cReservedPages;
490	/** The number of pages that we have over-committed in reservations. */
491	uint64_t cOverCommittedPages;
492	/** The number of actually allocated (committed if you like) pages. */
493	uint64_t cAllocatedPages;
494	/** The number of pages that are shared. A subset of cAllocatedPages. */
495	uint64_t cSharedPages;
496	/** The number of pages that are shared that has been left behind by
497	* VMs not doing proper cleanups. */
498	uint64_t cLeftBehindSharedPages;
499	/** The number of allocation chunks.
500	* (The number of pages we've allocated from the host can be derived from this.) */
501	uint32_t cChunks;
502	/** The number of current ballooned pages. */
503	uint64_t cBalloonedPages;
504
505	/** The legacy allocation mode indicator.
506	* This is determined at initialization time. */
507	bool fLegacyAllocationMode;
508	/** The bound memory mode indicator.
509	* When set, the memory will be bound to a specific VM and never
510	* shared. This is always set if fLegacyAllocationMode is set.
511	* (Also determined at initialization time.) */
512	bool fBoundMemoryMode;
513	/** The number of registered VMs. */
514	uint16_t cRegisteredVMs;
515
516	/** The previous allocated Chunk ID.
517	* Used as a hint to avoid scanning the whole bitmap. */
518	uint32_t idChunkPrev;
519	/** Chunk ID allocation bitmap.
520	* Bits of allocated IDs are set, free ones are clear.
521	* The NIL id (0) is marked allocated. */
522	uint32_t bmChunkId[(GMM_CHUNKID_LAST + 1 + 31) / 32];
523	} GMM;
524	/** Pointer to the GMM instance. */
525	typedef GMM *PGMM;
526
527	/** The value of GMM::u32Magic (Katsuhiro Otomo). */
528	#define GMM_MAGIC 0x19540414
529
530
531	/*******************************************************************************
532	* Global Variables *
533	*******************************************************************************/
534	/** Pointer to the GMM instance data. */
535	static PGMM g_pGMM = NULL;
536
537	/** Macro for obtaining and validating the g_pGMM pointer.
538	* On failure it will return from the invoking function with the specified return value.
539	*
540	* @param pGMM The name of the pGMM variable.
541	* @param rc The return value on failure. Use VERR_INTERNAL_ERROR for
542	* VBox status codes.
543	*/
544	#define GMM_GET_VALID_INSTANCE(pGMM, rc) \
545	do { \
546	(pGMM) = g_pGMM; \
547	AssertPtrReturn((pGMM), (rc)); \
548	AssertMsgReturn((pGMM)->u32Magic == GMM_MAGIC, ("%p - %#x\n", (pGMM), (pGMM)->u32Magic), (rc)); \
549	} while (0)
550
551	/** Macro for obtaining and validating the g_pGMM pointer, void function variant.
552	* On failure it will return from the invoking function.
553	*
554	* @param pGMM The name of the pGMM variable.
555	*/
556	#define GMM_GET_VALID_INSTANCE_VOID(pGMM) \
557	do { \
558	(pGMM) = g_pGMM; \
559	AssertPtrReturnVoid((pGMM)); \
560	AssertMsgReturnVoid((pGMM)->u32Magic == GMM_MAGIC, ("%p - %#x\n", (pGMM), (pGMM)->u32Magic)); \
561	} while (0)
562
563
564	/*******************************************************************************
565	* Internal Functions *
566	*******************************************************************************/
567	static DECLCALLBACK(int) gmmR0TermDestroyChunk(PAVLU32NODECORE pNode, void *pvGMM);
568	static DECLCALLBACK(int) gmmR0CleanupVMScanChunk(PAVLU32NODECORE pNode, void *pvGMM);
569	/static/ DECLCALLBACK(int) gmmR0CleanupVMDestroyChunk(PAVLU32NODECORE pNode, void *pvGVM);
570	DECLINLINE(void) gmmR0LinkChunk(PGMMCHUNK pChunk, PGMMCHUNKFREESET pSet);
571	DECLINLINE(void) gmmR0UnlinkChunk(PGMMCHUNK pChunk);
572	static void gmmR0FreeChunk(PGMM pGMM, PGVM pGVM, PGMMCHUNK pChunk);
573	static void gmmR0FreeSharedPage(PGMM pGMM, uint32_t idPage, PGMMPAGE pPage);
574	static int gmmR0UnmapChunk(PGMM pGMM, PGVM pGVM, PGMMCHUNK pChunk);
575
576
577
578	/**
579	* Initializes the GMM component.
580	*
581	* This is called when the VMMR0.r0 module is loaded and protected by the
582	* loader semaphore.
583	*
584	* @returns VBox status code.
585	*/
586	GMMR0DECL(int) GMMR0Init(void)
587	{
588	LogFlow(("GMMInit:\n"));
589
590	/*
591	* Allocate the instance data and the lock(s).
592	*/
593	PGMM pGMM = (PGMM)RTMemAllocZ(sizeof(*pGMM));
594	if (!pGMM)
595	return VERR_NO_MEMORY;
596	pGMM->u32Magic = GMM_MAGIC;
597	for (unsigned i = 0; i < RT_ELEMENTS(pGMM->ChunkTLB.aEntries); i++)
598	pGMM->ChunkTLB.aEntries[i].idChunk = NIL_GMM_CHUNKID;
599	ASMBitSet(&pGMM->bmChunkId[0], NIL_GMM_CHUNKID);
600
601	int rc = RTSemFastMutexCreate(&pGMM->Mtx);
602	if (RT_SUCCESS(rc))
603	{
604	/*
605	* Check and see if RTR0MemObjAllocPhysNC works.
606	*/
607	#if 0 /* later, see #3170. */
608	RTR0MEMOBJ MemObj;
609	rc = RTR0MemObjAllocPhysNC(&MemObj, _64K, NIL_RTHCPHYS);
610	if (RT_SUCCESS(rc))
611	{
612	rc = RTR0MemObjFree(MemObj, true);
613	AssertRC(rc);
614	}
615	else if (rc == VERR_NOT_SUPPORTED)
616	pGMM->fLegacyAllocationMode = pGMM->fBoundMemoryMode = true;
617	else
618	SUPR0Printf("GMMR0Init: RTR0MemObjAllocPhysNC(,64K,Any) -> %d!\n", rc);
619	#else
620	# ifdef RT_OS_WINDOWS
621	pGMM->fLegacyAllocationMode = false;
622	# else
623	pGMM->fLegacyAllocationMode = true;
624	# endif
625	pGMM->fBoundMemoryMode = true;
626	#endif
627
628	/*
629	* Query system page count and guess a reasonable cMaxPages value.
630	*/
631	pGMM->cMaxPages = UINT32_MAX; /** @todo IPRT function for query ram size and such. */
632
633	g_pGMM = pGMM;
634	LogFlow(("GMMInit: pGMM=%p fLegacyAllocationMode=%RTbool fBoundMemoryMode=%RTbool\n", pGMM, pGMM->fLegacyAllocationMode, pGMM->fBoundMemoryMode));
635	return VINF_SUCCESS;
636	}
637
638	RTMemFree(pGMM);
639	SUPR0Printf("GMMR0Init: failed! rc=%d\n", rc);
640	return rc;
641	}
642
643
644	/**
645	* Terminates the GMM component.
646	*/
647	GMMR0DECL(void) GMMR0Term(void)
648	{
649	LogFlow(("GMMTerm:\n"));
650
651	/*
652	* Take care / be paranoid...
653	*/
654	PGMM pGMM = g_pGMM;
655	if (!VALID_PTR(pGMM))
656	return;
657	if (pGMM->u32Magic != GMM_MAGIC)
658	{
659	SUPR0Printf("GMMR0Term: u32Magic=%#x\n", pGMM->u32Magic);
660	return;
661	}
662
663	/*
664	* Undo what init did and free all the resources we've acquired.
665	*/
666	/* Destroy the fundamentals. */
667	g_pGMM = NULL;
668	pGMM->u32Magic++;
669	RTSemFastMutexDestroy(pGMM->Mtx);
670	pGMM->Mtx = NIL_RTSEMFASTMUTEX;
671
672	/* free any chunks still hanging around. */
673	RTAvlU32Destroy(&pGMM->pChunks, gmmR0TermDestroyChunk, pGMM);
674
675	/* finally the instance data itself. */
676	RTMemFree(pGMM);
677	LogFlow(("GMMTerm: done\n"));
678	}
679
680
681	/**
682	* RTAvlU32Destroy callback.
683	*
684	* @returns 0
685	* @param pNode The node to destroy.
686	* @param pvGMM The GMM handle.
687	*/
688	static DECLCALLBACK(int) gmmR0TermDestroyChunk(PAVLU32NODECORE pNode, void *pvGMM)
689	{
690	PGMMCHUNK pChunk = (PGMMCHUNK)pNode;
691
692	if (pChunk->cFree != (GMM_CHUNK_SIZE >> PAGE_SHIFT))
693	SUPR0Printf("GMMR0Term: %p/%#x: cFree=%d cPrivate=%d cShared=%d cMappings=%d\n", pChunk,
694	pChunk->Core.Key, pChunk->cFree, pChunk->cPrivate, pChunk->cShared, pChunk->cMappings);
695
696	int rc = RTR0MemObjFree(pChunk->MemObj, true /* fFreeMappings */);
697	if (RT_FAILURE(rc))
698	{
699	SUPR0Printf("GMMR0Term: %p/%#x: RTRMemObjFree(%p,true) -> %d (cMappings=%d)\n", pChunk,
700	pChunk->Core.Key, pChunk->MemObj, rc, pChunk->cMappings);
701	AssertRC(rc);
702	}
703	pChunk->MemObj = NIL_RTR0MEMOBJ;
704
705	RTMemFree(pChunk->paMappings);
706	pChunk->paMappings = NULL;
707
708	RTMemFree(pChunk);
709	NOREF(pvGMM);
710	return 0;
711	}
712
713
714	/**
715	* Initializes the per-VM data for the GMM.
716	*
717	* This is called from within the GVMM lock (from GVMMR0CreateVM)
718	* and should only initialize the data members so GMMR0CleanupVM
719	* can deal with them. We reserve no memory or anything here,
720	* that's done later in GMMR0InitVM.
721	*
722	* @param pGVM Pointer to the Global VM structure.
723	*/
724	GMMR0DECL(void) GMMR0InitPerVMData(PGVM pGVM)
725	{
726	AssertCompile(RT_SIZEOFMEMB(GVM,gmm.s) <= RT_SIZEOFMEMB(GVM,gmm.padding));
727	AssertRelease(RT_SIZEOFMEMB(GVM,gmm.s) <= RT_SIZEOFMEMB(GVM,gmm.padding));
728
729	pGVM->gmm.s.enmPolicy = GMMOCPOLICY_INVALID;
730	pGVM->gmm.s.enmPriority = GMMPRIORITY_INVALID;
731	pGVM->gmm.s.fMayAllocate = false;
732	}
733
734
735	/**
736	* Cleans up when a VM is terminating.
737	*
738	* @param pGVM Pointer to the Global VM structure.
739	*/
740	GMMR0DECL(void) GMMR0CleanupVM(PGVM pGVM)
741	{
742	LogFlow(("GMMR0CleanupVM: pGVM=%p:{.pVM=%p, .hSelf=%#x}\n", pGVM, pGVM->pVM, pGVM->hSelf));
743
744	PGMM pGMM;
745	GMM_GET_VALID_INSTANCE_VOID(pGMM);
746
747	int rc = RTSemFastMutexRequest(pGMM->Mtx);
748	AssertRC(rc);
749
750	/*
751	* The policy is 'INVALID' until the initial reservation
752	* request has been serviced.
753	*/
754	if ( pGVM->gmm.s.enmPolicy > GMMOCPOLICY_INVALID
755	&& pGVM->gmm.s.enmPolicy < GMMOCPOLICY_END)
756	{
757	/*
758	* If it's the last VM around, we can skip walking all the chunk looking
759	* for the pages owned by this VM and instead flush the whole shebang.
760	*
761	* This takes care of the eventuality that a VM has left shared page
762	* references behind (shouldn't happen of course, but you never know).
763	*/
764	Assert(pGMM->cRegisteredVMs);
765	pGMM->cRegisteredVMs--;
766	#if 0 /* disabled so it won't hide bugs. */
767	if (!pGMM->cRegisteredVMs)
768	{
769	RTAvlU32Destroy(&pGMM->pChunks, gmmR0CleanupVMDestroyChunk, pGMM);
770
771	for (unsigned i = 0; i < RT_ELEMENTS(pGMM->ChunkTLB.aEntries); i++)
772	{
773	pGMM->ChunkTLB.aEntries[i].idChunk = NIL_GMM_CHUNKID;
774	pGMM->ChunkTLB.aEntries[i].pChunk = NULL;
775	}
776
777	memset(&pGMM->Private, 0, sizeof(pGMM->Private));
778	memset(&pGMM->Shared, 0, sizeof(pGMM->Shared));
779
780	memset(&pGMM->bmChunkId[0], 0, sizeof(pGMM->bmChunkId));
781	ASMBitSet(&pGMM->bmChunkId[0], NIL_GMM_CHUNKID);
782
783	pGMM->cReservedPages = 0;
784	pGMM->cOverCommittedPages = 0;
785	pGMM->cAllocatedPages = 0;
786	pGMM->cSharedPages = 0;
787	pGMM->cLeftBehindSharedPages = 0;
788	pGMM->cChunks = 0;
789	pGMM->cBalloonedPages = 0;
790	}
791	else
792	#endif
793	{
794	/*
795	* Walk the entire pool looking for pages that belongs to this VM
796	* and left over mappings. (This'll only catch private pages, shared
797	* pages will be 'left behind'.)
798	*/
799	uint64_t cPrivatePages = pGVM->gmm.s.cPrivatePages; /* save */
800	RTAvlU32DoWithAll(&pGMM->pChunks, true /* fFromLeft */, gmmR0CleanupVMScanChunk, pGVM);
801	if (pGVM->gmm.s.cPrivatePages)
802	SUPR0Printf("GMMR0CleanupVM: hGVM=%#x has %#x private pages that cannot be found!\n", pGVM->hSelf, pGVM->gmm.s.cPrivatePages);
803	pGMM->cAllocatedPages -= cPrivatePages;
804
805	/* free empty chunks. */
806	if (cPrivatePages)
807	{
808	PGMMCHUNK pCur = pGMM->Private.apLists[RT_ELEMENTS(pGMM->Private.apLists) - 1];
809	while (pCur)
810	{
811	PGMMCHUNK pNext = pCur->pFreeNext;
812	if ( pCur->cFree == GMM_CHUNK_NUM_PAGES
813	&& ( !pGMM->fBoundMemoryMode
814	\|\| pCur->hGVM == pGVM->hSelf))
815	gmmR0FreeChunk(pGMM, pGVM, pCur);
816	pCur = pNext;
817	}
818	}
819
820	/* account for shared pages that weren't freed. */
821	if (pGVM->gmm.s.cSharedPages)
822	{
823	Assert(pGMM->cSharedPages >= pGVM->gmm.s.cSharedPages);
824	SUPR0Printf("GMMR0CleanupVM: hGVM=%#x left %#x shared pages behind!\n", pGVM->hSelf, pGVM->gmm.s.cSharedPages);
825	pGMM->cLeftBehindSharedPages += pGVM->gmm.s.cSharedPages;
826	}
827
828	/*
829	* Update the over-commitment management statistics.
830	*/
831	pGMM->cReservedPages -= pGVM->gmm.s.Reserved.cBasePages
832	+ pGVM->gmm.s.Reserved.cFixedPages
833	+ pGVM->gmm.s.Reserved.cShadowPages;
834	switch (pGVM->gmm.s.enmPolicy)
835	{
836	case GMMOCPOLICY_NO_OC:
837	break;
838	default:
839	/** @todo Update GMM->cOverCommittedPages */
840	break;
841	}
842	}
843	}
844
845	/* zap the GVM data. */
846	pGVM->gmm.s.enmPolicy = GMMOCPOLICY_INVALID;
847	pGVM->gmm.s.enmPriority = GMMPRIORITY_INVALID;
848	pGVM->gmm.s.fMayAllocate = false;
849
850	RTSemFastMutexRelease(pGMM->Mtx);
851
852	LogFlow(("GMMR0CleanupVM: returns\n"));
853	}
854
855
856	/**
857	* RTAvlU32DoWithAll callback.
858	*
859	* @returns 0
860	* @param pNode The node to search.
861	* @param pvGVM Pointer to the shared VM structure.
862	*/
863	static DECLCALLBACK(int) gmmR0CleanupVMScanChunk(PAVLU32NODECORE pNode, void *pvGVM)
864	{
865	PGMMCHUNK pChunk = (PGMMCHUNK)pNode;
866	PGVM pGVM = (PGVM)pvGVM;
867
868	/*
869	* Look for pages belonging to the VM.
870	* (Perform some internal checks while we're scanning.)
871	*/
872	#ifndef VBOX_STRICT
873	if (pChunk->cFree != (GMM_CHUNK_SIZE >> PAGE_SHIFT))
874	#endif
875	{
876	unsigned cPrivate = 0;
877	unsigned cShared = 0;
878	unsigned cFree = 0;
879
880	uint16_t hGVM = pGVM->hSelf;
881	unsigned iPage = (GMM_CHUNK_SIZE >> PAGE_SHIFT);
882	while (iPage-- > 0)
883	if (GMM_PAGE_IS_PRIVATE(&pChunk->aPages[iPage]))
884	{
885	if (pChunk->aPages[iPage].Private.hGVM == hGVM)
886	{
887	/*
888	* Free the page.
889	*
890	* The reason for not using gmmR0FreePrivatePage here is that we
891	* must not cause the chunk to be freed from under us - we're in
892	* a AVL tree walk here.
893	*/
894	pChunk->aPages[iPage].u = 0;
895	pChunk->aPages[iPage].Free.iNext = pChunk->iFreeHead;
896	pChunk->aPages[iPage].Free.u2State = GMM_PAGE_STATE_FREE;
897	pChunk->iFreeHead = iPage;
898	pChunk->cPrivate--;
899	if ((pChunk->cFree & GMM_CHUNK_FREE_SET_MASK) == 0)
900	{
901	gmmR0UnlinkChunk(pChunk);
902	pChunk->cFree++;
903	gmmR0LinkChunk(pChunk, pChunk->cShared ? &g_pGMM->Shared : &g_pGMM->Private);
904	}
905	else
906	pChunk->cFree++;
907	pGVM->gmm.s.cPrivatePages--;
908	cFree++;
909	}
910	else
911	cPrivate++;
912	}
913	else if (GMM_PAGE_IS_FREE(&pChunk->aPages[iPage]))
914	cFree++;
915	else
916	cShared++;
917
918	/*
919	* Did it add up?
920	*/
921	if (RT_UNLIKELY( pChunk->cFree != cFree
922	\|\| pChunk->cPrivate != cPrivate
923	\|\| pChunk->cShared != cShared))
924	{
925	SUPR0Printf("gmmR0CleanupVMScanChunk: Chunk %p/%#x has bogus stats - free=%d/%d private=%d/%d shared=%d/%d\n",
926	pChunk->cFree, cFree, pChunk->cPrivate, cPrivate, pChunk->cShared, cShared);
927	pChunk->cFree = cFree;
928	pChunk->cPrivate = cPrivate;
929	pChunk->cShared = cShared;
930	}
931	}
932
933	/*
934	* Look for the mapping belonging to the terminating VM.
935	*/
936	for (unsigned i = 0; i < pChunk->cMappings; i++)
937	if (pChunk->paMappings[i].pGVM == pGVM)
938	{
939	RTR0MEMOBJ MemObj = pChunk->paMappings[i].MapObj;
940
941	pChunk->cMappings--;
942	if (i < pChunk->cMappings)
943	pChunk->paMappings[i] = pChunk->paMappings[pChunk->cMappings];
944	pChunk->paMappings[pChunk->cMappings].pGVM = NULL;
945	pChunk->paMappings[pChunk->cMappings].MapObj = NIL_RTR0MEMOBJ;
946
947	int rc = RTR0MemObjFree(MemObj, false /* fFreeMappings (NA) */);
948	if (RT_FAILURE(rc))
949	{
950	SUPR0Printf("gmmR0CleanupVMScanChunk: %p/%#x: mapping #%x: RTRMemObjFree(%p,false) -> %d \n",
951	pChunk, pChunk->Core.Key, i, MemObj, rc);
952	AssertRC(rc);
953	}
954	break;
955	}
956
957	/*
958	* If not in bound memory mode, we should reset the hGVM field
959	* if it has our handle in it.
960	*/
961	if (pChunk->hGVM == pGVM->hSelf)
962	{
963	if (!g_pGMM->fBoundMemoryMode)
964	pChunk->hGVM = NIL_GVM_HANDLE;
965	else if (pChunk->cFree != GMM_CHUNK_NUM_PAGES)
966	{
967	SUPR0Printf("gmmR0CleanupVMScanChunk: %p/%#x: cFree=%#x - it should be 0 in bound mode!\n",
968	pChunk, pChunk->Core.Key, pChunk->cFree);
969	AssertMsgFailed(("%p/%#x: cFree=%#x - it should be 0 in bound mode!\n", pChunk, pChunk->Core.Key, pChunk->cFree));
970
971	gmmR0UnlinkChunk(pChunk);
972	pChunk->cFree = GMM_CHUNK_NUM_PAGES;
973	gmmR0LinkChunk(pChunk, pChunk->cShared ? &g_pGMM->Shared : &g_pGMM->Private);
974	}
975	}
976
977	return 0;
978	}
979
980
981	/**
982	* RTAvlU32Destroy callback for GMMR0CleanupVM.
983	*
984	* @returns 0
985	* @param pNode The node (allocation chunk) to destroy.
986	* @param pvGVM Pointer to the shared VM structure.
987	*/
988	/static/ DECLCALLBACK(int) gmmR0CleanupVMDestroyChunk(PAVLU32NODECORE pNode, void *pvGVM)
989	{
990	PGMMCHUNK pChunk = (PGMMCHUNK)pNode;
991	PGVM pGVM = (PGVM)pvGVM;
992
993	for (unsigned i = 0; i < pChunk->cMappings; i++)
994	{
995	if (pChunk->paMappings[i].pGVM != pGVM)
996	SUPR0Printf("gmmR0CleanupVMDestroyChunk: %p/%#x: mapping #%x: pGVM=%p exepcted %p\n", pChunk,
997	pChunk->Core.Key, i, pChunk->paMappings[i].pGVM, pGVM);
998	int rc = RTR0MemObjFree(pChunk->paMappings[i].MapObj, false /* fFreeMappings (NA) */);
999	if (RT_FAILURE(rc))
1000	{
1001	SUPR0Printf("gmmR0CleanupVMDestroyChunk: %p/%#x: mapping #%x: RTRMemObjFree(%p,false) -> %d \n", pChunk,
1002	pChunk->Core.Key, i, pChunk->paMappings[i].MapObj, rc);
1003	AssertRC(rc);
1004	}
1005	}
1006
1007	int rc = RTR0MemObjFree(pChunk->MemObj, true /* fFreeMappings */);
1008	if (RT_FAILURE(rc))
1009	{
1010	SUPR0Printf("gmmR0CleanupVMDestroyChunk: %p/%#x: RTRMemObjFree(%p,true) -> %d (cMappings=%d)\n", pChunk,
1011	pChunk->Core.Key, pChunk->MemObj, rc, pChunk->cMappings);
1012	AssertRC(rc);
1013	}
1014	pChunk->MemObj = NIL_RTR0MEMOBJ;
1015
1016	RTMemFree(pChunk->paMappings);
1017	pChunk->paMappings = NULL;
1018
1019	RTMemFree(pChunk);
1020	return 0;
1021	}
1022
1023
1024	/**
1025	* The initial resource reservations.
1026	*
1027	* This will make memory reservations according to policy and priority. If there isn't
1028	* sufficient resources available to sustain the VM this function will fail and all
1029	* future allocations requests will fail as well.
1030	*
1031	* These are just the initial reservations made very very early during the VM creation
1032	* process and will be adjusted later in the GMMR0UpdateReservation call after the
1033	* ring-3 init has completed.
1034	*
1035	* @returns VBox status code.
1036	* @retval VERR_GMM_MEMORY_RESERVATION_DECLINED
1037	* @retval VERR_GMM_
1038	*
1039	* @param pVM Pointer to the shared VM structure.
1040	* @param cBasePages The number of pages that may be allocated for the base RAM and ROMs.
1041	* This does not include MMIO2 and similar.
1042	* @param cShadowPages The number of pages that may be allocated for shadow pageing structures.
1043	* @param cFixedPages The number of pages that may be allocated for fixed objects like the
1044	* hyper heap, MMIO2 and similar.
1045	* @param enmPolicy The OC policy to use on this VM.
1046	* @param enmPriority The priority in an out-of-memory situation.
1047	*
1048	* @thread The creator thread / EMT.
1049	*/
1050	GMMR0DECL(int) GMMR0InitialReservation(PVM pVM, uint64_t cBasePages, uint32_t cShadowPages, uint32_t cFixedPages,
1051	GMMOCPOLICY enmPolicy, GMMPRIORITY enmPriority)
1052	{
1053	LogFlow(("GMMR0InitialReservation: pVM=%p cBasePages=%#llx cShadowPages=%#x cFixedPages=%#x enmPolicy=%d enmPriority=%d\n",
1054	pVM, cBasePages, cShadowPages, cFixedPages, enmPolicy, enmPriority));
1055
1056	/*
1057	* Validate, get basics and take the semaphore.
1058	*/
1059	PGMM pGMM;
1060	GMM_GET_VALID_INSTANCE(pGMM, VERR_INTERNAL_ERROR);
1061	PGVM pGVM = GVMMR0ByVM(pVM);
1062	if (!pGVM)
1063	return VERR_INVALID_PARAMETER;
1064	if (pGVM->hEMT != RTThreadNativeSelf())
1065	return VERR_NOT_OWNER;
1066
1067	AssertReturn(cBasePages, VERR_INVALID_PARAMETER);
1068	AssertReturn(cShadowPages, VERR_INVALID_PARAMETER);
1069	AssertReturn(cFixedPages, VERR_INVALID_PARAMETER);
1070	AssertReturn(enmPolicy > GMMOCPOLICY_INVALID && enmPolicy < GMMOCPOLICY_END, VERR_INVALID_PARAMETER);
1071	AssertReturn(enmPriority > GMMPRIORITY_INVALID && enmPriority < GMMPRIORITY_END, VERR_INVALID_PARAMETER);
1072
1073	int rc = RTSemFastMutexRequest(pGMM->Mtx);
1074	AssertRC(rc);
1075
1076	if ( !pGVM->gmm.s.Reserved.cBasePages
1077	&& !pGVM->gmm.s.Reserved.cFixedPages
1078	&& !pGVM->gmm.s.Reserved.cShadowPages)
1079	{
1080	/*
1081	* Check if we can accomodate this.
1082	*/
1083	/* ... later ... */
1084	if (RT_SUCCESS(rc))
1085	{
1086	/*
1087	* Update the records.
1088	*/
1089	pGVM->gmm.s.Reserved.cBasePages = cBasePages;
1090	pGVM->gmm.s.Reserved.cFixedPages = cFixedPages;
1091	pGVM->gmm.s.Reserved.cShadowPages = cShadowPages;
1092	pGVM->gmm.s.enmPolicy = enmPolicy;
1093	pGVM->gmm.s.enmPriority = enmPriority;
1094	pGVM->gmm.s.fMayAllocate = true;
1095
1096	pGMM->cReservedPages += cBasePages + cFixedPages + cShadowPages;
1097	pGMM->cRegisteredVMs++;
1098	}
1099	}
1100	else
1101	rc = VERR_WRONG_ORDER;
1102
1103	RTSemFastMutexRelease(pGMM->Mtx);
1104	LogFlow(("GMMR0InitialReservation: returns %Rrc\n", rc));
1105	return rc;
1106	}
1107
1108
1109	/**
1110	* VMMR0 request wrapper for GMMR0InitialReservation.
1111	*
1112	* @returns see GMMR0InitialReservation.
1113	* @param pVM Pointer to the shared VM structure.
1114	* @param pReq The request packet.
1115	*/
1116	GMMR0DECL(int) GMMR0InitialReservationReq(PVM pVM, PGMMINITIALRESERVATIONREQ pReq)
1117	{
1118	/*
1119	* Validate input and pass it on.
1120	*/
1121	AssertPtrReturn(pVM, VERR_INVALID_POINTER);
1122	AssertPtrReturn(pReq, VERR_INVALID_POINTER);
1123	AssertMsgReturn(pReq->Hdr.cbReq == sizeof(pReq), ("%#x != %#x\n", pReq->Hdr.cbReq, sizeof(pReq)), VERR_INVALID_PARAMETER);
1124
1125	return GMMR0InitialReservation(pVM, pReq->cBasePages, pReq->cShadowPages, pReq->cFixedPages, pReq->enmPolicy, pReq->enmPriority);
1126	}
1127
1128
1129	/**
1130	* This updates the memory reservation with the additional MMIO2 and ROM pages.
1131	*
1132	* @returns VBox status code.
1133	* @retval VERR_GMM_MEMORY_RESERVATION_DECLINED
1134	*
1135	* @param pVM Pointer to the shared VM structure.
1136	* @param cBasePages The number of pages that may be allocated for the base RAM and ROMs.
1137	* This does not include MMIO2 and similar.
1138	* @param cShadowPages The number of pages that may be allocated for shadow pageing structures.
1139	* @param cFixedPages The number of pages that may be allocated for fixed objects like the
1140	* hyper heap, MMIO2 and similar.
1141	*
1142	* @thread EMT.
1143	*/
1144	GMMR0DECL(int) GMMR0UpdateReservation(PVM pVM, uint64_t cBasePages, uint32_t cShadowPages, uint32_t cFixedPages)
1145	{
1146	LogFlow(("GMMR0UpdateReservation: pVM=%p cBasePages=%#llx cShadowPages=%#x cFixedPages=%#x\n",
1147	pVM, cBasePages, cShadowPages, cFixedPages));
1148
1149	/*
1150	* Validate, get basics and take the semaphore.
1151	*/
1152	PGMM pGMM;
1153	GMM_GET_VALID_INSTANCE(pGMM, VERR_INTERNAL_ERROR);
1154	PGVM pGVM = GVMMR0ByVM(pVM);
1155	if (!pGVM)
1156	return VERR_INVALID_PARAMETER;
1157	if (pGVM->hEMT != RTThreadNativeSelf())
1158	return VERR_NOT_OWNER;
1159
1160	AssertReturn(cBasePages, VERR_INVALID_PARAMETER);
1161	AssertReturn(cShadowPages, VERR_INVALID_PARAMETER);
1162	AssertReturn(cFixedPages, VERR_INVALID_PARAMETER);
1163
1164	int rc = RTSemFastMutexRequest(pGMM->Mtx);
1165	AssertRC(rc);
1166
1167	if ( pGVM->gmm.s.Reserved.cBasePages
1168	&& pGVM->gmm.s.Reserved.cFixedPages
1169	&& pGVM->gmm.s.Reserved.cShadowPages)
1170	{
1171	/*
1172	* Check if we can accomodate this.
1173	*/
1174	/* ... later ... */
1175	if (RT_SUCCESS(rc))
1176	{
1177	/*
1178	* Update the records.
1179	*/
1180	pGMM->cReservedPages -= pGVM->gmm.s.Reserved.cBasePages
1181	+ pGVM->gmm.s.Reserved.cFixedPages
1182	+ pGVM->gmm.s.Reserved.cShadowPages;
1183	pGMM->cReservedPages += cBasePages + cFixedPages + cShadowPages;
1184
1185	pGVM->gmm.s.Reserved.cBasePages = cBasePages;
1186	pGVM->gmm.s.Reserved.cFixedPages = cFixedPages;
1187	pGVM->gmm.s.Reserved.cShadowPages = cShadowPages;
1188	}
1189	}
1190	else
1191	rc = VERR_WRONG_ORDER;
1192
1193	RTSemFastMutexRelease(pGMM->Mtx);
1194	LogFlow(("GMMR0UpdateReservation: returns %Rrc\n", rc));
1195	return rc;
1196	}
1197
1198
1199	/**
1200	* VMMR0 request wrapper for GMMR0UpdateReservation.
1201	*
1202	* @returns see GMMR0UpdateReservation.
1203	* @param pVM Pointer to the shared VM structure.
1204	* @param pReq The request packet.
1205	*/
1206	GMMR0DECL(int) GMMR0UpdateReservationReq(PVM pVM, PGMMUPDATERESERVATIONREQ pReq)
1207	{
1208	/*
1209	* Validate input and pass it on.
1210	*/
1211	AssertPtrReturn(pVM, VERR_INVALID_POINTER);
1212	AssertPtrReturn(pReq, VERR_INVALID_POINTER);
1213	AssertMsgReturn(pReq->Hdr.cbReq == sizeof(pReq), ("%#x != %#x\n", pReq->Hdr.cbReq, sizeof(pReq)), VERR_INVALID_PARAMETER);
1214
1215	return GMMR0UpdateReservation(pVM, pReq->cBasePages, pReq->cShadowPages, pReq->cFixedPages);
1216	}
1217
1218
1219	/**
1220	* Looks up a chunk in the tree and fill in the TLB entry for it.
1221	*
1222	* This is not expected to fail and will bitch if it does.
1223	*
1224	* @returns Pointer to the allocation chunk, NULL if not found.
1225	* @param pGMM Pointer to the GMM instance.
1226	* @param idChunk The ID of the chunk to find.
1227	* @param pTlbe Pointer to the TLB entry.
1228	*/
1229	static PGMMCHUNK gmmR0GetChunkSlow(PGMM pGMM, uint32_t idChunk, PGMMCHUNKTLBE pTlbe)
1230	{
1231	PGMMCHUNK pChunk = (PGMMCHUNK)RTAvlU32Get(&pGMM->pChunks, idChunk);
1232	AssertMsgReturn(pChunk, ("Chunk %#x not found!\n", idChunk), NULL);
1233	pTlbe->idChunk = idChunk;
1234	pTlbe->pChunk = pChunk;
1235	return pChunk;
1236	}
1237
1238
1239	/**
1240	* Finds a allocation chunk.
1241	*
1242	* This is not expected to fail and will bitch if it does.
1243	*
1244	* @returns Pointer to the allocation chunk, NULL if not found.
1245	* @param pGMM Pointer to the GMM instance.
1246	* @param idChunk The ID of the chunk to find.
1247	*/
1248	DECLINLINE(PGMMCHUNK) gmmR0GetChunk(PGMM pGMM, uint32_t idChunk)
1249	{
1250	/*
1251	* Do a TLB lookup, branch if not in the TLB.
1252	*/
1253	PGMMCHUNKTLBE pTlbe = &pGMM->ChunkTLB.aEntries[GMM_CHUNKTLB_IDX(idChunk)];
1254	if ( pTlbe->idChunk != idChunk
1255	\|\| !pTlbe->pChunk)
1256	return gmmR0GetChunkSlow(pGMM, idChunk, pTlbe);
1257	return pTlbe->pChunk;
1258	}
1259
1260
1261	/**
1262	* Finds a page.
1263	*
1264	* This is not expected to fail and will bitch if it does.
1265	*
1266	* @returns Pointer to the page, NULL if not found.
1267	* @param pGMM Pointer to the GMM instance.
1268	* @param idPage The ID of the page to find.
1269	*/
1270	DECLINLINE(PGMMPAGE) gmmR0GetPage(PGMM pGMM, uint32_t idPage)
1271	{
1272	PGMMCHUNK pChunk = gmmR0GetChunk(pGMM, idPage >> GMM_CHUNKID_SHIFT);
1273	if (RT_LIKELY(pChunk))
1274	return &pChunk->aPages[idPage & GMM_PAGEID_IDX_MASK];
1275	return NULL;
1276	}
1277
1278
1279	/**
1280	* Unlinks the chunk from the free list it's currently on (if any).
1281	*
1282	* @param pChunk The allocation chunk.
1283	*/
1284	DECLINLINE(void) gmmR0UnlinkChunk(PGMMCHUNK pChunk)
1285	{
1286	PGMMCHUNKFREESET pSet = pChunk->pSet;
1287	if (RT_LIKELY(pSet))
1288	{
1289	pSet->cPages -= pChunk->cFree;
1290
1291	PGMMCHUNK pPrev = pChunk->pFreePrev;
1292	PGMMCHUNK pNext = pChunk->pFreeNext;
1293	if (pPrev)
1294	pPrev->pFreeNext = pNext;
1295	else
1296	pSet->apLists[(pChunk->cFree - 1) >> GMM_CHUNK_FREE_SET_SHIFT] = pNext;
1297	if (pNext)
1298	pNext->pFreePrev = pPrev;
1299
1300	pChunk->pSet = NULL;
1301	pChunk->pFreeNext = NULL;
1302	pChunk->pFreePrev = NULL;
1303	}
1304	else
1305	{
1306	Assert(!pChunk->pFreeNext);
1307	Assert(!pChunk->pFreePrev);
1308	Assert(!pChunk->cFree);
1309	}
1310	}
1311
1312
1313	/**
1314	* Links the chunk onto the appropriate free list in the specified free set.
1315	*
1316	* If no free entries, it's not linked into any list.
1317	*
1318	* @param pChunk The allocation chunk.
1319	* @param pSet The free set.
1320	*/
1321	DECLINLINE(void) gmmR0LinkChunk(PGMMCHUNK pChunk, PGMMCHUNKFREESET pSet)
1322	{
1323	Assert(!pChunk->pSet);
1324	Assert(!pChunk->pFreeNext);
1325	Assert(!pChunk->pFreePrev);
1326
1327	if (pChunk->cFree > 0)
1328	{
1329	pChunk->pSet = pSet;
1330	pChunk->pFreePrev = NULL;
1331	unsigned iList = (pChunk->cFree - 1) >> GMM_CHUNK_FREE_SET_SHIFT;
1332	pChunk->pFreeNext = pSet->apLists[iList];
1333	if (pChunk->pFreeNext)
1334	pChunk->pFreeNext->pFreePrev = pChunk;
1335	pSet->apLists[iList] = pChunk;
1336
1337	pSet->cPages += pChunk->cFree;
1338	}
1339	}
1340
1341
1342	/**
1343	* Frees a Chunk ID.
1344	*
1345	* @param pGMM Pointer to the GMM instance.
1346	* @param idChunk The Chunk ID to free.
1347	*/
1348	static void gmmR0FreeChunkId(PGMM pGMM, uint32_t idChunk)
1349	{
1350	AssertReturnVoid(idChunk != NIL_GMM_CHUNKID);
1351	AssertMsg(ASMBitTest(&pGMM->bmChunkId[0], idChunk), ("%#x\n", idChunk));
1352	ASMAtomicBitClear(&pGMM->bmChunkId[0], idChunk);
1353	}
1354
1355
1356	/**
1357	* Allocates a new Chunk ID.
1358	*
1359	* @returns The Chunk ID.
1360	* @param pGMM Pointer to the GMM instance.
1361	*/
1362	static uint32_t gmmR0AllocateChunkId(PGMM pGMM)
1363	{
1364	AssertCompile(!((GMM_CHUNKID_LAST + 1) & 31)); /* must be a multiple of 32 */
1365	AssertCompile(NIL_GMM_CHUNKID == 0);
1366
1367	/*
1368	* Try the next sequential one.
1369	*/
1370	int32_t idChunk = ++pGMM->idChunkPrev;
1371	#if 0 /* test the fallback first */
1372	if ( idChunk <= GMM_CHUNKID_LAST
1373	&& idChunk > NIL_GMM_CHUNKID
1374	&& !ASMAtomicBitTestAndSet(&pVMM->bmChunkId[0], idChunk))
1375	return idChunk;
1376	#endif
1377
1378	/*
1379	* Scan sequentially from the last one.
1380	*/
1381	if ( (uint32_t)idChunk < GMM_CHUNKID_LAST
1382	&& idChunk > NIL_GMM_CHUNKID)
1383	{
1384	idChunk = ASMBitNextClear(&pGMM->bmChunkId[0], GMM_CHUNKID_LAST + 1, idChunk);
1385	if (idChunk > NIL_GMM_CHUNKID)
1386	{
1387	AssertMsgReturn(!ASMAtomicBitTestAndSet(&pGMM->bmChunkId[0], idChunk), ("%#x\n", idChunk), NIL_GMM_CHUNKID);
1388	return pGMM->idChunkPrev = idChunk;
1389	}
1390	}
1391
1392	/*
1393	* Ok, scan from the start.
1394	* We're not racing anyone, so there is no need to expect failures or have restart loops.
1395	*/
1396	idChunk = ASMBitFirstClear(&pGMM->bmChunkId[0], GMM_CHUNKID_LAST + 1);
1397	AssertMsgReturn(idChunk > NIL_GMM_CHUNKID, ("%#x\n", idChunk), NIL_GVM_HANDLE);
1398	AssertMsgReturn(!ASMAtomicBitTestAndSet(&pGMM->bmChunkId[0], idChunk), ("%#x\n", idChunk), NIL_GMM_CHUNKID);
1399
1400	return pGMM->idChunkPrev = idChunk;
1401	}
1402
1403
1404	/**
1405	* Registers a new chunk of memory.
1406	*
1407	* This is called by both gmmR0AllocateOneChunk and GMMR0SeedChunk. Will take
1408	* the mutex, the caller must not own it.
1409	*
1410	* @returns VBox status code.
1411	* @param pGMM Pointer to the GMM instance.
1412	* @param pSet Pointer to the set.
1413	* @param MemObj The memory object for the chunk.
1414	* @param hGVM The affinity of the chunk. NIL_GVM_HANDLE for no
1415	* affinity.
1416	*/
1417	static int gmmR0RegisterChunk(PGMM pGMM, PGMMCHUNKFREESET pSet, RTR0MEMOBJ MemObj, uint16_t hGVM)
1418	{
1419	Assert(hGVM != NIL_GVM_HANDLE \|\| pGMM->fBoundMemoryMode);
1420
1421	int rc;
1422	PGMMCHUNK pChunk = (PGMMCHUNK)RTMemAllocZ(sizeof(*pChunk));
1423	if (pChunk)
1424	{
1425	/*
1426	* Initialize it.
1427	*/
1428	pChunk->MemObj = MemObj;
1429	pChunk->cFree = GMM_CHUNK_NUM_PAGES;
1430	pChunk->hGVM = hGVM;
1431	pChunk->iFreeHead = 0;
1432	for (unsigned iPage = 0; iPage < RT_ELEMENTS(pChunk->aPages) - 1; iPage++)
1433	{
1434	pChunk->aPages[iPage].Free.u2State = GMM_PAGE_STATE_FREE;
1435	pChunk->aPages[iPage].Free.iNext = iPage + 1;
1436	}
1437	pChunk->aPages[RT_ELEMENTS(pChunk->aPages) - 1].Free.u2State = GMM_PAGE_STATE_FREE;
1438	pChunk->aPages[RT_ELEMENTS(pChunk->aPages) - 1].Free.iNext = UINT16_MAX;
1439
1440	/*
1441	* Allocate a Chunk ID and insert it into the tree.
1442	* This has to be done behind the mutex of course.
1443	*/
1444	rc = RTSemFastMutexRequest(pGMM->Mtx);
1445	if (RT_SUCCESS(rc))
1446	{
1447	pChunk->Core.Key = gmmR0AllocateChunkId(pGMM);
1448	if ( pChunk->Core.Key != NIL_GMM_CHUNKID
1449	&& pChunk->Core.Key <= GMM_CHUNKID_LAST
1450	&& RTAvlU32Insert(&pGMM->pChunks, &pChunk->Core))
1451	{
1452	pGMM->cChunks++;
1453	gmmR0LinkChunk(pChunk, pSet);
1454	LogFlow(("gmmR0RegisterChunk: pChunk=%p id=%#x cChunks=%d\n", pChunk, pChunk->Core.Key, pGMM->cChunks));
1455	RTSemFastMutexRelease(pGMM->Mtx);
1456	return VINF_SUCCESS;
1457	}
1458
1459	/* bail out */
1460	rc = VERR_INTERNAL_ERROR;
1461	RTSemFastMutexRelease(pGMM->Mtx);
1462	}
1463	RTMemFree(pChunk);
1464	}
1465	else
1466	rc = VERR_NO_MEMORY;
1467	return rc;
1468	}
1469
1470
1471	/**
1472	* Allocate one new chunk and add it to the specified free set.
1473	*
1474	* @returns VBox status code.
1475	* @param pGMM Pointer to the GMM instance.
1476	* @param pSet Pointer to the set.
1477	* @param hGVM The affinity of the new chunk.
1478	*
1479	* @remarks Called without owning the mutex.
1480	*/
1481	static int gmmR0AllocateOneChunk(PGMM pGMM, PGMMCHUNKFREESET pSet, uint16_t hGVM)
1482	{
1483	/*
1484	* Allocate the memory.
1485	*/
1486	RTR0MEMOBJ MemObj;
1487	int rc = RTR0MemObjAllocPhysNC(&MemObj, GMM_CHUNK_SIZE, NIL_RTHCPHYS);
1488	if (RT_SUCCESS(rc))
1489	{
1490	rc = gmmR0RegisterChunk(pGMM, pSet, MemObj, hGVM);
1491	if (RT_FAILURE(rc))
1492	RTR0MemObjFree(MemObj, false /* fFreeMappings */);
1493	}
1494	/** @todo Check that RTR0MemObjAllocPhysNC always returns VERR_NO_MEMORY on
1495	* allocation failure. */
1496	return rc;
1497	}
1498
1499
1500	/**
1501	* Attempts to allocate more pages until the requested amount is met.
1502	*
1503	* @returns VBox status code.
1504	* @param pGMM Pointer to the GMM instance data.
1505	* @param pGVM The calling VM.
1506	* @param pSet Pointer to the free set to grow.
1507	* @param cPages The number of pages needed.
1508	*
1509	* @remarks Called owning the mutex, but will leave it temporarily while
1510	* allocating the memory!
1511	*/
1512	static int gmmR0AllocateMoreChunks(PGMM pGMM, PGVM pGVM, PGMMCHUNKFREESET pSet, uint32_t cPages)
1513	{
1514	Assert(!pGMM->fLegacyAllocationMode);
1515
1516	if (!pGMM->fBoundMemoryMode)
1517	{
1518	/*
1519	* Try steal free chunks from the other set first. (Only take 100% free chunks.)
1520	*/
1521	PGMMCHUNKFREESET pOtherSet = pSet == &pGMM->Private ? &pGMM->Shared : &pGMM->Private;
1522	while ( pSet->cPages < cPages
1523	&& pOtherSet->cPages >= GMM_CHUNK_NUM_PAGES)
1524	{
1525	PGMMCHUNK pChunk = pOtherSet->apLists[GMM_CHUNK_FREE_SET_LISTS - 1];
1526	while (pChunk && pChunk->cFree != GMM_CHUNK_NUM_PAGES)
1527	pChunk = pChunk->pFreeNext;
1528	if (!pChunk)
1529	break;
1530
1531	gmmR0UnlinkChunk(pChunk);
1532	gmmR0LinkChunk(pChunk, pSet);
1533	}
1534
1535	/*
1536	* If we need still more pages, allocate new chunks.
1537	* Note! We will leave the mutex while doing the allocation,
1538	* gmmR0AllocateOneChunk will re-take it temporarily while registering the chunk.
1539	*/
1540	while (pSet->cPages < cPages)
1541	{
1542	RTSemFastMutexRelease(pGMM->Mtx);
1543	int rc = gmmR0AllocateOneChunk(pGMM, pSet, NIL_GVM_HANDLE);
1544	int rc2 = RTSemFastMutexRequest(pGMM->Mtx);
1545	AssertRCReturn(rc2, rc2);
1546	if (RT_FAILURE(rc))
1547	return rc;
1548	}
1549	}
1550	else
1551	{
1552	/*
1553	* The memory is bound to the VM allocating it, so we have to count
1554	* the free pages carefully as well as making sure we set brand it
1555	* with our VM handle.
1556	*
1557	* Note! We will leave the mutex while doing the allocation,
1558	* gmmR0AllocateOneChunk will re-take it temporarily while registering the chunk.
1559	*/
1560	uint16_t const hGVM = pGVM->hSelf;
1561	for (;;)
1562	{
1563	/* Count and see if we've reached the goal. */
1564	uint32_t cPagesFound = 0;
1565	for (unsigned i = 0; i < RT_ELEMENTS(pSet->apLists); i++)
1566	for (PGMMCHUNK pCur = pSet->apLists[i]; pCur; pCur = pCur->pFreeNext)
1567	if (pCur->hGVM == hGVM)
1568	{
1569	cPagesFound += pCur->cFree;
1570	if (cPagesFound >= cPages)
1571	break;
1572	}
1573	if (cPagesFound >= cPages)
1574	break;
1575
1576	/* Allocate more. */
1577	RTSemFastMutexRelease(pGMM->Mtx);
1578	int rc = gmmR0AllocateOneChunk(pGMM, pSet, hGVM);
1579	int rc2 = RTSemFastMutexRequest(pGMM->Mtx);
1580	AssertRCReturn(rc2, rc2);
1581	if (RT_FAILURE(rc))
1582	return rc;
1583	}
1584	}
1585
1586	return VINF_SUCCESS;
1587	}
1588
1589
1590	/**
1591	* Allocates one private page.
1592	*
1593	* Worker for gmmR0AllocatePages.
1594	*
1595	* @param pGMM Pointer to the GMM instance data.
1596	* @param hGVM The GVM handle of the VM requesting memory.
1597	* @param pChunk The chunk to allocate it from.
1598	* @param pPageDesc The page descriptor.
1599	*/
1600	static void gmmR0AllocatePage(PGMM pGMM, uint32_t hGVM, PGMMCHUNK pChunk, PGMMPAGEDESC pPageDesc)
1601	{
1602	/* update the chunk stats. */
1603	if (pChunk->hGVM == NIL_GVM_HANDLE)
1604	pChunk->hGVM = hGVM;
1605	Assert(pChunk->cFree);
1606	pChunk->cFree--;
1607	pChunk->cPrivate++;
1608
1609	/* unlink the first free page. */
1610	const uint32_t iPage = pChunk->iFreeHead;
1611	AssertReleaseMsg(iPage < RT_ELEMENTS(pChunk->aPages), ("%d\n", iPage));
1612	PGMMPAGE pPage = &pChunk->aPages[iPage];
1613	Assert(GMM_PAGE_IS_FREE(pPage));
1614	pChunk->iFreeHead = pPage->Free.iNext;
1615	Log3(("A pPage=%p iPage=%#x/%#x u2State=%d iFreeHead=%#x iNext=%#x\n",
1616	pPage, iPage, (pChunk->Core.Key << GMM_CHUNKID_SHIFT) \| iPage,
1617	pPage->Common.u2State, pChunk->iFreeHead, pPage->Free.iNext));
1618
1619	/* make the page private. */
1620	pPage->u = 0;
1621	AssertCompile(GMM_PAGE_STATE_PRIVATE == 0);
1622	pPage->Private.hGVM = hGVM;
1623	AssertCompile(NIL_RTHCPHYS >= GMM_GCPHYS_LAST);
1624	AssertCompile(GMM_GCPHYS_UNSHAREABLE >= GMM_GCPHYS_LAST);
1625	if (pPageDesc->HCPhysGCPhys <= GMM_GCPHYS_LAST)
1626	pPage->Private.pfn = pPageDesc->HCPhysGCPhys >> PAGE_SHIFT;
1627	else
1628	pPage->Private.pfn = GMM_PAGE_PFN_UNSHAREABLE; /* unshareable / unassigned - same thing. */
1629
1630	/* update the page descriptor. */
1631	pPageDesc->HCPhysGCPhys = RTR0MemObjGetPagePhysAddr(pChunk->MemObj, iPage);
1632	Assert(pPageDesc->HCPhysGCPhys != NIL_RTHCPHYS);
1633	pPageDesc->idPage = (pChunk->Core.Key << GMM_CHUNKID_SHIFT) \| iPage;
1634	pPageDesc->idSharedPage = NIL_GMM_PAGEID;
1635	}
1636
1637
1638	/**
1639	* Common worker for GMMR0AllocateHandyPages and GMMR0AllocatePages.
1640	*
1641	* @returns VBox status code:
1642	* @retval VINF_SUCCESS on success.
1643	* @retval VERR_GMM_SEED_ME if seeding via GMMR0SeedChunk or
1644	* gmmR0AllocateMoreChunks is necessary.
1645	* @retval VERR_GMM_HIT_GLOBAL_LIMIT if we've exhausted the available pages.
1646	* @retval VERR_GMM_HIT_VM_ACCOUNT_LIMIT if we've hit the VM account limit,
1647	* that is we're trying to allocate more than we've reserved.
1648	*
1649	* @param pGMM Pointer to the GMM instance data.
1650	* @param pGVM Pointer to the shared VM structure.
1651	* @param cPages The number of pages to allocate.
1652	* @param paPages Pointer to the page descriptors.
1653	* See GMMPAGEDESC for details on what is expected on input.
1654	* @param enmAccount The account to charge.
1655	*/
1656	static int gmmR0AllocatePages(PGMM pGMM, PGVM pGVM, uint32_t cPages, PGMMPAGEDESC paPages, GMMACCOUNT enmAccount)
1657	{
1658	/*
1659	* Check allocation limits.
1660	*/
1661	if (RT_UNLIKELY(pGMM->cAllocatedPages + cPages > pGMM->cMaxPages))
1662	return VERR_GMM_HIT_GLOBAL_LIMIT;
1663
1664	switch (enmAccount)
1665	{
1666	case GMMACCOUNT_BASE:
1667	if (RT_UNLIKELY(pGVM->gmm.s.Allocated.cBasePages + cPages > pGVM->gmm.s.Reserved.cBasePages))
1668	{
1669	Log(("gmmR0AllocatePages: Reserved=%#llx Allocated+Requested=%#llx+%#x!\n",
1670	pGVM->gmm.s.Reserved.cBasePages, pGVM->gmm.s.Allocated.cBasePages, cPages));
1671	return VERR_GMM_HIT_VM_ACCOUNT_LIMIT;
1672	}
1673	break;
1674	case GMMACCOUNT_SHADOW:
1675	if (RT_UNLIKELY(pGVM->gmm.s.Allocated.cShadowPages + cPages > pGVM->gmm.s.Reserved.cShadowPages))
1676	{
1677	Log(("gmmR0AllocatePages: Reserved=%#llx Allocated+Requested=%#llx+%#x!\n",
1678	pGVM->gmm.s.Reserved.cShadowPages, pGVM->gmm.s.Allocated.cShadowPages, cPages));
1679	return VERR_GMM_HIT_VM_ACCOUNT_LIMIT;
1680	}
1681	break;
1682	case GMMACCOUNT_FIXED:
1683	if (RT_UNLIKELY(pGVM->gmm.s.Allocated.cFixedPages + cPages > pGVM->gmm.s.Reserved.cFixedPages))
1684	{
1685	Log(("gmmR0AllocatePages: Reserved=%#llx Allocated+Requested=%#llx+%#x!\n",
1686	pGVM->gmm.s.Reserved.cFixedPages, pGVM->gmm.s.Allocated.cFixedPages, cPages));
1687	return VERR_GMM_HIT_VM_ACCOUNT_LIMIT;
1688	}
1689	break;
1690	default:
1691	AssertMsgFailedReturn(("enmAccount=%d\n", enmAccount), VERR_INTERNAL_ERROR);
1692	}
1693
1694	/*
1695	* Check if we need to allocate more memory or not. In bound memory mode this
1696	* is a bit extra work but it's easier to do it upfront than bailing out later.
1697	*/
1698	PGMMCHUNKFREESET pSet = &pGMM->Private;
1699	if (pSet->cPages < cPages)
1700	return VERR_GMM_SEED_ME;
1701	if (pGMM->fBoundMemoryMode)
1702	{
1703	uint16_t hGVM = pGVM->hSelf;
1704	uint32_t cPagesFound = 0;
1705	for (unsigned i = 0; i < RT_ELEMENTS(pSet->apLists); i++)
1706	for (PGMMCHUNK pCur = pSet->apLists[i]; pCur; pCur = pCur->pFreeNext)
1707	if (pCur->hGVM == hGVM)
1708	{
1709	cPagesFound += pCur->cFree;
1710	if (cPagesFound >= cPages)
1711	break;
1712	}
1713	if (cPagesFound < cPages)
1714	return VERR_GMM_SEED_ME;
1715	}
1716
1717	/*
1718	* Pick the pages.
1719	* Try make some effort keeping VMs sharing private chunks.
1720	*/
1721	uint16_t hGVM = pGVM->hSelf;
1722	uint32_t iPage = 0;
1723
1724	/* first round, pick from chunks with an affinity to the VM. */
1725	for (unsigned i = 0; i < RT_ELEMENTS(pSet->apLists) && iPage < cPages; i++)
1726	{
1727	PGMMCHUNK pCurFree = NULL;
1728	PGMMCHUNK pCur = pSet->apLists[i];
1729	while (pCur && iPage < cPages)
1730	{
1731	PGMMCHUNK pNext = pCur->pFreeNext;
1732
1733	if ( pCur->hGVM == hGVM
1734	&& pCur->cFree < GMM_CHUNK_NUM_PAGES)
1735	{
1736	gmmR0UnlinkChunk(pCur);
1737	for (; pCur->cFree && iPage < cPages; iPage++)
1738	gmmR0AllocatePage(pGMM, hGVM, pCur, &paPages[iPage]);
1739	gmmR0LinkChunk(pCur, pSet);
1740	}
1741
1742	pCur = pNext;
1743	}
1744	}
1745
1746	if (iPage < cPages)
1747	{
1748	/* second round, pick pages from the 100% empty chunks we just skipped above. */
1749	PGMMCHUNK pCurFree = NULL;
1750	PGMMCHUNK pCur = pSet->apLists[RT_ELEMENTS(pSet->apLists) - 1];
1751	while (pCur && iPage < cPages)
1752	{
1753	PGMMCHUNK pNext = pCur->pFreeNext;
1754
1755	if ( pCur->cFree == GMM_CHUNK_NUM_PAGES
1756	&& ( pCur->hGVM == hGVM
1757	\|\| !pGMM->fBoundMemoryMode))
1758	{
1759	gmmR0UnlinkChunk(pCur);
1760	for (; pCur->cFree && iPage < cPages; iPage++)
1761	gmmR0AllocatePage(pGMM, hGVM, pCur, &paPages[iPage]);
1762	gmmR0LinkChunk(pCur, pSet);
1763	}
1764
1765	pCur = pNext;
1766	}
1767	}
1768
1769	if ( iPage < cPages
1770	&& !pGMM->fBoundMemoryMode)
1771	{
1772	/* third round, disregard affinity. */
1773	unsigned i = RT_ELEMENTS(pSet->apLists);
1774	while (i-- > 0 && iPage < cPages)
1775	{
1776	PGMMCHUNK pCurFree = NULL;
1777	PGMMCHUNK pCur = pSet->apLists[i];
1778	while (pCur && iPage < cPages)
1779	{
1780	PGMMCHUNK pNext = pCur->pFreeNext;
1781
1782	if ( pCur->cFree > GMM_CHUNK_NUM_PAGES / 2
1783	&& cPages >= GMM_CHUNK_NUM_PAGES / 2)
1784	pCur->hGVM = hGVM; /* change chunk affinity */
1785
1786	gmmR0UnlinkChunk(pCur);
1787	for (; pCur->cFree && iPage < cPages; iPage++)
1788	gmmR0AllocatePage(pGMM, hGVM, pCur, &paPages[iPage]);
1789	gmmR0LinkChunk(pCur, pSet);
1790
1791	pCur = pNext;
1792	}
1793	}
1794	}
1795
1796	/*
1797	* Update the account.
1798	*/
1799	switch (enmAccount)
1800	{
1801	case GMMACCOUNT_BASE: pGVM->gmm.s.Allocated.cBasePages += iPage; break;
1802	case GMMACCOUNT_SHADOW: pGVM->gmm.s.Allocated.cShadowPages += iPage; break;
1803	case GMMACCOUNT_FIXED: pGVM->gmm.s.Allocated.cFixedPages += iPage; break;
1804	default:
1805	AssertMsgFailedReturn(("enmAccount=%d\n", enmAccount), VERR_INTERNAL_ERROR);
1806	}
1807	pGVM->gmm.s.cPrivatePages += iPage;
1808	pGMM->cAllocatedPages += iPage;
1809
1810	AssertMsgReturn(iPage == cPages, ("%u != %u\n", iPage, cPages), VERR_INTERNAL_ERROR);
1811
1812	/*
1813	* Check if we've reached some threshold and should kick one or two VMs and tell
1814	* them to inflate their balloons a bit more... later.
1815	*/
1816
1817	return VINF_SUCCESS;
1818	}
1819
1820
1821	/**
1822	* Updates the previous allocations and allocates more pages.
1823	*
1824	* The handy pages are always taken from the 'base' memory account.
1825	* The allocated pages are not cleared and will contains random garbage.
1826	*
1827	* @returns VBox status code:
1828	* @retval VINF_SUCCESS on success.
1829	* @retval VERR_NOT_OWNER if the caller is not an EMT.
1830	* @retval VERR_GMM_PAGE_NOT_FOUND if one of the pages to update wasn't found.
1831	* @retval VERR_GMM_PAGE_NOT_PRIVATE if one of the pages to update wasn't a
1832	* private page.
1833	* @retval VERR_GMM_PAGE_NOT_SHARED if one of the pages to update wasn't a
1834	* shared page.
1835	* @retval VERR_GMM_NOT_PAGE_OWNER if one of the pages to be updated wasn't
1836	* owned by the VM.
1837	* @retval VERR_GMM_SEED_ME if seeding via GMMR0SeedChunk is necessary.
1838	* @retval VERR_GMM_HIT_GLOBAL_LIMIT if we've exhausted the available pages.
1839	* @retval VERR_GMM_HIT_VM_ACCOUNT_LIMIT if we've hit the VM account limit,
1840	* that is we're trying to allocate more than we've reserved.
1841	*
1842	* @param pVM Pointer to the shared VM structure.
1843	* @param cPagesToUpdate The number of pages to update (starting from the head).
1844	* @param cPagesToAlloc The number of pages to allocate (starting from the head).
1845	* @param paPages The array of page descriptors.
1846	* See GMMPAGEDESC for details on what is expected on input.
1847	* @thread EMT.
1848	*/
1849	GMMR0DECL(int) GMMR0AllocateHandyPages(PVM pVM, uint32_t cPagesToUpdate, uint32_t cPagesToAlloc, PGMMPAGEDESC paPages)
1850	{
1851	LogFlow(("GMMR0AllocateHandyPages: pVM=%p cPagesToUpdate=%#x cPagesToAlloc=%#x paPages=%p\n",
1852	pVM, cPagesToUpdate, cPagesToAlloc, paPages));
1853
1854	/*
1855	* Validate, get basics and take the semaphore.
1856	* (This is a relatively busy path, so make predictions where possible.)
1857	*/
1858	PGMM pGMM;
1859	GMM_GET_VALID_INSTANCE(pGMM, VERR_INTERNAL_ERROR);
1860	PGVM pGVM = GVMMR0ByVM(pVM);
1861	if (RT_UNLIKELY(!pGVM))
1862	return VERR_INVALID_PARAMETER;
1863	if (RT_UNLIKELY(pGVM->hEMT != RTThreadNativeSelf()))
1864	return VERR_NOT_OWNER;
1865
1866	AssertPtrReturn(paPages, VERR_INVALID_PARAMETER);
1867	AssertMsgReturn( (cPagesToUpdate && cPagesToUpdate < 1024)
1868	\|\| (cPagesToAlloc && cPagesToAlloc < 1024),
1869	("cPagesToUpdate=%#x cPagesToAlloc=%#x\n", cPagesToUpdate, cPagesToAlloc),
1870	VERR_INVALID_PARAMETER);
1871
1872	unsigned iPage = 0;
1873	for (; iPage < cPagesToUpdate; iPage++)
1874	{
1875	AssertMsgReturn( ( paPages[iPage].HCPhysGCPhys <= GMM_GCPHYS_LAST
1876	&& !(paPages[iPage].HCPhysGCPhys & PAGE_OFFSET_MASK))
1877	\|\| paPages[iPage].HCPhysGCPhys == NIL_RTHCPHYS
1878	\|\| paPages[iPage].HCPhysGCPhys == GMM_GCPHYS_UNSHAREABLE,
1879	("#%#x: %RHp\n", iPage, paPages[iPage].HCPhysGCPhys),
1880	VERR_INVALID_PARAMETER);
1881	AssertMsgReturn( paPages[iPage].idPage <= GMM_PAGEID_LAST
1882	/\|\| paPages[iPage].idPage == NIL_GMM_PAGEID/,
1883	("#%#x: %#x\n", iPage, paPages[iPage].idPage), VERR_INVALID_PARAMETER);
1884	AssertMsgReturn( paPages[iPage].idPage <= GMM_PAGEID_LAST
1885	/\|\| paPages[iPage].idSharedPage == NIL_GMM_PAGEID/,
1886	("#%#x: %#x\n", iPage, paPages[iPage].idSharedPage), VERR_INVALID_PARAMETER);
1887	}
1888
1889	for (; iPage < cPagesToAlloc; iPage++)
1890	{
1891	AssertMsgReturn(paPages[iPage].HCPhysGCPhys == NIL_RTHCPHYS, ("#%#x: %RHp\n", iPage, paPages[iPage].HCPhysGCPhys), VERR_INVALID_PARAMETER);
1892	AssertMsgReturn(paPages[iPage].idPage == NIL_GMM_PAGEID, ("#%#x: %#x\n", iPage, paPages[iPage].idPage), VERR_INVALID_PARAMETER);
1893	AssertMsgReturn(paPages[iPage].idSharedPage == NIL_GMM_PAGEID, ("#%#x: %#x\n", iPage, paPages[iPage].idSharedPage), VERR_INVALID_PARAMETER);
1894	}
1895
1896	int rc = RTSemFastMutexRequest(pGMM->Mtx);
1897	AssertRC(rc);
1898
1899	/* No allocations before the initial reservation has been made! */
1900	if (RT_LIKELY( pGVM->gmm.s.Reserved.cBasePages
1901	&& pGVM->gmm.s.Reserved.cFixedPages
1902	&& pGVM->gmm.s.Reserved.cShadowPages))
1903	{
1904	/*
1905	* Perform the updates.
1906	* Stop on the first error.
1907	*/
1908	for (iPage = 0; iPage < cPagesToUpdate; iPage++)
1909	{
1910	if (paPages[iPage].idPage != NIL_GMM_PAGEID)
1911	{
1912	PGMMPAGE pPage = gmmR0GetPage(pGMM, paPages[iPage].idPage);
1913	if (RT_LIKELY(pPage))
1914	{
1915	if (RT_LIKELY(GMM_PAGE_IS_PRIVATE(pPage)))
1916	{
1917	if (RT_LIKELY(pPage->Private.hGVM == pGVM->hSelf))
1918	{
1919	AssertCompile(NIL_RTHCPHYS > GMM_GCPHYS_LAST && GMM_GCPHYS_UNSHAREABLE > GMM_GCPHYS_LAST);
1920	if (RT_LIKELY(paPages[iPage].HCPhysGCPhys <= GMM_GCPHYS_LAST))
1921	pPage->Private.pfn = paPages[iPage].HCPhysGCPhys >> PAGE_SHIFT;
1922	else if (paPages[iPage].HCPhysGCPhys == GMM_GCPHYS_UNSHAREABLE)
1923	pPage->Private.pfn = GMM_PAGE_PFN_UNSHAREABLE;
1924	/* else: NIL_RTHCPHYS nothing */
1925
1926	paPages[iPage].idPage = NIL_GMM_PAGEID;
1927	paPages[iPage].HCPhysGCPhys = NIL_RTHCPHYS;
1928	}
1929	else
1930	{
1931	Log(("GMMR0AllocateHandyPages: #%#x/%#x: Not owner! hGVM=%#x hSelf=%#x\n",
1932	iPage, paPages[iPage].idPage, pPage->Private.hGVM, pGVM->hSelf));
1933	rc = VERR_GMM_NOT_PAGE_OWNER;
1934	break;
1935	}
1936	}
1937	else
1938	{
1939	Log(("GMMR0AllocateHandyPages: #%#x/%#x: Not private! %.Rhxs\n", iPage, paPages[iPage].idPage, sizeof(pPage), pPage));
1940	rc = VERR_GMM_PAGE_NOT_PRIVATE;
1941	break;
1942	}
1943	}
1944	else
1945	{
1946	Log(("GMMR0AllocateHandyPages: #%#x/%#x: Not found! (private)\n", iPage, paPages[iPage].idPage));
1947	rc = VERR_GMM_PAGE_NOT_FOUND;
1948	break;
1949	}
1950	}
1951
1952	if (paPages[iPage].idSharedPage != NIL_GMM_PAGEID)
1953	{
1954	PGMMPAGE pPage = gmmR0GetPage(pGMM, paPages[iPage].idSharedPage);
1955	if (RT_LIKELY(pPage))
1956	{
1957	if (RT_LIKELY(GMM_PAGE_IS_SHARED(pPage)))
1958	{
1959	AssertCompile(NIL_RTHCPHYS > GMM_GCPHYS_LAST && GMM_GCPHYS_UNSHAREABLE > GMM_GCPHYS_LAST);
1960	Assert(pPage->Shared.cRefs);
1961	Assert(pGVM->gmm.s.cSharedPages);
1962	Assert(pGVM->gmm.s.Allocated.cBasePages);
1963
1964	pGVM->gmm.s.cSharedPages--;
1965	pGVM->gmm.s.Allocated.cBasePages--;
1966	if (!--pPage->Shared.cRefs)
1967	gmmR0FreeSharedPage(pGMM, paPages[iPage].idSharedPage, pPage);
1968
1969	paPages[iPage].idSharedPage = NIL_GMM_PAGEID;
1970	}
1971	else
1972	{
1973	Log(("GMMR0AllocateHandyPages: #%#x/%#x: Not shared!\n", iPage, paPages[iPage].idSharedPage));
1974	rc = VERR_GMM_PAGE_NOT_SHARED;
1975	break;
1976	}
1977	}
1978	else
1979	{
1980	Log(("GMMR0AllocateHandyPages: #%#x/%#x: Not found! (shared)\n", iPage, paPages[iPage].idSharedPage));
1981	rc = VERR_GMM_PAGE_NOT_FOUND;
1982	break;
1983	}
1984	}
1985	}
1986
1987	/*
1988	* Join paths with GMMR0AllocatePages for the allocation.
1989	* Note! gmmR0AllocateMoreChunks may leave the protection of the mutex!
1990	*/
1991	while (RT_SUCCESS(rc))
1992	{
1993	rc = gmmR0AllocatePages(pGMM, pGVM, cPagesToAlloc, paPages, GMMACCOUNT_BASE);
1994	if ( rc != VERR_GMM_SEED_ME
1995	\|\| pGMM->fLegacyAllocationMode)
1996	break;
1997	rc = gmmR0AllocateMoreChunks(pGMM, pGVM, &pGMM->Private, cPagesToAlloc);
1998	}
1999	}
2000	else
2001	rc = VERR_WRONG_ORDER;
2002
2003	RTSemFastMutexRelease(pGMM->Mtx);
2004	LogFlow(("GMMR0AllocateHandyPages: returns %Rrc\n", rc));
2005	return rc;
2006	}
2007
2008
2009	/**
2010	* Allocate one or more pages.
2011	*
2012	* This is typically used for ROMs and MMIO2 (VRAM) during VM creation.
2013	* The allocated pages are not cleared and will contains random garbage.
2014	*
2015	* @returns VBox status code:
2016	* @retval VINF_SUCCESS on success.
2017	* @retval VERR_NOT_OWNER if the caller is not an EMT.
2018	* @retval VERR_GMM_SEED_ME if seeding via GMMR0SeedChunk is necessary.
2019	* @retval VERR_GMM_HIT_GLOBAL_LIMIT if we've exhausted the available pages.
2020	* @retval VERR_GMM_HIT_VM_ACCOUNT_LIMIT if we've hit the VM account limit,
2021	* that is we're trying to allocate more than we've reserved.
2022	*
2023	* @param pVM Pointer to the shared VM structure.
2024	* @param cPages The number of pages to allocate.
2025	* @param paPages Pointer to the page descriptors.
2026	* See GMMPAGEDESC for details on what is expected on input.
2027	* @param enmAccount The account to charge.
2028	*
2029	* @thread EMT.
2030	*/
2031	GMMR0DECL(int) GMMR0AllocatePages(PVM pVM, uint32_t cPages, PGMMPAGEDESC paPages, GMMACCOUNT enmAccount)
2032	{
2033	LogFlow(("GMMR0AllocatePages: pVM=%p cPages=%#x paPages=%p enmAccount=%d\n", pVM, cPages, paPages, enmAccount));
2034
2035	/*
2036	* Validate, get basics and take the semaphore.
2037	*/
2038	PGMM pGMM;
2039	GMM_GET_VALID_INSTANCE(pGMM, VERR_INTERNAL_ERROR);
2040	PGVM pGVM = GVMMR0ByVM(pVM);
2041	if (!pGVM)
2042	return VERR_INVALID_PARAMETER;
2043	if (pGVM->hEMT != RTThreadNativeSelf())
2044	return VERR_NOT_OWNER;
2045
2046	AssertPtrReturn(paPages, VERR_INVALID_PARAMETER);
2047	AssertMsgReturn(enmAccount > GMMACCOUNT_INVALID && enmAccount < GMMACCOUNT_END, ("%d\n", enmAccount), VERR_INVALID_PARAMETER);
2048	AssertMsgReturn(cPages > 0 && cPages < RT_BIT(32 - PAGE_SHIFT), ("%#x\n", cPages), VERR_INVALID_PARAMETER);
2049
2050	for (unsigned iPage = 0; iPage < cPages; iPage++)
2051	{
2052	AssertMsgReturn( paPages[iPage].HCPhysGCPhys == NIL_RTHCPHYS
2053	\|\| paPages[iPage].HCPhysGCPhys == GMM_GCPHYS_UNSHAREABLE
2054	\|\| ( enmAccount == GMMACCOUNT_BASE
2055	&& paPages[iPage].HCPhysGCPhys <= GMM_GCPHYS_LAST
2056	&& !(paPages[iPage].HCPhysGCPhys & PAGE_OFFSET_MASK)),
2057	("#%#x: %RHp enmAccount=%d\n", iPage, paPages[iPage].HCPhysGCPhys, enmAccount),
2058	VERR_INVALID_PARAMETER);
2059	AssertMsgReturn(paPages[iPage].idPage == NIL_GMM_PAGEID, ("#%#x: %#x\n", iPage, paPages[iPage].idPage), VERR_INVALID_PARAMETER);
2060	AssertMsgReturn(paPages[iPage].idSharedPage == NIL_GMM_PAGEID, ("#%#x: %#x\n", iPage, paPages[iPage].idSharedPage), VERR_INVALID_PARAMETER);
2061	}
2062
2063	int rc = RTSemFastMutexRequest(pGMM->Mtx);
2064	AssertRC(rc);
2065
2066	/* No allocations before the initial reservation has been made! */
2067	if (RT_LIKELY( pGVM->gmm.s.Reserved.cBasePages
2068	&& pGVM->gmm.s.Reserved.cFixedPages
2069	&& pGVM->gmm.s.Reserved.cShadowPages))
2070	{
2071	/*
2072	* gmmR0AllocatePages seed loop.
2073	* Note! gmmR0AllocateMoreChunks may leave the protection of the mutex!
2074	*/
2075	while (RT_SUCCESS(rc))
2076	{
2077	rc = gmmR0AllocatePages(pGMM, pGVM, cPages, paPages, enmAccount);
2078	if ( rc != VERR_GMM_SEED_ME
2079	\|\| pGMM->fLegacyAllocationMode)
2080	break;
2081	rc = gmmR0AllocateMoreChunks(pGMM, pGVM, &pGMM->Private, cPages);
2082	}
2083	}
2084	else
2085	rc = VERR_WRONG_ORDER;
2086
2087	RTSemFastMutexRelease(pGMM->Mtx);
2088	LogFlow(("GMMR0AllocatePages: returns %Rrc\n", rc));
2089	return rc;
2090	}
2091
2092
2093	/**
2094	* VMMR0 request wrapper for GMMR0AllocatePages.
2095	*
2096	* @returns see GMMR0AllocatePages.
2097	* @param pVM Pointer to the shared VM structure.
2098	* @param pReq The request packet.
2099	*/
2100	GMMR0DECL(int) GMMR0AllocatePagesReq(PVM pVM, PGMMALLOCATEPAGESREQ pReq)
2101	{
2102	/*
2103	* Validate input and pass it on.
2104	*/
2105	AssertPtrReturn(pVM, VERR_INVALID_POINTER);
2106	AssertPtrReturn(pReq, VERR_INVALID_POINTER);
2107	AssertMsgReturn(pReq->Hdr.cbReq >= RT_UOFFSETOF(GMMALLOCATEPAGESREQ, aPages[0]),
2108	("%#x < %#x\n", pReq->Hdr.cbReq, RT_UOFFSETOF(GMMALLOCATEPAGESREQ, aPages[0])),
2109	VERR_INVALID_PARAMETER);
2110	AssertMsgReturn(pReq->Hdr.cbReq == RT_UOFFSETOF(GMMALLOCATEPAGESREQ, aPages[pReq->cPages]),
2111	("%#x != %#x\n", pReq->Hdr.cbReq, RT_UOFFSETOF(GMMALLOCATEPAGESREQ, aPages[pReq->cPages])),
2112	VERR_INVALID_PARAMETER);
2113
2114	return GMMR0AllocatePages(pVM, pReq->cPages, &pReq->aPages[0], pReq->enmAccount);
2115	}
2116
2117
2118	/**
2119	* Frees a chunk, giving it back to the host OS.
2120	*
2121	* @param pGMM Pointer to the GMM instance.
2122	* @param pGVM This is set when called from GMMR0CleanupVM so we can
2123	* unmap and free the chunk in one go.
2124	* @param pChunk The chunk to free.
2125	*/
2126	static void gmmR0FreeChunk(PGMM pGMM, PGVM pGVM, PGMMCHUNK pChunk)
2127	{
2128	Assert(pChunk->Core.Key != NIL_GMM_CHUNKID);
2129
2130	/*
2131	* Cleanup hack! Unmap the chunk from the callers address space.
2132	*/
2133	if ( pChunk->cMappings
2134	&& pGVM)
2135	gmmR0UnmapChunk(pGMM, pGVM, pChunk);
2136
2137	/*
2138	* If there are current mappings of the chunk, then request the
2139	* VMs to unmap them. Reposition the chunk in the free list so
2140	* it won't be a likely candidate for allocations.
2141	*/
2142	if (pChunk->cMappings)
2143	{
2144	/** @todo R0 -> VM request */
2145	}
2146	else
2147	{
2148	/*
2149	* Try free the memory object.
2150	*/
2151	int rc = RTR0MemObjFree(pChunk->MemObj, false /* fFreeMappings */);
2152	if (RT_SUCCESS(rc))
2153	{
2154	pChunk->MemObj = NIL_RTR0MEMOBJ;
2155
2156	/*
2157	* Unlink it from everywhere.
2158	*/
2159	gmmR0UnlinkChunk(pChunk);
2160
2161	PAVLU32NODECORE pCore = RTAvlU32Remove(&pGMM->pChunks, pChunk->Core.Key);
2162	Assert(pCore == &pChunk->Core); NOREF(pCore);
2163
2164	PGMMCHUNKTLBE pTlbe = &pGMM->ChunkTLB.aEntries[GMM_CHUNKTLB_IDX(pChunk->Core.Key)];
2165	if (pTlbe->pChunk == pChunk)
2166	{
2167	pTlbe->idChunk = NIL_GMM_CHUNKID;
2168	pTlbe->pChunk = NULL;
2169	}
2170
2171	Assert(pGMM->cChunks > 0);
2172	pGMM->cChunks--;
2173
2174	/*
2175	* Free the Chunk ID and struct.
2176	*/
2177	gmmR0FreeChunkId(pGMM, pChunk->Core.Key);
2178	pChunk->Core.Key = NIL_GMM_CHUNKID;
2179
2180	RTMemFree(pChunk->paMappings);
2181	pChunk->paMappings = NULL;
2182
2183	RTMemFree(pChunk);
2184	}
2185	else
2186	AssertRC(rc);
2187	}
2188	}
2189
2190
2191	/**
2192	* Free page worker.
2193	*
2194	* The caller does all the statistic decrementing, we do all the incrementing.
2195	*
2196	* @param pGMM Pointer to the GMM instance data.
2197	* @param pChunk Pointer to the chunk this page belongs to.
2198	* @param idPage The Page ID.
2199	* @param pPage Pointer to the page.
2200	*/
2201	static void gmmR0FreePageWorker(PGMM pGMM, PGMMCHUNK pChunk, uint32_t idPage, PGMMPAGE pPage)
2202	{
2203	Log3(("F pPage=%p iPage=%#x/%#x u2State=%d iFreeHead=%#x\n",
2204	pPage, pPage - &pChunk->aPages[0], idPage, pPage->Common.u2State, pChunk->iFreeHead)); NOREF(idPage);
2205
2206	/*
2207	* Put the page on the free list.
2208	*/
2209	pPage->u = 0;
2210	pPage->Free.u2State = GMM_PAGE_STATE_FREE;
2211	Assert(pChunk->iFreeHead < RT_ELEMENTS(pChunk->aPages) \|\| pChunk->iFreeHead == UINT16_MAX);
2212	pPage->Free.iNext = pChunk->iFreeHead;
2213	pChunk->iFreeHead = pPage - &pChunk->aPages[0];
2214
2215	/*
2216	* Update statistics (the cShared/cPrivate stats are up to date already),
2217	* and relink the chunk if necessary.
2218	*/
2219	if ((pChunk->cFree & GMM_CHUNK_FREE_SET_MASK) == 0)
2220	{
2221	gmmR0UnlinkChunk(pChunk);
2222	pChunk->cFree++;
2223	gmmR0LinkChunk(pChunk, pChunk->cShared ? &pGMM->Shared : &pGMM->Private);
2224	}
2225	else
2226	{
2227	pChunk->cFree++;
2228	pChunk->pSet->cPages++;
2229
2230	/*
2231	* If the chunk becomes empty, consider giving memory back to the host OS.
2232	*
2233	* The current strategy is to try give it back if there are other chunks
2234	* in this free list, meaning if there are at least 240 free pages in this
2235	* category. Note that since there are probably mappings of the chunk,
2236	* it won't be freed up instantly, which probably screws up this logic
2237	* a bit...
2238	*/
2239	if (RT_UNLIKELY( pChunk->cFree == GMM_CHUNK_NUM_PAGES
2240	&& pChunk->pFreeNext
2241	&& pChunk->pFreePrev
2242	&& !pGMM->fLegacyAllocationMode))
2243	gmmR0FreeChunk(pGMM, NULL, pChunk);
2244	}
2245	}
2246
2247
2248	/**
2249	* Frees a shared page, the page is known to exist and be valid and such.
2250	*
2251	* @param pGMM Pointer to the GMM instance.
2252	* @param idPage The Page ID
2253	* @param pPage The page structure.
2254	*/
2255	DECLINLINE(void) gmmR0FreeSharedPage(PGMM pGMM, uint32_t idPage, PGMMPAGE pPage)
2256	{
2257	PGMMCHUNK pChunk = gmmR0GetChunk(pGMM, idPage >> GMM_CHUNKID_SHIFT);
2258	Assert(pChunk);
2259	Assert(pChunk->cFree < GMM_CHUNK_NUM_PAGES);
2260	Assert(pChunk->cShared > 0);
2261	Assert(pGMM->cSharedPages > 0);
2262	Assert(pGMM->cAllocatedPages > 0);
2263	Assert(!pPage->Shared.cRefs);
2264
2265	pChunk->cShared--;
2266	pGMM->cAllocatedPages--;
2267	pGMM->cSharedPages--;
2268	gmmR0FreePageWorker(pGMM, pChunk, idPage, pPage);
2269	}
2270
2271
2272	/**
2273	* Frees a private page, the page is known to exist and be valid and such.
2274	*
2275	* @param pGMM Pointer to the GMM instance.
2276	* @param idPage The Page ID
2277	* @param pPage The page structure.
2278	*/
2279	DECLINLINE(void) gmmR0FreePrivatePage(PGMM pGMM, uint32_t idPage, PGMMPAGE pPage)
2280	{
2281	PGMMCHUNK pChunk = gmmR0GetChunk(pGMM, idPage >> GMM_CHUNKID_SHIFT);
2282	Assert(pChunk);
2283	Assert(pChunk->cFree < GMM_CHUNK_NUM_PAGES);
2284	Assert(pChunk->cPrivate > 0);
2285	Assert(pGMM->cAllocatedPages > 0);
2286
2287	pChunk->cPrivate--;
2288	pGMM->cAllocatedPages--;
2289	gmmR0FreePageWorker(pGMM, pChunk, idPage, pPage);
2290	}
2291
2292
2293	/**
2294	* Common worker for GMMR0FreePages and GMMR0BalloonedPages.
2295	*
2296	* @returns VBox status code:
2297	* @retval xxx
2298	*
2299	* @param pGMM Pointer to the GMM instance data.
2300	* @param pGVM Pointer to the shared VM structure.
2301	* @param cPages The number of pages to free.
2302	* @param paPages Pointer to the page descriptors.
2303	* @param enmAccount The account this relates to.
2304	*/
2305	static int gmmR0FreePages(PGMM pGMM, PGVM pGVM, uint32_t cPages, PGMMFREEPAGEDESC paPages, GMMACCOUNT enmAccount)
2306	{
2307	/*
2308	* Check that the request isn't impossible wrt to the account status.
2309	*/
2310	switch (enmAccount)
2311	{
2312	case GMMACCOUNT_BASE:
2313	if (RT_UNLIKELY(pGVM->gmm.s.Allocated.cBasePages < cPages))
2314	{
2315	Log(("gmmR0FreePages: allocated=%#llx cPages=%#x!\n", pGVM->gmm.s.Allocated.cBasePages, cPages));
2316	return VERR_GMM_ATTEMPT_TO_FREE_TOO_MUCH;
2317	}
2318	break;
2319	case GMMACCOUNT_SHADOW:
2320	if (RT_UNLIKELY(pGVM->gmm.s.Allocated.cShadowPages < cPages))
2321	{
2322	Log(("gmmR0FreePages: allocated=%#llx cPages=%#x!\n", pGVM->gmm.s.Allocated.cShadowPages, cPages));
2323	return VERR_GMM_ATTEMPT_TO_FREE_TOO_MUCH;
2324	}
2325	break;
2326	case GMMACCOUNT_FIXED:
2327	if (RT_UNLIKELY(pGVM->gmm.s.Allocated.cFixedPages < cPages))
2328	{
2329	Log(("gmmR0FreePages: allocated=%#llx cPages=%#x!\n", pGVM->gmm.s.Allocated.cFixedPages, cPages));
2330	return VERR_GMM_ATTEMPT_TO_FREE_TOO_MUCH;
2331	}
2332	break;
2333	default:
2334	AssertMsgFailedReturn(("enmAccount=%d\n", enmAccount), VERR_INTERNAL_ERROR);
2335	}
2336
2337	/*
2338	* Walk the descriptors and free the pages.
2339	*
2340	* Statistics (except the account) are being updated as we go along,
2341	* unlike the alloc code. Also, stop on the first error.
2342	*/
2343	int rc = VINF_SUCCESS;
2344	uint32_t iPage;
2345	for (iPage = 0; iPage < cPages; iPage++)
2346	{
2347	uint32_t idPage = paPages[iPage].idPage;
2348	PGMMPAGE pPage = gmmR0GetPage(pGMM, idPage);
2349	if (RT_LIKELY(pPage))
2350	{
2351	if (RT_LIKELY(GMM_PAGE_IS_PRIVATE(pPage)))
2352	{
2353	if (RT_LIKELY(pPage->Private.hGVM == pGVM->hSelf))
2354	{
2355	Assert(pGVM->gmm.s.cPrivatePages);
2356	pGVM->gmm.s.cPrivatePages--;
2357	gmmR0FreePrivatePage(pGMM, idPage, pPage);
2358	}
2359	else
2360	{
2361	Log(("gmmR0AllocatePages: #%#x/%#x: not owner! hGVM=%#x hSelf=%#x\n", iPage, idPage,
2362	pPage->Private.hGVM, pGVM->hEMT));
2363	rc = VERR_GMM_NOT_PAGE_OWNER;
2364	break;
2365	}
2366	}
2367	else if (RT_LIKELY(GMM_PAGE_IS_SHARED(pPage)))
2368	{
2369	Assert(pGVM->gmm.s.cSharedPages);
2370	pGVM->gmm.s.cSharedPages--;
2371	Assert(pPage->Shared.cRefs);
2372	if (!--pPage->Shared.cRefs)
2373	gmmR0FreeSharedPage(pGMM, idPage, pPage);
2374	}
2375	else
2376	{
2377	Log(("gmmR0AllocatePages: #%#x/%#x: already free!\n", iPage, idPage));
2378	rc = VERR_GMM_PAGE_ALREADY_FREE;
2379	break;
2380	}
2381	}
2382	else
2383	{
2384	Log(("gmmR0AllocatePages: #%#x/%#x: not found!\n", iPage, idPage));
2385	rc = VERR_GMM_PAGE_NOT_FOUND;
2386	break;
2387	}
2388	paPages[iPage].idPage = NIL_GMM_PAGEID;
2389	}
2390
2391	/*
2392	* Update the account.
2393	*/
2394	switch (enmAccount)
2395	{
2396	case GMMACCOUNT_BASE: pGVM->gmm.s.Allocated.cBasePages -= iPage; break;
2397	case GMMACCOUNT_SHADOW: pGVM->gmm.s.Allocated.cShadowPages -= iPage; break;
2398	case GMMACCOUNT_FIXED: pGVM->gmm.s.Allocated.cFixedPages -= iPage; break;
2399	default:
2400	AssertMsgFailedReturn(("enmAccount=%d\n", enmAccount), VERR_INTERNAL_ERROR);
2401	}
2402
2403	/*
2404	* Any threshold stuff to be done here?
2405	*/
2406
2407	return rc;
2408	}
2409
2410
2411	/**
2412	* Free one or more pages.
2413	*
2414	* This is typically used at reset time or power off.
2415	*
2416	* @returns VBox status code:
2417	* @retval xxx
2418	*
2419	* @param pVM Pointer to the shared VM structure.
2420	* @param cPages The number of pages to allocate.
2421	* @param paPages Pointer to the page descriptors containing the Page IDs for each page.
2422	* @param enmAccount The account this relates to.
2423	* @thread EMT.
2424	*/
2425	GMMR0DECL(int) GMMR0FreePages(PVM pVM, uint32_t cPages, PGMMFREEPAGEDESC paPages, GMMACCOUNT enmAccount)
2426	{
2427	LogFlow(("GMMR0FreePages: pVM=%p cPages=%#x paPages=%p enmAccount=%d\n", pVM, cPages, paPages, enmAccount));
2428
2429	/*
2430	* Validate input and get the basics.
2431	*/
2432	PGMM pGMM;
2433	GMM_GET_VALID_INSTANCE(pGMM, VERR_INTERNAL_ERROR);
2434	PGVM pGVM = GVMMR0ByVM(pVM);
2435	if (!pGVM)
2436	return VERR_INVALID_PARAMETER;
2437	if (pGVM->hEMT != RTThreadNativeSelf())
2438	return VERR_NOT_OWNER;
2439
2440	AssertPtrReturn(paPages, VERR_INVALID_PARAMETER);
2441	AssertMsgReturn(enmAccount > GMMACCOUNT_INVALID && enmAccount < GMMACCOUNT_END, ("%d\n", enmAccount), VERR_INVALID_PARAMETER);
2442	AssertMsgReturn(cPages > 0 && cPages < RT_BIT(32 - PAGE_SHIFT), ("%#x\n", cPages), VERR_INVALID_PARAMETER);
2443
2444	for (unsigned iPage = 0; iPage < cPages; iPage++)
2445	AssertMsgReturn( paPages[iPage].idPage <= GMM_PAGEID_LAST
2446	/\|\| paPages[iPage].idPage == NIL_GMM_PAGEID/,
2447	("#%#x: %#x\n", iPage, paPages[iPage].idPage), VERR_INVALID_PARAMETER);
2448
2449	/*
2450	* Take the semaphore and call the worker function.
2451	*/
2452	int rc = RTSemFastMutexRequest(pGMM->Mtx);
2453	AssertRC(rc);
2454
2455	rc = gmmR0FreePages(pGMM, pGVM, cPages, paPages, enmAccount);
2456
2457	RTSemFastMutexRelease(pGMM->Mtx);
2458	LogFlow(("GMMR0FreePages: returns %Rrc\n", rc));
2459	return rc;
2460	}
2461
2462
2463	/**
2464	* VMMR0 request wrapper for GMMR0FreePages.
2465	*
2466	* @returns see GMMR0FreePages.
2467	* @param pVM Pointer to the shared VM structure.
2468	* @param pReq The request packet.
2469	*/
2470	GMMR0DECL(int) GMMR0FreePagesReq(PVM pVM, PGMMFREEPAGESREQ pReq)
2471	{
2472	/*
2473	* Validate input and pass it on.
2474	*/
2475	AssertPtrReturn(pVM, VERR_INVALID_POINTER);
2476	AssertPtrReturn(pReq, VERR_INVALID_POINTER);
2477	AssertMsgReturn(pReq->Hdr.cbReq >= RT_UOFFSETOF(GMMFREEPAGESREQ, aPages[0]),
2478	("%#x < %#x\n", pReq->Hdr.cbReq, RT_UOFFSETOF(GMMFREEPAGESREQ, aPages[0])),
2479	VERR_INVALID_PARAMETER);
2480	AssertMsgReturn(pReq->Hdr.cbReq == RT_UOFFSETOF(GMMFREEPAGESREQ, aPages[pReq->cPages]),
2481	("%#x != %#x\n", pReq->Hdr.cbReq, RT_UOFFSETOF(GMMFREEPAGESREQ, aPages[pReq->cPages])),
2482	VERR_INVALID_PARAMETER);
2483
2484	return GMMR0FreePages(pVM, pReq->cPages, &pReq->aPages[0], pReq->enmAccount);
2485	}
2486
2487
2488	/**
2489	* Report back on a memory ballooning request.
2490	*
2491	* The request may or may not have been initiated by the GMM. If it was initiated
2492	* by the GMM it is important that this function is called even if no pages was
2493	* ballooned.
2494	*
2495	* Since the whole purpose of ballooning is to free up guest RAM pages, this API
2496	* may also be given a set of related pages to be freed. These pages are assumed
2497	* to be on the base account.
2498	*
2499	* @returns VBox status code:
2500	* @retval xxx
2501	*
2502	* @param pVM Pointer to the shared VM structure.
2503	* @param cBalloonedPages The number of pages that was ballooned.
2504	* @param cPagesToFree The number of pages to be freed.
2505	* @param paPages Pointer to the page descriptors for the pages that's to be freed.
2506	* @param fCompleted Indicates whether the ballooning request was completed (true) or
2507	* if there is more pages to come (false). If the ballooning was not
2508	* not triggered by the GMM, don't set this.
2509	* @thread EMT.
2510	*/
2511	GMMR0DECL(int) GMMR0BalloonedPages(PVM pVM, uint32_t cBalloonedPages, uint32_t cPagesToFree, PGMMFREEPAGEDESC paPages, bool fCompleted)
2512	{
2513	LogFlow(("GMMR0BalloonedPages: pVM=%p cBalloonedPages=%#x cPagestoFree=%#x paPages=%p enmAccount=%d fCompleted=%RTbool\n",
2514	pVM, cBalloonedPages, cPagesToFree, paPages, fCompleted));
2515
2516	/*
2517	* Validate input and get the basics.
2518	*/
2519	PGMM pGMM;
2520	GMM_GET_VALID_INSTANCE(pGMM, VERR_INTERNAL_ERROR);
2521	PGVM pGVM = GVMMR0ByVM(pVM);
2522	if (!pGVM)
2523	return VERR_INVALID_PARAMETER;
2524	if (pGVM->hEMT != RTThreadNativeSelf())
2525	return VERR_NOT_OWNER;
2526
2527	AssertPtrReturn(paPages, VERR_INVALID_PARAMETER);
2528	AssertMsgReturn(cBalloonedPages < RT_BIT(32 - PAGE_SHIFT), ("%#x\n", cBalloonedPages), VERR_INVALID_PARAMETER);
2529	AssertMsgReturn(cPagesToFree <= cBalloonedPages, ("%#x\n", cPagesToFree), VERR_INVALID_PARAMETER);
2530
2531	for (unsigned iPage = 0; iPage < cPagesToFree; iPage++)
2532	AssertMsgReturn( paPages[iPage].idPage <= GMM_PAGEID_LAST
2533	/\|\| paPages[iPage].idPage == NIL_GMM_PAGEID/,
2534	("#%#x: %#x\n", iPage, paPages[iPage].idPage), VERR_INVALID_PARAMETER);
2535
2536	/*
2537	* Take the sempahore and do some more validations.
2538	*/
2539	int rc = RTSemFastMutexRequest(pGMM->Mtx);
2540	AssertRC(rc);
2541	if (pGVM->gmm.s.Allocated.cBasePages >= cPagesToFree)
2542	{
2543	/*
2544	* Record the ballooned memory.
2545	*/
2546	pGMM->cBalloonedPages += cBalloonedPages;
2547	if (pGVM->gmm.s.cReqBalloonedPages)
2548	{
2549	pGVM->gmm.s.cBalloonedPages += cBalloonedPages;
2550	pGVM->gmm.s.cReqActuallyBalloonedPages += cBalloonedPages;
2551	if (fCompleted)
2552	{
2553	Log(("GMMR0BalloonedPages: +%#x - Global=%#llx; / VM: Total=%#llx Req=%#llx Actual=%#llx (completed)\n", cBalloonedPages,
2554	pGMM->cBalloonedPages, pGVM->gmm.s.cBalloonedPages, pGVM->gmm.s.cReqBalloonedPages, pGVM->gmm.s.cReqActuallyBalloonedPages));
2555
2556	/*
2557	* Anything we need to do here now when the request has been completed?
2558	*/
2559	pGVM->gmm.s.cReqBalloonedPages = 0;
2560	}
2561	else
2562	Log(("GMMR0BalloonedPages: +%#x - Global=%#llx / VM: Total=%#llx Req=%#llx Actual=%#llx (pending)\n", cBalloonedPages,
2563	pGMM->cBalloonedPages, pGVM->gmm.s.cBalloonedPages, pGVM->gmm.s.cReqBalloonedPages, pGVM->gmm.s.cReqActuallyBalloonedPages));
2564	}
2565	else
2566	{
2567	pGVM->gmm.s.cBalloonedPages += cBalloonedPages;
2568	Log(("GMMR0BalloonedPages: +%#x - Global=%#llx / VM: Total=%#llx (user)\n",
2569	cBalloonedPages, pGMM->cBalloonedPages, pGVM->gmm.s.cBalloonedPages));
2570	}
2571
2572	/*
2573	* Any pages to free?
2574	*/
2575	if (cPagesToFree)
2576	rc = gmmR0FreePages(pGMM, pGVM, cPagesToFree, paPages, GMMACCOUNT_BASE);
2577	}
2578	else
2579	{
2580	rc = VERR_GMM_ATTEMPT_TO_FREE_TOO_MUCH;
2581	}
2582
2583	RTSemFastMutexRelease(pGMM->Mtx);
2584	LogFlow(("GMMR0BalloonedPages: returns %Rrc\n", rc));
2585	return rc;
2586	}
2587
2588
2589	/**
2590	* VMMR0 request wrapper for GMMR0BalloonedPages.
2591	*
2592	* @returns see GMMR0BalloonedPages.
2593	* @param pVM Pointer to the shared VM structure.
2594	* @param pReq The request packet.
2595	*/
2596	GMMR0DECL(int) GMMR0BalloonedPagesReq(PVM pVM, PGMMBALLOONEDPAGESREQ pReq)
2597	{
2598	/*
2599	* Validate input and pass it on.
2600	*/
2601	AssertPtrReturn(pVM, VERR_INVALID_POINTER);
2602	AssertPtrReturn(pReq, VERR_INVALID_POINTER);
2603	AssertMsgReturn(pReq->Hdr.cbReq >= RT_UOFFSETOF(GMMBALLOONEDPAGESREQ, aPages[0]),
2604	("%#x < %#x\n", pReq->Hdr.cbReq, RT_UOFFSETOF(GMMBALLOONEDPAGESREQ, aPages[0])),
2605	VERR_INVALID_PARAMETER);
2606	AssertMsgReturn(pReq->Hdr.cbReq == RT_UOFFSETOF(GMMBALLOONEDPAGESREQ, aPages[pReq->cPagesToFree]),
2607	("%#x != %#x\n", pReq->Hdr.cbReq, RT_UOFFSETOF(GMMBALLOONEDPAGESREQ, aPages[pReq->cPagesToFree])),
2608	VERR_INVALID_PARAMETER);
2609
2610	return GMMR0BalloonedPages(pVM, pReq->cBalloonedPages, pReq->cPagesToFree, &pReq->aPages[0], pReq->fCompleted);
2611	}
2612
2613
2614	/**
2615	* Report balloon deflating.
2616	*
2617	* @returns VBox status code:
2618	* @retval xxx
2619	*
2620	* @param pVM Pointer to the shared VM structure.
2621	* @param cPages The number of pages that was let out of the balloon.
2622	* @thread EMT.
2623	*/
2624	GMMR0DECL(int) GMMR0DeflatedBalloon(PVM pVM, uint32_t cPages)
2625	{
2626	LogFlow(("GMMR0DeflatedBalloon: pVM=%p cPages=%#x\n", pVM, cPages));
2627
2628	/*
2629	* Validate input and get the basics.
2630	*/
2631	PGMM pGMM;
2632	GMM_GET_VALID_INSTANCE(pGMM, VERR_INTERNAL_ERROR);
2633	PGVM pGVM = GVMMR0ByVM(pVM);
2634	if (!pGVM)
2635	return VERR_INVALID_PARAMETER;
2636	if (pGVM->hEMT != RTThreadNativeSelf())
2637	return VERR_NOT_OWNER;
2638
2639	AssertMsgReturn(cPages < RT_BIT(32 - PAGE_SHIFT), ("%#x\n", cPages), VERR_INVALID_PARAMETER);
2640
2641	/*
2642	* Take the sempahore and do some more validations.
2643	*/
2644	int rc = RTSemFastMutexRequest(pGMM->Mtx);
2645	AssertRC(rc);
2646
2647	if (pGVM->gmm.s.cBalloonedPages < cPages)
2648	{
2649	Assert(pGMM->cBalloonedPages >= pGVM->gmm.s.cBalloonedPages);
2650
2651	/*
2652	* Record it.
2653	*/
2654	pGMM->cBalloonedPages -= cPages;
2655	pGVM->gmm.s.cBalloonedPages -= cPages;
2656	if (pGVM->gmm.s.cReqDeflatePages)
2657	{
2658	Log(("GMMR0BalloonedPages: -%#x - Global=%#llx / VM: Total=%#llx Req=%#llx\n", cPages,
2659	pGMM->cBalloonedPages, pGVM->gmm.s.cBalloonedPages, pGVM->gmm.s.cReqDeflatePages));
2660
2661	/*
2662	* Anything we need to do here now when the request has been completed?
2663	*/
2664	pGVM->gmm.s.cReqDeflatePages = 0;
2665	}
2666	else
2667	Log(("GMMR0BalloonedPages: -%#x - Global=%#llx / VM: Total=%#llx\n", cPages,
2668	pGMM->cBalloonedPages, pGVM->gmm.s.cBalloonedPages));
2669	}
2670	else
2671	{
2672	Log(("GMMR0DeflatedBalloon: cBalloonedPages=%#llx cPages=%#x\n", pGVM->gmm.s.cBalloonedPages, cPages));
2673	rc = VERR_GMM_ATTEMPT_TO_DEFLATE_TOO_MUCH;
2674	}
2675
2676	RTSemFastMutexRelease(pGMM->Mtx);
2677	LogFlow(("GMMR0BalloonedPages: returns %Rrc\n", rc));
2678	return rc;
2679	}
2680
2681
2682	/**
2683	* Unmaps a chunk previously mapped into the address space of the current process.
2684	*
2685	* @returns VBox status code.
2686	* @param pGMM Pointer to the GMM instance data.
2687	* @param pGVM Pointer to the Global VM structure.
2688	* @param pChunk Pointer to the chunk to be unmapped.
2689	*/
2690	static int gmmR0UnmapChunk(PGMM pGMM, PGVM pGVM, PGMMCHUNK pChunk)
2691	{
2692	if (!pGMM->fLegacyAllocationMode)
2693	{
2694	/*
2695	* Find the mapping and try unmapping it.
2696	*/
2697	for (uint32_t i = 0; i < pChunk->cMappings; i++)
2698	{
2699	Assert(pChunk->paMappings[i].pGVM && pChunk->paMappings[i].MapObj != NIL_RTR0MEMOBJ);
2700	if (pChunk->paMappings[i].pGVM == pGVM)
2701	{
2702	/* unmap */
2703	int rc = RTR0MemObjFree(pChunk->paMappings[i].MapObj, false /* fFreeMappings (NA) */);
2704	if (RT_SUCCESS(rc))
2705	{
2706	/* update the record. */
2707	pChunk->cMappings--;
2708	if (i < pChunk->cMappings)
2709	pChunk->paMappings[i] = pChunk->paMappings[pChunk->cMappings];
2710	pChunk->paMappings[pChunk->cMappings].MapObj = NIL_RTR0MEMOBJ;
2711	pChunk->paMappings[pChunk->cMappings].pGVM = NULL;
2712	}
2713	return rc;
2714	}
2715	}
2716	}
2717	else if (pChunk->hGVM == pGVM->hSelf)
2718	return VINF_SUCCESS;
2719
2720	Log(("gmmR0MapChunk: Chunk %#x is not mapped into pGVM=%p/%#x\n", pChunk->Core.Key, pGVM, pGVM->hSelf));
2721	return VERR_GMM_CHUNK_NOT_MAPPED;
2722	}
2723
2724
2725	/**
2726	* Maps a chunk into the user address space of the current process.
2727	*
2728	* @returns VBox status code.
2729	* @param pGMM Pointer to the GMM instance data.
2730	* @param pGVM Pointer to the Global VM structure.
2731	* @param pChunk Pointer to the chunk to be mapped.
2732	* @param ppvR3 Where to store the ring-3 address of the mapping.
2733	* In the VERR_GMM_CHUNK_ALREADY_MAPPED case, this will be
2734	* contain the address of the existing mapping.
2735	*/
2736	static int gmmR0MapChunk(PGMM pGMM, PGVM pGVM, PGMMCHUNK pChunk, PRTR3PTR ppvR3)
2737	{
2738	/*
2739	* If we're in legacy mode this is simple.
2740	*/
2741	if (pGMM->fLegacyAllocationMode)
2742	{
2743	if (pChunk->hGVM != pGVM->hSelf)
2744	{
2745	Log(("gmmR0MapChunk: chunk %#x is already mapped at %p!\n", pChunk->Core.Key, *ppvR3));
2746	return VERR_GMM_CHUNK_NOT_FOUND;
2747	}
2748
2749	*ppvR3 = RTR0MemObjAddressR3(pChunk->MemObj);
2750	return VINF_SUCCESS;
2751	}
2752
2753	/*
2754	* Check to see if the chunk is already mapped.
2755	*/
2756	for (uint32_t i = 0; i < pChunk->cMappings; i++)
2757	{
2758	Assert(pChunk->paMappings[i].pGVM && pChunk->paMappings[i].MapObj != NIL_RTR0MEMOBJ);
2759	if (pChunk->paMappings[i].pGVM == pGVM)
2760	{
2761	*ppvR3 = RTR0MemObjAddressR3(pChunk->paMappings[i].MapObj);
2762	Log(("gmmR0MapChunk: chunk %#x is already mapped at %p!\n", pChunk->Core.Key, *ppvR3));
2763	return VERR_GMM_CHUNK_ALREADY_MAPPED;
2764	}
2765	}
2766
2767	/*
2768	* Do the mapping.
2769	*/
2770	RTR0MEMOBJ MapObj;
2771	int rc = RTR0MemObjMapUser(&MapObj, pChunk->MemObj, (RTR3PTR)-1, 0, RTMEM_PROT_READ \| RTMEM_PROT_WRITE, NIL_RTR0PROCESS);
2772	if (RT_SUCCESS(rc))
2773	{
2774	/* reallocate the array? */
2775	if ((pChunk->cMappings & 1 /7/) == 0)
2776	{
2777	void pvMappings = RTMemRealloc(pChunk->paMappings, (pChunk->cMappings + 2 /8/) sizeof(pChunk->paMappings[0]));
2778	if (RT_UNLIKELY(!pvMappings))
2779	{
2780	rc = RTR0MemObjFree(MapObj, false /* fFreeMappings (NA) */);
2781	AssertRC(rc);
2782	return VERR_NO_MEMORY;
2783	}
2784	pChunk->paMappings = (PGMMCHUNKMAP)pvMappings;
2785	}
2786
2787	/* insert new entry */
2788	pChunk->paMappings[pChunk->cMappings].MapObj = MapObj;
2789	pChunk->paMappings[pChunk->cMappings].pGVM = pGVM;
2790	pChunk->cMappings++;
2791
2792	*ppvR3 = RTR0MemObjAddressR3(MapObj);
2793	}
2794
2795	return rc;
2796	}
2797
2798
2799	/**
2800	* Map a chunk and/or unmap another chunk.
2801	*
2802	* The mapping and unmapping applies to the current process.
2803	*
2804	* This API does two things because it saves a kernel call per mapping when
2805	* when the ring-3 mapping cache is full.
2806	*
2807	* @returns VBox status code.
2808	* @param pVM The VM.
2809	* @param idChunkMap The chunk to map. NIL_GMM_CHUNKID if nothing to map.
2810	* @param idChunkUnmap The chunk to unmap. NIL_GMM_CHUNKID if nothing to unmap.
2811	* @param ppvR3 Where to store the address of the mapped chunk. NULL is ok if nothing to map.
2812	* @thread EMT
2813	*/
2814	GMMR0DECL(int) GMMR0MapUnmapChunk(PVM pVM, uint32_t idChunkMap, uint32_t idChunkUnmap, PRTR3PTR ppvR3)
2815	{
2816	LogFlow(("GMMR0MapUnmapChunk: pVM=%p idChunkMap=%#x idChunkUnmap=%#x ppvR3=%p\n",
2817	pVM, idChunkMap, idChunkUnmap, ppvR3));
2818
2819	/*
2820	* Validate input and get the basics.
2821	*/
2822	PGMM pGMM;
2823	GMM_GET_VALID_INSTANCE(pGMM, VERR_INTERNAL_ERROR);
2824	PGVM pGVM = GVMMR0ByVM(pVM);
2825	if (!pGVM)
2826	return VERR_INVALID_PARAMETER;
2827	if (pGVM->hEMT != RTThreadNativeSelf())
2828	return VERR_NOT_OWNER;
2829
2830	AssertCompile(NIL_GMM_CHUNKID == 0);
2831	AssertMsgReturn(idChunkMap <= GMM_CHUNKID_LAST, ("%#x\n", idChunkMap), VERR_INVALID_PARAMETER);
2832	AssertMsgReturn(idChunkUnmap <= GMM_CHUNKID_LAST, ("%#x\n", idChunkUnmap), VERR_INVALID_PARAMETER);
2833
2834	if ( idChunkMap == NIL_GMM_CHUNKID
2835	&& idChunkUnmap == NIL_GMM_CHUNKID)
2836	return VERR_INVALID_PARAMETER;
2837
2838	if (idChunkMap != NIL_GMM_CHUNKID)
2839	{
2840	AssertPtrReturn(ppvR3, VERR_INVALID_POINTER);
2841	*ppvR3 = NIL_RTR3PTR;
2842	}
2843
2844	/*
2845	* Take the semaphore and do the work.
2846	*
2847	* The unmapping is done last since it's easier to undo a mapping than
2848	* undoing an unmapping. The ring-3 mapping cache cannot not be so big
2849	* that it pushes the user virtual address space to within a chunk of
2850	* it it's limits, so, no problem here.
2851	*/
2852	int rc = RTSemFastMutexRequest(pGMM->Mtx);
2853	AssertRC(rc);
2854
2855	PGMMCHUNK pMap = NULL;
2856	if (idChunkMap != NIL_GVM_HANDLE)
2857	{
2858	pMap = gmmR0GetChunk(pGMM, idChunkMap);
2859	if (RT_LIKELY(pMap))
2860	rc = gmmR0MapChunk(pGMM, pGVM, pMap, ppvR3);
2861	else
2862	{
2863	Log(("GMMR0MapUnmapChunk: idChunkMap=%#x\n", idChunkMap));
2864	rc = VERR_GMM_CHUNK_NOT_FOUND;
2865	}
2866	}
2867
2868	if ( idChunkUnmap != NIL_GMM_CHUNKID
2869	&& RT_SUCCESS(rc))
2870	{
2871	PGMMCHUNK pUnmap = gmmR0GetChunk(pGMM, idChunkUnmap);
2872	if (RT_LIKELY(pUnmap))
2873	rc = gmmR0UnmapChunk(pGMM, pGVM, pUnmap);
2874	else
2875	{
2876	Log(("GMMR0MapUnmapChunk: idChunkUnmap=%#x\n", idChunkUnmap));
2877	rc = VERR_GMM_CHUNK_NOT_FOUND;
2878	}
2879
2880	if (RT_FAILURE(rc) && pMap)
2881	gmmR0UnmapChunk(pGMM, pGVM, pMap);
2882	}
2883
2884	RTSemFastMutexRelease(pGMM->Mtx);
2885
2886	LogFlow(("GMMR0MapUnmapChunk: returns %Rrc\n", rc));
2887	return rc;
2888	}
2889
2890
2891	/**
2892	* VMMR0 request wrapper for GMMR0MapUnmapChunk.
2893	*
2894	* @returns see GMMR0MapUnmapChunk.
2895	* @param pVM Pointer to the shared VM structure.
2896	* @param pReq The request packet.
2897	*/
2898	GMMR0DECL(int) GMMR0MapUnmapChunkReq(PVM pVM, PGMMMAPUNMAPCHUNKREQ pReq)
2899	{
2900	/*
2901	* Validate input and pass it on.
2902	*/
2903	AssertPtrReturn(pVM, VERR_INVALID_POINTER);
2904	AssertPtrReturn(pReq, VERR_INVALID_POINTER);
2905	AssertMsgReturn(pReq->Hdr.cbReq == sizeof(pReq), ("%#x != %#x\n", pReq->Hdr.cbReq, sizeof(pReq)), VERR_INVALID_PARAMETER);
2906
2907	return GMMR0MapUnmapChunk(pVM, pReq->idChunkMap, pReq->idChunkUnmap, &pReq->pvR3);
2908	}
2909
2910
2911	/**
2912	* Legacy mode API for supplying pages.
2913	*
2914	* The specified user address points to a allocation chunk sized block that
2915	* will be locked down and used by the GMM when the GM asks for pages.
2916	*
2917	* @returns VBox status code.
2918	* @param pVM The VM.
2919	* @param pvR3 Pointer to the chunk size memory block to lock down.
2920	*/
2921	GMMR0DECL(int) GMMR0SeedChunk(PVM pVM, RTR3PTR pvR3)
2922	{
2923	/*
2924	* Validate input and get the basics.
2925	*/
2926	PGMM pGMM;
2927	GMM_GET_VALID_INSTANCE(pGMM, VERR_INTERNAL_ERROR);
2928	PGVM pGVM = GVMMR0ByVM(pVM);
2929	if (!pGVM)
2930	return VERR_INVALID_PARAMETER;
2931	if (pGVM->hEMT != RTThreadNativeSelf())
2932	return VERR_NOT_OWNER;
2933
2934	AssertPtrReturn(pvR3, VERR_INVALID_POINTER);
2935	AssertReturn(!(PAGE_OFFSET_MASK & pvR3), VERR_INVALID_POINTER);
2936
2937	if (!pGMM->fLegacyAllocationMode)
2938	{
2939	Log(("GMMR0SeedChunk: not in legacy allocation mode!\n"));
2940	return VERR_NOT_SUPPORTED;
2941	}
2942
2943	/*
2944	* Lock the memory before taking the semaphore.
2945	*/
2946	RTR0MEMOBJ MemObj;
2947	int rc = RTR0MemObjLockUser(&MemObj, pvR3, GMM_CHUNK_SIZE, NIL_RTR0PROCESS);
2948	if (RT_SUCCESS(rc))
2949	{
2950	/*
2951	* Add a new chunk with our hGVM.
2952	*/
2953	rc = gmmR0RegisterChunk(pGMM, &pGMM->Private, MemObj, pGVM->hSelf);
2954	if (RT_FAILURE(rc))
2955	RTR0MemObjFree(MemObj, false /* fFreeMappings */);
2956	}
2957
2958	LogFlow(("GMMR0SeedChunk: rc=%d (pvR3=%p)\n", rc, pvR3));
2959	return rc;
2960	}
2961

Note: See TracBrowser for help on using the repository browser.

source: vbox/trunk/src/VBox/VMM/VMMR0/GMMR0.cpp@ 18992

Download in other formats: