VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMR0/GVMMR0.cpp@ 80274

Last change on this file since 80274 was 80274, checked in by vboxsync, 5 years ago

VMM: Refactoring VMMR0/* and VMMRZ/* to use VMCC & VMMCPUCC. bugref:9217

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id Revision
File size: 121.7 KB
Line 
1/* $Id: GVMMR0.cpp 80274 2019-08-14 14:34:38Z vboxsync $ */
2/** @file
3 * GVMM - Global VM Manager.
4 */
5
6/*
7 * Copyright (C) 2007-2019 Oracle Corporation
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 */
17
18
19/** @page pg_gvmm GVMM - The Global VM Manager
20 *
21 * The Global VM Manager lives in ring-0. Its main function at the moment is
22 * to manage a list of all running VMs, keep a ring-0 only structure (GVM) for
23 * each of them, and assign them unique identifiers (so GMM can track page
24 * owners). The GVMM also manage some of the host CPU resources, like the
25 * periodic preemption timer.
26 *
27 * The GVMM will create a ring-0 object for each VM when it is registered, this
28 * is both for session cleanup purposes and for having a point where it is
29 * possible to implement usage polices later (in SUPR0ObjRegister).
30 *
31 *
32 * @section sec_gvmm_ppt Periodic Preemption Timer (PPT)
33 *
34 * On system that sports a high resolution kernel timer API, we use per-cpu
35 * timers to generate interrupts that preempts VT-x, AMD-V and raw-mode guest
36 * execution. The timer frequency is calculating by taking the max
37 * TMCalcHostTimerFrequency for all VMs running on a CPU for the last ~160 ms
38 * (RT_ELEMENTS((PGVMMHOSTCPU)0, Ppt.aHzHistory) *
39 * GVMMHOSTCPU_PPT_HIST_INTERVAL_NS).
40 *
41 * The TMCalcHostTimerFrequency() part of the things gets its takes the max
42 * TMTimerSetFrequencyHint() value and adjusts by the current catch-up percent,
43 * warp drive percent and some fudge factors. VMMR0.cpp reports the result via
44 * GVMMR0SchedUpdatePeriodicPreemptionTimer() before switching to the VT-x,
45 * AMD-V and raw-mode execution environments.
46 */
47
48
49/*********************************************************************************************************************************
50* Header Files *
51*********************************************************************************************************************************/
52#define LOG_GROUP LOG_GROUP_GVMM
53#include <VBox/vmm/gvmm.h>
54#include <VBox/vmm/gmm.h>
55#include "GVMMR0Internal.h"
56#include <VBox/vmm/vmcc.h>
57#include <VBox/vmm/vmcpuset.h>
58#include <VBox/vmm/vmm.h>
59#ifdef VBOX_WITH_NEM_R0
60# include <VBox/vmm/nem.h>
61#endif
62#include <VBox/param.h>
63#include <VBox/err.h>
64
65#include <iprt/asm.h>
66#include <iprt/asm-amd64-x86.h>
67#include <iprt/critsect.h>
68#include <iprt/mem.h>
69#include <iprt/semaphore.h>
70#include <iprt/time.h>
71#include <VBox/log.h>
72#include <iprt/thread.h>
73#include <iprt/process.h>
74#include <iprt/param.h>
75#include <iprt/string.h>
76#include <iprt/assert.h>
77#include <iprt/mem.h>
78#include <iprt/memobj.h>
79#include <iprt/mp.h>
80#include <iprt/cpuset.h>
81#include <iprt/spinlock.h>
82#include <iprt/timer.h>
83
84#include "dtrace/VBoxVMM.h"
85
86
87/*********************************************************************************************************************************
88* Defined Constants And Macros *
89*********************************************************************************************************************************/
90#if defined(RT_OS_LINUX) || defined(RT_OS_SOLARIS) || defined(DOXYGEN_RUNNING)
91/** Define this to enable the periodic preemption timer. */
92# define GVMM_SCHED_WITH_PPT
93#endif
94
95
96/** @def GVMM_CHECK_SMAP_SETUP
97 * SMAP check setup. */
98/** @def GVMM_CHECK_SMAP_CHECK
99 * Checks that the AC flag is set if SMAP is enabled. If AC is not set,
100 * it will be logged and @a a_BadExpr is executed. */
101/** @def GVMM_CHECK_SMAP_CHECK2
102 * Checks that the AC flag is set if SMAP is enabled. If AC is not set, it will
103 * be logged, written to the VMs assertion text buffer, and @a a_BadExpr is
104 * executed. */
105#if defined(VBOX_STRICT) || 1
106# define GVMM_CHECK_SMAP_SETUP() uint32_t const fKernelFeatures = SUPR0GetKernelFeatures()
107# define GVMM_CHECK_SMAP_CHECK(a_BadExpr) \
108 do { \
109 if (fKernelFeatures & SUPKERNELFEATURES_SMAP) \
110 { \
111 RTCCUINTREG fEflCheck = ASMGetFlags(); \
112 if (RT_LIKELY(fEflCheck & X86_EFL_AC)) \
113 { /* likely */ } \
114 else \
115 { \
116 SUPR0Printf("%s, line %d: EFLAGS.AC is clear! (%#x)\n", __FUNCTION__, __LINE__, (uint32_t)fEflCheck); \
117 a_BadExpr; \
118 } \
119 } \
120 } while (0)
121# define GVMM_CHECK_SMAP_CHECK2(a_pVM, a_BadExpr) \
122 do { \
123 if (fKernelFeatures & SUPKERNELFEATURES_SMAP) \
124 { \
125 RTCCUINTREG fEflCheck = ASMGetFlags(); \
126 if (RT_LIKELY(fEflCheck & X86_EFL_AC)) \
127 { /* likely */ } \
128 else \
129 { \
130 SUPR0BadContext((a_pVM) ? (a_pVM)->pSession : NULL, __FILE__, __LINE__, "EFLAGS.AC is zero!"); \
131 a_BadExpr; \
132 } \
133 } \
134 } while (0)
135#else
136# define GVMM_CHECK_SMAP_SETUP() uint32_t const fKernelFeatures = 0
137# define GVMM_CHECK_SMAP_CHECK(a_BadExpr) NOREF(fKernelFeatures)
138# define GVMM_CHECK_SMAP_CHECK2(a_pVM, a_BadExpr) NOREF(fKernelFeatures)
139#endif
140
141
142
143/*********************************************************************************************************************************
144* Structures and Typedefs *
145*********************************************************************************************************************************/
146
147/**
148 * Global VM handle.
149 */
150typedef struct GVMHANDLE
151{
152 /** The index of the next handle in the list (free or used). (0 is nil.) */
153 uint16_t volatile iNext;
154 /** Our own index / handle value. */
155 uint16_t iSelf;
156 /** The process ID of the handle owner.
157 * This is used for access checks. */
158 RTPROCESS ProcId;
159 /** The pointer to the ring-0 only (aka global) VM structure. */
160 PGVM pGVM;
161 /** The ring-0 mapping of the shared VM instance data. */
162 PVMCC pVM;
163 /** The virtual machine object. */
164 void *pvObj;
165 /** The session this VM is associated with. */
166 PSUPDRVSESSION pSession;
167 /** The ring-0 handle of the EMT0 thread.
168 * This is used for ownership checks as well as looking up a VM handle by thread
169 * at times like assertions. */
170 RTNATIVETHREAD hEMT0;
171} GVMHANDLE;
172/** Pointer to a global VM handle. */
173typedef GVMHANDLE *PGVMHANDLE;
174
175/** Number of GVM handles (including the NIL handle). */
176#if HC_ARCH_BITS == 64
177# define GVMM_MAX_HANDLES 8192
178#else
179# define GVMM_MAX_HANDLES 128
180#endif
181
182/**
183 * Per host CPU GVMM data.
184 */
185typedef struct GVMMHOSTCPU
186{
187 /** Magic number (GVMMHOSTCPU_MAGIC). */
188 uint32_t volatile u32Magic;
189 /** The CPU ID. */
190 RTCPUID idCpu;
191 /** The CPU set index. */
192 uint32_t idxCpuSet;
193
194#ifdef GVMM_SCHED_WITH_PPT
195 /** Periodic preemption timer data. */
196 struct
197 {
198 /** The handle to the periodic preemption timer. */
199 PRTTIMER pTimer;
200 /** Spinlock protecting the data below. */
201 RTSPINLOCK hSpinlock;
202 /** The smalles Hz that we need to care about. (static) */
203 uint32_t uMinHz;
204 /** The number of ticks between each historization. */
205 uint32_t cTicksHistoriziationInterval;
206 /** The current historization tick (counting up to
207 * cTicksHistoriziationInterval and then resetting). */
208 uint32_t iTickHistorization;
209 /** The current timer interval. This is set to 0 when inactive. */
210 uint32_t cNsInterval;
211 /** The current timer frequency. This is set to 0 when inactive. */
212 uint32_t uTimerHz;
213 /** The current max frequency reported by the EMTs.
214 * This gets historicize and reset by the timer callback. This is
215 * read without holding the spinlock, so needs atomic updating. */
216 uint32_t volatile uDesiredHz;
217 /** Whether the timer was started or not. */
218 bool volatile fStarted;
219 /** Set if we're starting timer. */
220 bool volatile fStarting;
221 /** The index of the next history entry (mod it). */
222 uint32_t iHzHistory;
223 /** Historicized uDesiredHz values. The array wraps around, new entries
224 * are added at iHzHistory. This is updated approximately every
225 * GVMMHOSTCPU_PPT_HIST_INTERVAL_NS by the timer callback. */
226 uint32_t aHzHistory[8];
227 /** Statistics counter for recording the number of interval changes. */
228 uint32_t cChanges;
229 /** Statistics counter for recording the number of timer starts. */
230 uint32_t cStarts;
231 } Ppt;
232#endif /* GVMM_SCHED_WITH_PPT */
233
234} GVMMHOSTCPU;
235/** Pointer to the per host CPU GVMM data. */
236typedef GVMMHOSTCPU *PGVMMHOSTCPU;
237/** The GVMMHOSTCPU::u32Magic value (Petra, Tanya & Rachel Haden). */
238#define GVMMHOSTCPU_MAGIC UINT32_C(0x19711011)
239/** The interval on history entry should cover (approximately) give in
240 * nanoseconds. */
241#define GVMMHOSTCPU_PPT_HIST_INTERVAL_NS UINT32_C(20000000)
242
243
244/**
245 * The GVMM instance data.
246 */
247typedef struct GVMM
248{
249 /** Eyecatcher / magic. */
250 uint32_t u32Magic;
251 /** The index of the head of the free handle chain. (0 is nil.) */
252 uint16_t volatile iFreeHead;
253 /** The index of the head of the active handle chain. (0 is nil.) */
254 uint16_t volatile iUsedHead;
255 /** The number of VMs. */
256 uint16_t volatile cVMs;
257 /** Alignment padding. */
258 uint16_t u16Reserved;
259 /** The number of EMTs. */
260 uint32_t volatile cEMTs;
261 /** The number of EMTs that have halted in GVMMR0SchedHalt. */
262 uint32_t volatile cHaltedEMTs;
263 /** Mini lock for restricting early wake-ups to one thread. */
264 bool volatile fDoingEarlyWakeUps;
265 bool afPadding[3]; /**< explicit alignment padding. */
266 /** When the next halted or sleeping EMT will wake up.
267 * This is set to 0 when it needs recalculating and to UINT64_MAX when
268 * there are no halted or sleeping EMTs in the GVMM. */
269 uint64_t uNsNextEmtWakeup;
270 /** The lock used to serialize VM creation, destruction and associated events that
271 * isn't performance critical. Owners may acquire the list lock. */
272 RTCRITSECT CreateDestroyLock;
273 /** The lock used to serialize used list updates and accesses.
274 * This indirectly includes scheduling since the scheduler will have to walk the
275 * used list to examin running VMs. Owners may not acquire any other locks. */
276 RTCRITSECTRW UsedLock;
277 /** The handle array.
278 * The size of this array defines the maximum number of currently running VMs.
279 * The first entry is unused as it represents the NIL handle. */
280 GVMHANDLE aHandles[GVMM_MAX_HANDLES];
281
282 /** @gcfgm{/GVMM/cEMTsMeansCompany, 32-bit, 0, UINT32_MAX, 1}
283 * The number of EMTs that means we no longer consider ourselves alone on a
284 * CPU/Core.
285 */
286 uint32_t cEMTsMeansCompany;
287 /** @gcfgm{/GVMM/MinSleepAlone,32-bit, 0, 100000000, 750000, ns}
288 * The minimum sleep time for when we're alone, in nano seconds.
289 */
290 uint32_t nsMinSleepAlone;
291 /** @gcfgm{/GVMM/MinSleepCompany,32-bit,0, 100000000, 15000, ns}
292 * The minimum sleep time for when we've got company, in nano seconds.
293 */
294 uint32_t nsMinSleepCompany;
295 /** @gcfgm{/GVMM/EarlyWakeUp1, 32-bit, 0, 100000000, 25000, ns}
296 * The limit for the first round of early wake-ups, given in nano seconds.
297 */
298 uint32_t nsEarlyWakeUp1;
299 /** @gcfgm{/GVMM/EarlyWakeUp2, 32-bit, 0, 100000000, 50000, ns}
300 * The limit for the second round of early wake-ups, given in nano seconds.
301 */
302 uint32_t nsEarlyWakeUp2;
303
304 /** Set if we're doing early wake-ups.
305 * This reflects nsEarlyWakeUp1 and nsEarlyWakeUp2. */
306 bool volatile fDoEarlyWakeUps;
307
308 /** The number of entries in the host CPU array (aHostCpus). */
309 uint32_t cHostCpus;
310 /** Per host CPU data (variable length). */
311 GVMMHOSTCPU aHostCpus[1];
312} GVMM;
313AssertCompileMemberAlignment(GVMM, CreateDestroyLock, 8);
314AssertCompileMemberAlignment(GVMM, UsedLock, 8);
315AssertCompileMemberAlignment(GVMM, uNsNextEmtWakeup, 8);
316/** Pointer to the GVMM instance data. */
317typedef GVMM *PGVMM;
318
319/** The GVMM::u32Magic value (Charlie Haden). */
320#define GVMM_MAGIC UINT32_C(0x19370806)
321
322
323
324/*********************************************************************************************************************************
325* Global Variables *
326*********************************************************************************************************************************/
327/** Pointer to the GVMM instance data.
328 * (Just my general dislike for global variables.) */
329static PGVMM g_pGVMM = NULL;
330
331/** Macro for obtaining and validating the g_pGVMM pointer.
332 * On failure it will return from the invoking function with the specified return value.
333 *
334 * @param pGVMM The name of the pGVMM variable.
335 * @param rc The return value on failure. Use VERR_GVMM_INSTANCE for VBox
336 * status codes.
337 */
338#define GVMM_GET_VALID_INSTANCE(pGVMM, rc) \
339 do { \
340 (pGVMM) = g_pGVMM;\
341 AssertPtrReturn((pGVMM), (rc)); \
342 AssertMsgReturn((pGVMM)->u32Magic == GVMM_MAGIC, ("%p - %#x\n", (pGVMM), (pGVMM)->u32Magic), (rc)); \
343 } while (0)
344
345/** Macro for obtaining and validating the g_pGVMM pointer, void function variant.
346 * On failure it will return from the invoking function.
347 *
348 * @param pGVMM The name of the pGVMM variable.
349 */
350#define GVMM_GET_VALID_INSTANCE_VOID(pGVMM) \
351 do { \
352 (pGVMM) = g_pGVMM;\
353 AssertPtrReturnVoid((pGVMM)); \
354 AssertMsgReturnVoid((pGVMM)->u32Magic == GVMM_MAGIC, ("%p - %#x\n", (pGVMM), (pGVMM)->u32Magic)); \
355 } while (0)
356
357
358/*********************************************************************************************************************************
359* Internal Functions *
360*********************************************************************************************************************************/
361#ifdef VBOX_BUGREF_9217
362static void gvmmR0InitPerVMData(PGVM pGVM, int16_t hSelf, VMCPUID cCpus, PSUPDRVSESSION pSession);
363#else
364static void gvmmR0InitPerVMData(PGVM pGVM);
365#endif
366static DECLCALLBACK(void) gvmmR0HandleObjDestructor(void *pvObj, void *pvGVMM, void *pvHandle);
367static int gvmmR0ByGVMandVM(PGVM pGVM, PVMCC pVM, PGVMM *ppGVMM, bool fTakeUsedLock);
368static int gvmmR0ByGVMandVMandEMT(PGVM pGVM, PVMCC pVM, VMCPUID idCpu, PGVMM *ppGVMM);
369
370#ifdef GVMM_SCHED_WITH_PPT
371static DECLCALLBACK(void) gvmmR0SchedPeriodicPreemptionTimerCallback(PRTTIMER pTimer, void *pvUser, uint64_t iTick);
372#endif
373
374
375/**
376 * Initializes the GVMM.
377 *
378 * This is called while owning the loader semaphore (see supdrvIOCtl_LdrLoad()).
379 *
380 * @returns VBox status code.
381 */
382GVMMR0DECL(int) GVMMR0Init(void)
383{
384 LogFlow(("GVMMR0Init:\n"));
385
386 /*
387 * Allocate and initialize the instance data.
388 */
389 uint32_t cHostCpus = RTMpGetArraySize();
390 AssertMsgReturn(cHostCpus > 0 && cHostCpus < _64K, ("%d", (int)cHostCpus), VERR_GVMM_HOST_CPU_RANGE);
391
392 PGVMM pGVMM = (PGVMM)RTMemAllocZ(RT_UOFFSETOF_DYN(GVMM, aHostCpus[cHostCpus]));
393 if (!pGVMM)
394 return VERR_NO_MEMORY;
395 int rc = RTCritSectInitEx(&pGVMM->CreateDestroyLock, 0, NIL_RTLOCKVALCLASS, RTLOCKVAL_SUB_CLASS_NONE,
396 "GVMM-CreateDestroyLock");
397 if (RT_SUCCESS(rc))
398 {
399 rc = RTCritSectRwInitEx(&pGVMM->UsedLock, 0, NIL_RTLOCKVALCLASS, RTLOCKVAL_SUB_CLASS_NONE, "GVMM-UsedLock");
400 if (RT_SUCCESS(rc))
401 {
402 pGVMM->u32Magic = GVMM_MAGIC;
403 pGVMM->iUsedHead = 0;
404 pGVMM->iFreeHead = 1;
405
406 /* the nil handle */
407 pGVMM->aHandles[0].iSelf = 0;
408 pGVMM->aHandles[0].iNext = 0;
409
410 /* the tail */
411 unsigned i = RT_ELEMENTS(pGVMM->aHandles) - 1;
412 pGVMM->aHandles[i].iSelf = i;
413 pGVMM->aHandles[i].iNext = 0; /* nil */
414
415 /* the rest */
416 while (i-- > 1)
417 {
418 pGVMM->aHandles[i].iSelf = i;
419 pGVMM->aHandles[i].iNext = i + 1;
420 }
421
422 /* The default configuration values. */
423 uint32_t cNsResolution = RTSemEventMultiGetResolution();
424 pGVMM->cEMTsMeansCompany = 1; /** @todo should be adjusted to relative to the cpu count or something... */
425 if (cNsResolution >= 5*RT_NS_100US)
426 {
427 pGVMM->nsMinSleepAlone = 750000 /* ns (0.750 ms) */; /** @todo this should be adjusted to be 75% (or something) of the scheduler granularity... */
428 pGVMM->nsMinSleepCompany = 15000 /* ns (0.015 ms) */;
429 pGVMM->nsEarlyWakeUp1 = 25000 /* ns (0.025 ms) */;
430 pGVMM->nsEarlyWakeUp2 = 50000 /* ns (0.050 ms) */;
431 }
432 else if (cNsResolution > RT_NS_100US)
433 {
434 pGVMM->nsMinSleepAlone = cNsResolution / 2;
435 pGVMM->nsMinSleepCompany = cNsResolution / 4;
436 pGVMM->nsEarlyWakeUp1 = 0;
437 pGVMM->nsEarlyWakeUp2 = 0;
438 }
439 else
440 {
441 pGVMM->nsMinSleepAlone = 2000;
442 pGVMM->nsMinSleepCompany = 2000;
443 pGVMM->nsEarlyWakeUp1 = 0;
444 pGVMM->nsEarlyWakeUp2 = 0;
445 }
446 pGVMM->fDoEarlyWakeUps = pGVMM->nsEarlyWakeUp1 > 0 && pGVMM->nsEarlyWakeUp2 > 0;
447
448 /* The host CPU data. */
449 pGVMM->cHostCpus = cHostCpus;
450 uint32_t iCpu = cHostCpus;
451 RTCPUSET PossibleSet;
452 RTMpGetSet(&PossibleSet);
453 while (iCpu-- > 0)
454 {
455 pGVMM->aHostCpus[iCpu].idxCpuSet = iCpu;
456#ifdef GVMM_SCHED_WITH_PPT
457 pGVMM->aHostCpus[iCpu].Ppt.pTimer = NULL;
458 pGVMM->aHostCpus[iCpu].Ppt.hSpinlock = NIL_RTSPINLOCK;
459 pGVMM->aHostCpus[iCpu].Ppt.uMinHz = 5; /** @todo Add some API which figures this one out. (not *that* important) */
460 pGVMM->aHostCpus[iCpu].Ppt.cTicksHistoriziationInterval = 1;
461 //pGVMM->aHostCpus[iCpu].Ppt.iTickHistorization = 0;
462 //pGVMM->aHostCpus[iCpu].Ppt.cNsInterval = 0;
463 //pGVMM->aHostCpus[iCpu].Ppt.uTimerHz = 0;
464 //pGVMM->aHostCpus[iCpu].Ppt.uDesiredHz = 0;
465 //pGVMM->aHostCpus[iCpu].Ppt.fStarted = false;
466 //pGVMM->aHostCpus[iCpu].Ppt.fStarting = false;
467 //pGVMM->aHostCpus[iCpu].Ppt.iHzHistory = 0;
468 //pGVMM->aHostCpus[iCpu].Ppt.aHzHistory = {0};
469#endif
470
471 if (RTCpuSetIsMember(&PossibleSet, iCpu))
472 {
473 pGVMM->aHostCpus[iCpu].idCpu = RTMpCpuIdFromSetIndex(iCpu);
474 pGVMM->aHostCpus[iCpu].u32Magic = GVMMHOSTCPU_MAGIC;
475
476#ifdef GVMM_SCHED_WITH_PPT
477 rc = RTTimerCreateEx(&pGVMM->aHostCpus[iCpu].Ppt.pTimer,
478 50*1000*1000 /* whatever */,
479 RTTIMER_FLAGS_CPU(iCpu) | RTTIMER_FLAGS_HIGH_RES,
480 gvmmR0SchedPeriodicPreemptionTimerCallback,
481 &pGVMM->aHostCpus[iCpu]);
482 if (RT_SUCCESS(rc))
483 rc = RTSpinlockCreate(&pGVMM->aHostCpus[iCpu].Ppt.hSpinlock, RTSPINLOCK_FLAGS_INTERRUPT_SAFE, "GVMM/CPU");
484 if (RT_FAILURE(rc))
485 {
486 while (iCpu < cHostCpus)
487 {
488 RTTimerDestroy(pGVMM->aHostCpus[iCpu].Ppt.pTimer);
489 RTSpinlockDestroy(pGVMM->aHostCpus[iCpu].Ppt.hSpinlock);
490 pGVMM->aHostCpus[iCpu].Ppt.hSpinlock = NIL_RTSPINLOCK;
491 iCpu++;
492 }
493 break;
494 }
495#endif
496 }
497 else
498 {
499 pGVMM->aHostCpus[iCpu].idCpu = NIL_RTCPUID;
500 pGVMM->aHostCpus[iCpu].u32Magic = 0;
501 }
502 }
503 if (RT_SUCCESS(rc))
504 {
505 g_pGVMM = pGVMM;
506 LogFlow(("GVMMR0Init: pGVMM=%p cHostCpus=%u\n", pGVMM, cHostCpus));
507 return VINF_SUCCESS;
508 }
509
510 /* bail out. */
511 RTCritSectRwDelete(&pGVMM->UsedLock);
512 }
513 RTCritSectDelete(&pGVMM->CreateDestroyLock);
514 }
515
516 RTMemFree(pGVMM);
517 return rc;
518}
519
520
521/**
522 * Terminates the GVM.
523 *
524 * This is called while owning the loader semaphore (see supdrvLdrFree()).
525 * And unless something is wrong, there should be absolutely no VMs
526 * registered at this point.
527 */
528GVMMR0DECL(void) GVMMR0Term(void)
529{
530 LogFlow(("GVMMR0Term:\n"));
531
532 PGVMM pGVMM = g_pGVMM;
533 g_pGVMM = NULL;
534 if (RT_UNLIKELY(!VALID_PTR(pGVMM)))
535 {
536 SUPR0Printf("GVMMR0Term: pGVMM=%RKv\n", pGVMM);
537 return;
538 }
539
540 /*
541 * First of all, stop all active timers.
542 */
543 uint32_t cActiveTimers = 0;
544 uint32_t iCpu = pGVMM->cHostCpus;
545 while (iCpu-- > 0)
546 {
547 ASMAtomicWriteU32(&pGVMM->aHostCpus[iCpu].u32Magic, ~GVMMHOSTCPU_MAGIC);
548#ifdef GVMM_SCHED_WITH_PPT
549 if ( pGVMM->aHostCpus[iCpu].Ppt.pTimer != NULL
550 && RT_SUCCESS(RTTimerStop(pGVMM->aHostCpus[iCpu].Ppt.pTimer)))
551 cActiveTimers++;
552#endif
553 }
554 if (cActiveTimers)
555 RTThreadSleep(1); /* fudge */
556
557 /*
558 * Invalidate the and free resources.
559 */
560 pGVMM->u32Magic = ~GVMM_MAGIC;
561 RTCritSectRwDelete(&pGVMM->UsedLock);
562 RTCritSectDelete(&pGVMM->CreateDestroyLock);
563
564 pGVMM->iFreeHead = 0;
565 if (pGVMM->iUsedHead)
566 {
567 SUPR0Printf("GVMMR0Term: iUsedHead=%#x! (cVMs=%#x cEMTs=%#x)\n", pGVMM->iUsedHead, pGVMM->cVMs, pGVMM->cEMTs);
568 pGVMM->iUsedHead = 0;
569 }
570
571#ifdef GVMM_SCHED_WITH_PPT
572 iCpu = pGVMM->cHostCpus;
573 while (iCpu-- > 0)
574 {
575 RTTimerDestroy(pGVMM->aHostCpus[iCpu].Ppt.pTimer);
576 pGVMM->aHostCpus[iCpu].Ppt.pTimer = NULL;
577 RTSpinlockDestroy(pGVMM->aHostCpus[iCpu].Ppt.hSpinlock);
578 pGVMM->aHostCpus[iCpu].Ppt.hSpinlock = NIL_RTSPINLOCK;
579 }
580#endif
581
582 RTMemFree(pGVMM);
583}
584
585
586/**
587 * A quick hack for setting global config values.
588 *
589 * @returns VBox status code.
590 *
591 * @param pSession The session handle. Used for authentication.
592 * @param pszName The variable name.
593 * @param u64Value The new value.
594 */
595GVMMR0DECL(int) GVMMR0SetConfig(PSUPDRVSESSION pSession, const char *pszName, uint64_t u64Value)
596{
597 /*
598 * Validate input.
599 */
600 PGVMM pGVMM;
601 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
602 AssertPtrReturn(pSession, VERR_INVALID_HANDLE);
603 AssertPtrReturn(pszName, VERR_INVALID_POINTER);
604
605 /*
606 * String switch time!
607 */
608 if (strncmp(pszName, RT_STR_TUPLE("/GVMM/")))
609 return VERR_CFGM_VALUE_NOT_FOUND; /* borrow status codes from CFGM... */
610 int rc = VINF_SUCCESS;
611 pszName += sizeof("/GVMM/") - 1;
612 if (!strcmp(pszName, "cEMTsMeansCompany"))
613 {
614 if (u64Value <= UINT32_MAX)
615 pGVMM->cEMTsMeansCompany = u64Value;
616 else
617 rc = VERR_OUT_OF_RANGE;
618 }
619 else if (!strcmp(pszName, "MinSleepAlone"))
620 {
621 if (u64Value <= RT_NS_100MS)
622 pGVMM->nsMinSleepAlone = u64Value;
623 else
624 rc = VERR_OUT_OF_RANGE;
625 }
626 else if (!strcmp(pszName, "MinSleepCompany"))
627 {
628 if (u64Value <= RT_NS_100MS)
629 pGVMM->nsMinSleepCompany = u64Value;
630 else
631 rc = VERR_OUT_OF_RANGE;
632 }
633 else if (!strcmp(pszName, "EarlyWakeUp1"))
634 {
635 if (u64Value <= RT_NS_100MS)
636 {
637 pGVMM->nsEarlyWakeUp1 = u64Value;
638 pGVMM->fDoEarlyWakeUps = pGVMM->nsEarlyWakeUp1 > 0 && pGVMM->nsEarlyWakeUp2 > 0;
639 }
640 else
641 rc = VERR_OUT_OF_RANGE;
642 }
643 else if (!strcmp(pszName, "EarlyWakeUp2"))
644 {
645 if (u64Value <= RT_NS_100MS)
646 {
647 pGVMM->nsEarlyWakeUp2 = u64Value;
648 pGVMM->fDoEarlyWakeUps = pGVMM->nsEarlyWakeUp1 > 0 && pGVMM->nsEarlyWakeUp2 > 0;
649 }
650 else
651 rc = VERR_OUT_OF_RANGE;
652 }
653 else
654 rc = VERR_CFGM_VALUE_NOT_FOUND;
655 return rc;
656}
657
658
659/**
660 * A quick hack for getting global config values.
661 *
662 * @returns VBox status code.
663 *
664 * @param pSession The session handle. Used for authentication.
665 * @param pszName The variable name.
666 * @param pu64Value Where to return the value.
667 */
668GVMMR0DECL(int) GVMMR0QueryConfig(PSUPDRVSESSION pSession, const char *pszName, uint64_t *pu64Value)
669{
670 /*
671 * Validate input.
672 */
673 PGVMM pGVMM;
674 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
675 AssertPtrReturn(pSession, VERR_INVALID_HANDLE);
676 AssertPtrReturn(pszName, VERR_INVALID_POINTER);
677 AssertPtrReturn(pu64Value, VERR_INVALID_POINTER);
678
679 /*
680 * String switch time!
681 */
682 if (strncmp(pszName, RT_STR_TUPLE("/GVMM/")))
683 return VERR_CFGM_VALUE_NOT_FOUND; /* borrow status codes from CFGM... */
684 int rc = VINF_SUCCESS;
685 pszName += sizeof("/GVMM/") - 1;
686 if (!strcmp(pszName, "cEMTsMeansCompany"))
687 *pu64Value = pGVMM->cEMTsMeansCompany;
688 else if (!strcmp(pszName, "MinSleepAlone"))
689 *pu64Value = pGVMM->nsMinSleepAlone;
690 else if (!strcmp(pszName, "MinSleepCompany"))
691 *pu64Value = pGVMM->nsMinSleepCompany;
692 else if (!strcmp(pszName, "EarlyWakeUp1"))
693 *pu64Value = pGVMM->nsEarlyWakeUp1;
694 else if (!strcmp(pszName, "EarlyWakeUp2"))
695 *pu64Value = pGVMM->nsEarlyWakeUp2;
696 else
697 rc = VERR_CFGM_VALUE_NOT_FOUND;
698 return rc;
699}
700
701
702/**
703 * Acquire the 'used' lock in shared mode.
704 *
705 * This prevents destruction of the VM while we're in ring-0.
706 *
707 * @returns IPRT status code, see RTSemFastMutexRequest.
708 * @param a_pGVMM The GVMM instance data.
709 * @sa GVMMR0_USED_SHARED_UNLOCK, GVMMR0_USED_EXCLUSIVE_LOCK
710 */
711#define GVMMR0_USED_SHARED_LOCK(a_pGVMM) RTCritSectRwEnterShared(&(a_pGVMM)->UsedLock)
712
713/**
714 * Release the 'used' lock in when owning it in shared mode.
715 *
716 * @returns IPRT status code, see RTSemFastMutexRequest.
717 * @param a_pGVMM The GVMM instance data.
718 * @sa GVMMR0_USED_SHARED_LOCK
719 */
720#define GVMMR0_USED_SHARED_UNLOCK(a_pGVMM) RTCritSectRwLeaveShared(&(a_pGVMM)->UsedLock)
721
722/**
723 * Acquire the 'used' lock in exclusive mode.
724 *
725 * Only use this function when making changes to the used list.
726 *
727 * @returns IPRT status code, see RTSemFastMutexRequest.
728 * @param a_pGVMM The GVMM instance data.
729 * @sa GVMMR0_USED_EXCLUSIVE_UNLOCK
730 */
731#define GVMMR0_USED_EXCLUSIVE_LOCK(a_pGVMM) RTCritSectRwEnterExcl(&(a_pGVMM)->UsedLock)
732
733/**
734 * Release the 'used' lock when owning it in exclusive mode.
735 *
736 * @returns IPRT status code, see RTSemFastMutexRelease.
737 * @param a_pGVMM The GVMM instance data.
738 * @sa GVMMR0_USED_EXCLUSIVE_LOCK, GVMMR0_USED_SHARED_UNLOCK
739 */
740#define GVMMR0_USED_EXCLUSIVE_UNLOCK(a_pGVMM) RTCritSectRwLeaveExcl(&(a_pGVMM)->UsedLock)
741
742
743/**
744 * Try acquire the 'create & destroy' lock.
745 *
746 * @returns IPRT status code, see RTSemFastMutexRequest.
747 * @param pGVMM The GVMM instance data.
748 */
749DECLINLINE(int) gvmmR0CreateDestroyLock(PGVMM pGVMM)
750{
751 LogFlow(("++gvmmR0CreateDestroyLock(%p)\n", pGVMM));
752 int rc = RTCritSectEnter(&pGVMM->CreateDestroyLock);
753 LogFlow(("gvmmR0CreateDestroyLock(%p)->%Rrc\n", pGVMM, rc));
754 return rc;
755}
756
757
758/**
759 * Release the 'create & destroy' lock.
760 *
761 * @returns IPRT status code, see RTSemFastMutexRequest.
762 * @param pGVMM The GVMM instance data.
763 */
764DECLINLINE(int) gvmmR0CreateDestroyUnlock(PGVMM pGVMM)
765{
766 LogFlow(("--gvmmR0CreateDestroyUnlock(%p)\n", pGVMM));
767 int rc = RTCritSectLeave(&pGVMM->CreateDestroyLock);
768 AssertRC(rc);
769 return rc;
770}
771
772
773/**
774 * Request wrapper for the GVMMR0CreateVM API.
775 *
776 * @returns VBox status code.
777 * @param pReq The request buffer.
778 * @param pSession The session handle. The VM will be associated with this.
779 */
780GVMMR0DECL(int) GVMMR0CreateVMReq(PGVMMCREATEVMREQ pReq, PSUPDRVSESSION pSession)
781{
782 /*
783 * Validate the request.
784 */
785 if (!VALID_PTR(pReq))
786 return VERR_INVALID_POINTER;
787 if (pReq->Hdr.cbReq != sizeof(*pReq))
788 return VERR_INVALID_PARAMETER;
789 if (pReq->pSession != pSession)
790 return VERR_INVALID_POINTER;
791
792 /*
793 * Execute it.
794 */
795 PVMCC pVM;
796 pReq->pVMR0 = NULL;
797 pReq->pVMR3 = NIL_RTR3PTR;
798 int rc = GVMMR0CreateVM(pSession, pReq->cCpus, &pVM);
799 if (RT_SUCCESS(rc))
800 {
801 pReq->pVMR0 = pVM;
802 pReq->pVMR3 = pVM->pVMR3;
803 }
804 return rc;
805}
806
807
808/**
809 * Allocates the VM structure and registers it with GVM.
810 *
811 * The caller will become the VM owner and there by the EMT.
812 *
813 * @returns VBox status code.
814 * @param pSession The support driver session.
815 * @param cCpus Number of virtual CPUs for the new VM.
816 * @param ppVM Where to store the pointer to the VM structure.
817 *
818 * @thread EMT.
819 */
820GVMMR0DECL(int) GVMMR0CreateVM(PSUPDRVSESSION pSession, uint32_t cCpus, PVMCC *ppVM)
821{
822 LogFlow(("GVMMR0CreateVM: pSession=%p\n", pSession));
823 PGVMM pGVMM;
824 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
825
826 AssertPtrReturn(ppVM, VERR_INVALID_POINTER);
827 *ppVM = NULL;
828
829 if ( cCpus == 0
830 || cCpus > VMM_MAX_CPU_COUNT)
831 return VERR_INVALID_PARAMETER;
832
833 RTNATIVETHREAD hEMT0 = RTThreadNativeSelf();
834 AssertReturn(hEMT0 != NIL_RTNATIVETHREAD, VERR_GVMM_BROKEN_IPRT);
835 RTPROCESS ProcId = RTProcSelf();
836 AssertReturn(ProcId != NIL_RTPROCESS, VERR_GVMM_BROKEN_IPRT);
837
838 /*
839 * The whole allocation process is protected by the lock.
840 */
841 int rc = gvmmR0CreateDestroyLock(pGVMM);
842 AssertRCReturn(rc, rc);
843
844 /*
845 * Only one VM per session.
846 */
847 if (SUPR0GetSessionVM(pSession) != NULL)
848 {
849 gvmmR0CreateDestroyUnlock(pGVMM);
850 SUPR0Printf("GVMMR0CreateVM: The session %p already got a VM: %p\n", pSession, SUPR0GetSessionVM(pSession));
851 return VERR_ALREADY_EXISTS;
852 }
853
854 /*
855 * Allocate a handle first so we don't waste resources unnecessarily.
856 */
857 uint16_t iHandle = pGVMM->iFreeHead;
858 if (iHandle)
859 {
860 PGVMHANDLE pHandle = &pGVMM->aHandles[iHandle];
861
862 /* consistency checks, a bit paranoid as always. */
863 if ( !pHandle->pVM
864 && !pHandle->pGVM
865 && !pHandle->pvObj
866 && pHandle->iSelf == iHandle)
867 {
868 pHandle->pvObj = SUPR0ObjRegister(pSession, SUPDRVOBJTYPE_VM, gvmmR0HandleObjDestructor, pGVMM, pHandle);
869 if (pHandle->pvObj)
870 {
871 /*
872 * Move the handle from the free to used list and perform permission checks.
873 */
874 rc = GVMMR0_USED_EXCLUSIVE_LOCK(pGVMM);
875 AssertRC(rc);
876
877 pGVMM->iFreeHead = pHandle->iNext;
878 pHandle->iNext = pGVMM->iUsedHead;
879 pGVMM->iUsedHead = iHandle;
880 pGVMM->cVMs++;
881
882 pHandle->pVM = NULL;
883 pHandle->pGVM = NULL;
884 pHandle->pSession = pSession;
885 pHandle->hEMT0 = NIL_RTNATIVETHREAD;
886 pHandle->ProcId = NIL_RTPROCESS;
887
888 GVMMR0_USED_EXCLUSIVE_UNLOCK(pGVMM);
889
890 rc = SUPR0ObjVerifyAccess(pHandle->pvObj, pSession, NULL);
891 if (RT_SUCCESS(rc))
892 {
893#ifdef VBOX_BUGREF_9217
894 /*
895 * Allocate memory for the VM structure (combined VM + GVM).
896 */
897 const uint32_t cbVM = RT_UOFFSETOF_DYN(GVM, aCpus[cCpus]);
898 const uint32_t cPages = RT_ALIGN_32(cbVM, PAGE_SIZE) >> PAGE_SHIFT;
899 RTR0MEMOBJ hVMMemObj = NIL_RTR0MEMOBJ;
900 rc = RTR0MemObjAllocPage(&hVMMemObj, cPages << PAGE_SHIFT, false /* fExecutable */);
901 if (RT_SUCCESS(rc))
902 {
903 PGVM pGVM = (PGVM)RTR0MemObjAddress(hVMMemObj);
904 AssertPtr(pGVM);
905
906 /*
907 * Initialise the structure.
908 */
909 RT_BZERO(pGVM, cPages << PAGE_SHIFT);
910 gvmmR0InitPerVMData(pGVM, iHandle, cCpus, pSession);
911 GMMR0InitPerVMData(pGVM);
912 pGVM->gvmm.s.VMMemObj = hVMMemObj;
913
914 /*
915 * Allocate page array.
916 * This currently have to be made available to ring-3, but this is should change eventually.
917 */
918 rc = RTR0MemObjAllocPage(&pGVM->gvmm.s.VMPagesMemObj, cPages * sizeof(SUPPAGE), false /* fExecutable */);
919 if (RT_SUCCESS(rc))
920 {
921 PSUPPAGE paPages = (PSUPPAGE)RTR0MemObjAddress(pGVM->gvmm.s.VMPagesMemObj); AssertPtr(paPages);
922 for (uint32_t iPage = 0; iPage < cPages; iPage++)
923 {
924 paPages[iPage].uReserved = 0;
925 paPages[iPage].Phys = RTR0MemObjGetPagePhysAddr(pGVM->gvmm.s.VMMemObj, iPage);
926 Assert(paPages[iPage].Phys != NIL_RTHCPHYS);
927 }
928
929 /*
930 * Map the page array, VM and VMCPU structures into ring-3.
931 */
932 AssertCompileSizeAlignment(VM, PAGE_SIZE);
933 rc = RTR0MemObjMapUserEx(&pGVM->gvmm.s.VMMapObj, pGVM->gvmm.s.VMMemObj, (RTR3PTR)-1, 0,
934 RTMEM_PROT_READ | RTMEM_PROT_WRITE, NIL_RTR0PROCESS,
935 0 /*offSub*/, sizeof(VM));
936 for (VMCPUID i = 0; i < cCpus && RT_SUCCESS(rc); i++)
937 {
938 AssertCompileSizeAlignment(VMCPU, PAGE_SIZE);
939 rc = RTR0MemObjMapUserEx(&pGVM->aCpus[i].gvmm.s.VMCpuMapObj, pGVM->gvmm.s.VMMemObj,
940 (RTR3PTR)-1, 0, RTMEM_PROT_READ | RTMEM_PROT_WRITE, NIL_RTR0PROCESS,
941 RT_UOFFSETOF_DYN(GVM, aCpus[i]), sizeof(VMCPU));
942 }
943 if (RT_SUCCESS(rc))
944 rc = RTR0MemObjMapUser(&pGVM->gvmm.s.VMPagesMapObj, pGVM->gvmm.s.VMPagesMemObj, (RTR3PTR)-1,
945 0 /* uAlignment */, RTMEM_PROT_READ | RTMEM_PROT_WRITE,
946 NIL_RTR0PROCESS);
947 if (RT_SUCCESS(rc))
948 {
949 /*
950 * Initialize all the VM pointer.
951 */
952 PVMR3 pVMR3 = RTR0MemObjAddressR3(pGVM->gvmm.s.VMMapObj);
953 AssertPtr((void *)pVMR3);
954
955 for (VMCPUID i = 0; i < cCpus; i++)
956 {
957 pGVM->aCpus[i].pVMR0 = pGVM;
958 pGVM->aCpus[i].pVMR3 = pVMR3;
959 pGVM->apCpusR3[i] = RTR0MemObjAddressR3(pGVM->aCpus[i].gvmm.s.VMCpuMapObj);
960 pGVM->aCpus[i].pVCpuR3 = pGVM->apCpusR3[i];
961 pGVM->apCpusR0[i] = &pGVM->aCpus[i];
962 AssertPtr((void *)pGVM->apCpusR3[i]);
963 }
964
965 pGVM->paVMPagesR3 = RTR0MemObjAddressR3(pGVM->gvmm.s.VMPagesMapObj);
966 AssertPtr((void *)pGVM->paVMPagesR3);
967
968 /*
969 * Complete the handle - take the UsedLock sem just to be careful.
970 */
971 rc = GVMMR0_USED_EXCLUSIVE_LOCK(pGVMM);
972 AssertRC(rc);
973
974 pHandle->pVM = pGVM;
975 pHandle->pGVM = pGVM;
976 pHandle->hEMT0 = hEMT0;
977 pHandle->ProcId = ProcId;
978 pGVM->pVMR3 = pVMR3;
979 pGVM->aCpus[0].hEMT = hEMT0;
980 pGVM->aCpus[0].hNativeThreadR0 = hEMT0;
981 pGVMM->cEMTs += cCpus;
982
983 /* Associate it with the session and create the context hook for EMT0. */
984 rc = SUPR0SetSessionVM(pSession, pGVM, pGVM);
985 if (RT_SUCCESS(rc))
986 {
987 rc = VMMR0ThreadCtxHookCreateForEmt(&pGVM->aCpus[0]);
988 if (RT_SUCCESS(rc))
989 {
990 /*
991 * Done!
992 */
993 VBOXVMM_R0_GVMM_VM_CREATED(pGVM, pGVM, ProcId, (void *)hEMT0, cCpus);
994
995 GVMMR0_USED_EXCLUSIVE_UNLOCK(pGVMM);
996 gvmmR0CreateDestroyUnlock(pGVMM);
997
998 CPUMR0RegisterVCpuThread(&pGVM->aCpus[0]);
999
1000 *ppVM = pGVM;
1001 Log(("GVMMR0CreateVM: pVMR3=%p pGVM=%p hGVM=%d\n", pVMR3, pGVM, iHandle));
1002 return VINF_SUCCESS;
1003 }
1004
1005 SUPR0SetSessionVM(pSession, NULL, NULL);
1006 }
1007 GVMMR0_USED_EXCLUSIVE_UNLOCK(pGVMM);
1008 }
1009
1010 /* Cleanup mappings. */
1011 if (pGVM->gvmm.s.VMMapObj != NIL_RTR0MEMOBJ)
1012 {
1013 RTR0MemObjFree(pGVM->gvmm.s.VMMapObj, false /* fFreeMappings */);
1014 pGVM->gvmm.s.VMMapObj = NIL_RTR0MEMOBJ;
1015 }
1016 for (VMCPUID i = 0; i < cCpus; i++)
1017 if (pGVM->aCpus[i].gvmm.s.VMCpuMapObj != NIL_RTR0MEMOBJ)
1018 {
1019 RTR0MemObjFree(pGVM->aCpus[i].gvmm.s.VMCpuMapObj, false /* fFreeMappings */);
1020 pGVM->aCpus[i].gvmm.s.VMCpuMapObj = NIL_RTR0MEMOBJ;
1021 }
1022 if (pGVM->gvmm.s.VMPagesMapObj != NIL_RTR0MEMOBJ)
1023 {
1024 RTR0MemObjFree(pGVM->gvmm.s.VMPagesMapObj, false /* fFreeMappings */);
1025 pGVM->gvmm.s.VMPagesMapObj = NIL_RTR0MEMOBJ;
1026 }
1027 }
1028 }
1029
1030#else
1031 /*
1032 * Allocate the global VM structure (GVM) and initialize it.
1033 */
1034 PGVM pGVM = (PGVM)RTMemAllocZ(RT_UOFFSETOF_DYN(GVM, aCpus[cCpus]));
1035 if (pGVM)
1036 {
1037 pGVM->u32Magic = GVM_MAGIC;
1038 pGVM->hSelf = iHandle;
1039 pGVM->pVM = NULL;
1040 pGVM->cCpus = cCpus;
1041 pGVM->pSession = pSession;
1042
1043 gvmmR0InitPerVMData(pGVM);
1044 GMMR0InitPerVMData(pGVM);
1045
1046 /*
1047 * Allocate the shared VM structure and associated page array.
1048 */
1049 const uint32_t cbVM = RT_UOFFSETOF_DYN(VM, aCpus[cCpus]);
1050 const uint32_t cPages = RT_ALIGN_32(cbVM, PAGE_SIZE) >> PAGE_SHIFT;
1051 rc = RTR0MemObjAllocLow(&pGVM->gvmm.s.VMMemObj, cPages << PAGE_SHIFT, false /* fExecutable */);
1052 if (RT_SUCCESS(rc))
1053 {
1054 PVMCC pVM = (PVMCC)RTR0MemObjAddress(pGVM->gvmm.s.VMMemObj); AssertPtr(pVM);
1055 memset(pVM, 0, cPages << PAGE_SHIFT);
1056 pVM->enmVMState = VMSTATE_CREATING;
1057 pVM->pVMR0 = pVM;
1058 pVM->pSession = pSession;
1059 pVM->hSelf = iHandle;
1060 pVM->cbSelf = cbVM;
1061 pVM->cCpus = cCpus;
1062 pVM->uCpuExecutionCap = 100; /* default is no cap. */
1063 AssertCompileMemberAlignment(VM, cpum, 64);
1064 AssertCompileMemberAlignment(VM, tm, 64);
1065 AssertCompileMemberAlignment(VM, aCpus, PAGE_SIZE);
1066
1067 rc = RTR0MemObjAllocPage(&pGVM->gvmm.s.VMPagesMemObj, cPages * sizeof(SUPPAGE), false /* fExecutable */);
1068 if (RT_SUCCESS(rc))
1069 {
1070 PSUPPAGE paPages = (PSUPPAGE)RTR0MemObjAddress(pGVM->gvmm.s.VMPagesMemObj); AssertPtr(paPages);
1071 for (uint32_t iPage = 0; iPage < cPages; iPage++)
1072 {
1073 paPages[iPage].uReserved = 0;
1074 paPages[iPage].Phys = RTR0MemObjGetPagePhysAddr(pGVM->gvmm.s.VMMemObj, iPage);
1075 Assert(paPages[iPage].Phys != NIL_RTHCPHYS);
1076 }
1077
1078 /*
1079 * Map them into ring-3.
1080 */
1081 rc = RTR0MemObjMapUser(&pGVM->gvmm.s.VMMapObj, pGVM->gvmm.s.VMMemObj, (RTR3PTR)-1, 0,
1082 RTMEM_PROT_READ | RTMEM_PROT_WRITE, NIL_RTR0PROCESS);
1083 if (RT_SUCCESS(rc))
1084 {
1085 PVMR3 pVMR3 = RTR0MemObjAddressR3(pGVM->gvmm.s.VMMapObj);
1086 pVM->pVMR3 = pVMR3;
1087 AssertPtr((void *)pVMR3);
1088
1089 /* Initialize all the VM pointers. */
1090 for (VMCPUID i = 0; i < cCpus; i++)
1091 {
1092 pVM->aCpus[i].idCpu = i;
1093 pVM->aCpus[i].pVMR0 = pVM;
1094 pVM->aCpus[i].pVMR3 = pVMR3;
1095 pVM->aCpus[i].idHostCpu = NIL_RTCPUID;
1096 pVM->aCpus[i].hNativeThreadR0 = NIL_RTNATIVETHREAD;
1097 pVM->apCpusR3[i] = pVMR3 + RT_UOFFSETOF_DYN(VM, aCpus[i]);
1098 pVM->apCpusR0[i] = &pVM->aCpus[i];
1099 }
1100
1101 rc = RTR0MemObjMapUser(&pGVM->gvmm.s.VMPagesMapObj, pGVM->gvmm.s.VMPagesMemObj, (RTR3PTR)-1,
1102 0 /* uAlignment */, RTMEM_PROT_READ | RTMEM_PROT_WRITE,
1103 NIL_RTR0PROCESS);
1104 if (RT_SUCCESS(rc))
1105 {
1106 pVM->paVMPagesR3 = RTR0MemObjAddressR3(pGVM->gvmm.s.VMPagesMapObj);
1107 AssertPtr((void *)pVM->paVMPagesR3);
1108
1109 /* complete the handle - take the UsedLock sem just to be careful. */
1110 rc = GVMMR0_USED_EXCLUSIVE_LOCK(pGVMM);
1111 AssertRC(rc);
1112
1113 pHandle->pVM = pVM;
1114 pHandle->pGVM = pGVM;
1115 pHandle->hEMT0 = hEMT0;
1116 pHandle->ProcId = ProcId;
1117 pGVM->pVM = pVM;
1118 pGVM->pVMR3 = pVMR3;
1119 pGVM->aCpus[0].hEMT = hEMT0;
1120 pVM->aCpus[0].hNativeThreadR0 = hEMT0;
1121 pGVMM->cEMTs += cCpus;
1122
1123 for (VMCPUID i = 0; i < cCpus; i++)
1124 {
1125 pGVM->aCpus[i].pVCpu = &pVM->aCpus[i];
1126 pGVM->aCpus[i].pVM = pVM;
1127 }
1128
1129 /* Associate it with the session and create the context hook for EMT0. */
1130 rc = SUPR0SetSessionVM(pSession, pGVM, pVM);
1131 if (RT_SUCCESS(rc))
1132 {
1133 rc = VMMR0ThreadCtxHookCreateForEmt(&pVM->aCpus[0]);
1134 if (RT_SUCCESS(rc))
1135 {
1136 /*
1137 * Done!
1138 */
1139 VBOXVMM_R0_GVMM_VM_CREATED(pGVM, pVM, ProcId, (void *)hEMT0, cCpus);
1140
1141 GVMMR0_USED_EXCLUSIVE_UNLOCK(pGVMM);
1142 gvmmR0CreateDestroyUnlock(pGVMM);
1143
1144 CPUMR0RegisterVCpuThread(&pVM->aCpus[0]);
1145
1146 *ppVM = pVM;
1147 Log(("GVMMR0CreateVM: pVM=%p pVMR3=%p pGVM=%p hGVM=%d\n", pVM, pVMR3, pGVM, iHandle));
1148 return VINF_SUCCESS;
1149 }
1150
1151 SUPR0SetSessionVM(pSession, NULL, NULL);
1152 }
1153 GVMMR0_USED_EXCLUSIVE_UNLOCK(pGVMM);
1154 }
1155
1156 RTR0MemObjFree(pGVM->gvmm.s.VMMapObj, false /* fFreeMappings */);
1157 pGVM->gvmm.s.VMMapObj = NIL_RTR0MEMOBJ;
1158 }
1159 RTR0MemObjFree(pGVM->gvmm.s.VMPagesMemObj, false /* fFreeMappings */);
1160 pGVM->gvmm.s.VMPagesMemObj = NIL_RTR0MEMOBJ;
1161 }
1162 RTR0MemObjFree(pGVM->gvmm.s.VMMemObj, false /* fFreeMappings */);
1163 pGVM->gvmm.s.VMMemObj = NIL_RTR0MEMOBJ;
1164 }
1165 }
1166#endif
1167 }
1168 /* else: The user wasn't permitted to create this VM. */
1169
1170 /*
1171 * The handle will be freed by gvmmR0HandleObjDestructor as we release the
1172 * object reference here. A little extra mess because of non-recursive lock.
1173 */
1174 void *pvObj = pHandle->pvObj;
1175 pHandle->pvObj = NULL;
1176 gvmmR0CreateDestroyUnlock(pGVMM);
1177
1178 SUPR0ObjRelease(pvObj, pSession);
1179
1180 SUPR0Printf("GVMMR0CreateVM: failed, rc=%Rrc\n", rc);
1181 return rc;
1182 }
1183
1184 rc = VERR_NO_MEMORY;
1185 }
1186 else
1187 rc = VERR_GVMM_IPE_1;
1188 }
1189 else
1190 rc = VERR_GVM_TOO_MANY_VMS;
1191
1192 gvmmR0CreateDestroyUnlock(pGVMM);
1193 return rc;
1194}
1195
1196
1197#ifdef VBOX_BUGREF_9217
1198/**
1199 * Initializes the per VM data belonging to GVMM.
1200 *
1201 * @param pGVM Pointer to the global VM structure.
1202 */
1203static void gvmmR0InitPerVMData(PGVM pGVM, int16_t hSelf, VMCPUID cCpus, PSUPDRVSESSION pSession)
1204#else
1205/**
1206 * Initializes the per VM data belonging to GVMM.
1207 *
1208 * @param pGVM Pointer to the global VM structure.
1209 */
1210static void gvmmR0InitPerVMData(PGVM pGVM)
1211#endif
1212{
1213 AssertCompile(RT_SIZEOFMEMB(GVM,gvmm.s) <= RT_SIZEOFMEMB(GVM,gvmm.padding));
1214 AssertCompile(RT_SIZEOFMEMB(GVMCPU,gvmm.s) <= RT_SIZEOFMEMB(GVMCPU,gvmm.padding));
1215#ifdef VBOX_BUGREF_9217
1216 AssertCompileMemberAlignment(VM, cpum, 64);
1217 AssertCompileMemberAlignment(VM, tm, 64);
1218
1219 /* GVM: */
1220 pGVM->u32Magic = GVM_MAGIC;
1221 pGVM->hSelf = hSelf;
1222 pGVM->cCpus = cCpus;
1223 pGVM->pSession = pSession;
1224
1225 /* VM: */
1226 pGVM->enmVMState = VMSTATE_CREATING;
1227 pGVM->pSessionUnsafe = pSession;
1228 pGVM->hSelfUnsafe = hSelf;
1229 pGVM->cCpusUnsafe = cCpus;
1230 pGVM->uCpuExecutionCap = 100; /* default is no cap. */
1231 pGVM->uStructVersion = 1;
1232 pGVM->cbSelf = sizeof(VM);
1233 pGVM->cbVCpu = sizeof(VMCPU);
1234#endif
1235
1236 /* GVMM: */
1237 pGVM->gvmm.s.VMMemObj = NIL_RTR0MEMOBJ;
1238 pGVM->gvmm.s.VMMapObj = NIL_RTR0MEMOBJ;
1239 pGVM->gvmm.s.VMPagesMemObj = NIL_RTR0MEMOBJ;
1240 pGVM->gvmm.s.VMPagesMapObj = NIL_RTR0MEMOBJ;
1241 pGVM->gvmm.s.fDoneVMMR0Init = false;
1242 pGVM->gvmm.s.fDoneVMMR0Term = false;
1243
1244 /*
1245 * Per virtual CPU.
1246 */
1247 for (VMCPUID i = 0; i < pGVM->cCpus; i++)
1248 {
1249 pGVM->aCpus[i].idCpu = i;
1250#ifdef VBOX_BUGREF_9217
1251 pGVM->aCpus[i].idCpuUnsafe = i;
1252#endif
1253 pGVM->aCpus[i].gvmm.s.HaltEventMulti = NIL_RTSEMEVENTMULTI;
1254#ifdef VBOX_BUGREF_9217
1255 pGVM->aCpus[i].gvmm.s.VMCpuMapObj = NIL_RTR0MEMOBJ;
1256#endif
1257 pGVM->aCpus[i].hEMT = NIL_RTNATIVETHREAD;
1258 pGVM->aCpus[i].pGVM = pGVM;
1259#ifndef VBOX_BUGREF_9217
1260 pGVM->aCpus[i].pVCpu = NULL;
1261 pGVM->aCpus[i].pVM = NULL;
1262#endif
1263#ifdef VBOX_BUGREF_9217
1264 pGVM->aCpus[i].idHostCpu = NIL_RTCPUID;
1265 pGVM->aCpus[i].iHostCpuSet = UINT32_MAX;
1266 pGVM->aCpus[i].hNativeThread = NIL_RTNATIVETHREAD;
1267 pGVM->aCpus[i].hNativeThreadR0 = NIL_RTNATIVETHREAD;
1268 pGVM->aCpus[i].enmState = VMCPUSTATE_STOPPED;
1269#endif
1270 }
1271}
1272
1273
1274/**
1275 * Does the VM initialization.
1276 *
1277 * @returns VBox status code.
1278 * @param pGVM The global (ring-0) VM structure.
1279 */
1280GVMMR0DECL(int) GVMMR0InitVM(PGVM pGVM)
1281{
1282 LogFlow(("GVMMR0InitVM: pGVM=%p\n", pGVM));
1283
1284 int rc = VERR_INTERNAL_ERROR_3;
1285 if ( !pGVM->gvmm.s.fDoneVMMR0Init
1286 && pGVM->aCpus[0].gvmm.s.HaltEventMulti == NIL_RTSEMEVENTMULTI)
1287 {
1288 for (VMCPUID i = 0; i < pGVM->cCpus; i++)
1289 {
1290 rc = RTSemEventMultiCreate(&pGVM->aCpus[i].gvmm.s.HaltEventMulti);
1291 if (RT_FAILURE(rc))
1292 {
1293 pGVM->aCpus[i].gvmm.s.HaltEventMulti = NIL_RTSEMEVENTMULTI;
1294 break;
1295 }
1296 }
1297 }
1298 else
1299 rc = VERR_WRONG_ORDER;
1300
1301 LogFlow(("GVMMR0InitVM: returns %Rrc\n", rc));
1302 return rc;
1303}
1304
1305
1306/**
1307 * Indicates that we're done with the ring-0 initialization
1308 * of the VM.
1309 *
1310 * @param pGVM The global (ring-0) VM structure.
1311 * @thread EMT(0)
1312 */
1313GVMMR0DECL(void) GVMMR0DoneInitVM(PGVM pGVM)
1314{
1315 /* Set the indicator. */
1316 pGVM->gvmm.s.fDoneVMMR0Init = true;
1317}
1318
1319
1320/**
1321 * Indicates that we're doing the ring-0 termination of the VM.
1322 *
1323 * @returns true if termination hasn't been done already, false if it has.
1324 * @param pGVM Pointer to the global VM structure. Optional.
1325 * @thread EMT(0) or session cleanup thread.
1326 */
1327GVMMR0DECL(bool) GVMMR0DoingTermVM(PGVM pGVM)
1328{
1329 /* Validate the VM structure, state and handle. */
1330 AssertPtrReturn(pGVM, false);
1331
1332 /* Set the indicator. */
1333 if (pGVM->gvmm.s.fDoneVMMR0Term)
1334 return false;
1335 pGVM->gvmm.s.fDoneVMMR0Term = true;
1336 return true;
1337}
1338
1339
1340/**
1341 * Destroys the VM, freeing all associated resources (the ring-0 ones anyway).
1342 *
1343 * This is call from the vmR3DestroyFinalBit and from a error path in VMR3Create,
1344 * and the caller is not the EMT thread, unfortunately. For security reasons, it
1345 * would've been nice if the caller was actually the EMT thread or that we somehow
1346 * could've associated the calling thread with the VM up front.
1347 *
1348 * @returns VBox status code.
1349 * @param pGVM The global (ring-0) VM structure.
1350 * @param pVM The cross context VM structure.
1351 *
1352 * @thread EMT(0) if it's associated with the VM, otherwise any thread.
1353 */
1354GVMMR0DECL(int) GVMMR0DestroyVM(PGVM pGVM, PVMCC pVM)
1355{
1356 LogFlow(("GVMMR0DestroyVM: pGVM=%p pVM=%p\n", pGVM, pVM));
1357 PGVMM pGVMM;
1358 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
1359
1360 /*
1361 * Validate the VM structure, state and caller.
1362 */
1363 AssertPtrReturn(pGVM, VERR_INVALID_POINTER);
1364 AssertPtrReturn(pVM, VERR_INVALID_POINTER);
1365 AssertReturn(!((uintptr_t)pVM & PAGE_OFFSET_MASK), VERR_INVALID_POINTER);
1366#ifdef VBOX_BUGREF_9217
1367 AssertReturn(pGVM == pVM, VERR_INVALID_POINTER);
1368#else
1369 AssertReturn(pGVM->pVM == pVM, VERR_INVALID_POINTER);
1370#endif
1371 AssertMsgReturn(pVM->enmVMState >= VMSTATE_CREATING && pVM->enmVMState <= VMSTATE_TERMINATED, ("%d\n", pVM->enmVMState),
1372 VERR_WRONG_ORDER);
1373
1374 uint32_t hGVM = pGVM->hSelf;
1375 ASMCompilerBarrier();
1376 AssertReturn(hGVM != NIL_GVM_HANDLE, VERR_INVALID_VM_HANDLE);
1377 AssertReturn(hGVM < RT_ELEMENTS(pGVMM->aHandles), VERR_INVALID_VM_HANDLE);
1378
1379 PGVMHANDLE pHandle = &pGVMM->aHandles[hGVM];
1380 AssertReturn(pHandle->pVM == pVM, VERR_NOT_OWNER);
1381
1382 RTPROCESS ProcId = RTProcSelf();
1383 RTNATIVETHREAD hSelf = RTThreadNativeSelf();
1384 AssertReturn( ( pHandle->hEMT0 == hSelf
1385 && pHandle->ProcId == ProcId)
1386 || pHandle->hEMT0 == NIL_RTNATIVETHREAD, VERR_NOT_OWNER);
1387
1388 /*
1389 * Lookup the handle and destroy the object.
1390 * Since the lock isn't recursive and we'll have to leave it before dereferencing the
1391 * object, we take some precautions against racing callers just in case...
1392 */
1393 int rc = gvmmR0CreateDestroyLock(pGVMM);
1394 AssertRC(rc);
1395
1396 /* Be careful here because we might theoretically be racing someone else cleaning up. */
1397 if ( pHandle->pVM == pVM
1398 && ( ( pHandle->hEMT0 == hSelf
1399 && pHandle->ProcId == ProcId)
1400 || pHandle->hEMT0 == NIL_RTNATIVETHREAD)
1401 && VALID_PTR(pHandle->pvObj)
1402 && VALID_PTR(pHandle->pSession)
1403 && VALID_PTR(pHandle->pGVM)
1404 && pHandle->pGVM->u32Magic == GVM_MAGIC)
1405 {
1406 /* Check that other EMTs have deregistered. */
1407 uint32_t cNotDeregistered = 0;
1408 for (VMCPUID idCpu = 1; idCpu < pGVM->cCpus; idCpu++)
1409 cNotDeregistered += pGVM->aCpus[idCpu].hEMT != ~(RTNATIVETHREAD)1; /* see GVMMR0DeregisterVCpu for the value */
1410 if (cNotDeregistered == 0)
1411 {
1412 /* Grab the object pointer. */
1413 void *pvObj = pHandle->pvObj;
1414 pHandle->pvObj = NULL;
1415 gvmmR0CreateDestroyUnlock(pGVMM);
1416
1417 SUPR0ObjRelease(pvObj, pHandle->pSession);
1418 }
1419 else
1420 {
1421 gvmmR0CreateDestroyUnlock(pGVMM);
1422 rc = VERR_GVMM_NOT_ALL_EMTS_DEREGISTERED;
1423 }
1424 }
1425 else
1426 {
1427 SUPR0Printf("GVMMR0DestroyVM: pHandle=%RKv:{.pVM=%p, .hEMT0=%p, .ProcId=%u, .pvObj=%p} pVM=%p hSelf=%p\n",
1428 pHandle, pHandle->pVM, pHandle->hEMT0, pHandle->ProcId, pHandle->pvObj, pVM, hSelf);
1429 gvmmR0CreateDestroyUnlock(pGVMM);
1430 rc = VERR_GVMM_IPE_2;
1431 }
1432
1433 return rc;
1434}
1435
1436
1437/**
1438 * Performs VM cleanup task as part of object destruction.
1439 *
1440 * @param pGVM The GVM pointer.
1441 */
1442static void gvmmR0CleanupVM(PGVM pGVM)
1443{
1444 if ( pGVM->gvmm.s.fDoneVMMR0Init
1445 && !pGVM->gvmm.s.fDoneVMMR0Term)
1446 {
1447 if ( pGVM->gvmm.s.VMMemObj != NIL_RTR0MEMOBJ
1448#ifdef VBOX_BUGREF_9217
1449 && RTR0MemObjAddress(pGVM->gvmm.s.VMMemObj) == pGVM
1450#else
1451 && RTR0MemObjAddress(pGVM->gvmm.s.VMMemObj) == pGVM->pVM
1452#endif
1453 )
1454 {
1455 LogFlow(("gvmmR0CleanupVM: Calling VMMR0TermVM\n"));
1456#ifdef VBOX_BUGREF_9217
1457 VMMR0TermVM(pGVM, pGVM, NIL_VMCPUID);
1458#else
1459 VMMR0TermVM(pGVM, pGVM->pVM, NIL_VMCPUID);
1460#endif
1461 }
1462 else
1463#ifdef VBOX_BUGREF_9217
1464 AssertMsgFailed(("gvmmR0CleanupVM: VMMemObj=%p pGVM=%p\n", pGVM->gvmm.s.VMMemObj, pGVM));
1465#else
1466 AssertMsgFailed(("gvmmR0CleanupVM: VMMemObj=%p pVM=%p\n", pGVM->gvmm.s.VMMemObj, pGVM->pVM));
1467#endif
1468 }
1469
1470 GMMR0CleanupVM(pGVM);
1471#ifdef VBOX_WITH_NEM_R0
1472 NEMR0CleanupVM(pGVM);
1473#endif
1474
1475 AssertCompile(NIL_RTTHREADCTXHOOK == (RTTHREADCTXHOOK)0); /* Depends on zero initialized memory working for NIL at the moment. */
1476 for (VMCPUID idCpu = 0; idCpu < pGVM->cCpus; idCpu++)
1477 {
1478 /** @todo Can we busy wait here for all thread-context hooks to be
1479 * deregistered before releasing (destroying) it? Only until we find a
1480 * solution for not deregistering hooks everytime we're leaving HMR0
1481 * context. */
1482#ifdef VBOX_BUGREF_9217
1483 VMMR0ThreadCtxHookDestroyForEmt(&pGVM->aCpus[idCpu]);
1484#else
1485 VMMR0ThreadCtxHookDestroyForEmt(&pGVM->pVM->aCpus[idCpu]);
1486#endif
1487 }
1488}
1489
1490
1491/**
1492 * @callback_method_impl{FNSUPDRVDESTRUCTOR,VM handle destructor}
1493 *
1494 * pvUser1 is the GVM instance pointer.
1495 * pvUser2 is the handle pointer.
1496 */
1497static DECLCALLBACK(void) gvmmR0HandleObjDestructor(void *pvObj, void *pvUser1, void *pvUser2)
1498{
1499 LogFlow(("gvmmR0HandleObjDestructor: %p %p %p\n", pvObj, pvUser1, pvUser2));
1500
1501 NOREF(pvObj);
1502
1503 /*
1504 * Some quick, paranoid, input validation.
1505 */
1506 PGVMHANDLE pHandle = (PGVMHANDLE)pvUser2;
1507 AssertPtr(pHandle);
1508 PGVMM pGVMM = (PGVMM)pvUser1;
1509 Assert(pGVMM == g_pGVMM);
1510 const uint16_t iHandle = pHandle - &pGVMM->aHandles[0];
1511 if ( !iHandle
1512 || iHandle >= RT_ELEMENTS(pGVMM->aHandles)
1513 || iHandle != pHandle->iSelf)
1514 {
1515 SUPR0Printf("GVM: handle %d is out of range or corrupt (iSelf=%d)!\n", iHandle, pHandle->iSelf);
1516 return;
1517 }
1518
1519 int rc = gvmmR0CreateDestroyLock(pGVMM);
1520 AssertRC(rc);
1521 rc = GVMMR0_USED_EXCLUSIVE_LOCK(pGVMM);
1522 AssertRC(rc);
1523
1524 /*
1525 * This is a tad slow but a doubly linked list is too much hassle.
1526 */
1527 if (RT_UNLIKELY(pHandle->iNext >= RT_ELEMENTS(pGVMM->aHandles)))
1528 {
1529 SUPR0Printf("GVM: used list index %d is out of range!\n", pHandle->iNext);
1530 GVMMR0_USED_EXCLUSIVE_UNLOCK(pGVMM);
1531 gvmmR0CreateDestroyUnlock(pGVMM);
1532 return;
1533 }
1534
1535 if (pGVMM->iUsedHead == iHandle)
1536 pGVMM->iUsedHead = pHandle->iNext;
1537 else
1538 {
1539 uint16_t iPrev = pGVMM->iUsedHead;
1540 int c = RT_ELEMENTS(pGVMM->aHandles) + 2;
1541 while (iPrev)
1542 {
1543 if (RT_UNLIKELY(iPrev >= RT_ELEMENTS(pGVMM->aHandles)))
1544 {
1545 SUPR0Printf("GVM: used list index %d is out of range!\n", iPrev);
1546 GVMMR0_USED_EXCLUSIVE_UNLOCK(pGVMM);
1547 gvmmR0CreateDestroyUnlock(pGVMM);
1548 return;
1549 }
1550 if (RT_UNLIKELY(c-- <= 0))
1551 {
1552 iPrev = 0;
1553 break;
1554 }
1555
1556 if (pGVMM->aHandles[iPrev].iNext == iHandle)
1557 break;
1558 iPrev = pGVMM->aHandles[iPrev].iNext;
1559 }
1560 if (!iPrev)
1561 {
1562 SUPR0Printf("GVM: can't find the handle previous previous of %d!\n", pHandle->iSelf);
1563 GVMMR0_USED_EXCLUSIVE_UNLOCK(pGVMM);
1564 gvmmR0CreateDestroyUnlock(pGVMM);
1565 return;
1566 }
1567
1568 Assert(pGVMM->aHandles[iPrev].iNext == iHandle);
1569 pGVMM->aHandles[iPrev].iNext = pHandle->iNext;
1570 }
1571 pHandle->iNext = 0;
1572 pGVMM->cVMs--;
1573
1574 /*
1575 * Do the global cleanup round.
1576 */
1577 PGVM pGVM = pHandle->pGVM;
1578 if ( VALID_PTR(pGVM)
1579 && pGVM->u32Magic == GVM_MAGIC)
1580 {
1581 pGVMM->cEMTs -= pGVM->cCpus;
1582
1583 if (pGVM->pSession)
1584 SUPR0SetSessionVM(pGVM->pSession, NULL, NULL);
1585
1586 GVMMR0_USED_EXCLUSIVE_UNLOCK(pGVMM);
1587
1588 gvmmR0CleanupVM(pGVM);
1589
1590 /*
1591 * Do the GVMM cleanup - must be done last.
1592 */
1593 /* The VM and VM pages mappings/allocations. */
1594 if (pGVM->gvmm.s.VMPagesMapObj != NIL_RTR0MEMOBJ)
1595 {
1596 rc = RTR0MemObjFree(pGVM->gvmm.s.VMPagesMapObj, false /* fFreeMappings */); AssertRC(rc);
1597 pGVM->gvmm.s.VMPagesMapObj = NIL_RTR0MEMOBJ;
1598 }
1599
1600 if (pGVM->gvmm.s.VMMapObj != NIL_RTR0MEMOBJ)
1601 {
1602 rc = RTR0MemObjFree(pGVM->gvmm.s.VMMapObj, false /* fFreeMappings */); AssertRC(rc);
1603 pGVM->gvmm.s.VMMapObj = NIL_RTR0MEMOBJ;
1604 }
1605
1606 if (pGVM->gvmm.s.VMPagesMemObj != NIL_RTR0MEMOBJ)
1607 {
1608 rc = RTR0MemObjFree(pGVM->gvmm.s.VMPagesMemObj, false /* fFreeMappings */); AssertRC(rc);
1609 pGVM->gvmm.s.VMPagesMemObj = NIL_RTR0MEMOBJ;
1610 }
1611
1612#ifndef VBOX_BUGREF_9217
1613 if (pGVM->gvmm.s.VMMemObj != NIL_RTR0MEMOBJ)
1614 {
1615 rc = RTR0MemObjFree(pGVM->gvmm.s.VMMemObj, false /* fFreeMappings */); AssertRC(rc);
1616 pGVM->gvmm.s.VMMemObj = NIL_RTR0MEMOBJ;
1617 }
1618#endif
1619
1620 for (VMCPUID i = 0; i < pGVM->cCpus; i++)
1621 {
1622 if (pGVM->aCpus[i].gvmm.s.HaltEventMulti != NIL_RTSEMEVENTMULTI)
1623 {
1624 rc = RTSemEventMultiDestroy(pGVM->aCpus[i].gvmm.s.HaltEventMulti); AssertRC(rc);
1625 pGVM->aCpus[i].gvmm.s.HaltEventMulti = NIL_RTSEMEVENTMULTI;
1626 }
1627#ifdef VBOX_BUGREF_9217
1628 if (pGVM->aCpus[i].gvmm.s.VMCpuMapObj != NIL_RTR0MEMOBJ)
1629 {
1630 rc = RTR0MemObjFree(pGVM->aCpus[i].gvmm.s.VMCpuMapObj, false /* fFreeMappings */); AssertRC(rc);
1631 pGVM->aCpus[i].gvmm.s.VMCpuMapObj = NIL_RTR0MEMOBJ;
1632 }
1633#endif
1634 }
1635
1636 /* the GVM structure itself. */
1637 pGVM->u32Magic |= UINT32_C(0x80000000);
1638#ifdef VBOX_BUGREF_9217
1639 Assert(pGVM->gvmm.s.VMMemObj != NIL_RTR0MEMOBJ);
1640 rc = RTR0MemObjFree(pGVM->gvmm.s.VMMemObj, true /*fFreeMappings*/); AssertRC(rc);
1641#else
1642 RTMemFree(pGVM);
1643#endif
1644 pGVM = NULL;
1645
1646 /* Re-acquire the UsedLock before freeing the handle since we're updating handle fields. */
1647 rc = GVMMR0_USED_EXCLUSIVE_LOCK(pGVMM);
1648 AssertRC(rc);
1649 }
1650 /* else: GVMMR0CreateVM cleanup. */
1651
1652 /*
1653 * Free the handle.
1654 */
1655 pHandle->iNext = pGVMM->iFreeHead;
1656 pGVMM->iFreeHead = iHandle;
1657 ASMAtomicWriteNullPtr(&pHandle->pGVM);
1658 ASMAtomicWriteNullPtr(&pHandle->pVM);
1659 ASMAtomicWriteNullPtr(&pHandle->pvObj);
1660 ASMAtomicWriteNullPtr(&pHandle->pSession);
1661 ASMAtomicWriteHandle(&pHandle->hEMT0, NIL_RTNATIVETHREAD);
1662 ASMAtomicWriteU32(&pHandle->ProcId, NIL_RTPROCESS);
1663
1664 GVMMR0_USED_EXCLUSIVE_UNLOCK(pGVMM);
1665 gvmmR0CreateDestroyUnlock(pGVMM);
1666 LogFlow(("gvmmR0HandleObjDestructor: returns\n"));
1667}
1668
1669
1670/**
1671 * Registers the calling thread as the EMT of a Virtual CPU.
1672 *
1673 * Note that VCPU 0 is automatically registered during VM creation.
1674 *
1675 * @returns VBox status code
1676 * @param pGVM The global (ring-0) VM structure.
1677 * @param pVM The cross context VM structure.
1678 * @param idCpu VCPU id to register the current thread as.
1679 */
1680GVMMR0DECL(int) GVMMR0RegisterVCpu(PGVM pGVM, PVMCC pVM, VMCPUID idCpu)
1681{
1682 AssertReturn(idCpu != 0, VERR_INVALID_FUNCTION);
1683
1684 /*
1685 * Validate the VM structure, state and handle.
1686 */
1687 PGVMM pGVMM;
1688 int rc = gvmmR0ByGVMandVM(pGVM, pVM, &pGVMM, false /* fTakeUsedLock */); /** @todo take lock here. */
1689 if (RT_SUCCESS(rc))
1690 {
1691 if (idCpu < pGVM->cCpus)
1692 {
1693 /* Check that the EMT isn't already assigned to a thread. */
1694 if (pGVM->aCpus[idCpu].hEMT == NIL_RTNATIVETHREAD)
1695 {
1696#ifdef VBOX_BUGREF_9217
1697 Assert(pGVM->aCpus[idCpu].hNativeThreadR0 == NIL_RTNATIVETHREAD);
1698#else
1699 Assert(pVM->aCpus[idCpu].hNativeThreadR0 == NIL_RTNATIVETHREAD);
1700#endif
1701
1702 /* A thread may only be one EMT. */
1703 RTNATIVETHREAD const hNativeSelf = RTThreadNativeSelf();
1704 for (VMCPUID iCpu = 0; iCpu < pGVM->cCpus; iCpu++)
1705 AssertBreakStmt(pGVM->aCpus[iCpu].hEMT != hNativeSelf, rc = VERR_INVALID_PARAMETER);
1706 if (RT_SUCCESS(rc))
1707 {
1708 /*
1709 * Do the assignment, then try setup the hook. Undo if that fails.
1710 */
1711#ifdef VBOX_BUGREF_9217
1712 pGVM->aCpus[idCpu].hNativeThreadR0 = pGVM->aCpus[idCpu].hEMT = RTThreadNativeSelf();
1713
1714 rc = VMMR0ThreadCtxHookCreateForEmt(&pGVM->aCpus[idCpu]);
1715 if (RT_SUCCESS(rc))
1716 CPUMR0RegisterVCpuThread(&pGVM->aCpus[idCpu]);
1717 else
1718 pGVM->aCpus[idCpu].hNativeThreadR0 = pGVM->aCpus[idCpu].hEMT = NIL_RTNATIVETHREAD;
1719#else
1720 pVM->aCpus[idCpu].hNativeThreadR0 = pGVM->aCpus[idCpu].hEMT = RTThreadNativeSelf();
1721
1722 rc = VMMR0ThreadCtxHookCreateForEmt(&pVM->aCpus[idCpu]);
1723 if (RT_SUCCESS(rc))
1724 CPUMR0RegisterVCpuThread(&pVM->aCpus[idCpu]);
1725 else
1726 pVM->aCpus[idCpu].hNativeThreadR0 = pGVM->aCpus[idCpu].hEMT = NIL_RTNATIVETHREAD;
1727#endif
1728 }
1729 }
1730 else
1731 rc = VERR_ACCESS_DENIED;
1732 }
1733 else
1734 rc = VERR_INVALID_CPU_ID;
1735 }
1736 return rc;
1737}
1738
1739
1740/**
1741 * Deregisters the calling thread as the EMT of a Virtual CPU.
1742 *
1743 * Note that VCPU 0 shall call GVMMR0DestroyVM intead of this API.
1744 *
1745 * @returns VBox status code
1746 * @param pGVM The global (ring-0) VM structure.
1747 * @param pVM The cross context VM structure.
1748 * @param idCpu VCPU id to register the current thread as.
1749 */
1750GVMMR0DECL(int) GVMMR0DeregisterVCpu(PGVM pGVM, PVMCC pVM, VMCPUID idCpu)
1751{
1752 AssertReturn(idCpu != 0, VERR_INVALID_FUNCTION);
1753
1754 /*
1755 * Validate the VM structure, state and handle.
1756 */
1757 PGVMM pGVMM;
1758 int rc = gvmmR0ByGVMandVMandEMT(pGVM, pVM, idCpu, &pGVMM);
1759 if (RT_SUCCESS(rc))
1760 {
1761 /*
1762 * Take the destruction lock and recheck the handle state to
1763 * prevent racing GVMMR0DestroyVM.
1764 */
1765 gvmmR0CreateDestroyLock(pGVMM);
1766 uint32_t hSelf = pGVM->hSelf;
1767 ASMCompilerBarrier();
1768 if ( hSelf < RT_ELEMENTS(pGVMM->aHandles)
1769 && pGVMM->aHandles[hSelf].pvObj != NULL
1770 && pGVMM->aHandles[hSelf].pGVM == pGVM)
1771 {
1772 /*
1773 * Do per-EMT cleanups.
1774 */
1775#ifdef VBOX_BUGREF_9217
1776 VMMR0ThreadCtxHookDestroyForEmt(&pGVM->aCpus[idCpu]);
1777#else
1778 VMMR0ThreadCtxHookDestroyForEmt(&pVM->aCpus[idCpu]);
1779#endif
1780
1781 /*
1782 * Invalidate hEMT. We don't use NIL here as that would allow
1783 * GVMMR0RegisterVCpu to be called again, and we don't want that.
1784 */
1785 AssertCompile(~(RTNATIVETHREAD)1 != NIL_RTNATIVETHREAD);
1786 pGVM->aCpus[idCpu].hEMT = ~(RTNATIVETHREAD)1;
1787#ifdef VBOX_BUGREF_9217
1788 pGVM->aCpus[idCpu].hNativeThreadR0 = NIL_RTNATIVETHREAD;
1789#else
1790 pVM->aCpus[idCpu].hNativeThreadR0 = NIL_RTNATIVETHREAD;
1791#endif
1792 }
1793
1794 gvmmR0CreateDestroyUnlock(pGVMM);
1795 }
1796 return rc;
1797}
1798
1799
1800/**
1801 * Lookup a GVM structure by its handle.
1802 *
1803 * @returns The GVM pointer on success, NULL on failure.
1804 * @param hGVM The global VM handle. Asserts on bad handle.
1805 */
1806GVMMR0DECL(PGVM) GVMMR0ByHandle(uint32_t hGVM)
1807{
1808 PGVMM pGVMM;
1809 GVMM_GET_VALID_INSTANCE(pGVMM, NULL);
1810
1811 /*
1812 * Validate.
1813 */
1814 AssertReturn(hGVM != NIL_GVM_HANDLE, NULL);
1815 AssertReturn(hGVM < RT_ELEMENTS(pGVMM->aHandles), NULL);
1816
1817 /*
1818 * Look it up.
1819 */
1820 PGVMHANDLE pHandle = &pGVMM->aHandles[hGVM];
1821 AssertPtrReturn(pHandle->pVM, NULL);
1822 AssertPtrReturn(pHandle->pvObj, NULL);
1823 PGVM pGVM = pHandle->pGVM;
1824 AssertPtrReturn(pGVM, NULL);
1825#ifdef VBOX_BUGREF_9217
1826 AssertReturn(pGVM == pHandle->pVM, NULL);
1827#else
1828 AssertReturn(pGVM->pVM == pHandle->pVM, NULL);
1829#endif
1830
1831 return pHandle->pGVM;
1832}
1833
1834
1835/**
1836 * Lookup a GVM structure by the shared VM structure.
1837 *
1838 * The calling thread must be in the same process as the VM. All current lookups
1839 * are by threads inside the same process, so this will not be an issue.
1840 *
1841 * @returns VBox status code.
1842 * @param pVM The cross context VM structure.
1843 * @param ppGVM Where to store the GVM pointer.
1844 * @param ppGVMM Where to store the pointer to the GVMM instance data.
1845 * @param fTakeUsedLock Whether to take the used lock or not. We take it in
1846 * shared mode when requested.
1847 *
1848 * Be very careful if not taking the lock as it's
1849 * possible that the VM will disappear then!
1850 *
1851 * @remark This will not assert on an invalid pVM but try return silently.
1852 */
1853static int gvmmR0ByVM(PVMCC pVM, PGVM *ppGVM, PGVMM *ppGVMM, bool fTakeUsedLock)
1854{
1855 RTPROCESS ProcId = RTProcSelf();
1856 PGVMM pGVMM;
1857 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
1858
1859 /*
1860 * Validate.
1861 */
1862 if (RT_UNLIKELY( !VALID_PTR(pVM)
1863 || ((uintptr_t)pVM & PAGE_OFFSET_MASK)))
1864 return VERR_INVALID_POINTER;
1865 if (RT_UNLIKELY( pVM->enmVMState < VMSTATE_CREATING
1866 || pVM->enmVMState >= VMSTATE_TERMINATED))
1867 return VERR_INVALID_POINTER;
1868
1869 uint16_t hGVM = pVM->hSelf;
1870 ASMCompilerBarrier();
1871 if (RT_UNLIKELY( hGVM == NIL_GVM_HANDLE
1872 || hGVM >= RT_ELEMENTS(pGVMM->aHandles)))
1873 return VERR_INVALID_HANDLE;
1874
1875 /*
1876 * Look it up.
1877 */
1878 PGVMHANDLE pHandle = &pGVMM->aHandles[hGVM];
1879 PGVM pGVM;
1880 if (fTakeUsedLock)
1881 {
1882 int rc = GVMMR0_USED_SHARED_LOCK(pGVMM);
1883 AssertRCReturn(rc, rc);
1884
1885 pGVM = pHandle->pGVM;
1886#ifdef VBOX_BUGREF_9217
1887 if (RT_UNLIKELY( pHandle->pVM != pVM
1888 || pHandle->ProcId != ProcId
1889 || !VALID_PTR(pHandle->pvObj)
1890 || !VALID_PTR(pGVM)
1891 || pGVM != pVM))
1892#else
1893 if (RT_UNLIKELY( pHandle->pVM != pVM
1894 || pHandle->ProcId != ProcId
1895 || !VALID_PTR(pHandle->pvObj)
1896 || !VALID_PTR(pGVM)
1897 || pGVM->pVM != pVM))
1898#endif
1899 {
1900 GVMMR0_USED_SHARED_UNLOCK(pGVMM);
1901 return VERR_INVALID_HANDLE;
1902 }
1903 }
1904 else
1905 {
1906 if (RT_UNLIKELY(pHandle->pVM != pVM))
1907 return VERR_INVALID_HANDLE;
1908 if (RT_UNLIKELY(pHandle->ProcId != ProcId))
1909 return VERR_INVALID_HANDLE;
1910 if (RT_UNLIKELY(!VALID_PTR(pHandle->pvObj)))
1911 return VERR_INVALID_HANDLE;
1912
1913 pGVM = pHandle->pGVM;
1914 if (RT_UNLIKELY(!VALID_PTR(pGVM)))
1915 return VERR_INVALID_HANDLE;
1916#ifdef VBOX_BUGREF_9217
1917 if (RT_UNLIKELY(pGVM != pVM))
1918#else
1919 if (RT_UNLIKELY(pGVM->pVM != pVM))
1920#endif
1921 return VERR_INVALID_HANDLE;
1922 }
1923
1924 *ppGVM = pGVM;
1925 *ppGVMM = pGVMM;
1926 return VINF_SUCCESS;
1927}
1928
1929
1930/**
1931 * Fast look up a GVM structure by the cross context VM structure.
1932 *
1933 * This is mainly used a glue function, so performance is .
1934 *
1935 * @returns GVM on success, NULL on failure.
1936 * @param pVM The cross context VM structure. ASSUMES to be
1937 * reasonably valid, so we can do fewer checks than in
1938 * gvmmR0ByVM.
1939 *
1940 * @note Do not use this on pVM structures from userland!
1941 */
1942GVMMR0DECL(PGVM) GVMMR0FastGetGVMByVM(PVMCC pVM)
1943{
1944 AssertPtr(pVM);
1945 Assert(!((uintptr_t)pVM & PAGE_OFFSET_MASK));
1946
1947 PGVMM pGVMM;
1948 GVMM_GET_VALID_INSTANCE(pGVMM, NULL);
1949
1950 /*
1951 * Validate.
1952 */
1953 uint16_t hGVM = pVM->hSelf;
1954 ASMCompilerBarrier();
1955 AssertReturn(hGVM != NIL_GVM_HANDLE && hGVM < RT_ELEMENTS(pGVMM->aHandles), NULL);
1956
1957 /*
1958 * Look it up and check pVM against the value in the handle and GVM structures.
1959 */
1960 PGVMHANDLE pHandle = &pGVMM->aHandles[hGVM];
1961 AssertReturn(pHandle->pVM == pVM, NULL);
1962
1963 PGVM pGVM = pHandle->pGVM;
1964 AssertPtrReturn(pGVM, NULL);
1965#ifdef VBOX_BUGREF_9217
1966 AssertReturn(pGVM == pVM, NULL);
1967#else
1968 AssertReturn(pGVM->pVM == pVM, NULL);
1969#endif
1970
1971 return pGVM;
1972}
1973
1974
1975/**
1976 * Check that the given GVM and VM structures match up.
1977 *
1978 * The calling thread must be in the same process as the VM. All current lookups
1979 * are by threads inside the same process, so this will not be an issue.
1980 *
1981 * @returns VBox status code.
1982 * @param pGVM The global (ring-0) VM structure.
1983 * @param pVM The cross context VM structure.
1984 * @param ppGVMM Where to store the pointer to the GVMM instance data.
1985 * @param fTakeUsedLock Whether to take the used lock or not. We take it in
1986 * shared mode when requested.
1987 *
1988 * Be very careful if not taking the lock as it's
1989 * possible that the VM will disappear then!
1990 *
1991 * @remark This will not assert on an invalid pVM but try return silently.
1992 */
1993static int gvmmR0ByGVMandVM(PGVM pGVM, PVMCC pVM, PGVMM *ppGVMM, bool fTakeUsedLock)
1994{
1995 /*
1996 * Check the pointers.
1997 */
1998 int rc;
1999 if (RT_LIKELY(RT_VALID_PTR(pGVM)))
2000 {
2001 if (RT_LIKELY( RT_VALID_PTR(pVM)
2002 && ((uintptr_t)pVM & PAGE_OFFSET_MASK) == 0))
2003 {
2004#ifdef VBOX_BUGREF_9217
2005 if (RT_LIKELY(pGVM == pVM))
2006#else
2007 if (RT_LIKELY(pGVM->pVM == pVM))
2008#endif
2009 {
2010 /*
2011 * Get the pGVMM instance and check the VM handle.
2012 */
2013 PGVMM pGVMM;
2014 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
2015
2016 uint16_t hGVM = pGVM->hSelf;
2017 if (RT_LIKELY( hGVM != NIL_GVM_HANDLE
2018 && hGVM < RT_ELEMENTS(pGVMM->aHandles)))
2019 {
2020 RTPROCESS const pidSelf = RTProcSelf();
2021 PGVMHANDLE pHandle = &pGVMM->aHandles[hGVM];
2022 if (fTakeUsedLock)
2023 {
2024 rc = GVMMR0_USED_SHARED_LOCK(pGVMM);
2025 AssertRCReturn(rc, rc);
2026 }
2027
2028 if (RT_LIKELY( pHandle->pGVM == pGVM
2029 && pHandle->pVM == pVM
2030 && pHandle->ProcId == pidSelf
2031 && RT_VALID_PTR(pHandle->pvObj)))
2032 {
2033 /*
2034 * Some more VM data consistency checks.
2035 */
2036#ifdef VBOX_BUGREF_9217
2037 if (RT_LIKELY( pVM->cCpus == pGVM->cCpus
2038 && pVM->hSelf == hGVM
2039 && pVM->enmVMState >= VMSTATE_CREATING
2040 && pVM->enmVMState <= VMSTATE_TERMINATED
2041 && pVM->pSelf == pVM
2042 ))
2043#else
2044 if (RT_LIKELY( pVM->cCpus == pGVM->cCpus
2045 && pVM->hSelf == hGVM
2046 && pVM->enmVMState >= VMSTATE_CREATING
2047 && pVM->enmVMState <= VMSTATE_TERMINATED
2048 && pVM->pVMR0 == pVM
2049 ))
2050#endif
2051 {
2052 *ppGVMM = pGVMM;
2053 return VINF_SUCCESS;
2054 }
2055 }
2056
2057 if (fTakeUsedLock)
2058 GVMMR0_USED_SHARED_UNLOCK(pGVMM);
2059 }
2060 }
2061 rc = VERR_INVALID_VM_HANDLE;
2062 }
2063 else
2064 rc = VERR_INVALID_POINTER;
2065 }
2066 else
2067 rc = VERR_INVALID_POINTER;
2068 return rc;
2069}
2070
2071
2072/**
2073 * Check that the given GVM and VM structures match up.
2074 *
2075 * The calling thread must be in the same process as the VM. All current lookups
2076 * are by threads inside the same process, so this will not be an issue.
2077 *
2078 * @returns VBox status code.
2079 * @param pGVM The global (ring-0) VM structure.
2080 * @param pVM The cross context VM structure.
2081 * @param idCpu The (alleged) Virtual CPU ID of the calling EMT.
2082 * @param ppGVMM Where to store the pointer to the GVMM instance data.
2083 * @thread EMT
2084 *
2085 * @remarks This will assert in all failure paths.
2086 */
2087static int gvmmR0ByGVMandVMandEMT(PGVM pGVM, PVMCC pVM, VMCPUID idCpu, PGVMM *ppGVMM)
2088{
2089 /*
2090 * Check the pointers.
2091 */
2092 AssertPtrReturn(pGVM, VERR_INVALID_POINTER);
2093
2094 AssertPtrReturn(pVM, VERR_INVALID_POINTER);
2095 AssertReturn(((uintptr_t)pVM & PAGE_OFFSET_MASK) == 0, VERR_INVALID_POINTER);
2096#ifdef VBOX_BUGREF_9217
2097 AssertReturn(pGVM == pVM, VERR_INVALID_VM_HANDLE);
2098#else
2099 AssertReturn(pGVM->pVM == pVM, VERR_INVALID_VM_HANDLE);
2100#endif
2101
2102
2103 /*
2104 * Get the pGVMM instance and check the VM handle.
2105 */
2106 PGVMM pGVMM;
2107 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
2108
2109 uint16_t hGVM = pGVM->hSelf;
2110 ASMCompilerBarrier();
2111 AssertReturn( hGVM != NIL_GVM_HANDLE
2112 && hGVM < RT_ELEMENTS(pGVMM->aHandles), VERR_INVALID_VM_HANDLE);
2113
2114 RTPROCESS const pidSelf = RTProcSelf();
2115 PGVMHANDLE pHandle = &pGVMM->aHandles[hGVM];
2116 AssertReturn( pHandle->pGVM == pGVM
2117 && pHandle->pVM == pVM
2118 && pHandle->ProcId == pidSelf
2119 && RT_VALID_PTR(pHandle->pvObj),
2120 VERR_INVALID_HANDLE);
2121
2122 /*
2123 * Check the EMT claim.
2124 */
2125 RTNATIVETHREAD const hAllegedEMT = RTThreadNativeSelf();
2126 AssertReturn(idCpu < pGVM->cCpus, VERR_INVALID_CPU_ID);
2127 AssertReturn(pGVM->aCpus[idCpu].hEMT == hAllegedEMT, VERR_NOT_OWNER);
2128
2129 /*
2130 * Some more VM data consistency checks.
2131 */
2132 AssertReturn(pVM->cCpus == pGVM->cCpus, VERR_INCONSISTENT_VM_HANDLE);
2133 AssertReturn(pVM->hSelf == hGVM, VERR_INCONSISTENT_VM_HANDLE);
2134#ifndef VBOX_BUGREF_9217
2135 AssertReturn(pVM->pVMR0 == pVM, VERR_INCONSISTENT_VM_HANDLE);
2136#endif
2137 AssertReturn( pVM->enmVMState >= VMSTATE_CREATING
2138 && pVM->enmVMState <= VMSTATE_TERMINATED, VERR_INCONSISTENT_VM_HANDLE);
2139
2140 *ppGVMM = pGVMM;
2141 return VINF_SUCCESS;
2142}
2143
2144
2145/**
2146 * Validates a GVM/VM pair.
2147 *
2148 * @returns VBox status code.
2149 * @param pGVM The global (ring-0) VM structure.
2150 * @param pVM The cross context VM structure.
2151 */
2152GVMMR0DECL(int) GVMMR0ValidateGVMandVM(PGVM pGVM, PVMCC pVM)
2153{
2154 PGVMM pGVMM;
2155 return gvmmR0ByGVMandVM(pGVM, pVM, &pGVMM, false /*fTakeUsedLock*/);
2156}
2157
2158
2159
2160/**
2161 * Validates a GVM/VM/EMT combo.
2162 *
2163 * @returns VBox status code.
2164 * @param pGVM The global (ring-0) VM structure.
2165 * @param pVM The cross context VM structure.
2166 * @param idCpu The Virtual CPU ID of the calling EMT.
2167 * @thread EMT(idCpu)
2168 */
2169GVMMR0DECL(int) GVMMR0ValidateGVMandVMandEMT(PGVM pGVM, PVMCC pVM, VMCPUID idCpu)
2170{
2171 PGVMM pGVMM;
2172 return gvmmR0ByGVMandVMandEMT(pGVM, pVM, idCpu, &pGVMM);
2173}
2174
2175
2176/**
2177 * Looks up the VM belonging to the specified EMT thread.
2178 *
2179 * This is used by the assertion machinery in VMMR0.cpp to avoid causing
2180 * unnecessary kernel panics when the EMT thread hits an assertion. The
2181 * call may or not be an EMT thread.
2182 *
2183 * @returns Pointer to the VM on success, NULL on failure.
2184 * @param hEMT The native thread handle of the EMT.
2185 * NIL_RTNATIVETHREAD means the current thread
2186 */
2187GVMMR0DECL(PVMCC) GVMMR0GetVMByEMT(RTNATIVETHREAD hEMT)
2188{
2189 /*
2190 * No Assertions here as we're usually called in a AssertMsgN or
2191 * RTAssert* context.
2192 */
2193 PGVMM pGVMM = g_pGVMM;
2194 if ( !VALID_PTR(pGVMM)
2195 || pGVMM->u32Magic != GVMM_MAGIC)
2196 return NULL;
2197
2198 if (hEMT == NIL_RTNATIVETHREAD)
2199 hEMT = RTThreadNativeSelf();
2200 RTPROCESS ProcId = RTProcSelf();
2201
2202 /*
2203 * Search the handles in a linear fashion as we don't dare to take the lock (assert).
2204 */
2205/** @todo introduce some pid hash table here, please. */
2206 for (unsigned i = 1; i < RT_ELEMENTS(pGVMM->aHandles); i++)
2207 {
2208 if ( pGVMM->aHandles[i].iSelf == i
2209 && pGVMM->aHandles[i].ProcId == ProcId
2210 && VALID_PTR(pGVMM->aHandles[i].pvObj)
2211 && VALID_PTR(pGVMM->aHandles[i].pVM)
2212 && VALID_PTR(pGVMM->aHandles[i].pGVM))
2213 {
2214 if (pGVMM->aHandles[i].hEMT0 == hEMT)
2215 return pGVMM->aHandles[i].pVM;
2216
2217 /* This is fearly safe with the current process per VM approach. */
2218 PGVM pGVM = pGVMM->aHandles[i].pGVM;
2219 VMCPUID const cCpus = pGVM->cCpus;
2220 ASMCompilerBarrier();
2221 if ( cCpus < 1
2222 || cCpus > VMM_MAX_CPU_COUNT)
2223 continue;
2224 for (VMCPUID idCpu = 1; idCpu < cCpus; idCpu++)
2225 if (pGVM->aCpus[idCpu].hEMT == hEMT)
2226 return pGVMM->aHandles[i].pVM;
2227 }
2228 }
2229 return NULL;
2230}
2231
2232
2233/**
2234 * Looks up the GVMCPU belonging to the specified EMT thread.
2235 *
2236 * This is used by the assertion machinery in VMMR0.cpp to avoid causing
2237 * unnecessary kernel panics when the EMT thread hits an assertion. The
2238 * call may or not be an EMT thread.
2239 *
2240 * @returns Pointer to the VM on success, NULL on failure.
2241 * @param hEMT The native thread handle of the EMT.
2242 * NIL_RTNATIVETHREAD means the current thread
2243 */
2244GVMMR0DECL(PGVMCPU) GVMMR0GetGVCpuByEMT(RTNATIVETHREAD hEMT)
2245{
2246 /*
2247 * No Assertions here as we're usually called in a AssertMsgN,
2248 * RTAssert*, Log and LogRel contexts.
2249 */
2250 PGVMM pGVMM = g_pGVMM;
2251 if ( !VALID_PTR(pGVMM)
2252 || pGVMM->u32Magic != GVMM_MAGIC)
2253 return NULL;
2254
2255 if (hEMT == NIL_RTNATIVETHREAD)
2256 hEMT = RTThreadNativeSelf();
2257 RTPROCESS ProcId = RTProcSelf();
2258
2259 /*
2260 * Search the handles in a linear fashion as we don't dare to take the lock (assert).
2261 */
2262/** @todo introduce some pid hash table here, please. */
2263 for (unsigned i = 1; i < RT_ELEMENTS(pGVMM->aHandles); i++)
2264 {
2265 if ( pGVMM->aHandles[i].iSelf == i
2266 && pGVMM->aHandles[i].ProcId == ProcId
2267 && VALID_PTR(pGVMM->aHandles[i].pvObj)
2268 && VALID_PTR(pGVMM->aHandles[i].pVM)
2269 && VALID_PTR(pGVMM->aHandles[i].pGVM))
2270 {
2271 PGVM pGVM = pGVMM->aHandles[i].pGVM;
2272 if (pGVMM->aHandles[i].hEMT0 == hEMT)
2273 return &pGVM->aCpus[0];
2274
2275 /* This is fearly safe with the current process per VM approach. */
2276 VMCPUID const cCpus = pGVM->cCpus;
2277 ASMCompilerBarrier();
2278 ASMCompilerBarrier();
2279 if ( cCpus < 1
2280 || cCpus > VMM_MAX_CPU_COUNT)
2281 continue;
2282 for (VMCPUID idCpu = 1; idCpu < cCpus; idCpu++)
2283 if (pGVM->aCpus[idCpu].hEMT == hEMT)
2284 return &pGVM->aCpus[idCpu];
2285 }
2286 }
2287 return NULL;
2288}
2289
2290
2291/**
2292 * This is will wake up expired and soon-to-be expired VMs.
2293 *
2294 * @returns Number of VMs that has been woken up.
2295 * @param pGVMM Pointer to the GVMM instance data.
2296 * @param u64Now The current time.
2297 */
2298static unsigned gvmmR0SchedDoWakeUps(PGVMM pGVMM, uint64_t u64Now)
2299{
2300 /*
2301 * Skip this if we've got disabled because of high resolution wakeups or by
2302 * the user.
2303 */
2304 if (!pGVMM->fDoEarlyWakeUps)
2305 return 0;
2306
2307/** @todo Rewrite this algorithm. See performance defect XYZ. */
2308
2309 /*
2310 * A cheap optimization to stop wasting so much time here on big setups.
2311 */
2312 const uint64_t uNsEarlyWakeUp2 = u64Now + pGVMM->nsEarlyWakeUp2;
2313 if ( pGVMM->cHaltedEMTs == 0
2314 || uNsEarlyWakeUp2 > pGVMM->uNsNextEmtWakeup)
2315 return 0;
2316
2317 /*
2318 * Only one thread doing this at a time.
2319 */
2320 if (!ASMAtomicCmpXchgBool(&pGVMM->fDoingEarlyWakeUps, true, false))
2321 return 0;
2322
2323 /*
2324 * The first pass will wake up VMs which have actually expired
2325 * and look for VMs that should be woken up in the 2nd and 3rd passes.
2326 */
2327 const uint64_t uNsEarlyWakeUp1 = u64Now + pGVMM->nsEarlyWakeUp1;
2328 uint64_t u64Min = UINT64_MAX;
2329 unsigned cWoken = 0;
2330 unsigned cHalted = 0;
2331 unsigned cTodo2nd = 0;
2332 unsigned cTodo3rd = 0;
2333 for (unsigned i = pGVMM->iUsedHead, cGuard = 0;
2334 i != NIL_GVM_HANDLE && i < RT_ELEMENTS(pGVMM->aHandles);
2335 i = pGVMM->aHandles[i].iNext)
2336 {
2337 PGVM pCurGVM = pGVMM->aHandles[i].pGVM;
2338 if ( VALID_PTR(pCurGVM)
2339 && pCurGVM->u32Magic == GVM_MAGIC)
2340 {
2341 for (VMCPUID idCpu = 0; idCpu < pCurGVM->cCpus; idCpu++)
2342 {
2343 PGVMCPU pCurGVCpu = &pCurGVM->aCpus[idCpu];
2344 uint64_t u64 = ASMAtomicUoReadU64(&pCurGVCpu->gvmm.s.u64HaltExpire);
2345 if (u64)
2346 {
2347 if (u64 <= u64Now)
2348 {
2349 if (ASMAtomicXchgU64(&pCurGVCpu->gvmm.s.u64HaltExpire, 0))
2350 {
2351 int rc = RTSemEventMultiSignal(pCurGVCpu->gvmm.s.HaltEventMulti);
2352 AssertRC(rc);
2353 cWoken++;
2354 }
2355 }
2356 else
2357 {
2358 cHalted++;
2359 if (u64 <= uNsEarlyWakeUp1)
2360 cTodo2nd++;
2361 else if (u64 <= uNsEarlyWakeUp2)
2362 cTodo3rd++;
2363 else if (u64 < u64Min)
2364 u64 = u64Min;
2365 }
2366 }
2367 }
2368 }
2369 AssertLogRelBreak(cGuard++ < RT_ELEMENTS(pGVMM->aHandles));
2370 }
2371
2372 if (cTodo2nd)
2373 {
2374 for (unsigned i = pGVMM->iUsedHead, cGuard = 0;
2375 i != NIL_GVM_HANDLE && i < RT_ELEMENTS(pGVMM->aHandles);
2376 i = pGVMM->aHandles[i].iNext)
2377 {
2378 PGVM pCurGVM = pGVMM->aHandles[i].pGVM;
2379 if ( VALID_PTR(pCurGVM)
2380 && pCurGVM->u32Magic == GVM_MAGIC)
2381 {
2382 for (VMCPUID idCpu = 0; idCpu < pCurGVM->cCpus; idCpu++)
2383 {
2384 PGVMCPU pCurGVCpu = &pCurGVM->aCpus[idCpu];
2385 uint64_t u64 = ASMAtomicUoReadU64(&pCurGVCpu->gvmm.s.u64HaltExpire);
2386 if ( u64
2387 && u64 <= uNsEarlyWakeUp1)
2388 {
2389 if (ASMAtomicXchgU64(&pCurGVCpu->gvmm.s.u64HaltExpire, 0))
2390 {
2391 int rc = RTSemEventMultiSignal(pCurGVCpu->gvmm.s.HaltEventMulti);
2392 AssertRC(rc);
2393 cWoken++;
2394 }
2395 }
2396 }
2397 }
2398 AssertLogRelBreak(cGuard++ < RT_ELEMENTS(pGVMM->aHandles));
2399 }
2400 }
2401
2402 if (cTodo3rd)
2403 {
2404 for (unsigned i = pGVMM->iUsedHead, cGuard = 0;
2405 i != NIL_GVM_HANDLE && i < RT_ELEMENTS(pGVMM->aHandles);
2406 i = pGVMM->aHandles[i].iNext)
2407 {
2408 PGVM pCurGVM = pGVMM->aHandles[i].pGVM;
2409 if ( VALID_PTR(pCurGVM)
2410 && pCurGVM->u32Magic == GVM_MAGIC)
2411 {
2412 for (VMCPUID idCpu = 0; idCpu < pCurGVM->cCpus; idCpu++)
2413 {
2414 PGVMCPU pCurGVCpu = &pCurGVM->aCpus[idCpu];
2415 uint64_t u64 = ASMAtomicUoReadU64(&pCurGVCpu->gvmm.s.u64HaltExpire);
2416 if ( u64
2417 && u64 <= uNsEarlyWakeUp2)
2418 {
2419 if (ASMAtomicXchgU64(&pCurGVCpu->gvmm.s.u64HaltExpire, 0))
2420 {
2421 int rc = RTSemEventMultiSignal(pCurGVCpu->gvmm.s.HaltEventMulti);
2422 AssertRC(rc);
2423 cWoken++;
2424 }
2425 }
2426 }
2427 }
2428 AssertLogRelBreak(cGuard++ < RT_ELEMENTS(pGVMM->aHandles));
2429 }
2430 }
2431
2432 /*
2433 * Set the minimum value.
2434 */
2435 pGVMM->uNsNextEmtWakeup = u64Min;
2436
2437 ASMAtomicWriteBool(&pGVMM->fDoingEarlyWakeUps, false);
2438 return cWoken;
2439}
2440
2441
2442/**
2443 * Halt the EMT thread.
2444 *
2445 * @returns VINF_SUCCESS normal wakeup (timeout or kicked by other thread).
2446 * VERR_INTERRUPTED if a signal was scheduled for the thread.
2447 * @param pGVM The global (ring-0) VM structure.
2448 * @param pVM The cross context VM structure.
2449 * @param pGVCpu The global (ring-0) CPU structure of the calling
2450 * EMT.
2451 * @param u64ExpireGipTime The time for the sleep to expire expressed as GIP time.
2452 * @thread EMT(pGVCpu).
2453 */
2454GVMMR0DECL(int) GVMMR0SchedHalt(PGVM pGVM, PVMCC pVM, PGVMCPU pGVCpu, uint64_t u64ExpireGipTime)
2455{
2456 LogFlow(("GVMMR0SchedHalt: pGVM=%p pVM=%p pGVCpu=%p(%d) u64ExpireGipTime=%#RX64\n",
2457 pGVM, pVM, pGVCpu, pGVCpu->idCpu, u64ExpireGipTime));
2458 GVMM_CHECK_SMAP_SETUP();
2459 GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
2460
2461 PGVMM pGVMM;
2462 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
2463
2464 pGVM->gvmm.s.StatsSched.cHaltCalls++;
2465 Assert(!pGVCpu->gvmm.s.u64HaltExpire);
2466
2467 /*
2468 * If we're doing early wake-ups, we must take the UsedList lock before we
2469 * start querying the current time.
2470 * Note! Interrupts must NOT be disabled at this point because we ask for GIP time!
2471 */
2472 bool const fDoEarlyWakeUps = pGVMM->fDoEarlyWakeUps;
2473 if (fDoEarlyWakeUps)
2474 {
2475 int rc2 = GVMMR0_USED_SHARED_LOCK(pGVMM); AssertRC(rc2);
2476 GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
2477 }
2478
2479 pGVCpu->gvmm.s.iCpuEmt = ASMGetApicId();
2480
2481 /* GIP hack: We might are frequently sleeping for short intervals where the
2482 difference between GIP and system time matters on systems with high resolution
2483 system time. So, convert the input from GIP to System time in that case. */
2484 Assert(ASMGetFlags() & X86_EFL_IF);
2485 const uint64_t u64NowSys = RTTimeSystemNanoTS();
2486 const uint64_t u64NowGip = RTTimeNanoTS();
2487 GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
2488
2489 if (fDoEarlyWakeUps)
2490 {
2491 pGVM->gvmm.s.StatsSched.cHaltWakeUps += gvmmR0SchedDoWakeUps(pGVMM, u64NowGip);
2492 GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
2493 }
2494
2495 /*
2496 * Go to sleep if we must...
2497 * Cap the sleep time to 1 second to be on the safe side.
2498 */
2499 int rc;
2500 uint64_t cNsInterval = u64ExpireGipTime - u64NowGip;
2501 if ( u64NowGip < u64ExpireGipTime
2502 && cNsInterval >= (pGVMM->cEMTs > pGVMM->cEMTsMeansCompany
2503 ? pGVMM->nsMinSleepCompany
2504 : pGVMM->nsMinSleepAlone))
2505 {
2506 pGVM->gvmm.s.StatsSched.cHaltBlocking++;
2507 if (cNsInterval > RT_NS_1SEC)
2508 u64ExpireGipTime = u64NowGip + RT_NS_1SEC;
2509 ASMAtomicWriteU64(&pGVCpu->gvmm.s.u64HaltExpire, u64ExpireGipTime);
2510 ASMAtomicIncU32(&pGVMM->cHaltedEMTs);
2511 if (fDoEarlyWakeUps)
2512 {
2513 if (u64ExpireGipTime < pGVMM->uNsNextEmtWakeup)
2514 pGVMM->uNsNextEmtWakeup = u64ExpireGipTime;
2515 GVMMR0_USED_SHARED_UNLOCK(pGVMM);
2516 }
2517 GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
2518
2519 rc = RTSemEventMultiWaitEx(pGVCpu->gvmm.s.HaltEventMulti,
2520 RTSEMWAIT_FLAGS_ABSOLUTE | RTSEMWAIT_FLAGS_NANOSECS | RTSEMWAIT_FLAGS_INTERRUPTIBLE,
2521 u64NowGip > u64NowSys ? u64ExpireGipTime : u64NowSys + cNsInterval);
2522 GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
2523
2524 ASMAtomicWriteU64(&pGVCpu->gvmm.s.u64HaltExpire, 0);
2525 ASMAtomicDecU32(&pGVMM->cHaltedEMTs);
2526
2527 /* Reset the semaphore to try prevent a few false wake-ups. */
2528 if (rc == VINF_SUCCESS)
2529 {
2530 RTSemEventMultiReset(pGVCpu->gvmm.s.HaltEventMulti);
2531 GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
2532 }
2533 else if (rc == VERR_TIMEOUT)
2534 {
2535 pGVM->gvmm.s.StatsSched.cHaltTimeouts++;
2536 rc = VINF_SUCCESS;
2537 }
2538 }
2539 else
2540 {
2541 pGVM->gvmm.s.StatsSched.cHaltNotBlocking++;
2542 if (fDoEarlyWakeUps)
2543 GVMMR0_USED_SHARED_UNLOCK(pGVMM);
2544 GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
2545 RTSemEventMultiReset(pGVCpu->gvmm.s.HaltEventMulti);
2546 GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
2547 rc = VINF_SUCCESS;
2548 }
2549
2550 return rc;
2551}
2552
2553
2554/**
2555 * Halt the EMT thread.
2556 *
2557 * @returns VINF_SUCCESS normal wakeup (timeout or kicked by other thread).
2558 * VERR_INTERRUPTED if a signal was scheduled for the thread.
2559 * @param pGVM The global (ring-0) VM structure.
2560 * @param pVM The cross context VM structure.
2561 * @param idCpu The Virtual CPU ID of the calling EMT.
2562 * @param u64ExpireGipTime The time for the sleep to expire expressed as GIP time.
2563 * @thread EMT(idCpu).
2564 */
2565GVMMR0DECL(int) GVMMR0SchedHaltReq(PGVM pGVM, PVMCC pVM, VMCPUID idCpu, uint64_t u64ExpireGipTime)
2566{
2567 GVMM_CHECK_SMAP_SETUP();
2568 GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
2569 PGVMM pGVMM;
2570 int rc = gvmmR0ByGVMandVMandEMT(pGVM, pVM, idCpu, &pGVMM);
2571 if (RT_SUCCESS(rc))
2572 {
2573 GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
2574 rc = GVMMR0SchedHalt(pGVM, pVM, &pGVM->aCpus[idCpu], u64ExpireGipTime);
2575 }
2576 GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
2577 return rc;
2578}
2579
2580
2581
2582/**
2583 * Worker for GVMMR0SchedWakeUp and GVMMR0SchedWakeUpAndPokeCpus that wakes up
2584 * the a sleeping EMT.
2585 *
2586 * @retval VINF_SUCCESS if successfully woken up.
2587 * @retval VINF_GVM_NOT_BLOCKED if the EMT wasn't blocked.
2588 *
2589 * @param pGVM The global (ring-0) VM structure.
2590 * @param pGVCpu The global (ring-0) VCPU structure.
2591 */
2592DECLINLINE(int) gvmmR0SchedWakeUpOne(PGVM pGVM, PGVMCPU pGVCpu)
2593{
2594 pGVM->gvmm.s.StatsSched.cWakeUpCalls++;
2595
2596 /*
2597 * Signal the semaphore regardless of whether it's current blocked on it.
2598 *
2599 * The reason for this is that there is absolutely no way we can be 100%
2600 * certain that it isn't *about* go to go to sleep on it and just got
2601 * delayed a bit en route. So, we will always signal the semaphore when
2602 * the it is flagged as halted in the VMM.
2603 */
2604/** @todo we can optimize some of that by means of the pVCpu->enmState now. */
2605 int rc;
2606 if (pGVCpu->gvmm.s.u64HaltExpire)
2607 {
2608 rc = VINF_SUCCESS;
2609 ASMAtomicWriteU64(&pGVCpu->gvmm.s.u64HaltExpire, 0);
2610 }
2611 else
2612 {
2613 rc = VINF_GVM_NOT_BLOCKED;
2614 pGVM->gvmm.s.StatsSched.cWakeUpNotHalted++;
2615 }
2616
2617 int rc2 = RTSemEventMultiSignal(pGVCpu->gvmm.s.HaltEventMulti);
2618 AssertRC(rc2);
2619
2620 return rc;
2621}
2622
2623
2624/**
2625 * Wakes up the halted EMT thread so it can service a pending request.
2626 *
2627 * @returns VBox status code.
2628 * @retval VINF_SUCCESS if successfully woken up.
2629 * @retval VINF_GVM_NOT_BLOCKED if the EMT wasn't blocked.
2630 *
2631 * @param pGVM The global (ring-0) VM structure.
2632 * @param pVM The cross context VM structure.
2633 * @param idCpu The Virtual CPU ID of the EMT to wake up.
2634 * @param fTakeUsedLock Take the used lock or not
2635 * @thread Any but EMT(idCpu).
2636 */
2637GVMMR0DECL(int) GVMMR0SchedWakeUpEx(PGVM pGVM, PVMCC pVM, VMCPUID idCpu, bool fTakeUsedLock)
2638{
2639 GVMM_CHECK_SMAP_SETUP();
2640 GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
2641
2642 /*
2643 * Validate input and take the UsedLock.
2644 */
2645 PGVMM pGVMM;
2646 int rc = gvmmR0ByGVMandVM(pGVM, pVM, &pGVMM, fTakeUsedLock);
2647 GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
2648 if (RT_SUCCESS(rc))
2649 {
2650 if (idCpu < pGVM->cCpus)
2651 {
2652 /*
2653 * Do the actual job.
2654 */
2655 rc = gvmmR0SchedWakeUpOne(pGVM, &pGVM->aCpus[idCpu]);
2656 GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
2657
2658 if (fTakeUsedLock && pGVMM->fDoEarlyWakeUps)
2659 {
2660 /*
2661 * While we're here, do a round of scheduling.
2662 */
2663 Assert(ASMGetFlags() & X86_EFL_IF);
2664 const uint64_t u64Now = RTTimeNanoTS(); /* (GIP time) */
2665 pGVM->gvmm.s.StatsSched.cWakeUpWakeUps += gvmmR0SchedDoWakeUps(pGVMM, u64Now);
2666 GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
2667 }
2668 }
2669 else
2670 rc = VERR_INVALID_CPU_ID;
2671
2672 if (fTakeUsedLock)
2673 {
2674 int rc2 = GVMMR0_USED_SHARED_UNLOCK(pGVMM);
2675 AssertRC(rc2);
2676 GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
2677 }
2678 }
2679
2680 LogFlow(("GVMMR0SchedWakeUpEx: returns %Rrc\n", rc));
2681 return rc;
2682}
2683
2684
2685/**
2686 * Wakes up the halted EMT thread so it can service a pending request.
2687 *
2688 * @returns VBox status code.
2689 * @retval VINF_SUCCESS if successfully woken up.
2690 * @retval VINF_GVM_NOT_BLOCKED if the EMT wasn't blocked.
2691 *
2692 * @param pGVM The global (ring-0) VM structure.
2693 * @param pVM The cross context VM structure.
2694 * @param idCpu The Virtual CPU ID of the EMT to wake up.
2695 * @thread Any but EMT(idCpu).
2696 */
2697GVMMR0DECL(int) GVMMR0SchedWakeUp(PGVM pGVM, PVMCC pVM, VMCPUID idCpu)
2698{
2699 return GVMMR0SchedWakeUpEx(pGVM, pVM, idCpu, true /* fTakeUsedLock */);
2700}
2701
2702
2703/**
2704 * Wakes up the halted EMT thread so it can service a pending request, no GVM
2705 * parameter and no used locking.
2706 *
2707 * @returns VBox status code.
2708 * @retval VINF_SUCCESS if successfully woken up.
2709 * @retval VINF_GVM_NOT_BLOCKED if the EMT wasn't blocked.
2710 *
2711 * @param pVM The cross context VM structure.
2712 * @param idCpu The Virtual CPU ID of the EMT to wake up.
2713 * @thread Any but EMT(idCpu).
2714 * @deprecated Don't use in new code if possible! Use the GVM variant.
2715 */
2716GVMMR0DECL(int) GVMMR0SchedWakeUpNoGVMNoLock(PVMCC pVM, VMCPUID idCpu)
2717{
2718 GVMM_CHECK_SMAP_SETUP();
2719 GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
2720 PGVM pGVM;
2721 PGVMM pGVMM;
2722 int rc = gvmmR0ByVM(pVM, &pGVM, &pGVMM, false /*fTakeUsedLock*/);
2723 GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
2724 if (RT_SUCCESS(rc))
2725 rc = GVMMR0SchedWakeUpEx(pGVM, pVM, idCpu, false /*fTakeUsedLock*/);
2726 return rc;
2727}
2728
2729
2730/**
2731 * Worker common to GVMMR0SchedPoke and GVMMR0SchedWakeUpAndPokeCpus that pokes
2732 * the Virtual CPU if it's still busy executing guest code.
2733 *
2734 * @returns VBox status code.
2735 * @retval VINF_SUCCESS if poked successfully.
2736 * @retval VINF_GVM_NOT_BUSY_IN_GC if the EMT wasn't busy in GC.
2737 *
2738 * @param pGVM The global (ring-0) VM structure.
2739 * @param pVCpu The cross context virtual CPU structure.
2740 */
2741DECLINLINE(int) gvmmR0SchedPokeOne(PGVM pGVM, PVMCPUCC pVCpu)
2742{
2743 pGVM->gvmm.s.StatsSched.cPokeCalls++;
2744
2745 RTCPUID idHostCpu = pVCpu->idHostCpu;
2746 if ( idHostCpu == NIL_RTCPUID
2747 || VMCPU_GET_STATE(pVCpu) != VMCPUSTATE_STARTED_EXEC)
2748 {
2749 pGVM->gvmm.s.StatsSched.cPokeNotBusy++;
2750 return VINF_GVM_NOT_BUSY_IN_GC;
2751 }
2752
2753 /* Note: this function is not implemented on Darwin and Linux (kernel < 2.6.19) */
2754 RTMpPokeCpu(idHostCpu);
2755 return VINF_SUCCESS;
2756}
2757
2758
2759/**
2760 * Pokes an EMT if it's still busy running guest code.
2761 *
2762 * @returns VBox status code.
2763 * @retval VINF_SUCCESS if poked successfully.
2764 * @retval VINF_GVM_NOT_BUSY_IN_GC if the EMT wasn't busy in GC.
2765 *
2766 * @param pGVM The global (ring-0) VM structure.
2767 * @param pVM The cross context VM structure.
2768 * @param idCpu The ID of the virtual CPU to poke.
2769 * @param fTakeUsedLock Take the used lock or not
2770 */
2771GVMMR0DECL(int) GVMMR0SchedPokeEx(PGVM pGVM, PVMCC pVM, VMCPUID idCpu, bool fTakeUsedLock)
2772{
2773 /*
2774 * Validate input and take the UsedLock.
2775 */
2776 PGVMM pGVMM;
2777 int rc = gvmmR0ByGVMandVM(pGVM, pVM, &pGVMM, fTakeUsedLock);
2778 if (RT_SUCCESS(rc))
2779 {
2780 if (idCpu < pGVM->cCpus)
2781#ifdef VBOX_BUGREF_9217
2782 rc = gvmmR0SchedPokeOne(pGVM, &pGVM->aCpus[idCpu]);
2783#else
2784 rc = gvmmR0SchedPokeOne(pGVM, &pVM->aCpus[idCpu]);
2785#endif
2786 else
2787 rc = VERR_INVALID_CPU_ID;
2788
2789 if (fTakeUsedLock)
2790 {
2791 int rc2 = GVMMR0_USED_SHARED_UNLOCK(pGVMM);
2792 AssertRC(rc2);
2793 }
2794 }
2795
2796 LogFlow(("GVMMR0SchedWakeUpAndPokeCpus: returns %Rrc\n", rc));
2797 return rc;
2798}
2799
2800
2801/**
2802 * Pokes an EMT if it's still busy running guest code.
2803 *
2804 * @returns VBox status code.
2805 * @retval VINF_SUCCESS if poked successfully.
2806 * @retval VINF_GVM_NOT_BUSY_IN_GC if the EMT wasn't busy in GC.
2807 *
2808 * @param pGVM The global (ring-0) VM structure.
2809 * @param pVM The cross context VM structure.
2810 * @param idCpu The ID of the virtual CPU to poke.
2811 */
2812GVMMR0DECL(int) GVMMR0SchedPoke(PGVM pGVM, PVMCC pVM, VMCPUID idCpu)
2813{
2814 return GVMMR0SchedPokeEx(pGVM, pVM, idCpu, true /* fTakeUsedLock */);
2815}
2816
2817
2818/**
2819 * Pokes an EMT if it's still busy running guest code, no GVM parameter and no
2820 * used locking.
2821 *
2822 * @returns VBox status code.
2823 * @retval VINF_SUCCESS if poked successfully.
2824 * @retval VINF_GVM_NOT_BUSY_IN_GC if the EMT wasn't busy in GC.
2825 *
2826 * @param pVM The cross context VM structure.
2827 * @param idCpu The ID of the virtual CPU to poke.
2828 *
2829 * @deprecated Don't use in new code if possible! Use the GVM variant.
2830 */
2831GVMMR0DECL(int) GVMMR0SchedPokeNoGVMNoLock(PVMCC pVM, VMCPUID idCpu)
2832{
2833 PGVM pGVM;
2834 PGVMM pGVMM;
2835 int rc = gvmmR0ByVM(pVM, &pGVM, &pGVMM, false /*fTakeUsedLock*/);
2836 if (RT_SUCCESS(rc))
2837 {
2838 if (idCpu < pGVM->cCpus)
2839#ifdef VBOX_BUGREF_9217
2840 rc = gvmmR0SchedPokeOne(pGVM, &pGVM->aCpus[idCpu]);
2841#else
2842 rc = gvmmR0SchedPokeOne(pGVM, &pVM->aCpus[idCpu]);
2843#endif
2844 else
2845 rc = VERR_INVALID_CPU_ID;
2846 }
2847 return rc;
2848}
2849
2850
2851/**
2852 * Wakes up a set of halted EMT threads so they can service pending request.
2853 *
2854 * @returns VBox status code, no informational stuff.
2855 *
2856 * @param pGVM The global (ring-0) VM structure.
2857 * @param pVM The cross context VM structure.
2858 * @param pSleepSet The set of sleepers to wake up.
2859 * @param pPokeSet The set of CPUs to poke.
2860 */
2861GVMMR0DECL(int) GVMMR0SchedWakeUpAndPokeCpus(PGVM pGVM, PVMCC pVM, PCVMCPUSET pSleepSet, PCVMCPUSET pPokeSet)
2862{
2863 AssertPtrReturn(pSleepSet, VERR_INVALID_POINTER);
2864 AssertPtrReturn(pPokeSet, VERR_INVALID_POINTER);
2865 GVMM_CHECK_SMAP_SETUP();
2866 GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
2867 RTNATIVETHREAD hSelf = RTThreadNativeSelf();
2868
2869 /*
2870 * Validate input and take the UsedLock.
2871 */
2872 PGVMM pGVMM;
2873 int rc = gvmmR0ByGVMandVM(pGVM, pVM, &pGVMM, true /* fTakeUsedLock */);
2874 GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
2875 if (RT_SUCCESS(rc))
2876 {
2877 rc = VINF_SUCCESS;
2878 VMCPUID idCpu = pGVM->cCpus;
2879 while (idCpu-- > 0)
2880 {
2881 /* Don't try poke or wake up ourselves. */
2882 if (pGVM->aCpus[idCpu].hEMT == hSelf)
2883 continue;
2884
2885 /* just ignore errors for now. */
2886 if (VMCPUSET_IS_PRESENT(pSleepSet, idCpu))
2887 {
2888 gvmmR0SchedWakeUpOne(pGVM, &pGVM->aCpus[idCpu]);
2889 GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
2890 }
2891 else if (VMCPUSET_IS_PRESENT(pPokeSet, idCpu))
2892 {
2893#ifdef VBOX_BUGREF_9217
2894 gvmmR0SchedPokeOne(pGVM, &pGVM->aCpus[idCpu]);
2895#else
2896 gvmmR0SchedPokeOne(pGVM, &pVM->aCpus[idCpu]);
2897#endif
2898 GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
2899 }
2900 }
2901
2902 int rc2 = GVMMR0_USED_SHARED_UNLOCK(pGVMM);
2903 AssertRC(rc2);
2904 GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
2905 }
2906
2907 LogFlow(("GVMMR0SchedWakeUpAndPokeCpus: returns %Rrc\n", rc));
2908 return rc;
2909}
2910
2911
2912/**
2913 * VMMR0 request wrapper for GVMMR0SchedWakeUpAndPokeCpus.
2914 *
2915 * @returns see GVMMR0SchedWakeUpAndPokeCpus.
2916 * @param pGVM The global (ring-0) VM structure.
2917 * @param pVM The cross context VM structure.
2918 * @param pReq Pointer to the request packet.
2919 */
2920GVMMR0DECL(int) GVMMR0SchedWakeUpAndPokeCpusReq(PGVM pGVM, PVMCC pVM, PGVMMSCHEDWAKEUPANDPOKECPUSREQ pReq)
2921{
2922 /*
2923 * Validate input and pass it on.
2924 */
2925 AssertPtrReturn(pReq, VERR_INVALID_POINTER);
2926 AssertMsgReturn(pReq->Hdr.cbReq == sizeof(*pReq), ("%#x != %#x\n", pReq->Hdr.cbReq, sizeof(*pReq)), VERR_INVALID_PARAMETER);
2927
2928 return GVMMR0SchedWakeUpAndPokeCpus(pGVM, pVM, &pReq->SleepSet, &pReq->PokeSet);
2929}
2930
2931
2932
2933/**
2934 * Poll the schedule to see if someone else should get a chance to run.
2935 *
2936 * This is a bit hackish and will not work too well if the machine is
2937 * under heavy load from non-VM processes.
2938 *
2939 * @returns VINF_SUCCESS if not yielded.
2940 * VINF_GVM_YIELDED if an attempt to switch to a different VM task was made.
2941 * @param pGVM The global (ring-0) VM structure.
2942 * @param pVM The cross context VM structure.
2943 * @param idCpu The Virtual CPU ID of the calling EMT.
2944 * @param fYield Whether to yield or not.
2945 * This is for when we're spinning in the halt loop.
2946 * @thread EMT(idCpu).
2947 */
2948GVMMR0DECL(int) GVMMR0SchedPoll(PGVM pGVM, PVMCC pVM, VMCPUID idCpu, bool fYield)
2949{
2950 /*
2951 * Validate input.
2952 */
2953 PGVMM pGVMM;
2954 int rc = gvmmR0ByGVMandVMandEMT(pGVM, pVM, idCpu, &pGVMM);
2955 if (RT_SUCCESS(rc))
2956 {
2957 /*
2958 * We currently only implement helping doing wakeups (fYield = false), so don't
2959 * bother taking the lock if gvmmR0SchedDoWakeUps is not going to do anything.
2960 */
2961 if (!fYield && pGVMM->fDoEarlyWakeUps)
2962 {
2963 rc = GVMMR0_USED_SHARED_LOCK(pGVMM); AssertRC(rc);
2964 pGVM->gvmm.s.StatsSched.cPollCalls++;
2965
2966 Assert(ASMGetFlags() & X86_EFL_IF);
2967 const uint64_t u64Now = RTTimeNanoTS(); /* (GIP time) */
2968
2969 pGVM->gvmm.s.StatsSched.cPollWakeUps += gvmmR0SchedDoWakeUps(pGVMM, u64Now);
2970
2971 GVMMR0_USED_SHARED_UNLOCK(pGVMM);
2972 }
2973 /*
2974 * Not quite sure what we could do here...
2975 */
2976 else if (fYield)
2977 rc = VERR_NOT_IMPLEMENTED; /** @todo implement this... */
2978 else
2979 rc = VINF_SUCCESS;
2980 }
2981
2982 LogFlow(("GVMMR0SchedWakeUp: returns %Rrc\n", rc));
2983 return rc;
2984}
2985
2986
2987#ifdef GVMM_SCHED_WITH_PPT
2988/**
2989 * Timer callback for the periodic preemption timer.
2990 *
2991 * @param pTimer The timer handle.
2992 * @param pvUser Pointer to the per cpu structure.
2993 * @param iTick The current tick.
2994 */
2995static DECLCALLBACK(void) gvmmR0SchedPeriodicPreemptionTimerCallback(PRTTIMER pTimer, void *pvUser, uint64_t iTick)
2996{
2997 PGVMMHOSTCPU pCpu = (PGVMMHOSTCPU)pvUser;
2998 NOREF(pTimer); NOREF(iTick);
2999
3000 /*
3001 * Termination check
3002 */
3003 if (pCpu->u32Magic != GVMMHOSTCPU_MAGIC)
3004 return;
3005
3006 /*
3007 * Do the house keeping.
3008 */
3009 RTSpinlockAcquire(pCpu->Ppt.hSpinlock);
3010
3011 if (++pCpu->Ppt.iTickHistorization >= pCpu->Ppt.cTicksHistoriziationInterval)
3012 {
3013 /*
3014 * Historicize the max frequency.
3015 */
3016 uint32_t iHzHistory = ++pCpu->Ppt.iHzHistory % RT_ELEMENTS(pCpu->Ppt.aHzHistory);
3017 pCpu->Ppt.aHzHistory[iHzHistory] = pCpu->Ppt.uDesiredHz;
3018 pCpu->Ppt.iTickHistorization = 0;
3019 pCpu->Ppt.uDesiredHz = 0;
3020
3021 /*
3022 * Check if the current timer frequency.
3023 */
3024 uint32_t uHistMaxHz = 0;
3025 for (uint32_t i = 0; i < RT_ELEMENTS(pCpu->Ppt.aHzHistory); i++)
3026 if (pCpu->Ppt.aHzHistory[i] > uHistMaxHz)
3027 uHistMaxHz = pCpu->Ppt.aHzHistory[i];
3028 if (uHistMaxHz == pCpu->Ppt.uTimerHz)
3029 RTSpinlockRelease(pCpu->Ppt.hSpinlock);
3030 else if (uHistMaxHz)
3031 {
3032 /*
3033 * Reprogram it.
3034 */
3035 pCpu->Ppt.cChanges++;
3036 pCpu->Ppt.iTickHistorization = 0;
3037 pCpu->Ppt.uTimerHz = uHistMaxHz;
3038 uint32_t const cNsInterval = RT_NS_1SEC / uHistMaxHz;
3039 pCpu->Ppt.cNsInterval = cNsInterval;
3040 if (cNsInterval < GVMMHOSTCPU_PPT_HIST_INTERVAL_NS)
3041 pCpu->Ppt.cTicksHistoriziationInterval = ( GVMMHOSTCPU_PPT_HIST_INTERVAL_NS
3042 + GVMMHOSTCPU_PPT_HIST_INTERVAL_NS / 2 - 1)
3043 / cNsInterval;
3044 else
3045 pCpu->Ppt.cTicksHistoriziationInterval = 1;
3046 RTSpinlockRelease(pCpu->Ppt.hSpinlock);
3047
3048 /*SUPR0Printf("Cpu%u: change to %u Hz / %u ns\n", pCpu->idxCpuSet, uHistMaxHz, cNsInterval);*/
3049 RTTimerChangeInterval(pTimer, cNsInterval);
3050 }
3051 else
3052 {
3053 /*
3054 * Stop it.
3055 */
3056 pCpu->Ppt.fStarted = false;
3057 pCpu->Ppt.uTimerHz = 0;
3058 pCpu->Ppt.cNsInterval = 0;
3059 RTSpinlockRelease(pCpu->Ppt.hSpinlock);
3060
3061 /*SUPR0Printf("Cpu%u: stopping (%u Hz)\n", pCpu->idxCpuSet, uHistMaxHz);*/
3062 RTTimerStop(pTimer);
3063 }
3064 }
3065 else
3066 RTSpinlockRelease(pCpu->Ppt.hSpinlock);
3067}
3068#endif /* GVMM_SCHED_WITH_PPT */
3069
3070
3071/**
3072 * Updates the periodic preemption timer for the calling CPU.
3073 *
3074 * The caller must have disabled preemption!
3075 * The caller must check that the host can do high resolution timers.
3076 *
3077 * @param pVM The cross context VM structure.
3078 * @param idHostCpu The current host CPU id.
3079 * @param uHz The desired frequency.
3080 */
3081GVMMR0DECL(void) GVMMR0SchedUpdatePeriodicPreemptionTimer(PVMCC pVM, RTCPUID idHostCpu, uint32_t uHz)
3082{
3083 NOREF(pVM);
3084#ifdef GVMM_SCHED_WITH_PPT
3085 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
3086 Assert(RTTimerCanDoHighResolution());
3087
3088 /*
3089 * Resolve the per CPU data.
3090 */
3091 uint32_t iCpu = RTMpCpuIdToSetIndex(idHostCpu);
3092 PGVMM pGVMM = g_pGVMM;
3093 if ( !VALID_PTR(pGVMM)
3094 || pGVMM->u32Magic != GVMM_MAGIC)
3095 return;
3096 AssertMsgReturnVoid(iCpu < pGVMM->cHostCpus, ("iCpu=%d cHostCpus=%d\n", iCpu, pGVMM->cHostCpus));
3097 PGVMMHOSTCPU pCpu = &pGVMM->aHostCpus[iCpu];
3098 AssertMsgReturnVoid( pCpu->u32Magic == GVMMHOSTCPU_MAGIC
3099 && pCpu->idCpu == idHostCpu,
3100 ("u32Magic=%#x idCpu=% idHostCpu=%d\n", pCpu->u32Magic, pCpu->idCpu, idHostCpu));
3101
3102 /*
3103 * Check whether we need to do anything about the timer.
3104 * We have to be a little bit careful since we might be race the timer
3105 * callback here.
3106 */
3107 if (uHz > 16384)
3108 uHz = 16384; /** @todo add a query method for this! */
3109 if (RT_UNLIKELY( uHz > ASMAtomicReadU32(&pCpu->Ppt.uDesiredHz)
3110 && uHz >= pCpu->Ppt.uMinHz
3111 && !pCpu->Ppt.fStarting /* solaris paranoia */))
3112 {
3113 RTSpinlockAcquire(pCpu->Ppt.hSpinlock);
3114
3115 pCpu->Ppt.uDesiredHz = uHz;
3116 uint32_t cNsInterval = 0;
3117 if (!pCpu->Ppt.fStarted)
3118 {
3119 pCpu->Ppt.cStarts++;
3120 pCpu->Ppt.fStarted = true;
3121 pCpu->Ppt.fStarting = true;
3122 pCpu->Ppt.iTickHistorization = 0;
3123 pCpu->Ppt.uTimerHz = uHz;
3124 pCpu->Ppt.cNsInterval = cNsInterval = RT_NS_1SEC / uHz;
3125 if (cNsInterval < GVMMHOSTCPU_PPT_HIST_INTERVAL_NS)
3126 pCpu->Ppt.cTicksHistoriziationInterval = ( GVMMHOSTCPU_PPT_HIST_INTERVAL_NS
3127 + GVMMHOSTCPU_PPT_HIST_INTERVAL_NS / 2 - 1)
3128 / cNsInterval;
3129 else
3130 pCpu->Ppt.cTicksHistoriziationInterval = 1;
3131 }
3132
3133 RTSpinlockRelease(pCpu->Ppt.hSpinlock);
3134
3135 if (cNsInterval)
3136 {
3137 RTTimerChangeInterval(pCpu->Ppt.pTimer, cNsInterval);
3138 int rc = RTTimerStart(pCpu->Ppt.pTimer, cNsInterval);
3139 AssertRC(rc);
3140
3141 RTSpinlockAcquire(pCpu->Ppt.hSpinlock);
3142 if (RT_FAILURE(rc))
3143 pCpu->Ppt.fStarted = false;
3144 pCpu->Ppt.fStarting = false;
3145 RTSpinlockRelease(pCpu->Ppt.hSpinlock);
3146 }
3147 }
3148#else /* !GVMM_SCHED_WITH_PPT */
3149 NOREF(idHostCpu); NOREF(uHz);
3150#endif /* !GVMM_SCHED_WITH_PPT */
3151}
3152
3153
3154/**
3155 * Retrieves the GVMM statistics visible to the caller.
3156 *
3157 * @returns VBox status code.
3158 *
3159 * @param pStats Where to put the statistics.
3160 * @param pSession The current session.
3161 * @param pGVM The GVM to obtain statistics for. Optional.
3162 * @param pVM The VM structure corresponding to @a pGVM.
3163 */
3164GVMMR0DECL(int) GVMMR0QueryStatistics(PGVMMSTATS pStats, PSUPDRVSESSION pSession, PGVM pGVM, PVMCC pVM)
3165{
3166 LogFlow(("GVMMR0QueryStatistics: pStats=%p pSession=%p pGVM=%p pVM=%p\n", pStats, pSession, pGVM, pVM));
3167
3168 /*
3169 * Validate input.
3170 */
3171 AssertPtrReturn(pSession, VERR_INVALID_POINTER);
3172 AssertPtrReturn(pStats, VERR_INVALID_POINTER);
3173 pStats->cVMs = 0; /* (crash before taking the sem...) */
3174
3175 /*
3176 * Take the lock and get the VM statistics.
3177 */
3178 PGVMM pGVMM;
3179 if (pGVM)
3180 {
3181 int rc = gvmmR0ByGVMandVM(pGVM, pVM, &pGVMM, true /*fTakeUsedLock*/);
3182 if (RT_FAILURE(rc))
3183 return rc;
3184 pStats->SchedVM = pGVM->gvmm.s.StatsSched;
3185 }
3186 else
3187 {
3188 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
3189 memset(&pStats->SchedVM, 0, sizeof(pStats->SchedVM));
3190
3191 int rc = GVMMR0_USED_SHARED_LOCK(pGVMM);
3192 AssertRCReturn(rc, rc);
3193 }
3194
3195 /*
3196 * Enumerate the VMs and add the ones visible to the statistics.
3197 */
3198 pStats->cVMs = 0;
3199 pStats->cEMTs = 0;
3200 memset(&pStats->SchedSum, 0, sizeof(pStats->SchedSum));
3201
3202 for (unsigned i = pGVMM->iUsedHead;
3203 i != NIL_GVM_HANDLE && i < RT_ELEMENTS(pGVMM->aHandles);
3204 i = pGVMM->aHandles[i].iNext)
3205 {
3206 PGVM pOtherGVM = pGVMM->aHandles[i].pGVM;
3207 void *pvObj = pGVMM->aHandles[i].pvObj;
3208 if ( VALID_PTR(pvObj)
3209 && VALID_PTR(pOtherGVM)
3210 && pOtherGVM->u32Magic == GVM_MAGIC
3211 && RT_SUCCESS(SUPR0ObjVerifyAccess(pvObj, pSession, NULL)))
3212 {
3213 pStats->cVMs++;
3214 pStats->cEMTs += pOtherGVM->cCpus;
3215
3216 pStats->SchedSum.cHaltCalls += pOtherGVM->gvmm.s.StatsSched.cHaltCalls;
3217 pStats->SchedSum.cHaltBlocking += pOtherGVM->gvmm.s.StatsSched.cHaltBlocking;
3218 pStats->SchedSum.cHaltTimeouts += pOtherGVM->gvmm.s.StatsSched.cHaltTimeouts;
3219 pStats->SchedSum.cHaltNotBlocking += pOtherGVM->gvmm.s.StatsSched.cHaltNotBlocking;
3220 pStats->SchedSum.cHaltWakeUps += pOtherGVM->gvmm.s.StatsSched.cHaltWakeUps;
3221
3222 pStats->SchedSum.cWakeUpCalls += pOtherGVM->gvmm.s.StatsSched.cWakeUpCalls;
3223 pStats->SchedSum.cWakeUpNotHalted += pOtherGVM->gvmm.s.StatsSched.cWakeUpNotHalted;
3224 pStats->SchedSum.cWakeUpWakeUps += pOtherGVM->gvmm.s.StatsSched.cWakeUpWakeUps;
3225
3226 pStats->SchedSum.cPokeCalls += pOtherGVM->gvmm.s.StatsSched.cPokeCalls;
3227 pStats->SchedSum.cPokeNotBusy += pOtherGVM->gvmm.s.StatsSched.cPokeNotBusy;
3228
3229 pStats->SchedSum.cPollCalls += pOtherGVM->gvmm.s.StatsSched.cPollCalls;
3230 pStats->SchedSum.cPollHalts += pOtherGVM->gvmm.s.StatsSched.cPollHalts;
3231 pStats->SchedSum.cPollWakeUps += pOtherGVM->gvmm.s.StatsSched.cPollWakeUps;
3232 }
3233 }
3234
3235 /*
3236 * Copy out the per host CPU statistics.
3237 */
3238 uint32_t iDstCpu = 0;
3239 uint32_t cSrcCpus = pGVMM->cHostCpus;
3240 for (uint32_t iSrcCpu = 0; iSrcCpu < cSrcCpus; iSrcCpu++)
3241 {
3242 if (pGVMM->aHostCpus[iSrcCpu].idCpu != NIL_RTCPUID)
3243 {
3244 pStats->aHostCpus[iDstCpu].idCpu = pGVMM->aHostCpus[iSrcCpu].idCpu;
3245 pStats->aHostCpus[iDstCpu].idxCpuSet = pGVMM->aHostCpus[iSrcCpu].idxCpuSet;
3246#ifdef GVMM_SCHED_WITH_PPT
3247 pStats->aHostCpus[iDstCpu].uDesiredHz = pGVMM->aHostCpus[iSrcCpu].Ppt.uDesiredHz;
3248 pStats->aHostCpus[iDstCpu].uTimerHz = pGVMM->aHostCpus[iSrcCpu].Ppt.uTimerHz;
3249 pStats->aHostCpus[iDstCpu].cChanges = pGVMM->aHostCpus[iSrcCpu].Ppt.cChanges;
3250 pStats->aHostCpus[iDstCpu].cStarts = pGVMM->aHostCpus[iSrcCpu].Ppt.cStarts;
3251#else
3252 pStats->aHostCpus[iDstCpu].uDesiredHz = 0;
3253 pStats->aHostCpus[iDstCpu].uTimerHz = 0;
3254 pStats->aHostCpus[iDstCpu].cChanges = 0;
3255 pStats->aHostCpus[iDstCpu].cStarts = 0;
3256#endif
3257 iDstCpu++;
3258 if (iDstCpu >= RT_ELEMENTS(pStats->aHostCpus))
3259 break;
3260 }
3261 }
3262 pStats->cHostCpus = iDstCpu;
3263
3264 GVMMR0_USED_SHARED_UNLOCK(pGVMM);
3265
3266 return VINF_SUCCESS;
3267}
3268
3269
3270/**
3271 * VMMR0 request wrapper for GVMMR0QueryStatistics.
3272 *
3273 * @returns see GVMMR0QueryStatistics.
3274 * @param pGVM The global (ring-0) VM structure. Optional.
3275 * @param pVM The cross context VM structure. Optional.
3276 * @param pReq Pointer to the request packet.
3277 * @param pSession The current session.
3278 */
3279GVMMR0DECL(int) GVMMR0QueryStatisticsReq(PGVM pGVM, PVMCC pVM, PGVMMQUERYSTATISTICSSREQ pReq, PSUPDRVSESSION pSession)
3280{
3281 /*
3282 * Validate input and pass it on.
3283 */
3284 AssertPtrReturn(pReq, VERR_INVALID_POINTER);
3285 AssertMsgReturn(pReq->Hdr.cbReq == sizeof(*pReq), ("%#x != %#x\n", pReq->Hdr.cbReq, sizeof(*pReq)), VERR_INVALID_PARAMETER);
3286 AssertReturn(pReq->pSession == pSession, VERR_INVALID_PARAMETER);
3287
3288 return GVMMR0QueryStatistics(&pReq->Stats, pSession, pGVM, pVM);
3289}
3290
3291
3292/**
3293 * Resets the specified GVMM statistics.
3294 *
3295 * @returns VBox status code.
3296 *
3297 * @param pStats Which statistics to reset, that is, non-zero fields indicates which to reset.
3298 * @param pSession The current session.
3299 * @param pGVM The GVM to reset statistics for. Optional.
3300 * @param pVM The VM structure corresponding to @a pGVM.
3301 */
3302GVMMR0DECL(int) GVMMR0ResetStatistics(PCGVMMSTATS pStats, PSUPDRVSESSION pSession, PGVM pGVM, PVMCC pVM)
3303{
3304 LogFlow(("GVMMR0ResetStatistics: pStats=%p pSession=%p pGVM=%p pVM=%p\n", pStats, pSession, pGVM, pVM));
3305
3306 /*
3307 * Validate input.
3308 */
3309 AssertPtrReturn(pSession, VERR_INVALID_POINTER);
3310 AssertPtrReturn(pStats, VERR_INVALID_POINTER);
3311
3312 /*
3313 * Take the lock and get the VM statistics.
3314 */
3315 PGVMM pGVMM;
3316 if (pGVM)
3317 {
3318 int rc = gvmmR0ByGVMandVM(pGVM, pVM, &pGVMM, true /*fTakeUsedLock*/);
3319 if (RT_FAILURE(rc))
3320 return rc;
3321# define MAYBE_RESET_FIELD(field) \
3322 do { if (pStats->SchedVM. field ) { pGVM->gvmm.s.StatsSched. field = 0; } } while (0)
3323 MAYBE_RESET_FIELD(cHaltCalls);
3324 MAYBE_RESET_FIELD(cHaltBlocking);
3325 MAYBE_RESET_FIELD(cHaltTimeouts);
3326 MAYBE_RESET_FIELD(cHaltNotBlocking);
3327 MAYBE_RESET_FIELD(cHaltWakeUps);
3328 MAYBE_RESET_FIELD(cWakeUpCalls);
3329 MAYBE_RESET_FIELD(cWakeUpNotHalted);
3330 MAYBE_RESET_FIELD(cWakeUpWakeUps);
3331 MAYBE_RESET_FIELD(cPokeCalls);
3332 MAYBE_RESET_FIELD(cPokeNotBusy);
3333 MAYBE_RESET_FIELD(cPollCalls);
3334 MAYBE_RESET_FIELD(cPollHalts);
3335 MAYBE_RESET_FIELD(cPollWakeUps);
3336# undef MAYBE_RESET_FIELD
3337 }
3338 else
3339 {
3340 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
3341
3342 int rc = GVMMR0_USED_SHARED_LOCK(pGVMM);
3343 AssertRCReturn(rc, rc);
3344 }
3345
3346 /*
3347 * Enumerate the VMs and add the ones visible to the statistics.
3348 */
3349 if (!ASMMemIsZero(&pStats->SchedSum, sizeof(pStats->SchedSum)))
3350 {
3351 for (unsigned i = pGVMM->iUsedHead;
3352 i != NIL_GVM_HANDLE && i < RT_ELEMENTS(pGVMM->aHandles);
3353 i = pGVMM->aHandles[i].iNext)
3354 {
3355 PGVM pOtherGVM = pGVMM->aHandles[i].pGVM;
3356 void *pvObj = pGVMM->aHandles[i].pvObj;
3357 if ( VALID_PTR(pvObj)
3358 && VALID_PTR(pOtherGVM)
3359 && pOtherGVM->u32Magic == GVM_MAGIC
3360 && RT_SUCCESS(SUPR0ObjVerifyAccess(pvObj, pSession, NULL)))
3361 {
3362# define MAYBE_RESET_FIELD(field) \
3363 do { if (pStats->SchedSum. field ) { pOtherGVM->gvmm.s.StatsSched. field = 0; } } while (0)
3364 MAYBE_RESET_FIELD(cHaltCalls);
3365 MAYBE_RESET_FIELD(cHaltBlocking);
3366 MAYBE_RESET_FIELD(cHaltTimeouts);
3367 MAYBE_RESET_FIELD(cHaltNotBlocking);
3368 MAYBE_RESET_FIELD(cHaltWakeUps);
3369 MAYBE_RESET_FIELD(cWakeUpCalls);
3370 MAYBE_RESET_FIELD(cWakeUpNotHalted);
3371 MAYBE_RESET_FIELD(cWakeUpWakeUps);
3372 MAYBE_RESET_FIELD(cPokeCalls);
3373 MAYBE_RESET_FIELD(cPokeNotBusy);
3374 MAYBE_RESET_FIELD(cPollCalls);
3375 MAYBE_RESET_FIELD(cPollHalts);
3376 MAYBE_RESET_FIELD(cPollWakeUps);
3377# undef MAYBE_RESET_FIELD
3378 }
3379 }
3380 }
3381
3382 GVMMR0_USED_SHARED_UNLOCK(pGVMM);
3383
3384 return VINF_SUCCESS;
3385}
3386
3387
3388/**
3389 * VMMR0 request wrapper for GVMMR0ResetStatistics.
3390 *
3391 * @returns see GVMMR0ResetStatistics.
3392 * @param pGVM The global (ring-0) VM structure. Optional.
3393 * @param pVM The cross context VM structure. Optional.
3394 * @param pReq Pointer to the request packet.
3395 * @param pSession The current session.
3396 */
3397GVMMR0DECL(int) GVMMR0ResetStatisticsReq(PGVM pGVM, PVMCC pVM, PGVMMRESETSTATISTICSSREQ pReq, PSUPDRVSESSION pSession)
3398{
3399 /*
3400 * Validate input and pass it on.
3401 */
3402 AssertPtrReturn(pReq, VERR_INVALID_POINTER);
3403 AssertMsgReturn(pReq->Hdr.cbReq == sizeof(*pReq), ("%#x != %#x\n", pReq->Hdr.cbReq, sizeof(*pReq)), VERR_INVALID_PARAMETER);
3404 AssertReturn(pReq->pSession == pSession, VERR_INVALID_PARAMETER);
3405
3406 return GVMMR0ResetStatistics(&pReq->Stats, pSession, pGVM, pVM);
3407}
3408
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette