VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMR0/GVMMR0.cpp@ 80191

Last change on this file since 80191 was 80191, checked in by vboxsync, 6 years ago

VMM/r3: Refactored VMCPU enumeration in preparation that aCpus will be replaced with a pointer array. Removed two raw-mode offset members from the CPUM and CPUMCPU sub-structures. bugref:9217 bugref:9517

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id Revision
File size: 121.1 KB
Line 
1/* $Id: GVMMR0.cpp 80191 2019-08-08 00:36:57Z vboxsync $ */
2/** @file
3 * GVMM - Global VM Manager.
4 */
5
6/*
7 * Copyright (C) 2007-2019 Oracle Corporation
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 */
17
18
19/** @page pg_gvmm GVMM - The Global VM Manager
20 *
21 * The Global VM Manager lives in ring-0. Its main function at the moment is
22 * to manage a list of all running VMs, keep a ring-0 only structure (GVM) for
23 * each of them, and assign them unique identifiers (so GMM can track page
24 * owners). The GVMM also manage some of the host CPU resources, like the
25 * periodic preemption timer.
26 *
27 * The GVMM will create a ring-0 object for each VM when it is registered, this
28 * is both for session cleanup purposes and for having a point where it is
29 * possible to implement usage polices later (in SUPR0ObjRegister).
30 *
31 *
32 * @section sec_gvmm_ppt Periodic Preemption Timer (PPT)
33 *
34 * On system that sports a high resolution kernel timer API, we use per-cpu
35 * timers to generate interrupts that preempts VT-x, AMD-V and raw-mode guest
36 * execution. The timer frequency is calculating by taking the max
37 * TMCalcHostTimerFrequency for all VMs running on a CPU for the last ~160 ms
38 * (RT_ELEMENTS((PGVMMHOSTCPU)0, Ppt.aHzHistory) *
39 * GVMMHOSTCPU_PPT_HIST_INTERVAL_NS).
40 *
41 * The TMCalcHostTimerFrequency() part of the things gets its takes the max
42 * TMTimerSetFrequencyHint() value and adjusts by the current catch-up percent,
43 * warp drive percent and some fudge factors. VMMR0.cpp reports the result via
44 * GVMMR0SchedUpdatePeriodicPreemptionTimer() before switching to the VT-x,
45 * AMD-V and raw-mode execution environments.
46 */
47
48
49/*********************************************************************************************************************************
50* Header Files *
51*********************************************************************************************************************************/
52#define LOG_GROUP LOG_GROUP_GVMM
53#include <VBox/vmm/gvmm.h>
54#include <VBox/vmm/gmm.h>
55#include "GVMMR0Internal.h"
56#include <VBox/vmm/gvm.h>
57#include <VBox/vmm/vm.h>
58#include <VBox/vmm/vmcpuset.h>
59#include <VBox/vmm/vmm.h>
60#ifdef VBOX_WITH_NEM_R0
61# include <VBox/vmm/nem.h>
62#endif
63#include <VBox/param.h>
64#include <VBox/err.h>
65
66#include <iprt/asm.h>
67#include <iprt/asm-amd64-x86.h>
68#include <iprt/critsect.h>
69#include <iprt/mem.h>
70#include <iprt/semaphore.h>
71#include <iprt/time.h>
72#include <VBox/log.h>
73#include <iprt/thread.h>
74#include <iprt/process.h>
75#include <iprt/param.h>
76#include <iprt/string.h>
77#include <iprt/assert.h>
78#include <iprt/mem.h>
79#include <iprt/memobj.h>
80#include <iprt/mp.h>
81#include <iprt/cpuset.h>
82#include <iprt/spinlock.h>
83#include <iprt/timer.h>
84
85#include "dtrace/VBoxVMM.h"
86
87
88/*********************************************************************************************************************************
89* Defined Constants And Macros *
90*********************************************************************************************************************************/
91#if defined(RT_OS_LINUX) || defined(RT_OS_SOLARIS) || defined(DOXYGEN_RUNNING)
92/** Define this to enable the periodic preemption timer. */
93# define GVMM_SCHED_WITH_PPT
94#endif
95
96
97/** @def GVMM_CHECK_SMAP_SETUP
98 * SMAP check setup. */
99/** @def GVMM_CHECK_SMAP_CHECK
100 * Checks that the AC flag is set if SMAP is enabled. If AC is not set,
101 * it will be logged and @a a_BadExpr is executed. */
102/** @def GVMM_CHECK_SMAP_CHECK2
103 * Checks that the AC flag is set if SMAP is enabled. If AC is not set, it will
104 * be logged, written to the VMs assertion text buffer, and @a a_BadExpr is
105 * executed. */
106#if defined(VBOX_STRICT) || 1
107# define GVMM_CHECK_SMAP_SETUP() uint32_t const fKernelFeatures = SUPR0GetKernelFeatures()
108# define GVMM_CHECK_SMAP_CHECK(a_BadExpr) \
109 do { \
110 if (fKernelFeatures & SUPKERNELFEATURES_SMAP) \
111 { \
112 RTCCUINTREG fEflCheck = ASMGetFlags(); \
113 if (RT_LIKELY(fEflCheck & X86_EFL_AC)) \
114 { /* likely */ } \
115 else \
116 { \
117 SUPR0Printf("%s, line %d: EFLAGS.AC is clear! (%#x)\n", __FUNCTION__, __LINE__, (uint32_t)fEflCheck); \
118 a_BadExpr; \
119 } \
120 } \
121 } while (0)
122# define GVMM_CHECK_SMAP_CHECK2(a_pVM, a_BadExpr) \
123 do { \
124 if (fKernelFeatures & SUPKERNELFEATURES_SMAP) \
125 { \
126 RTCCUINTREG fEflCheck = ASMGetFlags(); \
127 if (RT_LIKELY(fEflCheck & X86_EFL_AC)) \
128 { /* likely */ } \
129 else \
130 { \
131 SUPR0BadContext((a_pVM) ? (a_pVM)->pSession : NULL, __FILE__, __LINE__, "EFLAGS.AC is zero!"); \
132 a_BadExpr; \
133 } \
134 } \
135 } while (0)
136#else
137# define GVMM_CHECK_SMAP_SETUP() uint32_t const fKernelFeatures = 0
138# define GVMM_CHECK_SMAP_CHECK(a_BadExpr) NOREF(fKernelFeatures)
139# define GVMM_CHECK_SMAP_CHECK2(a_pVM, a_BadExpr) NOREF(fKernelFeatures)
140#endif
141
142
143
144/*********************************************************************************************************************************
145* Structures and Typedefs *
146*********************************************************************************************************************************/
147
148/**
149 * Global VM handle.
150 */
151typedef struct GVMHANDLE
152{
153 /** The index of the next handle in the list (free or used). (0 is nil.) */
154 uint16_t volatile iNext;
155 /** Our own index / handle value. */
156 uint16_t iSelf;
157 /** The process ID of the handle owner.
158 * This is used for access checks. */
159 RTPROCESS ProcId;
160 /** The pointer to the ring-0 only (aka global) VM structure. */
161 PGVM pGVM;
162 /** The ring-0 mapping of the shared VM instance data. */
163 PVM pVM;
164 /** The virtual machine object. */
165 void *pvObj;
166 /** The session this VM is associated with. */
167 PSUPDRVSESSION pSession;
168 /** The ring-0 handle of the EMT0 thread.
169 * This is used for ownership checks as well as looking up a VM handle by thread
170 * at times like assertions. */
171 RTNATIVETHREAD hEMT0;
172} GVMHANDLE;
173/** Pointer to a global VM handle. */
174typedef GVMHANDLE *PGVMHANDLE;
175
176/** Number of GVM handles (including the NIL handle). */
177#if HC_ARCH_BITS == 64
178# define GVMM_MAX_HANDLES 8192
179#else
180# define GVMM_MAX_HANDLES 128
181#endif
182
183/**
184 * Per host CPU GVMM data.
185 */
186typedef struct GVMMHOSTCPU
187{
188 /** Magic number (GVMMHOSTCPU_MAGIC). */
189 uint32_t volatile u32Magic;
190 /** The CPU ID. */
191 RTCPUID idCpu;
192 /** The CPU set index. */
193 uint32_t idxCpuSet;
194
195#ifdef GVMM_SCHED_WITH_PPT
196 /** Periodic preemption timer data. */
197 struct
198 {
199 /** The handle to the periodic preemption timer. */
200 PRTTIMER pTimer;
201 /** Spinlock protecting the data below. */
202 RTSPINLOCK hSpinlock;
203 /** The smalles Hz that we need to care about. (static) */
204 uint32_t uMinHz;
205 /** The number of ticks between each historization. */
206 uint32_t cTicksHistoriziationInterval;
207 /** The current historization tick (counting up to
208 * cTicksHistoriziationInterval and then resetting). */
209 uint32_t iTickHistorization;
210 /** The current timer interval. This is set to 0 when inactive. */
211 uint32_t cNsInterval;
212 /** The current timer frequency. This is set to 0 when inactive. */
213 uint32_t uTimerHz;
214 /** The current max frequency reported by the EMTs.
215 * This gets historicize and reset by the timer callback. This is
216 * read without holding the spinlock, so needs atomic updating. */
217 uint32_t volatile uDesiredHz;
218 /** Whether the timer was started or not. */
219 bool volatile fStarted;
220 /** Set if we're starting timer. */
221 bool volatile fStarting;
222 /** The index of the next history entry (mod it). */
223 uint32_t iHzHistory;
224 /** Historicized uDesiredHz values. The array wraps around, new entries
225 * are added at iHzHistory. This is updated approximately every
226 * GVMMHOSTCPU_PPT_HIST_INTERVAL_NS by the timer callback. */
227 uint32_t aHzHistory[8];
228 /** Statistics counter for recording the number of interval changes. */
229 uint32_t cChanges;
230 /** Statistics counter for recording the number of timer starts. */
231 uint32_t cStarts;
232 } Ppt;
233#endif /* GVMM_SCHED_WITH_PPT */
234
235} GVMMHOSTCPU;
236/** Pointer to the per host CPU GVMM data. */
237typedef GVMMHOSTCPU *PGVMMHOSTCPU;
238/** The GVMMHOSTCPU::u32Magic value (Petra, Tanya & Rachel Haden). */
239#define GVMMHOSTCPU_MAGIC UINT32_C(0x19711011)
240/** The interval on history entry should cover (approximately) give in
241 * nanoseconds. */
242#define GVMMHOSTCPU_PPT_HIST_INTERVAL_NS UINT32_C(20000000)
243
244
245/**
246 * The GVMM instance data.
247 */
248typedef struct GVMM
249{
250 /** Eyecatcher / magic. */
251 uint32_t u32Magic;
252 /** The index of the head of the free handle chain. (0 is nil.) */
253 uint16_t volatile iFreeHead;
254 /** The index of the head of the active handle chain. (0 is nil.) */
255 uint16_t volatile iUsedHead;
256 /** The number of VMs. */
257 uint16_t volatile cVMs;
258 /** Alignment padding. */
259 uint16_t u16Reserved;
260 /** The number of EMTs. */
261 uint32_t volatile cEMTs;
262 /** The number of EMTs that have halted in GVMMR0SchedHalt. */
263 uint32_t volatile cHaltedEMTs;
264 /** Mini lock for restricting early wake-ups to one thread. */
265 bool volatile fDoingEarlyWakeUps;
266 bool afPadding[3]; /**< explicit alignment padding. */
267 /** When the next halted or sleeping EMT will wake up.
268 * This is set to 0 when it needs recalculating and to UINT64_MAX when
269 * there are no halted or sleeping EMTs in the GVMM. */
270 uint64_t uNsNextEmtWakeup;
271 /** The lock used to serialize VM creation, destruction and associated events that
272 * isn't performance critical. Owners may acquire the list lock. */
273 RTCRITSECT CreateDestroyLock;
274 /** The lock used to serialize used list updates and accesses.
275 * This indirectly includes scheduling since the scheduler will have to walk the
276 * used list to examin running VMs. Owners may not acquire any other locks. */
277 RTCRITSECTRW UsedLock;
278 /** The handle array.
279 * The size of this array defines the maximum number of currently running VMs.
280 * The first entry is unused as it represents the NIL handle. */
281 GVMHANDLE aHandles[GVMM_MAX_HANDLES];
282
283 /** @gcfgm{/GVMM/cEMTsMeansCompany, 32-bit, 0, UINT32_MAX, 1}
284 * The number of EMTs that means we no longer consider ourselves alone on a
285 * CPU/Core.
286 */
287 uint32_t cEMTsMeansCompany;
288 /** @gcfgm{/GVMM/MinSleepAlone,32-bit, 0, 100000000, 750000, ns}
289 * The minimum sleep time for when we're alone, in nano seconds.
290 */
291 uint32_t nsMinSleepAlone;
292 /** @gcfgm{/GVMM/MinSleepCompany,32-bit,0, 100000000, 15000, ns}
293 * The minimum sleep time for when we've got company, in nano seconds.
294 */
295 uint32_t nsMinSleepCompany;
296 /** @gcfgm{/GVMM/EarlyWakeUp1, 32-bit, 0, 100000000, 25000, ns}
297 * The limit for the first round of early wake-ups, given in nano seconds.
298 */
299 uint32_t nsEarlyWakeUp1;
300 /** @gcfgm{/GVMM/EarlyWakeUp2, 32-bit, 0, 100000000, 50000, ns}
301 * The limit for the second round of early wake-ups, given in nano seconds.
302 */
303 uint32_t nsEarlyWakeUp2;
304
305 /** Set if we're doing early wake-ups.
306 * This reflects nsEarlyWakeUp1 and nsEarlyWakeUp2. */
307 bool volatile fDoEarlyWakeUps;
308
309 /** The number of entries in the host CPU array (aHostCpus). */
310 uint32_t cHostCpus;
311 /** Per host CPU data (variable length). */
312 GVMMHOSTCPU aHostCpus[1];
313} GVMM;
314AssertCompileMemberAlignment(GVMM, CreateDestroyLock, 8);
315AssertCompileMemberAlignment(GVMM, UsedLock, 8);
316AssertCompileMemberAlignment(GVMM, uNsNextEmtWakeup, 8);
317/** Pointer to the GVMM instance data. */
318typedef GVMM *PGVMM;
319
320/** The GVMM::u32Magic value (Charlie Haden). */
321#define GVMM_MAGIC UINT32_C(0x19370806)
322
323
324
325/*********************************************************************************************************************************
326* Global Variables *
327*********************************************************************************************************************************/
328/** Pointer to the GVMM instance data.
329 * (Just my general dislike for global variables.) */
330static PGVMM g_pGVMM = NULL;
331
332/** Macro for obtaining and validating the g_pGVMM pointer.
333 * On failure it will return from the invoking function with the specified return value.
334 *
335 * @param pGVMM The name of the pGVMM variable.
336 * @param rc The return value on failure. Use VERR_GVMM_INSTANCE for VBox
337 * status codes.
338 */
339#define GVMM_GET_VALID_INSTANCE(pGVMM, rc) \
340 do { \
341 (pGVMM) = g_pGVMM;\
342 AssertPtrReturn((pGVMM), (rc)); \
343 AssertMsgReturn((pGVMM)->u32Magic == GVMM_MAGIC, ("%p - %#x\n", (pGVMM), (pGVMM)->u32Magic), (rc)); \
344 } while (0)
345
346/** Macro for obtaining and validating the g_pGVMM pointer, void function variant.
347 * On failure it will return from the invoking function.
348 *
349 * @param pGVMM The name of the pGVMM variable.
350 */
351#define GVMM_GET_VALID_INSTANCE_VOID(pGVMM) \
352 do { \
353 (pGVMM) = g_pGVMM;\
354 AssertPtrReturnVoid((pGVMM)); \
355 AssertMsgReturnVoid((pGVMM)->u32Magic == GVMM_MAGIC, ("%p - %#x\n", (pGVMM), (pGVMM)->u32Magic)); \
356 } while (0)
357
358
359/*********************************************************************************************************************************
360* Internal Functions *
361*********************************************************************************************************************************/
362#ifdef VBOX_BUGREF_9217
363static void gvmmR0InitPerVMData(PGVM pGVM, int16_t hSelf, VMCPUID cCpus, PSUPDRVSESSION pSession);
364#else
365static void gvmmR0InitPerVMData(PGVM pGVM);
366#endif
367static DECLCALLBACK(void) gvmmR0HandleObjDestructor(void *pvObj, void *pvGVMM, void *pvHandle);
368static int gvmmR0ByGVMandVM(PGVM pGVM, PVM pVM, PGVMM *ppGVMM, bool fTakeUsedLock);
369static int gvmmR0ByGVMandVMandEMT(PGVM pGVM, PVM pVM, VMCPUID idCpu, PGVMM *ppGVMM);
370
371#ifdef GVMM_SCHED_WITH_PPT
372static DECLCALLBACK(void) gvmmR0SchedPeriodicPreemptionTimerCallback(PRTTIMER pTimer, void *pvUser, uint64_t iTick);
373#endif
374
375
376/**
377 * Initializes the GVMM.
378 *
379 * This is called while owning the loader semaphore (see supdrvIOCtl_LdrLoad()).
380 *
381 * @returns VBox status code.
382 */
383GVMMR0DECL(int) GVMMR0Init(void)
384{
385 LogFlow(("GVMMR0Init:\n"));
386
387 /*
388 * Allocate and initialize the instance data.
389 */
390 uint32_t cHostCpus = RTMpGetArraySize();
391 AssertMsgReturn(cHostCpus > 0 && cHostCpus < _64K, ("%d", (int)cHostCpus), VERR_GVMM_HOST_CPU_RANGE);
392
393 PGVMM pGVMM = (PGVMM)RTMemAllocZ(RT_UOFFSETOF_DYN(GVMM, aHostCpus[cHostCpus]));
394 if (!pGVMM)
395 return VERR_NO_MEMORY;
396 int rc = RTCritSectInitEx(&pGVMM->CreateDestroyLock, 0, NIL_RTLOCKVALCLASS, RTLOCKVAL_SUB_CLASS_NONE,
397 "GVMM-CreateDestroyLock");
398 if (RT_SUCCESS(rc))
399 {
400 rc = RTCritSectRwInitEx(&pGVMM->UsedLock, 0, NIL_RTLOCKVALCLASS, RTLOCKVAL_SUB_CLASS_NONE, "GVMM-UsedLock");
401 if (RT_SUCCESS(rc))
402 {
403 pGVMM->u32Magic = GVMM_MAGIC;
404 pGVMM->iUsedHead = 0;
405 pGVMM->iFreeHead = 1;
406
407 /* the nil handle */
408 pGVMM->aHandles[0].iSelf = 0;
409 pGVMM->aHandles[0].iNext = 0;
410
411 /* the tail */
412 unsigned i = RT_ELEMENTS(pGVMM->aHandles) - 1;
413 pGVMM->aHandles[i].iSelf = i;
414 pGVMM->aHandles[i].iNext = 0; /* nil */
415
416 /* the rest */
417 while (i-- > 1)
418 {
419 pGVMM->aHandles[i].iSelf = i;
420 pGVMM->aHandles[i].iNext = i + 1;
421 }
422
423 /* The default configuration values. */
424 uint32_t cNsResolution = RTSemEventMultiGetResolution();
425 pGVMM->cEMTsMeansCompany = 1; /** @todo should be adjusted to relative to the cpu count or something... */
426 if (cNsResolution >= 5*RT_NS_100US)
427 {
428 pGVMM->nsMinSleepAlone = 750000 /* ns (0.750 ms) */; /** @todo this should be adjusted to be 75% (or something) of the scheduler granularity... */
429 pGVMM->nsMinSleepCompany = 15000 /* ns (0.015 ms) */;
430 pGVMM->nsEarlyWakeUp1 = 25000 /* ns (0.025 ms) */;
431 pGVMM->nsEarlyWakeUp2 = 50000 /* ns (0.050 ms) */;
432 }
433 else if (cNsResolution > RT_NS_100US)
434 {
435 pGVMM->nsMinSleepAlone = cNsResolution / 2;
436 pGVMM->nsMinSleepCompany = cNsResolution / 4;
437 pGVMM->nsEarlyWakeUp1 = 0;
438 pGVMM->nsEarlyWakeUp2 = 0;
439 }
440 else
441 {
442 pGVMM->nsMinSleepAlone = 2000;
443 pGVMM->nsMinSleepCompany = 2000;
444 pGVMM->nsEarlyWakeUp1 = 0;
445 pGVMM->nsEarlyWakeUp2 = 0;
446 }
447 pGVMM->fDoEarlyWakeUps = pGVMM->nsEarlyWakeUp1 > 0 && pGVMM->nsEarlyWakeUp2 > 0;
448
449 /* The host CPU data. */
450 pGVMM->cHostCpus = cHostCpus;
451 uint32_t iCpu = cHostCpus;
452 RTCPUSET PossibleSet;
453 RTMpGetSet(&PossibleSet);
454 while (iCpu-- > 0)
455 {
456 pGVMM->aHostCpus[iCpu].idxCpuSet = iCpu;
457#ifdef GVMM_SCHED_WITH_PPT
458 pGVMM->aHostCpus[iCpu].Ppt.pTimer = NULL;
459 pGVMM->aHostCpus[iCpu].Ppt.hSpinlock = NIL_RTSPINLOCK;
460 pGVMM->aHostCpus[iCpu].Ppt.uMinHz = 5; /** @todo Add some API which figures this one out. (not *that* important) */
461 pGVMM->aHostCpus[iCpu].Ppt.cTicksHistoriziationInterval = 1;
462 //pGVMM->aHostCpus[iCpu].Ppt.iTickHistorization = 0;
463 //pGVMM->aHostCpus[iCpu].Ppt.cNsInterval = 0;
464 //pGVMM->aHostCpus[iCpu].Ppt.uTimerHz = 0;
465 //pGVMM->aHostCpus[iCpu].Ppt.uDesiredHz = 0;
466 //pGVMM->aHostCpus[iCpu].Ppt.fStarted = false;
467 //pGVMM->aHostCpus[iCpu].Ppt.fStarting = false;
468 //pGVMM->aHostCpus[iCpu].Ppt.iHzHistory = 0;
469 //pGVMM->aHostCpus[iCpu].Ppt.aHzHistory = {0};
470#endif
471
472 if (RTCpuSetIsMember(&PossibleSet, iCpu))
473 {
474 pGVMM->aHostCpus[iCpu].idCpu = RTMpCpuIdFromSetIndex(iCpu);
475 pGVMM->aHostCpus[iCpu].u32Magic = GVMMHOSTCPU_MAGIC;
476
477#ifdef GVMM_SCHED_WITH_PPT
478 rc = RTTimerCreateEx(&pGVMM->aHostCpus[iCpu].Ppt.pTimer,
479 50*1000*1000 /* whatever */,
480 RTTIMER_FLAGS_CPU(iCpu) | RTTIMER_FLAGS_HIGH_RES,
481 gvmmR0SchedPeriodicPreemptionTimerCallback,
482 &pGVMM->aHostCpus[iCpu]);
483 if (RT_SUCCESS(rc))
484 rc = RTSpinlockCreate(&pGVMM->aHostCpus[iCpu].Ppt.hSpinlock, RTSPINLOCK_FLAGS_INTERRUPT_SAFE, "GVMM/CPU");
485 if (RT_FAILURE(rc))
486 {
487 while (iCpu < cHostCpus)
488 {
489 RTTimerDestroy(pGVMM->aHostCpus[iCpu].Ppt.pTimer);
490 RTSpinlockDestroy(pGVMM->aHostCpus[iCpu].Ppt.hSpinlock);
491 pGVMM->aHostCpus[iCpu].Ppt.hSpinlock = NIL_RTSPINLOCK;
492 iCpu++;
493 }
494 break;
495 }
496#endif
497 }
498 else
499 {
500 pGVMM->aHostCpus[iCpu].idCpu = NIL_RTCPUID;
501 pGVMM->aHostCpus[iCpu].u32Magic = 0;
502 }
503 }
504 if (RT_SUCCESS(rc))
505 {
506 g_pGVMM = pGVMM;
507 LogFlow(("GVMMR0Init: pGVMM=%p cHostCpus=%u\n", pGVMM, cHostCpus));
508 return VINF_SUCCESS;
509 }
510
511 /* bail out. */
512 RTCritSectRwDelete(&pGVMM->UsedLock);
513 }
514 RTCritSectDelete(&pGVMM->CreateDestroyLock);
515 }
516
517 RTMemFree(pGVMM);
518 return rc;
519}
520
521
522/**
523 * Terminates the GVM.
524 *
525 * This is called while owning the loader semaphore (see supdrvLdrFree()).
526 * And unless something is wrong, there should be absolutely no VMs
527 * registered at this point.
528 */
529GVMMR0DECL(void) GVMMR0Term(void)
530{
531 LogFlow(("GVMMR0Term:\n"));
532
533 PGVMM pGVMM = g_pGVMM;
534 g_pGVMM = NULL;
535 if (RT_UNLIKELY(!VALID_PTR(pGVMM)))
536 {
537 SUPR0Printf("GVMMR0Term: pGVMM=%RKv\n", pGVMM);
538 return;
539 }
540
541 /*
542 * First of all, stop all active timers.
543 */
544 uint32_t cActiveTimers = 0;
545 uint32_t iCpu = pGVMM->cHostCpus;
546 while (iCpu-- > 0)
547 {
548 ASMAtomicWriteU32(&pGVMM->aHostCpus[iCpu].u32Magic, ~GVMMHOSTCPU_MAGIC);
549#ifdef GVMM_SCHED_WITH_PPT
550 if ( pGVMM->aHostCpus[iCpu].Ppt.pTimer != NULL
551 && RT_SUCCESS(RTTimerStop(pGVMM->aHostCpus[iCpu].Ppt.pTimer)))
552 cActiveTimers++;
553#endif
554 }
555 if (cActiveTimers)
556 RTThreadSleep(1); /* fudge */
557
558 /*
559 * Invalidate the and free resources.
560 */
561 pGVMM->u32Magic = ~GVMM_MAGIC;
562 RTCritSectRwDelete(&pGVMM->UsedLock);
563 RTCritSectDelete(&pGVMM->CreateDestroyLock);
564
565 pGVMM->iFreeHead = 0;
566 if (pGVMM->iUsedHead)
567 {
568 SUPR0Printf("GVMMR0Term: iUsedHead=%#x! (cVMs=%#x cEMTs=%#x)\n", pGVMM->iUsedHead, pGVMM->cVMs, pGVMM->cEMTs);
569 pGVMM->iUsedHead = 0;
570 }
571
572#ifdef GVMM_SCHED_WITH_PPT
573 iCpu = pGVMM->cHostCpus;
574 while (iCpu-- > 0)
575 {
576 RTTimerDestroy(pGVMM->aHostCpus[iCpu].Ppt.pTimer);
577 pGVMM->aHostCpus[iCpu].Ppt.pTimer = NULL;
578 RTSpinlockDestroy(pGVMM->aHostCpus[iCpu].Ppt.hSpinlock);
579 pGVMM->aHostCpus[iCpu].Ppt.hSpinlock = NIL_RTSPINLOCK;
580 }
581#endif
582
583 RTMemFree(pGVMM);
584}
585
586
587/**
588 * A quick hack for setting global config values.
589 *
590 * @returns VBox status code.
591 *
592 * @param pSession The session handle. Used for authentication.
593 * @param pszName The variable name.
594 * @param u64Value The new value.
595 */
596GVMMR0DECL(int) GVMMR0SetConfig(PSUPDRVSESSION pSession, const char *pszName, uint64_t u64Value)
597{
598 /*
599 * Validate input.
600 */
601 PGVMM pGVMM;
602 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
603 AssertPtrReturn(pSession, VERR_INVALID_HANDLE);
604 AssertPtrReturn(pszName, VERR_INVALID_POINTER);
605
606 /*
607 * String switch time!
608 */
609 if (strncmp(pszName, RT_STR_TUPLE("/GVMM/")))
610 return VERR_CFGM_VALUE_NOT_FOUND; /* borrow status codes from CFGM... */
611 int rc = VINF_SUCCESS;
612 pszName += sizeof("/GVMM/") - 1;
613 if (!strcmp(pszName, "cEMTsMeansCompany"))
614 {
615 if (u64Value <= UINT32_MAX)
616 pGVMM->cEMTsMeansCompany = u64Value;
617 else
618 rc = VERR_OUT_OF_RANGE;
619 }
620 else if (!strcmp(pszName, "MinSleepAlone"))
621 {
622 if (u64Value <= RT_NS_100MS)
623 pGVMM->nsMinSleepAlone = u64Value;
624 else
625 rc = VERR_OUT_OF_RANGE;
626 }
627 else if (!strcmp(pszName, "MinSleepCompany"))
628 {
629 if (u64Value <= RT_NS_100MS)
630 pGVMM->nsMinSleepCompany = u64Value;
631 else
632 rc = VERR_OUT_OF_RANGE;
633 }
634 else if (!strcmp(pszName, "EarlyWakeUp1"))
635 {
636 if (u64Value <= RT_NS_100MS)
637 {
638 pGVMM->nsEarlyWakeUp1 = u64Value;
639 pGVMM->fDoEarlyWakeUps = pGVMM->nsEarlyWakeUp1 > 0 && pGVMM->nsEarlyWakeUp2 > 0;
640 }
641 else
642 rc = VERR_OUT_OF_RANGE;
643 }
644 else if (!strcmp(pszName, "EarlyWakeUp2"))
645 {
646 if (u64Value <= RT_NS_100MS)
647 {
648 pGVMM->nsEarlyWakeUp2 = u64Value;
649 pGVMM->fDoEarlyWakeUps = pGVMM->nsEarlyWakeUp1 > 0 && pGVMM->nsEarlyWakeUp2 > 0;
650 }
651 else
652 rc = VERR_OUT_OF_RANGE;
653 }
654 else
655 rc = VERR_CFGM_VALUE_NOT_FOUND;
656 return rc;
657}
658
659
660/**
661 * A quick hack for getting global config values.
662 *
663 * @returns VBox status code.
664 *
665 * @param pSession The session handle. Used for authentication.
666 * @param pszName The variable name.
667 * @param pu64Value Where to return the value.
668 */
669GVMMR0DECL(int) GVMMR0QueryConfig(PSUPDRVSESSION pSession, const char *pszName, uint64_t *pu64Value)
670{
671 /*
672 * Validate input.
673 */
674 PGVMM pGVMM;
675 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
676 AssertPtrReturn(pSession, VERR_INVALID_HANDLE);
677 AssertPtrReturn(pszName, VERR_INVALID_POINTER);
678 AssertPtrReturn(pu64Value, VERR_INVALID_POINTER);
679
680 /*
681 * String switch time!
682 */
683 if (strncmp(pszName, RT_STR_TUPLE("/GVMM/")))
684 return VERR_CFGM_VALUE_NOT_FOUND; /* borrow status codes from CFGM... */
685 int rc = VINF_SUCCESS;
686 pszName += sizeof("/GVMM/") - 1;
687 if (!strcmp(pszName, "cEMTsMeansCompany"))
688 *pu64Value = pGVMM->cEMTsMeansCompany;
689 else if (!strcmp(pszName, "MinSleepAlone"))
690 *pu64Value = pGVMM->nsMinSleepAlone;
691 else if (!strcmp(pszName, "MinSleepCompany"))
692 *pu64Value = pGVMM->nsMinSleepCompany;
693 else if (!strcmp(pszName, "EarlyWakeUp1"))
694 *pu64Value = pGVMM->nsEarlyWakeUp1;
695 else if (!strcmp(pszName, "EarlyWakeUp2"))
696 *pu64Value = pGVMM->nsEarlyWakeUp2;
697 else
698 rc = VERR_CFGM_VALUE_NOT_FOUND;
699 return rc;
700}
701
702
703/**
704 * Acquire the 'used' lock in shared mode.
705 *
706 * This prevents destruction of the VM while we're in ring-0.
707 *
708 * @returns IPRT status code, see RTSemFastMutexRequest.
709 * @param a_pGVMM The GVMM instance data.
710 * @sa GVMMR0_USED_SHARED_UNLOCK, GVMMR0_USED_EXCLUSIVE_LOCK
711 */
712#define GVMMR0_USED_SHARED_LOCK(a_pGVMM) RTCritSectRwEnterShared(&(a_pGVMM)->UsedLock)
713
714/**
715 * Release the 'used' lock in when owning it in shared mode.
716 *
717 * @returns IPRT status code, see RTSemFastMutexRequest.
718 * @param a_pGVMM The GVMM instance data.
719 * @sa GVMMR0_USED_SHARED_LOCK
720 */
721#define GVMMR0_USED_SHARED_UNLOCK(a_pGVMM) RTCritSectRwLeaveShared(&(a_pGVMM)->UsedLock)
722
723/**
724 * Acquire the 'used' lock in exclusive mode.
725 *
726 * Only use this function when making changes to the used list.
727 *
728 * @returns IPRT status code, see RTSemFastMutexRequest.
729 * @param a_pGVMM The GVMM instance data.
730 * @sa GVMMR0_USED_EXCLUSIVE_UNLOCK
731 */
732#define GVMMR0_USED_EXCLUSIVE_LOCK(a_pGVMM) RTCritSectRwEnterExcl(&(a_pGVMM)->UsedLock)
733
734/**
735 * Release the 'used' lock when owning it in exclusive mode.
736 *
737 * @returns IPRT status code, see RTSemFastMutexRelease.
738 * @param a_pGVMM The GVMM instance data.
739 * @sa GVMMR0_USED_EXCLUSIVE_LOCK, GVMMR0_USED_SHARED_UNLOCK
740 */
741#define GVMMR0_USED_EXCLUSIVE_UNLOCK(a_pGVMM) RTCritSectRwLeaveExcl(&(a_pGVMM)->UsedLock)
742
743
744/**
745 * Try acquire the 'create & destroy' lock.
746 *
747 * @returns IPRT status code, see RTSemFastMutexRequest.
748 * @param pGVMM The GVMM instance data.
749 */
750DECLINLINE(int) gvmmR0CreateDestroyLock(PGVMM pGVMM)
751{
752 LogFlow(("++gvmmR0CreateDestroyLock(%p)\n", pGVMM));
753 int rc = RTCritSectEnter(&pGVMM->CreateDestroyLock);
754 LogFlow(("gvmmR0CreateDestroyLock(%p)->%Rrc\n", pGVMM, rc));
755 return rc;
756}
757
758
759/**
760 * Release the 'create & destroy' lock.
761 *
762 * @returns IPRT status code, see RTSemFastMutexRequest.
763 * @param pGVMM The GVMM instance data.
764 */
765DECLINLINE(int) gvmmR0CreateDestroyUnlock(PGVMM pGVMM)
766{
767 LogFlow(("--gvmmR0CreateDestroyUnlock(%p)\n", pGVMM));
768 int rc = RTCritSectLeave(&pGVMM->CreateDestroyLock);
769 AssertRC(rc);
770 return rc;
771}
772
773
774/**
775 * Request wrapper for the GVMMR0CreateVM API.
776 *
777 * @returns VBox status code.
778 * @param pReq The request buffer.
779 * @param pSession The session handle. The VM will be associated with this.
780 */
781GVMMR0DECL(int) GVMMR0CreateVMReq(PGVMMCREATEVMREQ pReq, PSUPDRVSESSION pSession)
782{
783 /*
784 * Validate the request.
785 */
786 if (!VALID_PTR(pReq))
787 return VERR_INVALID_POINTER;
788 if (pReq->Hdr.cbReq != sizeof(*pReq))
789 return VERR_INVALID_PARAMETER;
790 if (pReq->pSession != pSession)
791 return VERR_INVALID_POINTER;
792
793 /*
794 * Execute it.
795 */
796 PVM pVM;
797 pReq->pVMR0 = NULL;
798 pReq->pVMR3 = NIL_RTR3PTR;
799 int rc = GVMMR0CreateVM(pSession, pReq->cCpus, &pVM);
800 if (RT_SUCCESS(rc))
801 {
802 pReq->pVMR0 = pVM;
803 pReq->pVMR3 = pVM->pVMR3;
804 }
805 return rc;
806}
807
808
809/**
810 * Allocates the VM structure and registers it with GVM.
811 *
812 * The caller will become the VM owner and there by the EMT.
813 *
814 * @returns VBox status code.
815 * @param pSession The support driver session.
816 * @param cCpus Number of virtual CPUs for the new VM.
817 * @param ppVM Where to store the pointer to the VM structure.
818 *
819 * @thread EMT.
820 */
821GVMMR0DECL(int) GVMMR0CreateVM(PSUPDRVSESSION pSession, uint32_t cCpus, PVM *ppVM)
822{
823 LogFlow(("GVMMR0CreateVM: pSession=%p\n", pSession));
824 PGVMM pGVMM;
825 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
826
827 AssertPtrReturn(ppVM, VERR_INVALID_POINTER);
828 *ppVM = NULL;
829
830 if ( cCpus == 0
831 || cCpus > VMM_MAX_CPU_COUNT)
832 return VERR_INVALID_PARAMETER;
833
834 RTNATIVETHREAD hEMT0 = RTThreadNativeSelf();
835 AssertReturn(hEMT0 != NIL_RTNATIVETHREAD, VERR_GVMM_BROKEN_IPRT);
836 RTPROCESS ProcId = RTProcSelf();
837 AssertReturn(ProcId != NIL_RTPROCESS, VERR_GVMM_BROKEN_IPRT);
838
839 /*
840 * The whole allocation process is protected by the lock.
841 */
842 int rc = gvmmR0CreateDestroyLock(pGVMM);
843 AssertRCReturn(rc, rc);
844
845 /*
846 * Only one VM per session.
847 */
848 if (SUPR0GetSessionVM(pSession) != NULL)
849 {
850 gvmmR0CreateDestroyUnlock(pGVMM);
851 SUPR0Printf("GVMMR0CreateVM: The session %p already got a VM: %p\n", pSession, SUPR0GetSessionVM(pSession));
852 return VERR_ALREADY_EXISTS;
853 }
854
855 /*
856 * Allocate a handle first so we don't waste resources unnecessarily.
857 */
858 uint16_t iHandle = pGVMM->iFreeHead;
859 if (iHandle)
860 {
861 PGVMHANDLE pHandle = &pGVMM->aHandles[iHandle];
862
863 /* consistency checks, a bit paranoid as always. */
864 if ( !pHandle->pVM
865 && !pHandle->pGVM
866 && !pHandle->pvObj
867 && pHandle->iSelf == iHandle)
868 {
869 pHandle->pvObj = SUPR0ObjRegister(pSession, SUPDRVOBJTYPE_VM, gvmmR0HandleObjDestructor, pGVMM, pHandle);
870 if (pHandle->pvObj)
871 {
872 /*
873 * Move the handle from the free to used list and perform permission checks.
874 */
875 rc = GVMMR0_USED_EXCLUSIVE_LOCK(pGVMM);
876 AssertRC(rc);
877
878 pGVMM->iFreeHead = pHandle->iNext;
879 pHandle->iNext = pGVMM->iUsedHead;
880 pGVMM->iUsedHead = iHandle;
881 pGVMM->cVMs++;
882
883 pHandle->pVM = NULL;
884 pHandle->pGVM = NULL;
885 pHandle->pSession = pSession;
886 pHandle->hEMT0 = NIL_RTNATIVETHREAD;
887 pHandle->ProcId = NIL_RTPROCESS;
888
889 GVMMR0_USED_EXCLUSIVE_UNLOCK(pGVMM);
890
891 rc = SUPR0ObjVerifyAccess(pHandle->pvObj, pSession, NULL);
892 if (RT_SUCCESS(rc))
893 {
894#ifdef VBOX_BUGREF_9217
895 /*
896 * Allocate memory for the VM structure (combined VM + GVM).
897 */
898 const uint32_t cbVM = RT_UOFFSETOF_DYN(GVM, aCpus[cCpus]);
899 const uint32_t cPages = RT_ALIGN_32(cbVM, PAGE_SIZE) >> PAGE_SHIFT;
900 RTR0MEMOBJ hVMMemObj = NIL_RTR0MEMOBJ;
901 rc = RTR0MemObjAllocPage(&hVMMemObj, cPages << PAGE_SHIFT, false /* fExecutable */);
902 if (RT_SUCCESS(rc))
903 {
904 PGVM pGVM = (PGVM)RTR0MemObjAddress(hVMMemObj);
905 AssertPtr(pGVM);
906
907 /*
908 * Initialise the structure.
909 */
910 RT_BZERO(pGVM, cPages << PAGE_SHIFT);
911 gvmmR0InitPerVMData(pGVM, iHandle, cCpus, pSession);
912 GMMR0InitPerVMData(pGVM);
913 pGVM->gvmm.s.VMMemObj = hVMMemObj;
914
915 /*
916 * Allocate page array.
917 * This currently have to be made available to ring-3, but this is should change eventually.
918 */
919 rc = RTR0MemObjAllocPage(&pGVM->gvmm.s.VMPagesMemObj, cPages * sizeof(SUPPAGE), false /* fExecutable */);
920 if (RT_SUCCESS(rc))
921 {
922 PSUPPAGE paPages = (PSUPPAGE)RTR0MemObjAddress(pGVM->gvmm.s.VMPagesMemObj); AssertPtr(paPages);
923 for (uint32_t iPage = 0; iPage < cPages; iPage++)
924 {
925 paPages[iPage].uReserved = 0;
926 paPages[iPage].Phys = RTR0MemObjGetPagePhysAddr(pGVM->gvmm.s.VMMemObj, iPage);
927 Assert(paPages[iPage].Phys != NIL_RTHCPHYS);
928 }
929
930 /*
931 * Map the page array, VM and VMCPU structures into ring-3.
932 */
933 AssertCompileSizeAlignment(VM, PAGE_SIZE);
934 rc = RTR0MemObjMapUserEx(&pGVM->gvmm.s.VMMapObj, pGVM->gvmm.s.VMMemObj, (RTR3PTR)-1, 0,
935 RTMEM_PROT_READ | RTMEM_PROT_WRITE, NIL_RTR0PROCESS,
936 0 /*offSub*/, sizeof(VM));
937 for (VMCPUID i = 0; i < cCpus && RT_SUCCESS(rc); i++)
938 {
939 AssertCompileSizeAlignment(VMCPU, PAGE_SIZE);
940 rc = RTR0MemObjMapUserEx(&pGVM->aCpus[i].gvmm.s.VMCpuMapObj, pGVM->gvmm.s.VMMemObj,
941 (RTR3PTR)-1, 0, RTMEM_PROT_READ | RTMEM_PROT_WRITE, NIL_RTR0PROCESS,
942 RT_UOFFSETOF_DYN(GVM, aCpus[i]), sizeof(VMCPU));
943 }
944 if (RT_SUCCESS(rc))
945 rc = RTR0MemObjMapUser(&pGVM->gvmm.s.VMPagesMapObj, pGVM->gvmm.s.VMPagesMemObj, (RTR3PTR)-1,
946 0 /* uAlignment */, RTMEM_PROT_READ | RTMEM_PROT_WRITE,
947 NIL_RTR0PROCESS);
948 if (RT_SUCCESS(rc))
949 {
950 /*
951 * Initialize all the VM pointer.
952 */
953 PVMR3 pVMR3 = RTR0MemObjAddressR3(pGVM->gvmm.s.VMMapObj);
954 AssertPtr((void *)pVMR3);
955
956 for (VMCPUID i = 0; i < cCpus; i++)
957 {
958 pGVM->aCpus[i].pVMR0 = pGVM;
959 pGVM->aCpus[i].pVMR3 = pVMR3;
960 pGVM->apCpusR3[i] = RTR0MemObjAddressR3(pGVM->aCpus[i].gvmm.s.VMCpuMapObj);
961 AssertPtr((void *)pGVM->apCpusR3[i]);
962 }
963
964 pGVM->paVMPagesR3 = RTR0MemObjAddressR3(pGVM->gvmm.s.VMPagesMapObj);
965 AssertPtr((void *)pGVM->paVMPagesR3);
966
967 /*
968 * Complete the handle - take the UsedLock sem just to be careful.
969 */
970 rc = GVMMR0_USED_EXCLUSIVE_LOCK(pGVMM);
971 AssertRC(rc);
972
973 pHandle->pVM = pGVM;
974 pHandle->pGVM = pGVM;
975 pHandle->hEMT0 = hEMT0;
976 pHandle->ProcId = ProcId;
977 pGVM->pVMR3 = pVMR3;
978 pGVM->aCpus[0].hEMT = hEMT0;
979 pGVM->aCpus[0].hNativeThreadR0 = hEMT0;
980 pGVMM->cEMTs += cCpus;
981
982 /* Associate it with the session and create the context hook for EMT0. */
983 rc = SUPR0SetSessionVM(pSession, pGVM, pGVM);
984 if (RT_SUCCESS(rc))
985 {
986 rc = VMMR0ThreadCtxHookCreateForEmt(&pGVM->aCpus[0]);
987 if (RT_SUCCESS(rc))
988 {
989 /*
990 * Done!
991 */
992 VBOXVMM_R0_GVMM_VM_CREATED(pGVM, pGVM, ProcId, (void *)hEMT0, cCpus);
993
994 GVMMR0_USED_EXCLUSIVE_UNLOCK(pGVMM);
995 gvmmR0CreateDestroyUnlock(pGVMM);
996
997 CPUMR0RegisterVCpuThread(&pGVM->aCpus[0]);
998
999 *ppVM = pGVM;
1000 Log(("GVMMR0CreateVM: pVMR3=%p pGVM=%p hGVM=%d\n", pVMR3, pGVM, iHandle));
1001 return VINF_SUCCESS;
1002 }
1003
1004 SUPR0SetSessionVM(pSession, NULL, NULL);
1005 }
1006 GVMMR0_USED_EXCLUSIVE_UNLOCK(pGVMM);
1007 }
1008
1009 /* Cleanup mappings. */
1010 if (pGVM->gvmm.s.VMMapObj != NIL_RTR0MEMOBJ)
1011 {
1012 RTR0MemObjFree(pGVM->gvmm.s.VMMapObj, false /* fFreeMappings */);
1013 pGVM->gvmm.s.VMMapObj = NIL_RTR0MEMOBJ;
1014 }
1015 for (VMCPUID i = 0; i < cCpus; i++)
1016 if (pGVM->aCpus[i].gvmm.s.VMCpuMapObj != NIL_RTR0MEMOBJ)
1017 {
1018 RTR0MemObjFree(pGVM->aCpus[i].gvmm.s.VMCpuMapObj, false /* fFreeMappings */);
1019 pGVM->aCpus[i].gvmm.s.VMCpuMapObj = NIL_RTR0MEMOBJ;
1020 }
1021 if (pGVM->gvmm.s.VMPagesMapObj != NIL_RTR0MEMOBJ)
1022 {
1023 RTR0MemObjFree(pGVM->gvmm.s.VMPagesMapObj, false /* fFreeMappings */);
1024 pGVM->gvmm.s.VMPagesMapObj = NIL_RTR0MEMOBJ;
1025 }
1026 }
1027 }
1028
1029#else
1030 /*
1031 * Allocate the global VM structure (GVM) and initialize it.
1032 */
1033 PGVM pGVM = (PGVM)RTMemAllocZ(RT_UOFFSETOF_DYN(GVM, aCpus[cCpus]));
1034 if (pGVM)
1035 {
1036 pGVM->u32Magic = GVM_MAGIC;
1037 pGVM->hSelf = iHandle;
1038 pGVM->pVM = NULL;
1039 pGVM->cCpus = cCpus;
1040 pGVM->pSession = pSession;
1041
1042 gvmmR0InitPerVMData(pGVM);
1043 GMMR0InitPerVMData(pGVM);
1044
1045 /*
1046 * Allocate the shared VM structure and associated page array.
1047 */
1048 const uint32_t cbVM = RT_UOFFSETOF_DYN(VM, aCpus[cCpus]);
1049 const uint32_t cPages = RT_ALIGN_32(cbVM, PAGE_SIZE) >> PAGE_SHIFT;
1050 rc = RTR0MemObjAllocLow(&pGVM->gvmm.s.VMMemObj, cPages << PAGE_SHIFT, false /* fExecutable */);
1051 if (RT_SUCCESS(rc))
1052 {
1053 PVM pVM = (PVM)RTR0MemObjAddress(pGVM->gvmm.s.VMMemObj); AssertPtr(pVM);
1054 memset(pVM, 0, cPages << PAGE_SHIFT);
1055 pVM->enmVMState = VMSTATE_CREATING;
1056 pVM->pVMR0 = pVM;
1057 pVM->pSession = pSession;
1058 pVM->hSelf = iHandle;
1059 pVM->cbSelf = cbVM;
1060 pVM->cCpus = cCpus;
1061 pVM->uCpuExecutionCap = 100; /* default is no cap. */
1062 AssertCompileMemberAlignment(VM, cpum, 64);
1063 AssertCompileMemberAlignment(VM, tm, 64);
1064 AssertCompileMemberAlignment(VM, aCpus, PAGE_SIZE);
1065
1066 rc = RTR0MemObjAllocPage(&pGVM->gvmm.s.VMPagesMemObj, cPages * sizeof(SUPPAGE), false /* fExecutable */);
1067 if (RT_SUCCESS(rc))
1068 {
1069 PSUPPAGE paPages = (PSUPPAGE)RTR0MemObjAddress(pGVM->gvmm.s.VMPagesMemObj); AssertPtr(paPages);
1070 for (uint32_t iPage = 0; iPage < cPages; iPage++)
1071 {
1072 paPages[iPage].uReserved = 0;
1073 paPages[iPage].Phys = RTR0MemObjGetPagePhysAddr(pGVM->gvmm.s.VMMemObj, iPage);
1074 Assert(paPages[iPage].Phys != NIL_RTHCPHYS);
1075 }
1076
1077 /*
1078 * Map them into ring-3.
1079 */
1080 rc = RTR0MemObjMapUser(&pGVM->gvmm.s.VMMapObj, pGVM->gvmm.s.VMMemObj, (RTR3PTR)-1, 0,
1081 RTMEM_PROT_READ | RTMEM_PROT_WRITE, NIL_RTR0PROCESS);
1082 if (RT_SUCCESS(rc))
1083 {
1084 PVMR3 pVMR3 = RTR0MemObjAddressR3(pGVM->gvmm.s.VMMapObj);
1085 pVM->pVMR3 = pVMR3;
1086 AssertPtr((void *)pVMR3);
1087
1088 /* Initialize all the VM pointers. */
1089 for (VMCPUID i = 0; i < cCpus; i++)
1090 {
1091 pVM->aCpus[i].idCpu = i;
1092 pVM->aCpus[i].pVMR0 = pVM;
1093 pVM->aCpus[i].pVMR3 = pVMR3;
1094 pVM->aCpus[i].idHostCpu = NIL_RTCPUID;
1095 pVM->aCpus[i].hNativeThreadR0 = NIL_RTNATIVETHREAD;
1096 pVM->apCpusR3[i] = pVMR3 + RT_UOFFSETOF_DYN(VM, aCpus[i]);
1097 }
1098
1099 rc = RTR0MemObjMapUser(&pGVM->gvmm.s.VMPagesMapObj, pGVM->gvmm.s.VMPagesMemObj, (RTR3PTR)-1,
1100 0 /* uAlignment */, RTMEM_PROT_READ | RTMEM_PROT_WRITE,
1101 NIL_RTR0PROCESS);
1102 if (RT_SUCCESS(rc))
1103 {
1104 pVM->paVMPagesR3 = RTR0MemObjAddressR3(pGVM->gvmm.s.VMPagesMapObj);
1105 AssertPtr((void *)pVM->paVMPagesR3);
1106
1107 /* complete the handle - take the UsedLock sem just to be careful. */
1108 rc = GVMMR0_USED_EXCLUSIVE_LOCK(pGVMM);
1109 AssertRC(rc);
1110
1111 pHandle->pVM = pVM;
1112 pHandle->pGVM = pGVM;
1113 pHandle->hEMT0 = hEMT0;
1114 pHandle->ProcId = ProcId;
1115 pGVM->pVM = pVM;
1116 pGVM->pVMR3 = pVMR3;
1117 pGVM->aCpus[0].hEMT = hEMT0;
1118 pVM->aCpus[0].hNativeThreadR0 = hEMT0;
1119 pGVMM->cEMTs += cCpus;
1120
1121 for (VMCPUID i = 0; i < cCpus; i++)
1122 {
1123 pGVM->aCpus[i].pVCpu = &pVM->aCpus[i];
1124 pGVM->aCpus[i].pVM = pVM;
1125 }
1126
1127 /* Associate it with the session and create the context hook for EMT0. */
1128 rc = SUPR0SetSessionVM(pSession, pGVM, pVM);
1129 if (RT_SUCCESS(rc))
1130 {
1131 rc = VMMR0ThreadCtxHookCreateForEmt(&pVM->aCpus[0]);
1132 if (RT_SUCCESS(rc))
1133 {
1134 /*
1135 * Done!
1136 */
1137 VBOXVMM_R0_GVMM_VM_CREATED(pGVM, pVM, ProcId, (void *)hEMT0, cCpus);
1138
1139 GVMMR0_USED_EXCLUSIVE_UNLOCK(pGVMM);
1140 gvmmR0CreateDestroyUnlock(pGVMM);
1141
1142 CPUMR0RegisterVCpuThread(&pVM->aCpus[0]);
1143
1144 *ppVM = pVM;
1145 Log(("GVMMR0CreateVM: pVM=%p pVMR3=%p pGVM=%p hGVM=%d\n", pVM, pVMR3, pGVM, iHandle));
1146 return VINF_SUCCESS;
1147 }
1148
1149 SUPR0SetSessionVM(pSession, NULL, NULL);
1150 }
1151 GVMMR0_USED_EXCLUSIVE_UNLOCK(pGVMM);
1152 }
1153
1154 RTR0MemObjFree(pGVM->gvmm.s.VMMapObj, false /* fFreeMappings */);
1155 pGVM->gvmm.s.VMMapObj = NIL_RTR0MEMOBJ;
1156 }
1157 RTR0MemObjFree(pGVM->gvmm.s.VMPagesMemObj, false /* fFreeMappings */);
1158 pGVM->gvmm.s.VMPagesMemObj = NIL_RTR0MEMOBJ;
1159 }
1160 RTR0MemObjFree(pGVM->gvmm.s.VMMemObj, false /* fFreeMappings */);
1161 pGVM->gvmm.s.VMMemObj = NIL_RTR0MEMOBJ;
1162 }
1163 }
1164#endif
1165 }
1166 /* else: The user wasn't permitted to create this VM. */
1167
1168 /*
1169 * The handle will be freed by gvmmR0HandleObjDestructor as we release the
1170 * object reference here. A little extra mess because of non-recursive lock.
1171 */
1172 void *pvObj = pHandle->pvObj;
1173 pHandle->pvObj = NULL;
1174 gvmmR0CreateDestroyUnlock(pGVMM);
1175
1176 SUPR0ObjRelease(pvObj, pSession);
1177
1178 SUPR0Printf("GVMMR0CreateVM: failed, rc=%Rrc\n", rc);
1179 return rc;
1180 }
1181
1182 rc = VERR_NO_MEMORY;
1183 }
1184 else
1185 rc = VERR_GVMM_IPE_1;
1186 }
1187 else
1188 rc = VERR_GVM_TOO_MANY_VMS;
1189
1190 gvmmR0CreateDestroyUnlock(pGVMM);
1191 return rc;
1192}
1193
1194
1195#ifdef VBOX_BUGREF_9217
1196/**
1197 * Initializes the per VM data belonging to GVMM.
1198 *
1199 * @param pGVM Pointer to the global VM structure.
1200 */
1201static void gvmmR0InitPerVMData(PGVM pGVM, int16_t hSelf, VMCPUID cCpus, PSUPDRVSESSION pSession)
1202#else
1203/**
1204 * Initializes the per VM data belonging to GVMM.
1205 *
1206 * @param pGVM Pointer to the global VM structure.
1207 */
1208static void gvmmR0InitPerVMData(PGVM pGVM)
1209#endif
1210{
1211 AssertCompile(RT_SIZEOFMEMB(GVM,gvmm.s) <= RT_SIZEOFMEMB(GVM,gvmm.padding));
1212 AssertCompile(RT_SIZEOFMEMB(GVMCPU,gvmm.s) <= RT_SIZEOFMEMB(GVMCPU,gvmm.padding));
1213#ifdef VBOX_BUGREF_9217
1214 AssertCompileMemberAlignment(VM, cpum, 64);
1215 AssertCompileMemberAlignment(VM, tm, 64);
1216
1217 /* GVM: */
1218 pGVM->u32Magic = GVM_MAGIC;
1219 pGVM->hSelfSafe = hSelf;
1220 pGVM->cCpusSafe = cCpus;
1221 pGVM->pSessionSafe = pSession;
1222
1223 /* VM: */
1224 pGVM->enmVMState = VMSTATE_CREATING;
1225 pGVM->pVMR0 = pGVM;
1226 pGVM->pSession = pSession;
1227 pGVM->hSelf = hSelf;
1228 pGVM->cCpus = cCpus;
1229 pGVM->uCpuExecutionCap = 100; /* default is no cap. */
1230 pGVM->uStructVersion = 1;
1231 pGVM->cbSelf = sizeof(VM);
1232 pGVM->cbVCpu = sizeof(VMCPU);
1233#endif
1234
1235 /* GVMM: */
1236 pGVM->gvmm.s.VMMemObj = NIL_RTR0MEMOBJ;
1237 pGVM->gvmm.s.VMMapObj = NIL_RTR0MEMOBJ;
1238 pGVM->gvmm.s.VMPagesMemObj = NIL_RTR0MEMOBJ;
1239 pGVM->gvmm.s.VMPagesMapObj = NIL_RTR0MEMOBJ;
1240 pGVM->gvmm.s.fDoneVMMR0Init = false;
1241 pGVM->gvmm.s.fDoneVMMR0Term = false;
1242
1243 /*
1244 * Per virtual CPU.
1245 */
1246 for (VMCPUID i = 0; i < pGVM->cCpus; i++)
1247 {
1248 pGVM->aCpus[i].idCpu = i;
1249#ifdef VBOX_BUGREF_9217
1250 pGVM->aCpus[i].idCpuSafe = i;
1251#endif
1252 pGVM->aCpus[i].gvmm.s.HaltEventMulti = NIL_RTSEMEVENTMULTI;
1253#ifdef VBOX_BUGREF_9217
1254 pGVM->aCpus[i].gvmm.s.VMCpuMapObj = NIL_RTR0MEMOBJ;
1255#endif
1256 pGVM->aCpus[i].hEMT = NIL_RTNATIVETHREAD;
1257 pGVM->aCpus[i].pGVM = pGVM;
1258#ifndef VBOX_BUGREF_9217
1259 pGVM->aCpus[i].pVCpu = NULL;
1260 pGVM->aCpus[i].pVM = NULL;
1261#endif
1262#ifdef VBOX_BUGREF_9217
1263 pGVM->aCpus[i].idHostCpu = NIL_RTCPUID;
1264 pGVM->aCpus[i].iHostCpuSet = UINT32_MAX;
1265 pGVM->aCpus[i].hNativeThread = NIL_RTNATIVETHREAD;
1266 pGVM->aCpus[i].hNativeThreadR0 = NIL_RTNATIVETHREAD;
1267 pGVM->aCpus[i].enmState = VMCPUSTATE_STOPPED;
1268#endif
1269 }
1270}
1271
1272
1273/**
1274 * Does the VM initialization.
1275 *
1276 * @returns VBox status code.
1277 * @param pGVM The global (ring-0) VM structure.
1278 */
1279GVMMR0DECL(int) GVMMR0InitVM(PGVM pGVM)
1280{
1281 LogFlow(("GVMMR0InitVM: pGVM=%p\n", pGVM));
1282
1283 int rc = VERR_INTERNAL_ERROR_3;
1284 if ( !pGVM->gvmm.s.fDoneVMMR0Init
1285 && pGVM->aCpus[0].gvmm.s.HaltEventMulti == NIL_RTSEMEVENTMULTI)
1286 {
1287 for (VMCPUID i = 0; i < pGVM->cCpus; i++)
1288 {
1289 rc = RTSemEventMultiCreate(&pGVM->aCpus[i].gvmm.s.HaltEventMulti);
1290 if (RT_FAILURE(rc))
1291 {
1292 pGVM->aCpus[i].gvmm.s.HaltEventMulti = NIL_RTSEMEVENTMULTI;
1293 break;
1294 }
1295 }
1296 }
1297 else
1298 rc = VERR_WRONG_ORDER;
1299
1300 LogFlow(("GVMMR0InitVM: returns %Rrc\n", rc));
1301 return rc;
1302}
1303
1304
1305/**
1306 * Indicates that we're done with the ring-0 initialization
1307 * of the VM.
1308 *
1309 * @param pGVM The global (ring-0) VM structure.
1310 * @thread EMT(0)
1311 */
1312GVMMR0DECL(void) GVMMR0DoneInitVM(PGVM pGVM)
1313{
1314 /* Set the indicator. */
1315 pGVM->gvmm.s.fDoneVMMR0Init = true;
1316}
1317
1318
1319/**
1320 * Indicates that we're doing the ring-0 termination of the VM.
1321 *
1322 * @returns true if termination hasn't been done already, false if it has.
1323 * @param pGVM Pointer to the global VM structure. Optional.
1324 * @thread EMT(0) or session cleanup thread.
1325 */
1326GVMMR0DECL(bool) GVMMR0DoingTermVM(PGVM pGVM)
1327{
1328 /* Validate the VM structure, state and handle. */
1329 AssertPtrReturn(pGVM, false);
1330
1331 /* Set the indicator. */
1332 if (pGVM->gvmm.s.fDoneVMMR0Term)
1333 return false;
1334 pGVM->gvmm.s.fDoneVMMR0Term = true;
1335 return true;
1336}
1337
1338
1339/**
1340 * Destroys the VM, freeing all associated resources (the ring-0 ones anyway).
1341 *
1342 * This is call from the vmR3DestroyFinalBit and from a error path in VMR3Create,
1343 * and the caller is not the EMT thread, unfortunately. For security reasons, it
1344 * would've been nice if the caller was actually the EMT thread or that we somehow
1345 * could've associated the calling thread with the VM up front.
1346 *
1347 * @returns VBox status code.
1348 * @param pGVM The global (ring-0) VM structure.
1349 * @param pVM The cross context VM structure.
1350 *
1351 * @thread EMT(0) if it's associated with the VM, otherwise any thread.
1352 */
1353GVMMR0DECL(int) GVMMR0DestroyVM(PGVM pGVM, PVM pVM)
1354{
1355 LogFlow(("GVMMR0DestroyVM: pGVM=%p pVM=%p\n", pGVM, pVM));
1356 PGVMM pGVMM;
1357 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
1358
1359 /*
1360 * Validate the VM structure, state and caller.
1361 */
1362 AssertPtrReturn(pGVM, VERR_INVALID_POINTER);
1363 AssertPtrReturn(pVM, VERR_INVALID_POINTER);
1364 AssertReturn(!((uintptr_t)pVM & PAGE_OFFSET_MASK), VERR_INVALID_POINTER);
1365#ifdef VBOX_BUGREF_9217
1366 AssertReturn(pGVM == pVM, VERR_INVALID_POINTER);
1367#else
1368 AssertReturn(pGVM->pVM == pVM, VERR_INVALID_POINTER);
1369#endif
1370 AssertMsgReturn(pVM->enmVMState >= VMSTATE_CREATING && pVM->enmVMState <= VMSTATE_TERMINATED, ("%d\n", pVM->enmVMState),
1371 VERR_WRONG_ORDER);
1372
1373 uint32_t hGVM = pGVM->hSelf;
1374 ASMCompilerBarrier();
1375 AssertReturn(hGVM != NIL_GVM_HANDLE, VERR_INVALID_VM_HANDLE);
1376 AssertReturn(hGVM < RT_ELEMENTS(pGVMM->aHandles), VERR_INVALID_VM_HANDLE);
1377
1378 PGVMHANDLE pHandle = &pGVMM->aHandles[hGVM];
1379 AssertReturn(pHandle->pVM == pVM, VERR_NOT_OWNER);
1380
1381 RTPROCESS ProcId = RTProcSelf();
1382 RTNATIVETHREAD hSelf = RTThreadNativeSelf();
1383 AssertReturn( ( pHandle->hEMT0 == hSelf
1384 && pHandle->ProcId == ProcId)
1385 || pHandle->hEMT0 == NIL_RTNATIVETHREAD, VERR_NOT_OWNER);
1386
1387 /*
1388 * Lookup the handle and destroy the object.
1389 * Since the lock isn't recursive and we'll have to leave it before dereferencing the
1390 * object, we take some precautions against racing callers just in case...
1391 */
1392 int rc = gvmmR0CreateDestroyLock(pGVMM);
1393 AssertRC(rc);
1394
1395 /* Be careful here because we might theoretically be racing someone else cleaning up. */
1396 if ( pHandle->pVM == pVM
1397 && ( ( pHandle->hEMT0 == hSelf
1398 && pHandle->ProcId == ProcId)
1399 || pHandle->hEMT0 == NIL_RTNATIVETHREAD)
1400 && VALID_PTR(pHandle->pvObj)
1401 && VALID_PTR(pHandle->pSession)
1402 && VALID_PTR(pHandle->pGVM)
1403 && pHandle->pGVM->u32Magic == GVM_MAGIC)
1404 {
1405 /* Check that other EMTs have deregistered. */
1406 uint32_t cNotDeregistered = 0;
1407 for (VMCPUID idCpu = 1; idCpu < pGVM->cCpus; idCpu++)
1408 cNotDeregistered += pGVM->aCpus[idCpu].hEMT != ~(RTNATIVETHREAD)1; /* see GVMMR0DeregisterVCpu for the value */
1409 if (cNotDeregistered == 0)
1410 {
1411 /* Grab the object pointer. */
1412 void *pvObj = pHandle->pvObj;
1413 pHandle->pvObj = NULL;
1414 gvmmR0CreateDestroyUnlock(pGVMM);
1415
1416 SUPR0ObjRelease(pvObj, pHandle->pSession);
1417 }
1418 else
1419 {
1420 gvmmR0CreateDestroyUnlock(pGVMM);
1421 rc = VERR_GVMM_NOT_ALL_EMTS_DEREGISTERED;
1422 }
1423 }
1424 else
1425 {
1426 SUPR0Printf("GVMMR0DestroyVM: pHandle=%RKv:{.pVM=%p, .hEMT0=%p, .ProcId=%u, .pvObj=%p} pVM=%p hSelf=%p\n",
1427 pHandle, pHandle->pVM, pHandle->hEMT0, pHandle->ProcId, pHandle->pvObj, pVM, hSelf);
1428 gvmmR0CreateDestroyUnlock(pGVMM);
1429 rc = VERR_GVMM_IPE_2;
1430 }
1431
1432 return rc;
1433}
1434
1435
1436/**
1437 * Performs VM cleanup task as part of object destruction.
1438 *
1439 * @param pGVM The GVM pointer.
1440 */
1441static void gvmmR0CleanupVM(PGVM pGVM)
1442{
1443 if ( pGVM->gvmm.s.fDoneVMMR0Init
1444 && !pGVM->gvmm.s.fDoneVMMR0Term)
1445 {
1446 if ( pGVM->gvmm.s.VMMemObj != NIL_RTR0MEMOBJ
1447#ifdef VBOX_BUGREF_9217
1448 && RTR0MemObjAddress(pGVM->gvmm.s.VMMemObj) == pGVM
1449#else
1450 && RTR0MemObjAddress(pGVM->gvmm.s.VMMemObj) == pGVM->pVM
1451#endif
1452 )
1453 {
1454 LogFlow(("gvmmR0CleanupVM: Calling VMMR0TermVM\n"));
1455#ifdef VBOX_BUGREF_9217
1456 VMMR0TermVM(pGVM, pGVM, NIL_VMCPUID);
1457#else
1458 VMMR0TermVM(pGVM, pGVM->pVM, NIL_VMCPUID);
1459#endif
1460 }
1461 else
1462#ifdef VBOX_BUGREF_9217
1463 AssertMsgFailed(("gvmmR0CleanupVM: VMMemObj=%p pGVM=%p\n", pGVM->gvmm.s.VMMemObj, pGVM));
1464#else
1465 AssertMsgFailed(("gvmmR0CleanupVM: VMMemObj=%p pVM=%p\n", pGVM->gvmm.s.VMMemObj, pGVM->pVM));
1466#endif
1467 }
1468
1469 GMMR0CleanupVM(pGVM);
1470#ifdef VBOX_WITH_NEM_R0
1471 NEMR0CleanupVM(pGVM);
1472#endif
1473
1474 AssertCompile(NIL_RTTHREADCTXHOOK == (RTTHREADCTXHOOK)0); /* Depends on zero initialized memory working for NIL at the moment. */
1475#ifdef VBOX_BUGREF_9217
1476 for (VMCPUID idCpu = 0; idCpu < pGVM->cCpusSafe; idCpu++)
1477#else
1478 for (VMCPUID idCpu = 0; idCpu < pGVM->cCpus; idCpu++)
1479#endif
1480 {
1481 /** @todo Can we busy wait here for all thread-context hooks to be
1482 * deregistered before releasing (destroying) it? Only until we find a
1483 * solution for not deregistering hooks everytime we're leaving HMR0
1484 * context. */
1485#ifdef VBOX_BUGREF_9217
1486 VMMR0ThreadCtxHookDestroyForEmt(&pGVM->aCpus[idCpu]);
1487#else
1488 VMMR0ThreadCtxHookDestroyForEmt(&pGVM->pVM->aCpus[idCpu]);
1489#endif
1490 }
1491}
1492
1493
1494/**
1495 * @callback_method_impl{FNSUPDRVDESTRUCTOR,VM handle destructor}
1496 *
1497 * pvUser1 is the GVM instance pointer.
1498 * pvUser2 is the handle pointer.
1499 */
1500static DECLCALLBACK(void) gvmmR0HandleObjDestructor(void *pvObj, void *pvUser1, void *pvUser2)
1501{
1502 LogFlow(("gvmmR0HandleObjDestructor: %p %p %p\n", pvObj, pvUser1, pvUser2));
1503
1504 NOREF(pvObj);
1505
1506 /*
1507 * Some quick, paranoid, input validation.
1508 */
1509 PGVMHANDLE pHandle = (PGVMHANDLE)pvUser2;
1510 AssertPtr(pHandle);
1511 PGVMM pGVMM = (PGVMM)pvUser1;
1512 Assert(pGVMM == g_pGVMM);
1513 const uint16_t iHandle = pHandle - &pGVMM->aHandles[0];
1514 if ( !iHandle
1515 || iHandle >= RT_ELEMENTS(pGVMM->aHandles)
1516 || iHandle != pHandle->iSelf)
1517 {
1518 SUPR0Printf("GVM: handle %d is out of range or corrupt (iSelf=%d)!\n", iHandle, pHandle->iSelf);
1519 return;
1520 }
1521
1522 int rc = gvmmR0CreateDestroyLock(pGVMM);
1523 AssertRC(rc);
1524 rc = GVMMR0_USED_EXCLUSIVE_LOCK(pGVMM);
1525 AssertRC(rc);
1526
1527 /*
1528 * This is a tad slow but a doubly linked list is too much hassle.
1529 */
1530 if (RT_UNLIKELY(pHandle->iNext >= RT_ELEMENTS(pGVMM->aHandles)))
1531 {
1532 SUPR0Printf("GVM: used list index %d is out of range!\n", pHandle->iNext);
1533 GVMMR0_USED_EXCLUSIVE_UNLOCK(pGVMM);
1534 gvmmR0CreateDestroyUnlock(pGVMM);
1535 return;
1536 }
1537
1538 if (pGVMM->iUsedHead == iHandle)
1539 pGVMM->iUsedHead = pHandle->iNext;
1540 else
1541 {
1542 uint16_t iPrev = pGVMM->iUsedHead;
1543 int c = RT_ELEMENTS(pGVMM->aHandles) + 2;
1544 while (iPrev)
1545 {
1546 if (RT_UNLIKELY(iPrev >= RT_ELEMENTS(pGVMM->aHandles)))
1547 {
1548 SUPR0Printf("GVM: used list index %d is out of range!\n", iPrev);
1549 GVMMR0_USED_EXCLUSIVE_UNLOCK(pGVMM);
1550 gvmmR0CreateDestroyUnlock(pGVMM);
1551 return;
1552 }
1553 if (RT_UNLIKELY(c-- <= 0))
1554 {
1555 iPrev = 0;
1556 break;
1557 }
1558
1559 if (pGVMM->aHandles[iPrev].iNext == iHandle)
1560 break;
1561 iPrev = pGVMM->aHandles[iPrev].iNext;
1562 }
1563 if (!iPrev)
1564 {
1565 SUPR0Printf("GVM: can't find the handle previous previous of %d!\n", pHandle->iSelf);
1566 GVMMR0_USED_EXCLUSIVE_UNLOCK(pGVMM);
1567 gvmmR0CreateDestroyUnlock(pGVMM);
1568 return;
1569 }
1570
1571 Assert(pGVMM->aHandles[iPrev].iNext == iHandle);
1572 pGVMM->aHandles[iPrev].iNext = pHandle->iNext;
1573 }
1574 pHandle->iNext = 0;
1575 pGVMM->cVMs--;
1576
1577 /*
1578 * Do the global cleanup round.
1579 */
1580 PGVM pGVM = pHandle->pGVM;
1581 if ( VALID_PTR(pGVM)
1582 && pGVM->u32Magic == GVM_MAGIC)
1583 {
1584 pGVMM->cEMTs -= pGVM->cCpus;
1585
1586 if (pGVM->pSession)
1587 SUPR0SetSessionVM(pGVM->pSession, NULL, NULL);
1588
1589 GVMMR0_USED_EXCLUSIVE_UNLOCK(pGVMM);
1590
1591 gvmmR0CleanupVM(pGVM);
1592
1593 /*
1594 * Do the GVMM cleanup - must be done last.
1595 */
1596 /* The VM and VM pages mappings/allocations. */
1597 if (pGVM->gvmm.s.VMPagesMapObj != NIL_RTR0MEMOBJ)
1598 {
1599 rc = RTR0MemObjFree(pGVM->gvmm.s.VMPagesMapObj, false /* fFreeMappings */); AssertRC(rc);
1600 pGVM->gvmm.s.VMPagesMapObj = NIL_RTR0MEMOBJ;
1601 }
1602
1603 if (pGVM->gvmm.s.VMMapObj != NIL_RTR0MEMOBJ)
1604 {
1605 rc = RTR0MemObjFree(pGVM->gvmm.s.VMMapObj, false /* fFreeMappings */); AssertRC(rc);
1606 pGVM->gvmm.s.VMMapObj = NIL_RTR0MEMOBJ;
1607 }
1608
1609 if (pGVM->gvmm.s.VMPagesMemObj != NIL_RTR0MEMOBJ)
1610 {
1611 rc = RTR0MemObjFree(pGVM->gvmm.s.VMPagesMemObj, false /* fFreeMappings */); AssertRC(rc);
1612 pGVM->gvmm.s.VMPagesMemObj = NIL_RTR0MEMOBJ;
1613 }
1614
1615#ifndef VBOX_BUGREF_9217
1616 if (pGVM->gvmm.s.VMMemObj != NIL_RTR0MEMOBJ)
1617 {
1618 rc = RTR0MemObjFree(pGVM->gvmm.s.VMMemObj, false /* fFreeMappings */); AssertRC(rc);
1619 pGVM->gvmm.s.VMMemObj = NIL_RTR0MEMOBJ;
1620 }
1621#endif
1622
1623 for (VMCPUID i = 0; i < pGVM->cCpus; i++)
1624 {
1625 if (pGVM->aCpus[i].gvmm.s.HaltEventMulti != NIL_RTSEMEVENTMULTI)
1626 {
1627 rc = RTSemEventMultiDestroy(pGVM->aCpus[i].gvmm.s.HaltEventMulti); AssertRC(rc);
1628 pGVM->aCpus[i].gvmm.s.HaltEventMulti = NIL_RTSEMEVENTMULTI;
1629 }
1630#ifdef VBOX_BUGREF_9217
1631 if (pGVM->aCpus[i].gvmm.s.VMCpuMapObj != NIL_RTR0MEMOBJ)
1632 {
1633 rc = RTR0MemObjFree(pGVM->aCpus[i].gvmm.s.VMCpuMapObj, false /* fFreeMappings */); AssertRC(rc);
1634 pGVM->aCpus[i].gvmm.s.VMCpuMapObj = NIL_RTR0MEMOBJ;
1635 }
1636#endif
1637 }
1638
1639 /* the GVM structure itself. */
1640 pGVM->u32Magic |= UINT32_C(0x80000000);
1641#ifdef VBOX_BUGREF_9217
1642 Assert(pGVM->gvmm.s.VMMemObj != NIL_RTR0MEMOBJ);
1643 rc = RTR0MemObjFree(pGVM->gvmm.s.VMMemObj, true /*fFreeMappings*/); AssertRC(rc);
1644#else
1645 RTMemFree(pGVM);
1646#endif
1647 pGVM = NULL;
1648
1649 /* Re-acquire the UsedLock before freeing the handle since we're updating handle fields. */
1650 rc = GVMMR0_USED_EXCLUSIVE_LOCK(pGVMM);
1651 AssertRC(rc);
1652 }
1653 /* else: GVMMR0CreateVM cleanup. */
1654
1655 /*
1656 * Free the handle.
1657 */
1658 pHandle->iNext = pGVMM->iFreeHead;
1659 pGVMM->iFreeHead = iHandle;
1660 ASMAtomicWriteNullPtr(&pHandle->pGVM);
1661 ASMAtomicWriteNullPtr(&pHandle->pVM);
1662 ASMAtomicWriteNullPtr(&pHandle->pvObj);
1663 ASMAtomicWriteNullPtr(&pHandle->pSession);
1664 ASMAtomicWriteHandle(&pHandle->hEMT0, NIL_RTNATIVETHREAD);
1665 ASMAtomicWriteU32(&pHandle->ProcId, NIL_RTPROCESS);
1666
1667 GVMMR0_USED_EXCLUSIVE_UNLOCK(pGVMM);
1668 gvmmR0CreateDestroyUnlock(pGVMM);
1669 LogFlow(("gvmmR0HandleObjDestructor: returns\n"));
1670}
1671
1672
1673/**
1674 * Registers the calling thread as the EMT of a Virtual CPU.
1675 *
1676 * Note that VCPU 0 is automatically registered during VM creation.
1677 *
1678 * @returns VBox status code
1679 * @param pGVM The global (ring-0) VM structure.
1680 * @param pVM The cross context VM structure.
1681 * @param idCpu VCPU id to register the current thread as.
1682 */
1683GVMMR0DECL(int) GVMMR0RegisterVCpu(PGVM pGVM, PVM pVM, VMCPUID idCpu)
1684{
1685 AssertReturn(idCpu != 0, VERR_INVALID_FUNCTION);
1686
1687 /*
1688 * Validate the VM structure, state and handle.
1689 */
1690 PGVMM pGVMM;
1691 int rc = gvmmR0ByGVMandVM(pGVM, pVM, &pGVMM, false /* fTakeUsedLock */); /** @todo take lock here. */
1692 if (RT_SUCCESS(rc))
1693 {
1694 if (idCpu < pGVM->cCpus)
1695 {
1696 /* Check that the EMT isn't already assigned to a thread. */
1697 if (pGVM->aCpus[idCpu].hEMT == NIL_RTNATIVETHREAD)
1698 {
1699#ifdef VBOX_BUGREF_9217
1700 Assert(pGVM->aCpus[idCpu].hNativeThreadR0 == NIL_RTNATIVETHREAD);
1701#else
1702 Assert(pVM->aCpus[idCpu].hNativeThreadR0 == NIL_RTNATIVETHREAD);
1703#endif
1704
1705 /* A thread may only be one EMT. */
1706 RTNATIVETHREAD const hNativeSelf = RTThreadNativeSelf();
1707 for (VMCPUID iCpu = 0; iCpu < pGVM->cCpus; iCpu++)
1708 AssertBreakStmt(pGVM->aCpus[iCpu].hEMT != hNativeSelf, rc = VERR_INVALID_PARAMETER);
1709 if (RT_SUCCESS(rc))
1710 {
1711 /*
1712 * Do the assignment, then try setup the hook. Undo if that fails.
1713 */
1714#ifdef VBOX_BUGREF_9217
1715 pGVM->aCpus[idCpu].hNativeThreadR0 = pGVM->aCpus[idCpu].hEMT = RTThreadNativeSelf();
1716
1717 rc = VMMR0ThreadCtxHookCreateForEmt(&pGVM->aCpus[idCpu]);
1718 if (RT_SUCCESS(rc))
1719 CPUMR0RegisterVCpuThread(&pGVM->aCpus[idCpu]);
1720 else
1721 pGVM->aCpus[idCpu].hNativeThreadR0 = pGVM->aCpus[idCpu].hEMT = NIL_RTNATIVETHREAD;
1722#else
1723 pVM->aCpus[idCpu].hNativeThreadR0 = pGVM->aCpus[idCpu].hEMT = RTThreadNativeSelf();
1724
1725 rc = VMMR0ThreadCtxHookCreateForEmt(&pVM->aCpus[idCpu]);
1726 if (RT_SUCCESS(rc))
1727 CPUMR0RegisterVCpuThread(&pVM->aCpus[idCpu]);
1728 else
1729 pVM->aCpus[idCpu].hNativeThreadR0 = pGVM->aCpus[idCpu].hEMT = NIL_RTNATIVETHREAD;
1730#endif
1731 }
1732 }
1733 else
1734 rc = VERR_ACCESS_DENIED;
1735 }
1736 else
1737 rc = VERR_INVALID_CPU_ID;
1738 }
1739 return rc;
1740}
1741
1742
1743/**
1744 * Deregisters the calling thread as the EMT of a Virtual CPU.
1745 *
1746 * Note that VCPU 0 shall call GVMMR0DestroyVM intead of this API.
1747 *
1748 * @returns VBox status code
1749 * @param pGVM The global (ring-0) VM structure.
1750 * @param pVM The cross context VM structure.
1751 * @param idCpu VCPU id to register the current thread as.
1752 */
1753GVMMR0DECL(int) GVMMR0DeregisterVCpu(PGVM pGVM, PVM pVM, VMCPUID idCpu)
1754{
1755 AssertReturn(idCpu != 0, VERR_INVALID_FUNCTION);
1756
1757 /*
1758 * Validate the VM structure, state and handle.
1759 */
1760 PGVMM pGVMM;
1761 int rc = gvmmR0ByGVMandVMandEMT(pGVM, pVM, idCpu, &pGVMM);
1762 if (RT_SUCCESS(rc))
1763 {
1764 /*
1765 * Take the destruction lock and recheck the handle state to
1766 * prevent racing GVMMR0DestroyVM.
1767 */
1768 gvmmR0CreateDestroyLock(pGVMM);
1769 uint32_t hSelf = pGVM->hSelf;
1770 ASMCompilerBarrier();
1771 if ( hSelf < RT_ELEMENTS(pGVMM->aHandles)
1772 && pGVMM->aHandles[hSelf].pvObj != NULL
1773 && pGVMM->aHandles[hSelf].pGVM == pGVM)
1774 {
1775 /*
1776 * Do per-EMT cleanups.
1777 */
1778#ifdef VBOX_BUGREF_9217
1779 VMMR0ThreadCtxHookDestroyForEmt(&pGVM->aCpus[idCpu]);
1780#else
1781 VMMR0ThreadCtxHookDestroyForEmt(&pVM->aCpus[idCpu]);
1782#endif
1783
1784 /*
1785 * Invalidate hEMT. We don't use NIL here as that would allow
1786 * GVMMR0RegisterVCpu to be called again, and we don't want that.
1787 */
1788 AssertCompile(~(RTNATIVETHREAD)1 != NIL_RTNATIVETHREAD);
1789 pGVM->aCpus[idCpu].hEMT = ~(RTNATIVETHREAD)1;
1790#ifdef VBOX_BUGREF_9217
1791 pGVM->aCpus[idCpu].hNativeThreadR0 = NIL_RTNATIVETHREAD;
1792#else
1793 pVM->aCpus[idCpu].hNativeThreadR0 = NIL_RTNATIVETHREAD;
1794#endif
1795 }
1796
1797 gvmmR0CreateDestroyUnlock(pGVMM);
1798 }
1799 return rc;
1800}
1801
1802
1803/**
1804 * Lookup a GVM structure by its handle.
1805 *
1806 * @returns The GVM pointer on success, NULL on failure.
1807 * @param hGVM The global VM handle. Asserts on bad handle.
1808 */
1809GVMMR0DECL(PGVM) GVMMR0ByHandle(uint32_t hGVM)
1810{
1811 PGVMM pGVMM;
1812 GVMM_GET_VALID_INSTANCE(pGVMM, NULL);
1813
1814 /*
1815 * Validate.
1816 */
1817 AssertReturn(hGVM != NIL_GVM_HANDLE, NULL);
1818 AssertReturn(hGVM < RT_ELEMENTS(pGVMM->aHandles), NULL);
1819
1820 /*
1821 * Look it up.
1822 */
1823 PGVMHANDLE pHandle = &pGVMM->aHandles[hGVM];
1824 AssertPtrReturn(pHandle->pVM, NULL);
1825 AssertPtrReturn(pHandle->pvObj, NULL);
1826 PGVM pGVM = pHandle->pGVM;
1827 AssertPtrReturn(pGVM, NULL);
1828#ifdef VBOX_BUGREF_9217
1829 AssertReturn(pGVM == pHandle->pVM, NULL);
1830#else
1831 AssertReturn(pGVM->pVM == pHandle->pVM, NULL);
1832#endif
1833
1834 return pHandle->pGVM;
1835}
1836
1837
1838/**
1839 * Lookup a GVM structure by the shared VM structure.
1840 *
1841 * The calling thread must be in the same process as the VM. All current lookups
1842 * are by threads inside the same process, so this will not be an issue.
1843 *
1844 * @returns VBox status code.
1845 * @param pVM The cross context VM structure.
1846 * @param ppGVM Where to store the GVM pointer.
1847 * @param ppGVMM Where to store the pointer to the GVMM instance data.
1848 * @param fTakeUsedLock Whether to take the used lock or not. We take it in
1849 * shared mode when requested.
1850 *
1851 * Be very careful if not taking the lock as it's
1852 * possible that the VM will disappear then!
1853 *
1854 * @remark This will not assert on an invalid pVM but try return silently.
1855 */
1856static int gvmmR0ByVM(PVM pVM, PGVM *ppGVM, PGVMM *ppGVMM, bool fTakeUsedLock)
1857{
1858 RTPROCESS ProcId = RTProcSelf();
1859 PGVMM pGVMM;
1860 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
1861
1862 /*
1863 * Validate.
1864 */
1865 if (RT_UNLIKELY( !VALID_PTR(pVM)
1866 || ((uintptr_t)pVM & PAGE_OFFSET_MASK)))
1867 return VERR_INVALID_POINTER;
1868 if (RT_UNLIKELY( pVM->enmVMState < VMSTATE_CREATING
1869 || pVM->enmVMState >= VMSTATE_TERMINATED))
1870 return VERR_INVALID_POINTER;
1871
1872 uint16_t hGVM = pVM->hSelf;
1873 ASMCompilerBarrier();
1874 if (RT_UNLIKELY( hGVM == NIL_GVM_HANDLE
1875 || hGVM >= RT_ELEMENTS(pGVMM->aHandles)))
1876 return VERR_INVALID_HANDLE;
1877
1878 /*
1879 * Look it up.
1880 */
1881 PGVMHANDLE pHandle = &pGVMM->aHandles[hGVM];
1882 PGVM pGVM;
1883 if (fTakeUsedLock)
1884 {
1885 int rc = GVMMR0_USED_SHARED_LOCK(pGVMM);
1886 AssertRCReturn(rc, rc);
1887
1888 pGVM = pHandle->pGVM;
1889#ifdef VBOX_BUGREF_9217
1890 if (RT_UNLIKELY( pHandle->pVM != pVM
1891 || pHandle->ProcId != ProcId
1892 || !VALID_PTR(pHandle->pvObj)
1893 || !VALID_PTR(pGVM)
1894 || pGVM != pVM))
1895#else
1896 if (RT_UNLIKELY( pHandle->pVM != pVM
1897 || pHandle->ProcId != ProcId
1898 || !VALID_PTR(pHandle->pvObj)
1899 || !VALID_PTR(pGVM)
1900 || pGVM->pVM != pVM))
1901#endif
1902 {
1903 GVMMR0_USED_SHARED_UNLOCK(pGVMM);
1904 return VERR_INVALID_HANDLE;
1905 }
1906 }
1907 else
1908 {
1909 if (RT_UNLIKELY(pHandle->pVM != pVM))
1910 return VERR_INVALID_HANDLE;
1911 if (RT_UNLIKELY(pHandle->ProcId != ProcId))
1912 return VERR_INVALID_HANDLE;
1913 if (RT_UNLIKELY(!VALID_PTR(pHandle->pvObj)))
1914 return VERR_INVALID_HANDLE;
1915
1916 pGVM = pHandle->pGVM;
1917 if (RT_UNLIKELY(!VALID_PTR(pGVM)))
1918 return VERR_INVALID_HANDLE;
1919#ifdef VBOX_BUGREF_9217
1920 if (RT_UNLIKELY(pGVM != pVM))
1921#else
1922 if (RT_UNLIKELY(pGVM->pVM != pVM))
1923#endif
1924 return VERR_INVALID_HANDLE;
1925 }
1926
1927 *ppGVM = pGVM;
1928 *ppGVMM = pGVMM;
1929 return VINF_SUCCESS;
1930}
1931
1932
1933/**
1934 * Fast look up a GVM structure by the cross context VM structure.
1935 *
1936 * This is mainly used a glue function, so performance is .
1937 *
1938 * @returns GVM on success, NULL on failure.
1939 * @param pVM The cross context VM structure. ASSUMES to be
1940 * reasonably valid, so we can do fewer checks than in
1941 * gvmmR0ByVM.
1942 *
1943 * @note Do not use this on pVM structures from userland!
1944 */
1945GVMMR0DECL(PGVM) GVMMR0FastGetGVMByVM(PVM pVM)
1946{
1947 AssertPtr(pVM);
1948 Assert(!((uintptr_t)pVM & PAGE_OFFSET_MASK));
1949
1950 PGVMM pGVMM;
1951 GVMM_GET_VALID_INSTANCE(pGVMM, NULL);
1952
1953 /*
1954 * Validate.
1955 */
1956 uint16_t hGVM = pVM->hSelf;
1957 ASMCompilerBarrier();
1958 AssertReturn(hGVM != NIL_GVM_HANDLE && hGVM < RT_ELEMENTS(pGVMM->aHandles), NULL);
1959
1960 /*
1961 * Look it up and check pVM against the value in the handle and GVM structures.
1962 */
1963 PGVMHANDLE pHandle = &pGVMM->aHandles[hGVM];
1964 AssertReturn(pHandle->pVM == pVM, NULL);
1965
1966 PGVM pGVM = pHandle->pGVM;
1967 AssertPtrReturn(pGVM, NULL);
1968#ifdef VBOX_BUGREF_9217
1969 AssertReturn(pGVM == pVM, NULL);
1970#else
1971 AssertReturn(pGVM->pVM == pVM, NULL);
1972#endif
1973
1974 return pGVM;
1975}
1976
1977
1978/**
1979 * Check that the given GVM and VM structures match up.
1980 *
1981 * The calling thread must be in the same process as the VM. All current lookups
1982 * are by threads inside the same process, so this will not be an issue.
1983 *
1984 * @returns VBox status code.
1985 * @param pGVM The global (ring-0) VM structure.
1986 * @param pVM The cross context VM structure.
1987 * @param ppGVMM Where to store the pointer to the GVMM instance data.
1988 * @param fTakeUsedLock Whether to take the used lock or not. We take it in
1989 * shared mode when requested.
1990 *
1991 * Be very careful if not taking the lock as it's
1992 * possible that the VM will disappear then!
1993 *
1994 * @remark This will not assert on an invalid pVM but try return silently.
1995 */
1996static int gvmmR0ByGVMandVM(PGVM pGVM, PVM pVM, PGVMM *ppGVMM, bool fTakeUsedLock)
1997{
1998 /*
1999 * Check the pointers.
2000 */
2001 int rc;
2002 if (RT_LIKELY(RT_VALID_PTR(pGVM)))
2003 {
2004 if (RT_LIKELY( RT_VALID_PTR(pVM)
2005 && ((uintptr_t)pVM & PAGE_OFFSET_MASK) == 0))
2006 {
2007#ifdef VBOX_BUGREF_9217
2008 if (RT_LIKELY(pGVM == pVM))
2009#else
2010 if (RT_LIKELY(pGVM->pVM == pVM))
2011#endif
2012 {
2013 /*
2014 * Get the pGVMM instance and check the VM handle.
2015 */
2016 PGVMM pGVMM;
2017 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
2018
2019 uint16_t hGVM = pGVM->hSelf;
2020 if (RT_LIKELY( hGVM != NIL_GVM_HANDLE
2021 && hGVM < RT_ELEMENTS(pGVMM->aHandles)))
2022 {
2023 RTPROCESS const pidSelf = RTProcSelf();
2024 PGVMHANDLE pHandle = &pGVMM->aHandles[hGVM];
2025 if (fTakeUsedLock)
2026 {
2027 rc = GVMMR0_USED_SHARED_LOCK(pGVMM);
2028 AssertRCReturn(rc, rc);
2029 }
2030
2031 if (RT_LIKELY( pHandle->pGVM == pGVM
2032 && pHandle->pVM == pVM
2033 && pHandle->ProcId == pidSelf
2034 && RT_VALID_PTR(pHandle->pvObj)))
2035 {
2036 /*
2037 * Some more VM data consistency checks.
2038 */
2039 if (RT_LIKELY( pVM->cCpus == pGVM->cCpus
2040 && pVM->hSelf == hGVM
2041 && pVM->enmVMState >= VMSTATE_CREATING
2042 && pVM->enmVMState <= VMSTATE_TERMINATED
2043 && pVM->pVMR0 == pVM))
2044 {
2045 *ppGVMM = pGVMM;
2046 return VINF_SUCCESS;
2047 }
2048 }
2049
2050 if (fTakeUsedLock)
2051 GVMMR0_USED_SHARED_UNLOCK(pGVMM);
2052 }
2053 }
2054 rc = VERR_INVALID_VM_HANDLE;
2055 }
2056 else
2057 rc = VERR_INVALID_POINTER;
2058 }
2059 else
2060 rc = VERR_INVALID_POINTER;
2061 return rc;
2062}
2063
2064
2065/**
2066 * Check that the given GVM and VM structures match up.
2067 *
2068 * The calling thread must be in the same process as the VM. All current lookups
2069 * are by threads inside the same process, so this will not be an issue.
2070 *
2071 * @returns VBox status code.
2072 * @param pGVM The global (ring-0) VM structure.
2073 * @param pVM The cross context VM structure.
2074 * @param idCpu The (alleged) Virtual CPU ID of the calling EMT.
2075 * @param ppGVMM Where to store the pointer to the GVMM instance data.
2076 * @thread EMT
2077 *
2078 * @remarks This will assert in all failure paths.
2079 */
2080static int gvmmR0ByGVMandVMandEMT(PGVM pGVM, PVM pVM, VMCPUID idCpu, PGVMM *ppGVMM)
2081{
2082 /*
2083 * Check the pointers.
2084 */
2085 AssertPtrReturn(pGVM, VERR_INVALID_POINTER);
2086
2087 AssertPtrReturn(pVM, VERR_INVALID_POINTER);
2088 AssertReturn(((uintptr_t)pVM & PAGE_OFFSET_MASK) == 0, VERR_INVALID_POINTER);
2089#ifdef VBOX_BUGREF_9217
2090 AssertReturn(pGVM == pVM, VERR_INVALID_VM_HANDLE);
2091#else
2092 AssertReturn(pGVM->pVM == pVM, VERR_INVALID_VM_HANDLE);
2093#endif
2094
2095
2096 /*
2097 * Get the pGVMM instance and check the VM handle.
2098 */
2099 PGVMM pGVMM;
2100 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
2101
2102 uint16_t hGVM = pGVM->hSelf;
2103 ASMCompilerBarrier();
2104 AssertReturn( hGVM != NIL_GVM_HANDLE
2105 && hGVM < RT_ELEMENTS(pGVMM->aHandles), VERR_INVALID_VM_HANDLE);
2106
2107 RTPROCESS const pidSelf = RTProcSelf();
2108 PGVMHANDLE pHandle = &pGVMM->aHandles[hGVM];
2109 AssertReturn( pHandle->pGVM == pGVM
2110 && pHandle->pVM == pVM
2111 && pHandle->ProcId == pidSelf
2112 && RT_VALID_PTR(pHandle->pvObj),
2113 VERR_INVALID_HANDLE);
2114
2115 /*
2116 * Check the EMT claim.
2117 */
2118 RTNATIVETHREAD const hAllegedEMT = RTThreadNativeSelf();
2119 AssertReturn(idCpu < pGVM->cCpus, VERR_INVALID_CPU_ID);
2120 AssertReturn(pGVM->aCpus[idCpu].hEMT == hAllegedEMT, VERR_NOT_OWNER);
2121
2122 /*
2123 * Some more VM data consistency checks.
2124 */
2125 AssertReturn(pVM->cCpus == pGVM->cCpus, VERR_INCONSISTENT_VM_HANDLE);
2126 AssertReturn(pVM->hSelf == hGVM, VERR_INCONSISTENT_VM_HANDLE);
2127 AssertReturn(pVM->pVMR0 == pVM, VERR_INCONSISTENT_VM_HANDLE);
2128 AssertReturn( pVM->enmVMState >= VMSTATE_CREATING
2129 && pVM->enmVMState <= VMSTATE_TERMINATED, VERR_INCONSISTENT_VM_HANDLE);
2130
2131 *ppGVMM = pGVMM;
2132 return VINF_SUCCESS;
2133}
2134
2135
2136/**
2137 * Validates a GVM/VM pair.
2138 *
2139 * @returns VBox status code.
2140 * @param pGVM The global (ring-0) VM structure.
2141 * @param pVM The cross context VM structure.
2142 */
2143GVMMR0DECL(int) GVMMR0ValidateGVMandVM(PGVM pGVM, PVM pVM)
2144{
2145 PGVMM pGVMM;
2146 return gvmmR0ByGVMandVM(pGVM, pVM, &pGVMM, false /*fTakeUsedLock*/);
2147}
2148
2149
2150
2151/**
2152 * Validates a GVM/VM/EMT combo.
2153 *
2154 * @returns VBox status code.
2155 * @param pGVM The global (ring-0) VM structure.
2156 * @param pVM The cross context VM structure.
2157 * @param idCpu The Virtual CPU ID of the calling EMT.
2158 * @thread EMT(idCpu)
2159 */
2160GVMMR0DECL(int) GVMMR0ValidateGVMandVMandEMT(PGVM pGVM, PVM pVM, VMCPUID idCpu)
2161{
2162 PGVMM pGVMM;
2163 return gvmmR0ByGVMandVMandEMT(pGVM, pVM, idCpu, &pGVMM);
2164}
2165
2166
2167/**
2168 * Looks up the VM belonging to the specified EMT thread.
2169 *
2170 * This is used by the assertion machinery in VMMR0.cpp to avoid causing
2171 * unnecessary kernel panics when the EMT thread hits an assertion. The
2172 * call may or not be an EMT thread.
2173 *
2174 * @returns Pointer to the VM on success, NULL on failure.
2175 * @param hEMT The native thread handle of the EMT.
2176 * NIL_RTNATIVETHREAD means the current thread
2177 */
2178GVMMR0DECL(PVM) GVMMR0GetVMByEMT(RTNATIVETHREAD hEMT)
2179{
2180 /*
2181 * No Assertions here as we're usually called in a AssertMsgN or
2182 * RTAssert* context.
2183 */
2184 PGVMM pGVMM = g_pGVMM;
2185 if ( !VALID_PTR(pGVMM)
2186 || pGVMM->u32Magic != GVMM_MAGIC)
2187 return NULL;
2188
2189 if (hEMT == NIL_RTNATIVETHREAD)
2190 hEMT = RTThreadNativeSelf();
2191 RTPROCESS ProcId = RTProcSelf();
2192
2193 /*
2194 * Search the handles in a linear fashion as we don't dare to take the lock (assert).
2195 */
2196/** @todo introduce some pid hash table here, please. */
2197 for (unsigned i = 1; i < RT_ELEMENTS(pGVMM->aHandles); i++)
2198 {
2199 if ( pGVMM->aHandles[i].iSelf == i
2200 && pGVMM->aHandles[i].ProcId == ProcId
2201 && VALID_PTR(pGVMM->aHandles[i].pvObj)
2202 && VALID_PTR(pGVMM->aHandles[i].pVM)
2203 && VALID_PTR(pGVMM->aHandles[i].pGVM))
2204 {
2205 if (pGVMM->aHandles[i].hEMT0 == hEMT)
2206 return pGVMM->aHandles[i].pVM;
2207
2208 /* This is fearly safe with the current process per VM approach. */
2209 PGVM pGVM = pGVMM->aHandles[i].pGVM;
2210 VMCPUID const cCpus = pGVM->cCpus;
2211 ASMCompilerBarrier();
2212 if ( cCpus < 1
2213 || cCpus > VMM_MAX_CPU_COUNT)
2214 continue;
2215 for (VMCPUID idCpu = 1; idCpu < cCpus; idCpu++)
2216 if (pGVM->aCpus[idCpu].hEMT == hEMT)
2217 return pGVMM->aHandles[i].pVM;
2218 }
2219 }
2220 return NULL;
2221}
2222
2223
2224/**
2225 * Looks up the GVMCPU belonging to the specified EMT thread.
2226 *
2227 * This is used by the assertion machinery in VMMR0.cpp to avoid causing
2228 * unnecessary kernel panics when the EMT thread hits an assertion. The
2229 * call may or not be an EMT thread.
2230 *
2231 * @returns Pointer to the VM on success, NULL on failure.
2232 * @param hEMT The native thread handle of the EMT.
2233 * NIL_RTNATIVETHREAD means the current thread
2234 */
2235GVMMR0DECL(PGVMCPU) GVMMR0GetGVCpuByEMT(RTNATIVETHREAD hEMT)
2236{
2237 /*
2238 * No Assertions here as we're usually called in a AssertMsgN,
2239 * RTAssert*, Log and LogRel contexts.
2240 */
2241 PGVMM pGVMM = g_pGVMM;
2242 if ( !VALID_PTR(pGVMM)
2243 || pGVMM->u32Magic != GVMM_MAGIC)
2244 return NULL;
2245
2246 if (hEMT == NIL_RTNATIVETHREAD)
2247 hEMT = RTThreadNativeSelf();
2248 RTPROCESS ProcId = RTProcSelf();
2249
2250 /*
2251 * Search the handles in a linear fashion as we don't dare to take the lock (assert).
2252 */
2253/** @todo introduce some pid hash table here, please. */
2254 for (unsigned i = 1; i < RT_ELEMENTS(pGVMM->aHandles); i++)
2255 {
2256 if ( pGVMM->aHandles[i].iSelf == i
2257 && pGVMM->aHandles[i].ProcId == ProcId
2258 && VALID_PTR(pGVMM->aHandles[i].pvObj)
2259 && VALID_PTR(pGVMM->aHandles[i].pVM)
2260 && VALID_PTR(pGVMM->aHandles[i].pGVM))
2261 {
2262 PGVM pGVM = pGVMM->aHandles[i].pGVM;
2263 if (pGVMM->aHandles[i].hEMT0 == hEMT)
2264 return &pGVM->aCpus[0];
2265
2266 /* This is fearly safe with the current process per VM approach. */
2267 VMCPUID const cCpus = pGVM->cCpus;
2268 ASMCompilerBarrier();
2269 ASMCompilerBarrier();
2270 if ( cCpus < 1
2271 || cCpus > VMM_MAX_CPU_COUNT)
2272 continue;
2273 for (VMCPUID idCpu = 1; idCpu < cCpus; idCpu++)
2274 if (pGVM->aCpus[idCpu].hEMT == hEMT)
2275 return &pGVM->aCpus[idCpu];
2276 }
2277 }
2278 return NULL;
2279}
2280
2281
2282/**
2283 * This is will wake up expired and soon-to-be expired VMs.
2284 *
2285 * @returns Number of VMs that has been woken up.
2286 * @param pGVMM Pointer to the GVMM instance data.
2287 * @param u64Now The current time.
2288 */
2289static unsigned gvmmR0SchedDoWakeUps(PGVMM pGVMM, uint64_t u64Now)
2290{
2291 /*
2292 * Skip this if we've got disabled because of high resolution wakeups or by
2293 * the user.
2294 */
2295 if (!pGVMM->fDoEarlyWakeUps)
2296 return 0;
2297
2298/** @todo Rewrite this algorithm. See performance defect XYZ. */
2299
2300 /*
2301 * A cheap optimization to stop wasting so much time here on big setups.
2302 */
2303 const uint64_t uNsEarlyWakeUp2 = u64Now + pGVMM->nsEarlyWakeUp2;
2304 if ( pGVMM->cHaltedEMTs == 0
2305 || uNsEarlyWakeUp2 > pGVMM->uNsNextEmtWakeup)
2306 return 0;
2307
2308 /*
2309 * Only one thread doing this at a time.
2310 */
2311 if (!ASMAtomicCmpXchgBool(&pGVMM->fDoingEarlyWakeUps, true, false))
2312 return 0;
2313
2314 /*
2315 * The first pass will wake up VMs which have actually expired
2316 * and look for VMs that should be woken up in the 2nd and 3rd passes.
2317 */
2318 const uint64_t uNsEarlyWakeUp1 = u64Now + pGVMM->nsEarlyWakeUp1;
2319 uint64_t u64Min = UINT64_MAX;
2320 unsigned cWoken = 0;
2321 unsigned cHalted = 0;
2322 unsigned cTodo2nd = 0;
2323 unsigned cTodo3rd = 0;
2324 for (unsigned i = pGVMM->iUsedHead, cGuard = 0;
2325 i != NIL_GVM_HANDLE && i < RT_ELEMENTS(pGVMM->aHandles);
2326 i = pGVMM->aHandles[i].iNext)
2327 {
2328 PGVM pCurGVM = pGVMM->aHandles[i].pGVM;
2329 if ( VALID_PTR(pCurGVM)
2330 && pCurGVM->u32Magic == GVM_MAGIC)
2331 {
2332 for (VMCPUID idCpu = 0; idCpu < pCurGVM->cCpus; idCpu++)
2333 {
2334 PGVMCPU pCurGVCpu = &pCurGVM->aCpus[idCpu];
2335 uint64_t u64 = ASMAtomicUoReadU64(&pCurGVCpu->gvmm.s.u64HaltExpire);
2336 if (u64)
2337 {
2338 if (u64 <= u64Now)
2339 {
2340 if (ASMAtomicXchgU64(&pCurGVCpu->gvmm.s.u64HaltExpire, 0))
2341 {
2342 int rc = RTSemEventMultiSignal(pCurGVCpu->gvmm.s.HaltEventMulti);
2343 AssertRC(rc);
2344 cWoken++;
2345 }
2346 }
2347 else
2348 {
2349 cHalted++;
2350 if (u64 <= uNsEarlyWakeUp1)
2351 cTodo2nd++;
2352 else if (u64 <= uNsEarlyWakeUp2)
2353 cTodo3rd++;
2354 else if (u64 < u64Min)
2355 u64 = u64Min;
2356 }
2357 }
2358 }
2359 }
2360 AssertLogRelBreak(cGuard++ < RT_ELEMENTS(pGVMM->aHandles));
2361 }
2362
2363 if (cTodo2nd)
2364 {
2365 for (unsigned i = pGVMM->iUsedHead, cGuard = 0;
2366 i != NIL_GVM_HANDLE && i < RT_ELEMENTS(pGVMM->aHandles);
2367 i = pGVMM->aHandles[i].iNext)
2368 {
2369 PGVM pCurGVM = pGVMM->aHandles[i].pGVM;
2370 if ( VALID_PTR(pCurGVM)
2371 && pCurGVM->u32Magic == GVM_MAGIC)
2372 {
2373 for (VMCPUID idCpu = 0; idCpu < pCurGVM->cCpus; idCpu++)
2374 {
2375 PGVMCPU pCurGVCpu = &pCurGVM->aCpus[idCpu];
2376 uint64_t u64 = ASMAtomicUoReadU64(&pCurGVCpu->gvmm.s.u64HaltExpire);
2377 if ( u64
2378 && u64 <= uNsEarlyWakeUp1)
2379 {
2380 if (ASMAtomicXchgU64(&pCurGVCpu->gvmm.s.u64HaltExpire, 0))
2381 {
2382 int rc = RTSemEventMultiSignal(pCurGVCpu->gvmm.s.HaltEventMulti);
2383 AssertRC(rc);
2384 cWoken++;
2385 }
2386 }
2387 }
2388 }
2389 AssertLogRelBreak(cGuard++ < RT_ELEMENTS(pGVMM->aHandles));
2390 }
2391 }
2392
2393 if (cTodo3rd)
2394 {
2395 for (unsigned i = pGVMM->iUsedHead, cGuard = 0;
2396 i != NIL_GVM_HANDLE && i < RT_ELEMENTS(pGVMM->aHandles);
2397 i = pGVMM->aHandles[i].iNext)
2398 {
2399 PGVM pCurGVM = pGVMM->aHandles[i].pGVM;
2400 if ( VALID_PTR(pCurGVM)
2401 && pCurGVM->u32Magic == GVM_MAGIC)
2402 {
2403 for (VMCPUID idCpu = 0; idCpu < pCurGVM->cCpus; idCpu++)
2404 {
2405 PGVMCPU pCurGVCpu = &pCurGVM->aCpus[idCpu];
2406 uint64_t u64 = ASMAtomicUoReadU64(&pCurGVCpu->gvmm.s.u64HaltExpire);
2407 if ( u64
2408 && u64 <= uNsEarlyWakeUp2)
2409 {
2410 if (ASMAtomicXchgU64(&pCurGVCpu->gvmm.s.u64HaltExpire, 0))
2411 {
2412 int rc = RTSemEventMultiSignal(pCurGVCpu->gvmm.s.HaltEventMulti);
2413 AssertRC(rc);
2414 cWoken++;
2415 }
2416 }
2417 }
2418 }
2419 AssertLogRelBreak(cGuard++ < RT_ELEMENTS(pGVMM->aHandles));
2420 }
2421 }
2422
2423 /*
2424 * Set the minimum value.
2425 */
2426 pGVMM->uNsNextEmtWakeup = u64Min;
2427
2428 ASMAtomicWriteBool(&pGVMM->fDoingEarlyWakeUps, false);
2429 return cWoken;
2430}
2431
2432
2433/**
2434 * Halt the EMT thread.
2435 *
2436 * @returns VINF_SUCCESS normal wakeup (timeout or kicked by other thread).
2437 * VERR_INTERRUPTED if a signal was scheduled for the thread.
2438 * @param pGVM The global (ring-0) VM structure.
2439 * @param pVM The cross context VM structure.
2440 * @param pGVCpu The global (ring-0) CPU structure of the calling
2441 * EMT.
2442 * @param u64ExpireGipTime The time for the sleep to expire expressed as GIP time.
2443 * @thread EMT(pGVCpu).
2444 */
2445GVMMR0DECL(int) GVMMR0SchedHalt(PGVM pGVM, PVM pVM, PGVMCPU pGVCpu, uint64_t u64ExpireGipTime)
2446{
2447 LogFlow(("GVMMR0SchedHalt: pGVM=%p pVM=%p pGVCpu=%p(%d) u64ExpireGipTime=%#RX64\n",
2448 pGVM, pVM, pGVCpu, pGVCpu->idCpu, u64ExpireGipTime));
2449 GVMM_CHECK_SMAP_SETUP();
2450 GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
2451
2452 PGVMM pGVMM;
2453 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
2454
2455 pGVM->gvmm.s.StatsSched.cHaltCalls++;
2456 Assert(!pGVCpu->gvmm.s.u64HaltExpire);
2457
2458 /*
2459 * If we're doing early wake-ups, we must take the UsedList lock before we
2460 * start querying the current time.
2461 * Note! Interrupts must NOT be disabled at this point because we ask for GIP time!
2462 */
2463 bool const fDoEarlyWakeUps = pGVMM->fDoEarlyWakeUps;
2464 if (fDoEarlyWakeUps)
2465 {
2466 int rc2 = GVMMR0_USED_SHARED_LOCK(pGVMM); AssertRC(rc2);
2467 GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
2468 }
2469
2470 pGVCpu->gvmm.s.iCpuEmt = ASMGetApicId();
2471
2472 /* GIP hack: We might are frequently sleeping for short intervals where the
2473 difference between GIP and system time matters on systems with high resolution
2474 system time. So, convert the input from GIP to System time in that case. */
2475 Assert(ASMGetFlags() & X86_EFL_IF);
2476 const uint64_t u64NowSys = RTTimeSystemNanoTS();
2477 const uint64_t u64NowGip = RTTimeNanoTS();
2478 GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
2479
2480 if (fDoEarlyWakeUps)
2481 {
2482 pGVM->gvmm.s.StatsSched.cHaltWakeUps += gvmmR0SchedDoWakeUps(pGVMM, u64NowGip);
2483 GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
2484 }
2485
2486 /*
2487 * Go to sleep if we must...
2488 * Cap the sleep time to 1 second to be on the safe side.
2489 */
2490 int rc;
2491 uint64_t cNsInterval = u64ExpireGipTime - u64NowGip;
2492 if ( u64NowGip < u64ExpireGipTime
2493 && cNsInterval >= (pGVMM->cEMTs > pGVMM->cEMTsMeansCompany
2494 ? pGVMM->nsMinSleepCompany
2495 : pGVMM->nsMinSleepAlone))
2496 {
2497 pGVM->gvmm.s.StatsSched.cHaltBlocking++;
2498 if (cNsInterval > RT_NS_1SEC)
2499 u64ExpireGipTime = u64NowGip + RT_NS_1SEC;
2500 ASMAtomicWriteU64(&pGVCpu->gvmm.s.u64HaltExpire, u64ExpireGipTime);
2501 ASMAtomicIncU32(&pGVMM->cHaltedEMTs);
2502 if (fDoEarlyWakeUps)
2503 {
2504 if (u64ExpireGipTime < pGVMM->uNsNextEmtWakeup)
2505 pGVMM->uNsNextEmtWakeup = u64ExpireGipTime;
2506 GVMMR0_USED_SHARED_UNLOCK(pGVMM);
2507 }
2508 GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
2509
2510 rc = RTSemEventMultiWaitEx(pGVCpu->gvmm.s.HaltEventMulti,
2511 RTSEMWAIT_FLAGS_ABSOLUTE | RTSEMWAIT_FLAGS_NANOSECS | RTSEMWAIT_FLAGS_INTERRUPTIBLE,
2512 u64NowGip > u64NowSys ? u64ExpireGipTime : u64NowSys + cNsInterval);
2513 GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
2514
2515 ASMAtomicWriteU64(&pGVCpu->gvmm.s.u64HaltExpire, 0);
2516 ASMAtomicDecU32(&pGVMM->cHaltedEMTs);
2517
2518 /* Reset the semaphore to try prevent a few false wake-ups. */
2519 if (rc == VINF_SUCCESS)
2520 {
2521 RTSemEventMultiReset(pGVCpu->gvmm.s.HaltEventMulti);
2522 GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
2523 }
2524 else if (rc == VERR_TIMEOUT)
2525 {
2526 pGVM->gvmm.s.StatsSched.cHaltTimeouts++;
2527 rc = VINF_SUCCESS;
2528 }
2529 }
2530 else
2531 {
2532 pGVM->gvmm.s.StatsSched.cHaltNotBlocking++;
2533 if (fDoEarlyWakeUps)
2534 GVMMR0_USED_SHARED_UNLOCK(pGVMM);
2535 GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
2536 RTSemEventMultiReset(pGVCpu->gvmm.s.HaltEventMulti);
2537 GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
2538 rc = VINF_SUCCESS;
2539 }
2540
2541 return rc;
2542}
2543
2544
2545/**
2546 * Halt the EMT thread.
2547 *
2548 * @returns VINF_SUCCESS normal wakeup (timeout or kicked by other thread).
2549 * VERR_INTERRUPTED if a signal was scheduled for the thread.
2550 * @param pGVM The global (ring-0) VM structure.
2551 * @param pVM The cross context VM structure.
2552 * @param idCpu The Virtual CPU ID of the calling EMT.
2553 * @param u64ExpireGipTime The time for the sleep to expire expressed as GIP time.
2554 * @thread EMT(idCpu).
2555 */
2556GVMMR0DECL(int) GVMMR0SchedHaltReq(PGVM pGVM, PVM pVM, VMCPUID idCpu, uint64_t u64ExpireGipTime)
2557{
2558 GVMM_CHECK_SMAP_SETUP();
2559 GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
2560 PGVMM pGVMM;
2561 int rc = gvmmR0ByGVMandVMandEMT(pGVM, pVM, idCpu, &pGVMM);
2562 if (RT_SUCCESS(rc))
2563 {
2564 GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
2565 rc = GVMMR0SchedHalt(pGVM, pVM, &pGVM->aCpus[idCpu], u64ExpireGipTime);
2566 }
2567 GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
2568 return rc;
2569}
2570
2571
2572
2573/**
2574 * Worker for GVMMR0SchedWakeUp and GVMMR0SchedWakeUpAndPokeCpus that wakes up
2575 * the a sleeping EMT.
2576 *
2577 * @retval VINF_SUCCESS if successfully woken up.
2578 * @retval VINF_GVM_NOT_BLOCKED if the EMT wasn't blocked.
2579 *
2580 * @param pGVM The global (ring-0) VM structure.
2581 * @param pGVCpu The global (ring-0) VCPU structure.
2582 */
2583DECLINLINE(int) gvmmR0SchedWakeUpOne(PGVM pGVM, PGVMCPU pGVCpu)
2584{
2585 pGVM->gvmm.s.StatsSched.cWakeUpCalls++;
2586
2587 /*
2588 * Signal the semaphore regardless of whether it's current blocked on it.
2589 *
2590 * The reason for this is that there is absolutely no way we can be 100%
2591 * certain that it isn't *about* go to go to sleep on it and just got
2592 * delayed a bit en route. So, we will always signal the semaphore when
2593 * the it is flagged as halted in the VMM.
2594 */
2595/** @todo we can optimize some of that by means of the pVCpu->enmState now. */
2596 int rc;
2597 if (pGVCpu->gvmm.s.u64HaltExpire)
2598 {
2599 rc = VINF_SUCCESS;
2600 ASMAtomicWriteU64(&pGVCpu->gvmm.s.u64HaltExpire, 0);
2601 }
2602 else
2603 {
2604 rc = VINF_GVM_NOT_BLOCKED;
2605 pGVM->gvmm.s.StatsSched.cWakeUpNotHalted++;
2606 }
2607
2608 int rc2 = RTSemEventMultiSignal(pGVCpu->gvmm.s.HaltEventMulti);
2609 AssertRC(rc2);
2610
2611 return rc;
2612}
2613
2614
2615/**
2616 * Wakes up the halted EMT thread so it can service a pending request.
2617 *
2618 * @returns VBox status code.
2619 * @retval VINF_SUCCESS if successfully woken up.
2620 * @retval VINF_GVM_NOT_BLOCKED if the EMT wasn't blocked.
2621 *
2622 * @param pGVM The global (ring-0) VM structure.
2623 * @param pVM The cross context VM structure.
2624 * @param idCpu The Virtual CPU ID of the EMT to wake up.
2625 * @param fTakeUsedLock Take the used lock or not
2626 * @thread Any but EMT(idCpu).
2627 */
2628GVMMR0DECL(int) GVMMR0SchedWakeUpEx(PGVM pGVM, PVM pVM, VMCPUID idCpu, bool fTakeUsedLock)
2629{
2630 GVMM_CHECK_SMAP_SETUP();
2631 GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
2632
2633 /*
2634 * Validate input and take the UsedLock.
2635 */
2636 PGVMM pGVMM;
2637 int rc = gvmmR0ByGVMandVM(pGVM, pVM, &pGVMM, fTakeUsedLock);
2638 GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
2639 if (RT_SUCCESS(rc))
2640 {
2641 if (idCpu < pGVM->cCpus)
2642 {
2643 /*
2644 * Do the actual job.
2645 */
2646 rc = gvmmR0SchedWakeUpOne(pGVM, &pGVM->aCpus[idCpu]);
2647 GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
2648
2649 if (fTakeUsedLock && pGVMM->fDoEarlyWakeUps)
2650 {
2651 /*
2652 * While we're here, do a round of scheduling.
2653 */
2654 Assert(ASMGetFlags() & X86_EFL_IF);
2655 const uint64_t u64Now = RTTimeNanoTS(); /* (GIP time) */
2656 pGVM->gvmm.s.StatsSched.cWakeUpWakeUps += gvmmR0SchedDoWakeUps(pGVMM, u64Now);
2657 GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
2658 }
2659 }
2660 else
2661 rc = VERR_INVALID_CPU_ID;
2662
2663 if (fTakeUsedLock)
2664 {
2665 int rc2 = GVMMR0_USED_SHARED_UNLOCK(pGVMM);
2666 AssertRC(rc2);
2667 GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
2668 }
2669 }
2670
2671 LogFlow(("GVMMR0SchedWakeUpEx: returns %Rrc\n", rc));
2672 return rc;
2673}
2674
2675
2676/**
2677 * Wakes up the halted EMT thread so it can service a pending request.
2678 *
2679 * @returns VBox status code.
2680 * @retval VINF_SUCCESS if successfully woken up.
2681 * @retval VINF_GVM_NOT_BLOCKED if the EMT wasn't blocked.
2682 *
2683 * @param pGVM The global (ring-0) VM structure.
2684 * @param pVM The cross context VM structure.
2685 * @param idCpu The Virtual CPU ID of the EMT to wake up.
2686 * @thread Any but EMT(idCpu).
2687 */
2688GVMMR0DECL(int) GVMMR0SchedWakeUp(PGVM pGVM, PVM pVM, VMCPUID idCpu)
2689{
2690 return GVMMR0SchedWakeUpEx(pGVM, pVM, idCpu, true /* fTakeUsedLock */);
2691}
2692
2693
2694/**
2695 * Wakes up the halted EMT thread so it can service a pending request, no GVM
2696 * parameter and no used locking.
2697 *
2698 * @returns VBox status code.
2699 * @retval VINF_SUCCESS if successfully woken up.
2700 * @retval VINF_GVM_NOT_BLOCKED if the EMT wasn't blocked.
2701 *
2702 * @param pVM The cross context VM structure.
2703 * @param idCpu The Virtual CPU ID of the EMT to wake up.
2704 * @thread Any but EMT(idCpu).
2705 * @deprecated Don't use in new code if possible! Use the GVM variant.
2706 */
2707GVMMR0DECL(int) GVMMR0SchedWakeUpNoGVMNoLock(PVM pVM, VMCPUID idCpu)
2708{
2709 GVMM_CHECK_SMAP_SETUP();
2710 GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
2711 PGVM pGVM;
2712 PGVMM pGVMM;
2713 int rc = gvmmR0ByVM(pVM, &pGVM, &pGVMM, false /*fTakeUsedLock*/);
2714 GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
2715 if (RT_SUCCESS(rc))
2716 rc = GVMMR0SchedWakeUpEx(pGVM, pVM, idCpu, false /*fTakeUsedLock*/);
2717 return rc;
2718}
2719
2720
2721/**
2722 * Worker common to GVMMR0SchedPoke and GVMMR0SchedWakeUpAndPokeCpus that pokes
2723 * the Virtual CPU if it's still busy executing guest code.
2724 *
2725 * @returns VBox status code.
2726 * @retval VINF_SUCCESS if poked successfully.
2727 * @retval VINF_GVM_NOT_BUSY_IN_GC if the EMT wasn't busy in GC.
2728 *
2729 * @param pGVM The global (ring-0) VM structure.
2730 * @param pVCpu The cross context virtual CPU structure.
2731 */
2732DECLINLINE(int) gvmmR0SchedPokeOne(PGVM pGVM, PVMCPU pVCpu)
2733{
2734 pGVM->gvmm.s.StatsSched.cPokeCalls++;
2735
2736 RTCPUID idHostCpu = pVCpu->idHostCpu;
2737 if ( idHostCpu == NIL_RTCPUID
2738 || VMCPU_GET_STATE(pVCpu) != VMCPUSTATE_STARTED_EXEC)
2739 {
2740 pGVM->gvmm.s.StatsSched.cPokeNotBusy++;
2741 return VINF_GVM_NOT_BUSY_IN_GC;
2742 }
2743
2744 /* Note: this function is not implemented on Darwin and Linux (kernel < 2.6.19) */
2745 RTMpPokeCpu(idHostCpu);
2746 return VINF_SUCCESS;
2747}
2748
2749
2750/**
2751 * Pokes an EMT if it's still busy running guest code.
2752 *
2753 * @returns VBox status code.
2754 * @retval VINF_SUCCESS if poked successfully.
2755 * @retval VINF_GVM_NOT_BUSY_IN_GC if the EMT wasn't busy in GC.
2756 *
2757 * @param pGVM The global (ring-0) VM structure.
2758 * @param pVM The cross context VM structure.
2759 * @param idCpu The ID of the virtual CPU to poke.
2760 * @param fTakeUsedLock Take the used lock or not
2761 */
2762GVMMR0DECL(int) GVMMR0SchedPokeEx(PGVM pGVM, PVM pVM, VMCPUID idCpu, bool fTakeUsedLock)
2763{
2764 /*
2765 * Validate input and take the UsedLock.
2766 */
2767 PGVMM pGVMM;
2768 int rc = gvmmR0ByGVMandVM(pGVM, pVM, &pGVMM, fTakeUsedLock);
2769 if (RT_SUCCESS(rc))
2770 {
2771 if (idCpu < pGVM->cCpus)
2772#ifdef VBOX_BUGREF_9217
2773 rc = gvmmR0SchedPokeOne(pGVM, &pGVM->aCpus[idCpu]);
2774#else
2775 rc = gvmmR0SchedPokeOne(pGVM, &pVM->aCpus[idCpu]);
2776#endif
2777 else
2778 rc = VERR_INVALID_CPU_ID;
2779
2780 if (fTakeUsedLock)
2781 {
2782 int rc2 = GVMMR0_USED_SHARED_UNLOCK(pGVMM);
2783 AssertRC(rc2);
2784 }
2785 }
2786
2787 LogFlow(("GVMMR0SchedWakeUpAndPokeCpus: returns %Rrc\n", rc));
2788 return rc;
2789}
2790
2791
2792/**
2793 * Pokes an EMT if it's still busy running guest code.
2794 *
2795 * @returns VBox status code.
2796 * @retval VINF_SUCCESS if poked successfully.
2797 * @retval VINF_GVM_NOT_BUSY_IN_GC if the EMT wasn't busy in GC.
2798 *
2799 * @param pGVM The global (ring-0) VM structure.
2800 * @param pVM The cross context VM structure.
2801 * @param idCpu The ID of the virtual CPU to poke.
2802 */
2803GVMMR0DECL(int) GVMMR0SchedPoke(PGVM pGVM, PVM pVM, VMCPUID idCpu)
2804{
2805 return GVMMR0SchedPokeEx(pGVM, pVM, idCpu, true /* fTakeUsedLock */);
2806}
2807
2808
2809/**
2810 * Pokes an EMT if it's still busy running guest code, no GVM parameter and no
2811 * used locking.
2812 *
2813 * @returns VBox status code.
2814 * @retval VINF_SUCCESS if poked successfully.
2815 * @retval VINF_GVM_NOT_BUSY_IN_GC if the EMT wasn't busy in GC.
2816 *
2817 * @param pVM The cross context VM structure.
2818 * @param idCpu The ID of the virtual CPU to poke.
2819 *
2820 * @deprecated Don't use in new code if possible! Use the GVM variant.
2821 */
2822GVMMR0DECL(int) GVMMR0SchedPokeNoGVMNoLock(PVM pVM, VMCPUID idCpu)
2823{
2824 PGVM pGVM;
2825 PGVMM pGVMM;
2826 int rc = gvmmR0ByVM(pVM, &pGVM, &pGVMM, false /*fTakeUsedLock*/);
2827 if (RT_SUCCESS(rc))
2828 {
2829 if (idCpu < pGVM->cCpus)
2830#ifdef VBOX_BUGREF_9217
2831 rc = gvmmR0SchedPokeOne(pGVM, &pGVM->aCpus[idCpu]);
2832#else
2833 rc = gvmmR0SchedPokeOne(pGVM, &pVM->aCpus[idCpu]);
2834#endif
2835 else
2836 rc = VERR_INVALID_CPU_ID;
2837 }
2838 return rc;
2839}
2840
2841
2842/**
2843 * Wakes up a set of halted EMT threads so they can service pending request.
2844 *
2845 * @returns VBox status code, no informational stuff.
2846 *
2847 * @param pGVM The global (ring-0) VM structure.
2848 * @param pVM The cross context VM structure.
2849 * @param pSleepSet The set of sleepers to wake up.
2850 * @param pPokeSet The set of CPUs to poke.
2851 */
2852GVMMR0DECL(int) GVMMR0SchedWakeUpAndPokeCpus(PGVM pGVM, PVM pVM, PCVMCPUSET pSleepSet, PCVMCPUSET pPokeSet)
2853{
2854 AssertPtrReturn(pSleepSet, VERR_INVALID_POINTER);
2855 AssertPtrReturn(pPokeSet, VERR_INVALID_POINTER);
2856 GVMM_CHECK_SMAP_SETUP();
2857 GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
2858 RTNATIVETHREAD hSelf = RTThreadNativeSelf();
2859
2860 /*
2861 * Validate input and take the UsedLock.
2862 */
2863 PGVMM pGVMM;
2864 int rc = gvmmR0ByGVMandVM(pGVM, pVM, &pGVMM, true /* fTakeUsedLock */);
2865 GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
2866 if (RT_SUCCESS(rc))
2867 {
2868 rc = VINF_SUCCESS;
2869 VMCPUID idCpu = pGVM->cCpus;
2870 while (idCpu-- > 0)
2871 {
2872 /* Don't try poke or wake up ourselves. */
2873 if (pGVM->aCpus[idCpu].hEMT == hSelf)
2874 continue;
2875
2876 /* just ignore errors for now. */
2877 if (VMCPUSET_IS_PRESENT(pSleepSet, idCpu))
2878 {
2879 gvmmR0SchedWakeUpOne(pGVM, &pGVM->aCpus[idCpu]);
2880 GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
2881 }
2882 else if (VMCPUSET_IS_PRESENT(pPokeSet, idCpu))
2883 {
2884#ifdef VBOX_BUGREF_9217
2885 gvmmR0SchedPokeOne(pGVM, &pGVM->aCpus[idCpu]);
2886#else
2887 gvmmR0SchedPokeOne(pGVM, &pVM->aCpus[idCpu]);
2888#endif
2889 GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
2890 }
2891 }
2892
2893 int rc2 = GVMMR0_USED_SHARED_UNLOCK(pGVMM);
2894 AssertRC(rc2);
2895 GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
2896 }
2897
2898 LogFlow(("GVMMR0SchedWakeUpAndPokeCpus: returns %Rrc\n", rc));
2899 return rc;
2900}
2901
2902
2903/**
2904 * VMMR0 request wrapper for GVMMR0SchedWakeUpAndPokeCpus.
2905 *
2906 * @returns see GVMMR0SchedWakeUpAndPokeCpus.
2907 * @param pGVM The global (ring-0) VM structure.
2908 * @param pVM The cross context VM structure.
2909 * @param pReq Pointer to the request packet.
2910 */
2911GVMMR0DECL(int) GVMMR0SchedWakeUpAndPokeCpusReq(PGVM pGVM, PVM pVM, PGVMMSCHEDWAKEUPANDPOKECPUSREQ pReq)
2912{
2913 /*
2914 * Validate input and pass it on.
2915 */
2916 AssertPtrReturn(pReq, VERR_INVALID_POINTER);
2917 AssertMsgReturn(pReq->Hdr.cbReq == sizeof(*pReq), ("%#x != %#x\n", pReq->Hdr.cbReq, sizeof(*pReq)), VERR_INVALID_PARAMETER);
2918
2919 return GVMMR0SchedWakeUpAndPokeCpus(pGVM, pVM, &pReq->SleepSet, &pReq->PokeSet);
2920}
2921
2922
2923
2924/**
2925 * Poll the schedule to see if someone else should get a chance to run.
2926 *
2927 * This is a bit hackish and will not work too well if the machine is
2928 * under heavy load from non-VM processes.
2929 *
2930 * @returns VINF_SUCCESS if not yielded.
2931 * VINF_GVM_YIELDED if an attempt to switch to a different VM task was made.
2932 * @param pGVM The global (ring-0) VM structure.
2933 * @param pVM The cross context VM structure.
2934 * @param idCpu The Virtual CPU ID of the calling EMT.
2935 * @param fYield Whether to yield or not.
2936 * This is for when we're spinning in the halt loop.
2937 * @thread EMT(idCpu).
2938 */
2939GVMMR0DECL(int) GVMMR0SchedPoll(PGVM pGVM, PVM pVM, VMCPUID idCpu, bool fYield)
2940{
2941 /*
2942 * Validate input.
2943 */
2944 PGVMM pGVMM;
2945 int rc = gvmmR0ByGVMandVMandEMT(pGVM, pVM, idCpu, &pGVMM);
2946 if (RT_SUCCESS(rc))
2947 {
2948 /*
2949 * We currently only implement helping doing wakeups (fYield = false), so don't
2950 * bother taking the lock if gvmmR0SchedDoWakeUps is not going to do anything.
2951 */
2952 if (!fYield && pGVMM->fDoEarlyWakeUps)
2953 {
2954 rc = GVMMR0_USED_SHARED_LOCK(pGVMM); AssertRC(rc);
2955 pGVM->gvmm.s.StatsSched.cPollCalls++;
2956
2957 Assert(ASMGetFlags() & X86_EFL_IF);
2958 const uint64_t u64Now = RTTimeNanoTS(); /* (GIP time) */
2959
2960 pGVM->gvmm.s.StatsSched.cPollWakeUps += gvmmR0SchedDoWakeUps(pGVMM, u64Now);
2961
2962 GVMMR0_USED_SHARED_UNLOCK(pGVMM);
2963 }
2964 /*
2965 * Not quite sure what we could do here...
2966 */
2967 else if (fYield)
2968 rc = VERR_NOT_IMPLEMENTED; /** @todo implement this... */
2969 else
2970 rc = VINF_SUCCESS;
2971 }
2972
2973 LogFlow(("GVMMR0SchedWakeUp: returns %Rrc\n", rc));
2974 return rc;
2975}
2976
2977
2978#ifdef GVMM_SCHED_WITH_PPT
2979/**
2980 * Timer callback for the periodic preemption timer.
2981 *
2982 * @param pTimer The timer handle.
2983 * @param pvUser Pointer to the per cpu structure.
2984 * @param iTick The current tick.
2985 */
2986static DECLCALLBACK(void) gvmmR0SchedPeriodicPreemptionTimerCallback(PRTTIMER pTimer, void *pvUser, uint64_t iTick)
2987{
2988 PGVMMHOSTCPU pCpu = (PGVMMHOSTCPU)pvUser;
2989 NOREF(pTimer); NOREF(iTick);
2990
2991 /*
2992 * Termination check
2993 */
2994 if (pCpu->u32Magic != GVMMHOSTCPU_MAGIC)
2995 return;
2996
2997 /*
2998 * Do the house keeping.
2999 */
3000 RTSpinlockAcquire(pCpu->Ppt.hSpinlock);
3001
3002 if (++pCpu->Ppt.iTickHistorization >= pCpu->Ppt.cTicksHistoriziationInterval)
3003 {
3004 /*
3005 * Historicize the max frequency.
3006 */
3007 uint32_t iHzHistory = ++pCpu->Ppt.iHzHistory % RT_ELEMENTS(pCpu->Ppt.aHzHistory);
3008 pCpu->Ppt.aHzHistory[iHzHistory] = pCpu->Ppt.uDesiredHz;
3009 pCpu->Ppt.iTickHistorization = 0;
3010 pCpu->Ppt.uDesiredHz = 0;
3011
3012 /*
3013 * Check if the current timer frequency.
3014 */
3015 uint32_t uHistMaxHz = 0;
3016 for (uint32_t i = 0; i < RT_ELEMENTS(pCpu->Ppt.aHzHistory); i++)
3017 if (pCpu->Ppt.aHzHistory[i] > uHistMaxHz)
3018 uHistMaxHz = pCpu->Ppt.aHzHistory[i];
3019 if (uHistMaxHz == pCpu->Ppt.uTimerHz)
3020 RTSpinlockRelease(pCpu->Ppt.hSpinlock);
3021 else if (uHistMaxHz)
3022 {
3023 /*
3024 * Reprogram it.
3025 */
3026 pCpu->Ppt.cChanges++;
3027 pCpu->Ppt.iTickHistorization = 0;
3028 pCpu->Ppt.uTimerHz = uHistMaxHz;
3029 uint32_t const cNsInterval = RT_NS_1SEC / uHistMaxHz;
3030 pCpu->Ppt.cNsInterval = cNsInterval;
3031 if (cNsInterval < GVMMHOSTCPU_PPT_HIST_INTERVAL_NS)
3032 pCpu->Ppt.cTicksHistoriziationInterval = ( GVMMHOSTCPU_PPT_HIST_INTERVAL_NS
3033 + GVMMHOSTCPU_PPT_HIST_INTERVAL_NS / 2 - 1)
3034 / cNsInterval;
3035 else
3036 pCpu->Ppt.cTicksHistoriziationInterval = 1;
3037 RTSpinlockRelease(pCpu->Ppt.hSpinlock);
3038
3039 /*SUPR0Printf("Cpu%u: change to %u Hz / %u ns\n", pCpu->idxCpuSet, uHistMaxHz, cNsInterval);*/
3040 RTTimerChangeInterval(pTimer, cNsInterval);
3041 }
3042 else
3043 {
3044 /*
3045 * Stop it.
3046 */
3047 pCpu->Ppt.fStarted = false;
3048 pCpu->Ppt.uTimerHz = 0;
3049 pCpu->Ppt.cNsInterval = 0;
3050 RTSpinlockRelease(pCpu->Ppt.hSpinlock);
3051
3052 /*SUPR0Printf("Cpu%u: stopping (%u Hz)\n", pCpu->idxCpuSet, uHistMaxHz);*/
3053 RTTimerStop(pTimer);
3054 }
3055 }
3056 else
3057 RTSpinlockRelease(pCpu->Ppt.hSpinlock);
3058}
3059#endif /* GVMM_SCHED_WITH_PPT */
3060
3061
3062/**
3063 * Updates the periodic preemption timer for the calling CPU.
3064 *
3065 * The caller must have disabled preemption!
3066 * The caller must check that the host can do high resolution timers.
3067 *
3068 * @param pVM The cross context VM structure.
3069 * @param idHostCpu The current host CPU id.
3070 * @param uHz The desired frequency.
3071 */
3072GVMMR0DECL(void) GVMMR0SchedUpdatePeriodicPreemptionTimer(PVM pVM, RTCPUID idHostCpu, uint32_t uHz)
3073{
3074 NOREF(pVM);
3075#ifdef GVMM_SCHED_WITH_PPT
3076 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
3077 Assert(RTTimerCanDoHighResolution());
3078
3079 /*
3080 * Resolve the per CPU data.
3081 */
3082 uint32_t iCpu = RTMpCpuIdToSetIndex(idHostCpu);
3083 PGVMM pGVMM = g_pGVMM;
3084 if ( !VALID_PTR(pGVMM)
3085 || pGVMM->u32Magic != GVMM_MAGIC)
3086 return;
3087 AssertMsgReturnVoid(iCpu < pGVMM->cHostCpus, ("iCpu=%d cHostCpus=%d\n", iCpu, pGVMM->cHostCpus));
3088 PGVMMHOSTCPU pCpu = &pGVMM->aHostCpus[iCpu];
3089 AssertMsgReturnVoid( pCpu->u32Magic == GVMMHOSTCPU_MAGIC
3090 && pCpu->idCpu == idHostCpu,
3091 ("u32Magic=%#x idCpu=% idHostCpu=%d\n", pCpu->u32Magic, pCpu->idCpu, idHostCpu));
3092
3093 /*
3094 * Check whether we need to do anything about the timer.
3095 * We have to be a little bit careful since we might be race the timer
3096 * callback here.
3097 */
3098 if (uHz > 16384)
3099 uHz = 16384; /** @todo add a query method for this! */
3100 if (RT_UNLIKELY( uHz > ASMAtomicReadU32(&pCpu->Ppt.uDesiredHz)
3101 && uHz >= pCpu->Ppt.uMinHz
3102 && !pCpu->Ppt.fStarting /* solaris paranoia */))
3103 {
3104 RTSpinlockAcquire(pCpu->Ppt.hSpinlock);
3105
3106 pCpu->Ppt.uDesiredHz = uHz;
3107 uint32_t cNsInterval = 0;
3108 if (!pCpu->Ppt.fStarted)
3109 {
3110 pCpu->Ppt.cStarts++;
3111 pCpu->Ppt.fStarted = true;
3112 pCpu->Ppt.fStarting = true;
3113 pCpu->Ppt.iTickHistorization = 0;
3114 pCpu->Ppt.uTimerHz = uHz;
3115 pCpu->Ppt.cNsInterval = cNsInterval = RT_NS_1SEC / uHz;
3116 if (cNsInterval < GVMMHOSTCPU_PPT_HIST_INTERVAL_NS)
3117 pCpu->Ppt.cTicksHistoriziationInterval = ( GVMMHOSTCPU_PPT_HIST_INTERVAL_NS
3118 + GVMMHOSTCPU_PPT_HIST_INTERVAL_NS / 2 - 1)
3119 / cNsInterval;
3120 else
3121 pCpu->Ppt.cTicksHistoriziationInterval = 1;
3122 }
3123
3124 RTSpinlockRelease(pCpu->Ppt.hSpinlock);
3125
3126 if (cNsInterval)
3127 {
3128 RTTimerChangeInterval(pCpu->Ppt.pTimer, cNsInterval);
3129 int rc = RTTimerStart(pCpu->Ppt.pTimer, cNsInterval);
3130 AssertRC(rc);
3131
3132 RTSpinlockAcquire(pCpu->Ppt.hSpinlock);
3133 if (RT_FAILURE(rc))
3134 pCpu->Ppt.fStarted = false;
3135 pCpu->Ppt.fStarting = false;
3136 RTSpinlockRelease(pCpu->Ppt.hSpinlock);
3137 }
3138 }
3139#else /* !GVMM_SCHED_WITH_PPT */
3140 NOREF(idHostCpu); NOREF(uHz);
3141#endif /* !GVMM_SCHED_WITH_PPT */
3142}
3143
3144
3145/**
3146 * Retrieves the GVMM statistics visible to the caller.
3147 *
3148 * @returns VBox status code.
3149 *
3150 * @param pStats Where to put the statistics.
3151 * @param pSession The current session.
3152 * @param pGVM The GVM to obtain statistics for. Optional.
3153 * @param pVM The VM structure corresponding to @a pGVM.
3154 */
3155GVMMR0DECL(int) GVMMR0QueryStatistics(PGVMMSTATS pStats, PSUPDRVSESSION pSession, PGVM pGVM, PVM pVM)
3156{
3157 LogFlow(("GVMMR0QueryStatistics: pStats=%p pSession=%p pGVM=%p pVM=%p\n", pStats, pSession, pGVM, pVM));
3158
3159 /*
3160 * Validate input.
3161 */
3162 AssertPtrReturn(pSession, VERR_INVALID_POINTER);
3163 AssertPtrReturn(pStats, VERR_INVALID_POINTER);
3164 pStats->cVMs = 0; /* (crash before taking the sem...) */
3165
3166 /*
3167 * Take the lock and get the VM statistics.
3168 */
3169 PGVMM pGVMM;
3170 if (pGVM)
3171 {
3172 int rc = gvmmR0ByGVMandVM(pGVM, pVM, &pGVMM, true /*fTakeUsedLock*/);
3173 if (RT_FAILURE(rc))
3174 return rc;
3175 pStats->SchedVM = pGVM->gvmm.s.StatsSched;
3176 }
3177 else
3178 {
3179 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
3180 memset(&pStats->SchedVM, 0, sizeof(pStats->SchedVM));
3181
3182 int rc = GVMMR0_USED_SHARED_LOCK(pGVMM);
3183 AssertRCReturn(rc, rc);
3184 }
3185
3186 /*
3187 * Enumerate the VMs and add the ones visible to the statistics.
3188 */
3189 pStats->cVMs = 0;
3190 pStats->cEMTs = 0;
3191 memset(&pStats->SchedSum, 0, sizeof(pStats->SchedSum));
3192
3193 for (unsigned i = pGVMM->iUsedHead;
3194 i != NIL_GVM_HANDLE && i < RT_ELEMENTS(pGVMM->aHandles);
3195 i = pGVMM->aHandles[i].iNext)
3196 {
3197 PGVM pOtherGVM = pGVMM->aHandles[i].pGVM;
3198 void *pvObj = pGVMM->aHandles[i].pvObj;
3199 if ( VALID_PTR(pvObj)
3200 && VALID_PTR(pOtherGVM)
3201 && pOtherGVM->u32Magic == GVM_MAGIC
3202 && RT_SUCCESS(SUPR0ObjVerifyAccess(pvObj, pSession, NULL)))
3203 {
3204 pStats->cVMs++;
3205 pStats->cEMTs += pOtherGVM->cCpus;
3206
3207 pStats->SchedSum.cHaltCalls += pOtherGVM->gvmm.s.StatsSched.cHaltCalls;
3208 pStats->SchedSum.cHaltBlocking += pOtherGVM->gvmm.s.StatsSched.cHaltBlocking;
3209 pStats->SchedSum.cHaltTimeouts += pOtherGVM->gvmm.s.StatsSched.cHaltTimeouts;
3210 pStats->SchedSum.cHaltNotBlocking += pOtherGVM->gvmm.s.StatsSched.cHaltNotBlocking;
3211 pStats->SchedSum.cHaltWakeUps += pOtherGVM->gvmm.s.StatsSched.cHaltWakeUps;
3212
3213 pStats->SchedSum.cWakeUpCalls += pOtherGVM->gvmm.s.StatsSched.cWakeUpCalls;
3214 pStats->SchedSum.cWakeUpNotHalted += pOtherGVM->gvmm.s.StatsSched.cWakeUpNotHalted;
3215 pStats->SchedSum.cWakeUpWakeUps += pOtherGVM->gvmm.s.StatsSched.cWakeUpWakeUps;
3216
3217 pStats->SchedSum.cPokeCalls += pOtherGVM->gvmm.s.StatsSched.cPokeCalls;
3218 pStats->SchedSum.cPokeNotBusy += pOtherGVM->gvmm.s.StatsSched.cPokeNotBusy;
3219
3220 pStats->SchedSum.cPollCalls += pOtherGVM->gvmm.s.StatsSched.cPollCalls;
3221 pStats->SchedSum.cPollHalts += pOtherGVM->gvmm.s.StatsSched.cPollHalts;
3222 pStats->SchedSum.cPollWakeUps += pOtherGVM->gvmm.s.StatsSched.cPollWakeUps;
3223 }
3224 }
3225
3226 /*
3227 * Copy out the per host CPU statistics.
3228 */
3229 uint32_t iDstCpu = 0;
3230 uint32_t cSrcCpus = pGVMM->cHostCpus;
3231 for (uint32_t iSrcCpu = 0; iSrcCpu < cSrcCpus; iSrcCpu++)
3232 {
3233 if (pGVMM->aHostCpus[iSrcCpu].idCpu != NIL_RTCPUID)
3234 {
3235 pStats->aHostCpus[iDstCpu].idCpu = pGVMM->aHostCpus[iSrcCpu].idCpu;
3236 pStats->aHostCpus[iDstCpu].idxCpuSet = pGVMM->aHostCpus[iSrcCpu].idxCpuSet;
3237#ifdef GVMM_SCHED_WITH_PPT
3238 pStats->aHostCpus[iDstCpu].uDesiredHz = pGVMM->aHostCpus[iSrcCpu].Ppt.uDesiredHz;
3239 pStats->aHostCpus[iDstCpu].uTimerHz = pGVMM->aHostCpus[iSrcCpu].Ppt.uTimerHz;
3240 pStats->aHostCpus[iDstCpu].cChanges = pGVMM->aHostCpus[iSrcCpu].Ppt.cChanges;
3241 pStats->aHostCpus[iDstCpu].cStarts = pGVMM->aHostCpus[iSrcCpu].Ppt.cStarts;
3242#else
3243 pStats->aHostCpus[iDstCpu].uDesiredHz = 0;
3244 pStats->aHostCpus[iDstCpu].uTimerHz = 0;
3245 pStats->aHostCpus[iDstCpu].cChanges = 0;
3246 pStats->aHostCpus[iDstCpu].cStarts = 0;
3247#endif
3248 iDstCpu++;
3249 if (iDstCpu >= RT_ELEMENTS(pStats->aHostCpus))
3250 break;
3251 }
3252 }
3253 pStats->cHostCpus = iDstCpu;
3254
3255 GVMMR0_USED_SHARED_UNLOCK(pGVMM);
3256
3257 return VINF_SUCCESS;
3258}
3259
3260
3261/**
3262 * VMMR0 request wrapper for GVMMR0QueryStatistics.
3263 *
3264 * @returns see GVMMR0QueryStatistics.
3265 * @param pGVM The global (ring-0) VM structure. Optional.
3266 * @param pVM The cross context VM structure. Optional.
3267 * @param pReq Pointer to the request packet.
3268 * @param pSession The current session.
3269 */
3270GVMMR0DECL(int) GVMMR0QueryStatisticsReq(PGVM pGVM, PVM pVM, PGVMMQUERYSTATISTICSSREQ pReq, PSUPDRVSESSION pSession)
3271{
3272 /*
3273 * Validate input and pass it on.
3274 */
3275 AssertPtrReturn(pReq, VERR_INVALID_POINTER);
3276 AssertMsgReturn(pReq->Hdr.cbReq == sizeof(*pReq), ("%#x != %#x\n", pReq->Hdr.cbReq, sizeof(*pReq)), VERR_INVALID_PARAMETER);
3277 AssertReturn(pReq->pSession == pSession, VERR_INVALID_PARAMETER);
3278
3279 return GVMMR0QueryStatistics(&pReq->Stats, pSession, pGVM, pVM);
3280}
3281
3282
3283/**
3284 * Resets the specified GVMM statistics.
3285 *
3286 * @returns VBox status code.
3287 *
3288 * @param pStats Which statistics to reset, that is, non-zero fields indicates which to reset.
3289 * @param pSession The current session.
3290 * @param pGVM The GVM to reset statistics for. Optional.
3291 * @param pVM The VM structure corresponding to @a pGVM.
3292 */
3293GVMMR0DECL(int) GVMMR0ResetStatistics(PCGVMMSTATS pStats, PSUPDRVSESSION pSession, PGVM pGVM, PVM pVM)
3294{
3295 LogFlow(("GVMMR0ResetStatistics: pStats=%p pSession=%p pGVM=%p pVM=%p\n", pStats, pSession, pGVM, pVM));
3296
3297 /*
3298 * Validate input.
3299 */
3300 AssertPtrReturn(pSession, VERR_INVALID_POINTER);
3301 AssertPtrReturn(pStats, VERR_INVALID_POINTER);
3302
3303 /*
3304 * Take the lock and get the VM statistics.
3305 */
3306 PGVMM pGVMM;
3307 if (pGVM)
3308 {
3309 int rc = gvmmR0ByGVMandVM(pGVM, pVM, &pGVMM, true /*fTakeUsedLock*/);
3310 if (RT_FAILURE(rc))
3311 return rc;
3312# define MAYBE_RESET_FIELD(field) \
3313 do { if (pStats->SchedVM. field ) { pGVM->gvmm.s.StatsSched. field = 0; } } while (0)
3314 MAYBE_RESET_FIELD(cHaltCalls);
3315 MAYBE_RESET_FIELD(cHaltBlocking);
3316 MAYBE_RESET_FIELD(cHaltTimeouts);
3317 MAYBE_RESET_FIELD(cHaltNotBlocking);
3318 MAYBE_RESET_FIELD(cHaltWakeUps);
3319 MAYBE_RESET_FIELD(cWakeUpCalls);
3320 MAYBE_RESET_FIELD(cWakeUpNotHalted);
3321 MAYBE_RESET_FIELD(cWakeUpWakeUps);
3322 MAYBE_RESET_FIELD(cPokeCalls);
3323 MAYBE_RESET_FIELD(cPokeNotBusy);
3324 MAYBE_RESET_FIELD(cPollCalls);
3325 MAYBE_RESET_FIELD(cPollHalts);
3326 MAYBE_RESET_FIELD(cPollWakeUps);
3327# undef MAYBE_RESET_FIELD
3328 }
3329 else
3330 {
3331 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
3332
3333 int rc = GVMMR0_USED_SHARED_LOCK(pGVMM);
3334 AssertRCReturn(rc, rc);
3335 }
3336
3337 /*
3338 * Enumerate the VMs and add the ones visible to the statistics.
3339 */
3340 if (!ASMMemIsZero(&pStats->SchedSum, sizeof(pStats->SchedSum)))
3341 {
3342 for (unsigned i = pGVMM->iUsedHead;
3343 i != NIL_GVM_HANDLE && i < RT_ELEMENTS(pGVMM->aHandles);
3344 i = pGVMM->aHandles[i].iNext)
3345 {
3346 PGVM pOtherGVM = pGVMM->aHandles[i].pGVM;
3347 void *pvObj = pGVMM->aHandles[i].pvObj;
3348 if ( VALID_PTR(pvObj)
3349 && VALID_PTR(pOtherGVM)
3350 && pOtherGVM->u32Magic == GVM_MAGIC
3351 && RT_SUCCESS(SUPR0ObjVerifyAccess(pvObj, pSession, NULL)))
3352 {
3353# define MAYBE_RESET_FIELD(field) \
3354 do { if (pStats->SchedSum. field ) { pOtherGVM->gvmm.s.StatsSched. field = 0; } } while (0)
3355 MAYBE_RESET_FIELD(cHaltCalls);
3356 MAYBE_RESET_FIELD(cHaltBlocking);
3357 MAYBE_RESET_FIELD(cHaltTimeouts);
3358 MAYBE_RESET_FIELD(cHaltNotBlocking);
3359 MAYBE_RESET_FIELD(cHaltWakeUps);
3360 MAYBE_RESET_FIELD(cWakeUpCalls);
3361 MAYBE_RESET_FIELD(cWakeUpNotHalted);
3362 MAYBE_RESET_FIELD(cWakeUpWakeUps);
3363 MAYBE_RESET_FIELD(cPokeCalls);
3364 MAYBE_RESET_FIELD(cPokeNotBusy);
3365 MAYBE_RESET_FIELD(cPollCalls);
3366 MAYBE_RESET_FIELD(cPollHalts);
3367 MAYBE_RESET_FIELD(cPollWakeUps);
3368# undef MAYBE_RESET_FIELD
3369 }
3370 }
3371 }
3372
3373 GVMMR0_USED_SHARED_UNLOCK(pGVMM);
3374
3375 return VINF_SUCCESS;
3376}
3377
3378
3379/**
3380 * VMMR0 request wrapper for GVMMR0ResetStatistics.
3381 *
3382 * @returns see GVMMR0ResetStatistics.
3383 * @param pGVM The global (ring-0) VM structure. Optional.
3384 * @param pVM The cross context VM structure. Optional.
3385 * @param pReq Pointer to the request packet.
3386 * @param pSession The current session.
3387 */
3388GVMMR0DECL(int) GVMMR0ResetStatisticsReq(PGVM pGVM, PVM pVM, PGVMMRESETSTATISTICSSREQ pReq, PSUPDRVSESSION pSession)
3389{
3390 /*
3391 * Validate input and pass it on.
3392 */
3393 AssertPtrReturn(pReq, VERR_INVALID_POINTER);
3394 AssertMsgReturn(pReq->Hdr.cbReq == sizeof(*pReq), ("%#x != %#x\n", pReq->Hdr.cbReq, sizeof(*pReq)), VERR_INVALID_PARAMETER);
3395 AssertReturn(pReq->pSession == pSession, VERR_INVALID_PARAMETER);
3396
3397 return GVMMR0ResetStatistics(&pReq->Stats, pSession, pGVM, pVM);
3398}
3399
Note: See TracBrowser for help on using the repository browser.

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette