VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMR0/GVMMR0.cpp@ 86699

Last change on this file since 86699 was 86683, checked in by vboxsync, 4 years ago

VMM/DBGF: Missing tracer destruction in R0 when the VM terminates, bugref:9210

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id Revision
File size: 106.4 KB
Line 
1/* $Id: GVMMR0.cpp 86683 2020-10-23 09:06:16Z vboxsync $ */
2/** @file
3 * GVMM - Global VM Manager.
4 */
5
6/*
7 * Copyright (C) 2007-2020 Oracle Corporation
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 */
17
18
19/** @page pg_gvmm GVMM - The Global VM Manager
20 *
21 * The Global VM Manager lives in ring-0. Its main function at the moment is
22 * to manage a list of all running VMs, keep a ring-0 only structure (GVM) for
23 * each of them, and assign them unique identifiers (so GMM can track page
24 * owners). The GVMM also manage some of the host CPU resources, like the
25 * periodic preemption timer.
26 *
27 * The GVMM will create a ring-0 object for each VM when it is registered, this
28 * is both for session cleanup purposes and for having a point where it is
29 * possible to implement usage polices later (in SUPR0ObjRegister).
30 *
31 *
32 * @section sec_gvmm_ppt Periodic Preemption Timer (PPT)
33 *
34 * On system that sports a high resolution kernel timer API, we use per-cpu
35 * timers to generate interrupts that preempts VT-x, AMD-V and raw-mode guest
36 * execution. The timer frequency is calculating by taking the max
37 * TMCalcHostTimerFrequency for all VMs running on a CPU for the last ~160 ms
38 * (RT_ELEMENTS((PGVMMHOSTCPU)0, Ppt.aHzHistory) *
39 * GVMMHOSTCPU_PPT_HIST_INTERVAL_NS).
40 *
41 * The TMCalcHostTimerFrequency() part of the things gets its takes the max
42 * TMTimerSetFrequencyHint() value and adjusts by the current catch-up percent,
43 * warp drive percent and some fudge factors. VMMR0.cpp reports the result via
44 * GVMMR0SchedUpdatePeriodicPreemptionTimer() before switching to the VT-x,
45 * AMD-V and raw-mode execution environments.
46 */
47
48
49/*********************************************************************************************************************************
50* Header Files *
51*********************************************************************************************************************************/
52#define LOG_GROUP LOG_GROUP_GVMM
53#include <VBox/vmm/gvmm.h>
54#include <VBox/vmm/gmm.h>
55#include "GVMMR0Internal.h"
56#include <VBox/vmm/dbgf.h>
57#include <VBox/vmm/iom.h>
58#include <VBox/vmm/pdm.h>
59#include <VBox/vmm/pgm.h>
60#include <VBox/vmm/vmm.h>
61#ifdef VBOX_WITH_NEM_R0
62# include <VBox/vmm/nem.h>
63#endif
64#include <VBox/vmm/vmcpuset.h>
65#include <VBox/vmm/vmcc.h>
66#include <VBox/param.h>
67#include <VBox/err.h>
68
69#include <iprt/asm.h>
70#include <iprt/asm-amd64-x86.h>
71#include <iprt/critsect.h>
72#include <iprt/mem.h>
73#include <iprt/semaphore.h>
74#include <iprt/time.h>
75#include <VBox/log.h>
76#include <iprt/thread.h>
77#include <iprt/process.h>
78#include <iprt/param.h>
79#include <iprt/string.h>
80#include <iprt/assert.h>
81#include <iprt/mem.h>
82#include <iprt/memobj.h>
83#include <iprt/mp.h>
84#include <iprt/cpuset.h>
85#include <iprt/spinlock.h>
86#include <iprt/timer.h>
87
88#include "dtrace/VBoxVMM.h"
89
90
91/*********************************************************************************************************************************
92* Defined Constants And Macros *
93*********************************************************************************************************************************/
94#if defined(RT_OS_LINUX) || defined(RT_OS_SOLARIS) || defined(DOXYGEN_RUNNING)
95/** Define this to enable the periodic preemption timer. */
96# define GVMM_SCHED_WITH_PPT
97#endif
98
99
100/** @def GVMM_CHECK_SMAP_SETUP
101 * SMAP check setup. */
102/** @def GVMM_CHECK_SMAP_CHECK
103 * Checks that the AC flag is set if SMAP is enabled. If AC is not set,
104 * it will be logged and @a a_BadExpr is executed. */
105/** @def GVMM_CHECK_SMAP_CHECK2
106 * Checks that the AC flag is set if SMAP is enabled. If AC is not set, it will
107 * be logged, written to the VMs assertion text buffer, and @a a_BadExpr is
108 * executed. */
109#if (defined(VBOX_STRICT) || 1) && !defined(VBOX_WITH_RAM_IN_KERNEL)
110# define GVMM_CHECK_SMAP_SETUP() uint32_t const fKernelFeatures = SUPR0GetKernelFeatures()
111# define GVMM_CHECK_SMAP_CHECK(a_BadExpr) \
112 do { \
113 if (fKernelFeatures & SUPKERNELFEATURES_SMAP) \
114 { \
115 RTCCUINTREG fEflCheck = ASMGetFlags(); \
116 if (RT_LIKELY(fEflCheck & X86_EFL_AC)) \
117 { /* likely */ } \
118 else \
119 { \
120 SUPR0Printf("%s, line %d: EFLAGS.AC is clear! (%#x)\n", __FUNCTION__, __LINE__, (uint32_t)fEflCheck); \
121 a_BadExpr; \
122 } \
123 } \
124 } while (0)
125# define GVMM_CHECK_SMAP_CHECK2(a_pGVM, a_BadExpr) \
126 do { \
127 if (fKernelFeatures & SUPKERNELFEATURES_SMAP) \
128 { \
129 RTCCUINTREG fEflCheck = ASMGetFlags(); \
130 if (RT_LIKELY(fEflCheck & X86_EFL_AC)) \
131 { /* likely */ } \
132 else \
133 { \
134 SUPR0BadContext((a_pGVM) ? (a_pGVM)->pSession : NULL, __FILE__, __LINE__, "EFLAGS.AC is zero!"); \
135 a_BadExpr; \
136 } \
137 } \
138 } while (0)
139#else
140# define GVMM_CHECK_SMAP_SETUP() uint32_t const fKernelFeatures = 0
141# define GVMM_CHECK_SMAP_CHECK(a_BadExpr) NOREF(fKernelFeatures)
142# define GVMM_CHECK_SMAP_CHECK2(a_pGVM, a_BadExpr) NOREF(fKernelFeatures)
143#endif
144
145
146
147/*********************************************************************************************************************************
148* Structures and Typedefs *
149*********************************************************************************************************************************/
150
151/**
152 * Global VM handle.
153 */
154typedef struct GVMHANDLE
155{
156 /** The index of the next handle in the list (free or used). (0 is nil.) */
157 uint16_t volatile iNext;
158 /** Our own index / handle value. */
159 uint16_t iSelf;
160 /** The process ID of the handle owner.
161 * This is used for access checks. */
162 RTPROCESS ProcId;
163 /** The pointer to the ring-0 only (aka global) VM structure. */
164 PGVM pGVM;
165 /** The virtual machine object. */
166 void *pvObj;
167 /** The session this VM is associated with. */
168 PSUPDRVSESSION pSession;
169 /** The ring-0 handle of the EMT0 thread.
170 * This is used for ownership checks as well as looking up a VM handle by thread
171 * at times like assertions. */
172 RTNATIVETHREAD hEMT0;
173} GVMHANDLE;
174/** Pointer to a global VM handle. */
175typedef GVMHANDLE *PGVMHANDLE;
176
177/** Number of GVM handles (including the NIL handle). */
178#if HC_ARCH_BITS == 64
179# define GVMM_MAX_HANDLES 8192
180#else
181# define GVMM_MAX_HANDLES 128
182#endif
183
184/**
185 * Per host CPU GVMM data.
186 */
187typedef struct GVMMHOSTCPU
188{
189 /** Magic number (GVMMHOSTCPU_MAGIC). */
190 uint32_t volatile u32Magic;
191 /** The CPU ID. */
192 RTCPUID idCpu;
193 /** The CPU set index. */
194 uint32_t idxCpuSet;
195
196#ifdef GVMM_SCHED_WITH_PPT
197 /** Periodic preemption timer data. */
198 struct
199 {
200 /** The handle to the periodic preemption timer. */
201 PRTTIMER pTimer;
202 /** Spinlock protecting the data below. */
203 RTSPINLOCK hSpinlock;
204 /** The smalles Hz that we need to care about. (static) */
205 uint32_t uMinHz;
206 /** The number of ticks between each historization. */
207 uint32_t cTicksHistoriziationInterval;
208 /** The current historization tick (counting up to
209 * cTicksHistoriziationInterval and then resetting). */
210 uint32_t iTickHistorization;
211 /** The current timer interval. This is set to 0 when inactive. */
212 uint32_t cNsInterval;
213 /** The current timer frequency. This is set to 0 when inactive. */
214 uint32_t uTimerHz;
215 /** The current max frequency reported by the EMTs.
216 * This gets historicize and reset by the timer callback. This is
217 * read without holding the spinlock, so needs atomic updating. */
218 uint32_t volatile uDesiredHz;
219 /** Whether the timer was started or not. */
220 bool volatile fStarted;
221 /** Set if we're starting timer. */
222 bool volatile fStarting;
223 /** The index of the next history entry (mod it). */
224 uint32_t iHzHistory;
225 /** Historicized uDesiredHz values. The array wraps around, new entries
226 * are added at iHzHistory. This is updated approximately every
227 * GVMMHOSTCPU_PPT_HIST_INTERVAL_NS by the timer callback. */
228 uint32_t aHzHistory[8];
229 /** Statistics counter for recording the number of interval changes. */
230 uint32_t cChanges;
231 /** Statistics counter for recording the number of timer starts. */
232 uint32_t cStarts;
233 } Ppt;
234#endif /* GVMM_SCHED_WITH_PPT */
235
236} GVMMHOSTCPU;
237/** Pointer to the per host CPU GVMM data. */
238typedef GVMMHOSTCPU *PGVMMHOSTCPU;
239/** The GVMMHOSTCPU::u32Magic value (Petra, Tanya & Rachel Haden). */
240#define GVMMHOSTCPU_MAGIC UINT32_C(0x19711011)
241/** The interval on history entry should cover (approximately) give in
242 * nanoseconds. */
243#define GVMMHOSTCPU_PPT_HIST_INTERVAL_NS UINT32_C(20000000)
244
245
246/**
247 * The GVMM instance data.
248 */
249typedef struct GVMM
250{
251 /** Eyecatcher / magic. */
252 uint32_t u32Magic;
253 /** The index of the head of the free handle chain. (0 is nil.) */
254 uint16_t volatile iFreeHead;
255 /** The index of the head of the active handle chain. (0 is nil.) */
256 uint16_t volatile iUsedHead;
257 /** The number of VMs. */
258 uint16_t volatile cVMs;
259 /** Alignment padding. */
260 uint16_t u16Reserved;
261 /** The number of EMTs. */
262 uint32_t volatile cEMTs;
263 /** The number of EMTs that have halted in GVMMR0SchedHalt. */
264 uint32_t volatile cHaltedEMTs;
265 /** Mini lock for restricting early wake-ups to one thread. */
266 bool volatile fDoingEarlyWakeUps;
267 bool afPadding[3]; /**< explicit alignment padding. */
268 /** When the next halted or sleeping EMT will wake up.
269 * This is set to 0 when it needs recalculating and to UINT64_MAX when
270 * there are no halted or sleeping EMTs in the GVMM. */
271 uint64_t uNsNextEmtWakeup;
272 /** The lock used to serialize VM creation, destruction and associated events that
273 * isn't performance critical. Owners may acquire the list lock. */
274 RTCRITSECT CreateDestroyLock;
275 /** The lock used to serialize used list updates and accesses.
276 * This indirectly includes scheduling since the scheduler will have to walk the
277 * used list to examin running VMs. Owners may not acquire any other locks. */
278 RTCRITSECTRW UsedLock;
279 /** The handle array.
280 * The size of this array defines the maximum number of currently running VMs.
281 * The first entry is unused as it represents the NIL handle. */
282 GVMHANDLE aHandles[GVMM_MAX_HANDLES];
283
284 /** @gcfgm{/GVMM/cEMTsMeansCompany, 32-bit, 0, UINT32_MAX, 1}
285 * The number of EMTs that means we no longer consider ourselves alone on a
286 * CPU/Core.
287 */
288 uint32_t cEMTsMeansCompany;
289 /** @gcfgm{/GVMM/MinSleepAlone,32-bit, 0, 100000000, 750000, ns}
290 * The minimum sleep time for when we're alone, in nano seconds.
291 */
292 uint32_t nsMinSleepAlone;
293 /** @gcfgm{/GVMM/MinSleepCompany,32-bit,0, 100000000, 15000, ns}
294 * The minimum sleep time for when we've got company, in nano seconds.
295 */
296 uint32_t nsMinSleepCompany;
297 /** @gcfgm{/GVMM/EarlyWakeUp1, 32-bit, 0, 100000000, 25000, ns}
298 * The limit for the first round of early wake-ups, given in nano seconds.
299 */
300 uint32_t nsEarlyWakeUp1;
301 /** @gcfgm{/GVMM/EarlyWakeUp2, 32-bit, 0, 100000000, 50000, ns}
302 * The limit for the second round of early wake-ups, given in nano seconds.
303 */
304 uint32_t nsEarlyWakeUp2;
305
306 /** Set if we're doing early wake-ups.
307 * This reflects nsEarlyWakeUp1 and nsEarlyWakeUp2. */
308 bool volatile fDoEarlyWakeUps;
309
310 /** The number of entries in the host CPU array (aHostCpus). */
311 uint32_t cHostCpus;
312 /** Per host CPU data (variable length). */
313 GVMMHOSTCPU aHostCpus[1];
314} GVMM;
315AssertCompileMemberAlignment(GVMM, CreateDestroyLock, 8);
316AssertCompileMemberAlignment(GVMM, UsedLock, 8);
317AssertCompileMemberAlignment(GVMM, uNsNextEmtWakeup, 8);
318/** Pointer to the GVMM instance data. */
319typedef GVMM *PGVMM;
320
321/** The GVMM::u32Magic value (Charlie Haden). */
322#define GVMM_MAGIC UINT32_C(0x19370806)
323
324
325
326/*********************************************************************************************************************************
327* Global Variables *
328*********************************************************************************************************************************/
329/** Pointer to the GVMM instance data.
330 * (Just my general dislike for global variables.) */
331static PGVMM g_pGVMM = NULL;
332
333/** Macro for obtaining and validating the g_pGVMM pointer.
334 * On failure it will return from the invoking function with the specified return value.
335 *
336 * @param pGVMM The name of the pGVMM variable.
337 * @param rc The return value on failure. Use VERR_GVMM_INSTANCE for VBox
338 * status codes.
339 */
340#define GVMM_GET_VALID_INSTANCE(pGVMM, rc) \
341 do { \
342 (pGVMM) = g_pGVMM;\
343 AssertPtrReturn((pGVMM), (rc)); \
344 AssertMsgReturn((pGVMM)->u32Magic == GVMM_MAGIC, ("%p - %#x\n", (pGVMM), (pGVMM)->u32Magic), (rc)); \
345 } while (0)
346
347/** Macro for obtaining and validating the g_pGVMM pointer, void function variant.
348 * On failure it will return from the invoking function.
349 *
350 * @param pGVMM The name of the pGVMM variable.
351 */
352#define GVMM_GET_VALID_INSTANCE_VOID(pGVMM) \
353 do { \
354 (pGVMM) = g_pGVMM;\
355 AssertPtrReturnVoid((pGVMM)); \
356 AssertMsgReturnVoid((pGVMM)->u32Magic == GVMM_MAGIC, ("%p - %#x\n", (pGVMM), (pGVMM)->u32Magic)); \
357 } while (0)
358
359
360/*********************************************************************************************************************************
361* Internal Functions *
362*********************************************************************************************************************************/
363static void gvmmR0InitPerVMData(PGVM pGVM, int16_t hSelf, VMCPUID cCpus, PSUPDRVSESSION pSession);
364static DECLCALLBACK(void) gvmmR0HandleObjDestructor(void *pvObj, void *pvGVMM, void *pvHandle);
365static int gvmmR0ByGVM(PGVM pGVM, PGVMM *ppGVMM, bool fTakeUsedLock);
366static int gvmmR0ByGVMandEMT(PGVM pGVM, VMCPUID idCpu, PGVMM *ppGVMM);
367
368#ifdef GVMM_SCHED_WITH_PPT
369static DECLCALLBACK(void) gvmmR0SchedPeriodicPreemptionTimerCallback(PRTTIMER pTimer, void *pvUser, uint64_t iTick);
370#endif
371
372
373/**
374 * Initializes the GVMM.
375 *
376 * This is called while owning the loader semaphore (see supdrvIOCtl_LdrLoad()).
377 *
378 * @returns VBox status code.
379 */
380GVMMR0DECL(int) GVMMR0Init(void)
381{
382 LogFlow(("GVMMR0Init:\n"));
383
384 /*
385 * Allocate and initialize the instance data.
386 */
387 uint32_t cHostCpus = RTMpGetArraySize();
388 AssertMsgReturn(cHostCpus > 0 && cHostCpus < _64K, ("%d", (int)cHostCpus), VERR_GVMM_HOST_CPU_RANGE);
389
390 PGVMM pGVMM = (PGVMM)RTMemAllocZ(RT_UOFFSETOF_DYN(GVMM, aHostCpus[cHostCpus]));
391 if (!pGVMM)
392 return VERR_NO_MEMORY;
393 int rc = RTCritSectInitEx(&pGVMM->CreateDestroyLock, 0, NIL_RTLOCKVALCLASS, RTLOCKVAL_SUB_CLASS_NONE,
394 "GVMM-CreateDestroyLock");
395 if (RT_SUCCESS(rc))
396 {
397 rc = RTCritSectRwInitEx(&pGVMM->UsedLock, 0, NIL_RTLOCKVALCLASS, RTLOCKVAL_SUB_CLASS_NONE, "GVMM-UsedLock");
398 if (RT_SUCCESS(rc))
399 {
400 pGVMM->u32Magic = GVMM_MAGIC;
401 pGVMM->iUsedHead = 0;
402 pGVMM->iFreeHead = 1;
403
404 /* the nil handle */
405 pGVMM->aHandles[0].iSelf = 0;
406 pGVMM->aHandles[0].iNext = 0;
407
408 /* the tail */
409 unsigned i = RT_ELEMENTS(pGVMM->aHandles) - 1;
410 pGVMM->aHandles[i].iSelf = i;
411 pGVMM->aHandles[i].iNext = 0; /* nil */
412
413 /* the rest */
414 while (i-- > 1)
415 {
416 pGVMM->aHandles[i].iSelf = i;
417 pGVMM->aHandles[i].iNext = i + 1;
418 }
419
420 /* The default configuration values. */
421 uint32_t cNsResolution = RTSemEventMultiGetResolution();
422 pGVMM->cEMTsMeansCompany = 1; /** @todo should be adjusted to relative to the cpu count or something... */
423 if (cNsResolution >= 5*RT_NS_100US)
424 {
425 pGVMM->nsMinSleepAlone = 750000 /* ns (0.750 ms) */; /** @todo this should be adjusted to be 75% (or something) of the scheduler granularity... */
426 pGVMM->nsMinSleepCompany = 15000 /* ns (0.015 ms) */;
427 pGVMM->nsEarlyWakeUp1 = 25000 /* ns (0.025 ms) */;
428 pGVMM->nsEarlyWakeUp2 = 50000 /* ns (0.050 ms) */;
429 }
430 else if (cNsResolution > RT_NS_100US)
431 {
432 pGVMM->nsMinSleepAlone = cNsResolution / 2;
433 pGVMM->nsMinSleepCompany = cNsResolution / 4;
434 pGVMM->nsEarlyWakeUp1 = 0;
435 pGVMM->nsEarlyWakeUp2 = 0;
436 }
437 else
438 {
439 pGVMM->nsMinSleepAlone = 2000;
440 pGVMM->nsMinSleepCompany = 2000;
441 pGVMM->nsEarlyWakeUp1 = 0;
442 pGVMM->nsEarlyWakeUp2 = 0;
443 }
444 pGVMM->fDoEarlyWakeUps = pGVMM->nsEarlyWakeUp1 > 0 && pGVMM->nsEarlyWakeUp2 > 0;
445
446 /* The host CPU data. */
447 pGVMM->cHostCpus = cHostCpus;
448 uint32_t iCpu = cHostCpus;
449 RTCPUSET PossibleSet;
450 RTMpGetSet(&PossibleSet);
451 while (iCpu-- > 0)
452 {
453 pGVMM->aHostCpus[iCpu].idxCpuSet = iCpu;
454#ifdef GVMM_SCHED_WITH_PPT
455 pGVMM->aHostCpus[iCpu].Ppt.pTimer = NULL;
456 pGVMM->aHostCpus[iCpu].Ppt.hSpinlock = NIL_RTSPINLOCK;
457 pGVMM->aHostCpus[iCpu].Ppt.uMinHz = 5; /** @todo Add some API which figures this one out. (not *that* important) */
458 pGVMM->aHostCpus[iCpu].Ppt.cTicksHistoriziationInterval = 1;
459 //pGVMM->aHostCpus[iCpu].Ppt.iTickHistorization = 0;
460 //pGVMM->aHostCpus[iCpu].Ppt.cNsInterval = 0;
461 //pGVMM->aHostCpus[iCpu].Ppt.uTimerHz = 0;
462 //pGVMM->aHostCpus[iCpu].Ppt.uDesiredHz = 0;
463 //pGVMM->aHostCpus[iCpu].Ppt.fStarted = false;
464 //pGVMM->aHostCpus[iCpu].Ppt.fStarting = false;
465 //pGVMM->aHostCpus[iCpu].Ppt.iHzHistory = 0;
466 //pGVMM->aHostCpus[iCpu].Ppt.aHzHistory = {0};
467#endif
468
469 if (RTCpuSetIsMember(&PossibleSet, iCpu))
470 {
471 pGVMM->aHostCpus[iCpu].idCpu = RTMpCpuIdFromSetIndex(iCpu);
472 pGVMM->aHostCpus[iCpu].u32Magic = GVMMHOSTCPU_MAGIC;
473
474#ifdef GVMM_SCHED_WITH_PPT
475 rc = RTTimerCreateEx(&pGVMM->aHostCpus[iCpu].Ppt.pTimer,
476 50*1000*1000 /* whatever */,
477 RTTIMER_FLAGS_CPU(iCpu) | RTTIMER_FLAGS_HIGH_RES,
478 gvmmR0SchedPeriodicPreemptionTimerCallback,
479 &pGVMM->aHostCpus[iCpu]);
480 if (RT_SUCCESS(rc))
481 rc = RTSpinlockCreate(&pGVMM->aHostCpus[iCpu].Ppt.hSpinlock, RTSPINLOCK_FLAGS_INTERRUPT_SAFE, "GVMM/CPU");
482 if (RT_FAILURE(rc))
483 {
484 while (iCpu < cHostCpus)
485 {
486 RTTimerDestroy(pGVMM->aHostCpus[iCpu].Ppt.pTimer);
487 RTSpinlockDestroy(pGVMM->aHostCpus[iCpu].Ppt.hSpinlock);
488 pGVMM->aHostCpus[iCpu].Ppt.hSpinlock = NIL_RTSPINLOCK;
489 iCpu++;
490 }
491 break;
492 }
493#endif
494 }
495 else
496 {
497 pGVMM->aHostCpus[iCpu].idCpu = NIL_RTCPUID;
498 pGVMM->aHostCpus[iCpu].u32Magic = 0;
499 }
500 }
501 if (RT_SUCCESS(rc))
502 {
503 g_pGVMM = pGVMM;
504 LogFlow(("GVMMR0Init: pGVMM=%p cHostCpus=%u\n", pGVMM, cHostCpus));
505 return VINF_SUCCESS;
506 }
507
508 /* bail out. */
509 RTCritSectRwDelete(&pGVMM->UsedLock);
510 }
511 RTCritSectDelete(&pGVMM->CreateDestroyLock);
512 }
513
514 RTMemFree(pGVMM);
515 return rc;
516}
517
518
519/**
520 * Terminates the GVM.
521 *
522 * This is called while owning the loader semaphore (see supdrvLdrFree()).
523 * And unless something is wrong, there should be absolutely no VMs
524 * registered at this point.
525 */
526GVMMR0DECL(void) GVMMR0Term(void)
527{
528 LogFlow(("GVMMR0Term:\n"));
529
530 PGVMM pGVMM = g_pGVMM;
531 g_pGVMM = NULL;
532 if (RT_UNLIKELY(!RT_VALID_PTR(pGVMM)))
533 {
534 SUPR0Printf("GVMMR0Term: pGVMM=%RKv\n", pGVMM);
535 return;
536 }
537
538 /*
539 * First of all, stop all active timers.
540 */
541 uint32_t cActiveTimers = 0;
542 uint32_t iCpu = pGVMM->cHostCpus;
543 while (iCpu-- > 0)
544 {
545 ASMAtomicWriteU32(&pGVMM->aHostCpus[iCpu].u32Magic, ~GVMMHOSTCPU_MAGIC);
546#ifdef GVMM_SCHED_WITH_PPT
547 if ( pGVMM->aHostCpus[iCpu].Ppt.pTimer != NULL
548 && RT_SUCCESS(RTTimerStop(pGVMM->aHostCpus[iCpu].Ppt.pTimer)))
549 cActiveTimers++;
550#endif
551 }
552 if (cActiveTimers)
553 RTThreadSleep(1); /* fudge */
554
555 /*
556 * Invalidate the and free resources.
557 */
558 pGVMM->u32Magic = ~GVMM_MAGIC;
559 RTCritSectRwDelete(&pGVMM->UsedLock);
560 RTCritSectDelete(&pGVMM->CreateDestroyLock);
561
562 pGVMM->iFreeHead = 0;
563 if (pGVMM->iUsedHead)
564 {
565 SUPR0Printf("GVMMR0Term: iUsedHead=%#x! (cVMs=%#x cEMTs=%#x)\n", pGVMM->iUsedHead, pGVMM->cVMs, pGVMM->cEMTs);
566 pGVMM->iUsedHead = 0;
567 }
568
569#ifdef GVMM_SCHED_WITH_PPT
570 iCpu = pGVMM->cHostCpus;
571 while (iCpu-- > 0)
572 {
573 RTTimerDestroy(pGVMM->aHostCpus[iCpu].Ppt.pTimer);
574 pGVMM->aHostCpus[iCpu].Ppt.pTimer = NULL;
575 RTSpinlockDestroy(pGVMM->aHostCpus[iCpu].Ppt.hSpinlock);
576 pGVMM->aHostCpus[iCpu].Ppt.hSpinlock = NIL_RTSPINLOCK;
577 }
578#endif
579
580 RTMemFree(pGVMM);
581}
582
583
584/**
585 * A quick hack for setting global config values.
586 *
587 * @returns VBox status code.
588 *
589 * @param pSession The session handle. Used for authentication.
590 * @param pszName The variable name.
591 * @param u64Value The new value.
592 */
593GVMMR0DECL(int) GVMMR0SetConfig(PSUPDRVSESSION pSession, const char *pszName, uint64_t u64Value)
594{
595 /*
596 * Validate input.
597 */
598 PGVMM pGVMM;
599 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
600 AssertPtrReturn(pSession, VERR_INVALID_HANDLE);
601 AssertPtrReturn(pszName, VERR_INVALID_POINTER);
602
603 /*
604 * String switch time!
605 */
606 if (strncmp(pszName, RT_STR_TUPLE("/GVMM/")))
607 return VERR_CFGM_VALUE_NOT_FOUND; /* borrow status codes from CFGM... */
608 int rc = VINF_SUCCESS;
609 pszName += sizeof("/GVMM/") - 1;
610 if (!strcmp(pszName, "cEMTsMeansCompany"))
611 {
612 if (u64Value <= UINT32_MAX)
613 pGVMM->cEMTsMeansCompany = u64Value;
614 else
615 rc = VERR_OUT_OF_RANGE;
616 }
617 else if (!strcmp(pszName, "MinSleepAlone"))
618 {
619 if (u64Value <= RT_NS_100MS)
620 pGVMM->nsMinSleepAlone = u64Value;
621 else
622 rc = VERR_OUT_OF_RANGE;
623 }
624 else if (!strcmp(pszName, "MinSleepCompany"))
625 {
626 if (u64Value <= RT_NS_100MS)
627 pGVMM->nsMinSleepCompany = u64Value;
628 else
629 rc = VERR_OUT_OF_RANGE;
630 }
631 else if (!strcmp(pszName, "EarlyWakeUp1"))
632 {
633 if (u64Value <= RT_NS_100MS)
634 {
635 pGVMM->nsEarlyWakeUp1 = u64Value;
636 pGVMM->fDoEarlyWakeUps = pGVMM->nsEarlyWakeUp1 > 0 && pGVMM->nsEarlyWakeUp2 > 0;
637 }
638 else
639 rc = VERR_OUT_OF_RANGE;
640 }
641 else if (!strcmp(pszName, "EarlyWakeUp2"))
642 {
643 if (u64Value <= RT_NS_100MS)
644 {
645 pGVMM->nsEarlyWakeUp2 = u64Value;
646 pGVMM->fDoEarlyWakeUps = pGVMM->nsEarlyWakeUp1 > 0 && pGVMM->nsEarlyWakeUp2 > 0;
647 }
648 else
649 rc = VERR_OUT_OF_RANGE;
650 }
651 else
652 rc = VERR_CFGM_VALUE_NOT_FOUND;
653 return rc;
654}
655
656
657/**
658 * A quick hack for getting global config values.
659 *
660 * @returns VBox status code.
661 *
662 * @param pSession The session handle. Used for authentication.
663 * @param pszName The variable name.
664 * @param pu64Value Where to return the value.
665 */
666GVMMR0DECL(int) GVMMR0QueryConfig(PSUPDRVSESSION pSession, const char *pszName, uint64_t *pu64Value)
667{
668 /*
669 * Validate input.
670 */
671 PGVMM pGVMM;
672 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
673 AssertPtrReturn(pSession, VERR_INVALID_HANDLE);
674 AssertPtrReturn(pszName, VERR_INVALID_POINTER);
675 AssertPtrReturn(pu64Value, VERR_INVALID_POINTER);
676
677 /*
678 * String switch time!
679 */
680 if (strncmp(pszName, RT_STR_TUPLE("/GVMM/")))
681 return VERR_CFGM_VALUE_NOT_FOUND; /* borrow status codes from CFGM... */
682 int rc = VINF_SUCCESS;
683 pszName += sizeof("/GVMM/") - 1;
684 if (!strcmp(pszName, "cEMTsMeansCompany"))
685 *pu64Value = pGVMM->cEMTsMeansCompany;
686 else if (!strcmp(pszName, "MinSleepAlone"))
687 *pu64Value = pGVMM->nsMinSleepAlone;
688 else if (!strcmp(pszName, "MinSleepCompany"))
689 *pu64Value = pGVMM->nsMinSleepCompany;
690 else if (!strcmp(pszName, "EarlyWakeUp1"))
691 *pu64Value = pGVMM->nsEarlyWakeUp1;
692 else if (!strcmp(pszName, "EarlyWakeUp2"))
693 *pu64Value = pGVMM->nsEarlyWakeUp2;
694 else
695 rc = VERR_CFGM_VALUE_NOT_FOUND;
696 return rc;
697}
698
699
700/**
701 * Acquire the 'used' lock in shared mode.
702 *
703 * This prevents destruction of the VM while we're in ring-0.
704 *
705 * @returns IPRT status code, see RTSemFastMutexRequest.
706 * @param a_pGVMM The GVMM instance data.
707 * @sa GVMMR0_USED_SHARED_UNLOCK, GVMMR0_USED_EXCLUSIVE_LOCK
708 */
709#define GVMMR0_USED_SHARED_LOCK(a_pGVMM) RTCritSectRwEnterShared(&(a_pGVMM)->UsedLock)
710
711/**
712 * Release the 'used' lock in when owning it in shared mode.
713 *
714 * @returns IPRT status code, see RTSemFastMutexRequest.
715 * @param a_pGVMM The GVMM instance data.
716 * @sa GVMMR0_USED_SHARED_LOCK
717 */
718#define GVMMR0_USED_SHARED_UNLOCK(a_pGVMM) RTCritSectRwLeaveShared(&(a_pGVMM)->UsedLock)
719
720/**
721 * Acquire the 'used' lock in exclusive mode.
722 *
723 * Only use this function when making changes to the used list.
724 *
725 * @returns IPRT status code, see RTSemFastMutexRequest.
726 * @param a_pGVMM The GVMM instance data.
727 * @sa GVMMR0_USED_EXCLUSIVE_UNLOCK
728 */
729#define GVMMR0_USED_EXCLUSIVE_LOCK(a_pGVMM) RTCritSectRwEnterExcl(&(a_pGVMM)->UsedLock)
730
731/**
732 * Release the 'used' lock when owning it in exclusive mode.
733 *
734 * @returns IPRT status code, see RTSemFastMutexRelease.
735 * @param a_pGVMM The GVMM instance data.
736 * @sa GVMMR0_USED_EXCLUSIVE_LOCK, GVMMR0_USED_SHARED_UNLOCK
737 */
738#define GVMMR0_USED_EXCLUSIVE_UNLOCK(a_pGVMM) RTCritSectRwLeaveExcl(&(a_pGVMM)->UsedLock)
739
740
741/**
742 * Try acquire the 'create & destroy' lock.
743 *
744 * @returns IPRT status code, see RTSemFastMutexRequest.
745 * @param pGVMM The GVMM instance data.
746 */
747DECLINLINE(int) gvmmR0CreateDestroyLock(PGVMM pGVMM)
748{
749 LogFlow(("++gvmmR0CreateDestroyLock(%p)\n", pGVMM));
750 int rc = RTCritSectEnter(&pGVMM->CreateDestroyLock);
751 LogFlow(("gvmmR0CreateDestroyLock(%p)->%Rrc\n", pGVMM, rc));
752 return rc;
753}
754
755
756/**
757 * Release the 'create & destroy' lock.
758 *
759 * @returns IPRT status code, see RTSemFastMutexRequest.
760 * @param pGVMM The GVMM instance data.
761 */
762DECLINLINE(int) gvmmR0CreateDestroyUnlock(PGVMM pGVMM)
763{
764 LogFlow(("--gvmmR0CreateDestroyUnlock(%p)\n", pGVMM));
765 int rc = RTCritSectLeave(&pGVMM->CreateDestroyLock);
766 AssertRC(rc);
767 return rc;
768}
769
770
771/**
772 * Request wrapper for the GVMMR0CreateVM API.
773 *
774 * @returns VBox status code.
775 * @param pReq The request buffer.
776 * @param pSession The session handle. The VM will be associated with this.
777 */
778GVMMR0DECL(int) GVMMR0CreateVMReq(PGVMMCREATEVMREQ pReq, PSUPDRVSESSION pSession)
779{
780 /*
781 * Validate the request.
782 */
783 if (!RT_VALID_PTR(pReq))
784 return VERR_INVALID_POINTER;
785 if (pReq->Hdr.cbReq != sizeof(*pReq))
786 return VERR_INVALID_PARAMETER;
787 if (pReq->pSession != pSession)
788 return VERR_INVALID_POINTER;
789
790 /*
791 * Execute it.
792 */
793 PGVM pGVM;
794 pReq->pVMR0 = NULL;
795 pReq->pVMR3 = NIL_RTR3PTR;
796 int rc = GVMMR0CreateVM(pSession, pReq->cCpus, &pGVM);
797 if (RT_SUCCESS(rc))
798 {
799 pReq->pVMR0 = pGVM; /** @todo don't expose this to ring-3, use a unique random number instead. */
800 pReq->pVMR3 = pGVM->pVMR3;
801 }
802 return rc;
803}
804
805
806/**
807 * Allocates the VM structure and registers it with GVM.
808 *
809 * The caller will become the VM owner and there by the EMT.
810 *
811 * @returns VBox status code.
812 * @param pSession The support driver session.
813 * @param cCpus Number of virtual CPUs for the new VM.
814 * @param ppGVM Where to store the pointer to the VM structure.
815 *
816 * @thread EMT.
817 */
818GVMMR0DECL(int) GVMMR0CreateVM(PSUPDRVSESSION pSession, uint32_t cCpus, PGVM *ppGVM)
819{
820 LogFlow(("GVMMR0CreateVM: pSession=%p\n", pSession));
821 PGVMM pGVMM;
822 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
823
824 AssertPtrReturn(ppGVM, VERR_INVALID_POINTER);
825 *ppGVM = NULL;
826
827 if ( cCpus == 0
828 || cCpus > VMM_MAX_CPU_COUNT)
829 return VERR_INVALID_PARAMETER;
830
831 RTNATIVETHREAD hEMT0 = RTThreadNativeSelf();
832 AssertReturn(hEMT0 != NIL_RTNATIVETHREAD, VERR_GVMM_BROKEN_IPRT);
833 RTPROCESS ProcId = RTProcSelf();
834 AssertReturn(ProcId != NIL_RTPROCESS, VERR_GVMM_BROKEN_IPRT);
835
836 /*
837 * The whole allocation process is protected by the lock.
838 */
839 int rc = gvmmR0CreateDestroyLock(pGVMM);
840 AssertRCReturn(rc, rc);
841
842 /*
843 * Only one VM per session.
844 */
845 if (SUPR0GetSessionVM(pSession) != NULL)
846 {
847 gvmmR0CreateDestroyUnlock(pGVMM);
848 SUPR0Printf("GVMMR0CreateVM: The session %p already got a VM: %p\n", pSession, SUPR0GetSessionVM(pSession));
849 return VERR_ALREADY_EXISTS;
850 }
851
852 /*
853 * Allocate a handle first so we don't waste resources unnecessarily.
854 */
855 uint16_t iHandle = pGVMM->iFreeHead;
856 if (iHandle)
857 {
858 PGVMHANDLE pHandle = &pGVMM->aHandles[iHandle];
859
860 /* consistency checks, a bit paranoid as always. */
861 if ( !pHandle->pGVM
862 && !pHandle->pvObj
863 && pHandle->iSelf == iHandle)
864 {
865 pHandle->pvObj = SUPR0ObjRegister(pSession, SUPDRVOBJTYPE_VM, gvmmR0HandleObjDestructor, pGVMM, pHandle);
866 if (pHandle->pvObj)
867 {
868 /*
869 * Move the handle from the free to used list and perform permission checks.
870 */
871 rc = GVMMR0_USED_EXCLUSIVE_LOCK(pGVMM);
872 AssertRC(rc);
873
874 pGVMM->iFreeHead = pHandle->iNext;
875 pHandle->iNext = pGVMM->iUsedHead;
876 pGVMM->iUsedHead = iHandle;
877 pGVMM->cVMs++;
878
879 pHandle->pGVM = NULL;
880 pHandle->pSession = pSession;
881 pHandle->hEMT0 = NIL_RTNATIVETHREAD;
882 pHandle->ProcId = NIL_RTPROCESS;
883
884 GVMMR0_USED_EXCLUSIVE_UNLOCK(pGVMM);
885
886 rc = SUPR0ObjVerifyAccess(pHandle->pvObj, pSession, NULL);
887 if (RT_SUCCESS(rc))
888 {
889 /*
890 * Allocate memory for the VM structure (combined VM + GVM).
891 */
892 const uint32_t cbVM = RT_UOFFSETOF_DYN(GVM, aCpus[cCpus]);
893 const uint32_t cPages = RT_ALIGN_32(cbVM, PAGE_SIZE) >> PAGE_SHIFT;
894 RTR0MEMOBJ hVMMemObj = NIL_RTR0MEMOBJ;
895 rc = RTR0MemObjAllocPage(&hVMMemObj, cPages << PAGE_SHIFT, false /* fExecutable */);
896 if (RT_SUCCESS(rc))
897 {
898 PGVM pGVM = (PGVM)RTR0MemObjAddress(hVMMemObj);
899 AssertPtr(pGVM);
900
901 /*
902 * Initialise the structure.
903 */
904 RT_BZERO(pGVM, cPages << PAGE_SHIFT);
905 gvmmR0InitPerVMData(pGVM, iHandle, cCpus, pSession);
906 pGVM->gvmm.s.VMMemObj = hVMMemObj;
907 rc = GMMR0InitPerVMData(pGVM);
908 int rc2 = PGMR0InitPerVMData(pGVM);
909 DBGFR0InitPerVMData(pGVM);
910 PDMR0InitPerVMData(pGVM);
911 IOMR0InitPerVMData(pGVM);
912 if (RT_SUCCESS(rc) && RT_SUCCESS(rc2))
913 {
914 /*
915 * Allocate page array.
916 * This currently have to be made available to ring-3, but this is should change eventually.
917 */
918 rc = RTR0MemObjAllocPage(&pGVM->gvmm.s.VMPagesMemObj, cPages * sizeof(SUPPAGE), false /* fExecutable */);
919 if (RT_SUCCESS(rc))
920 {
921 PSUPPAGE paPages = (PSUPPAGE)RTR0MemObjAddress(pGVM->gvmm.s.VMPagesMemObj); AssertPtr(paPages);
922 for (uint32_t iPage = 0; iPage < cPages; iPage++)
923 {
924 paPages[iPage].uReserved = 0;
925 paPages[iPage].Phys = RTR0MemObjGetPagePhysAddr(pGVM->gvmm.s.VMMemObj, iPage);
926 Assert(paPages[iPage].Phys != NIL_RTHCPHYS);
927 }
928
929 /*
930 * Map the page array, VM and VMCPU structures into ring-3.
931 */
932 AssertCompileSizeAlignment(VM, PAGE_SIZE);
933 rc = RTR0MemObjMapUserEx(&pGVM->gvmm.s.VMMapObj, pGVM->gvmm.s.VMMemObj, (RTR3PTR)-1, 0,
934 RTMEM_PROT_READ | RTMEM_PROT_WRITE, NIL_RTR0PROCESS,
935 0 /*offSub*/, sizeof(VM));
936 for (VMCPUID i = 0; i < cCpus && RT_SUCCESS(rc); i++)
937 {
938 AssertCompileSizeAlignment(VMCPU, PAGE_SIZE);
939 rc = RTR0MemObjMapUserEx(&pGVM->aCpus[i].gvmm.s.VMCpuMapObj, pGVM->gvmm.s.VMMemObj,
940 (RTR3PTR)-1, 0, RTMEM_PROT_READ | RTMEM_PROT_WRITE, NIL_RTR0PROCESS,
941 RT_UOFFSETOF_DYN(GVM, aCpus[i]), sizeof(VMCPU));
942 }
943 if (RT_SUCCESS(rc))
944 rc = RTR0MemObjMapUser(&pGVM->gvmm.s.VMPagesMapObj, pGVM->gvmm.s.VMPagesMemObj, (RTR3PTR)-1,
945 0 /* uAlignment */, RTMEM_PROT_READ | RTMEM_PROT_WRITE,
946 NIL_RTR0PROCESS);
947 if (RT_SUCCESS(rc))
948 {
949 /*
950 * Initialize all the VM pointers.
951 */
952 PVMR3 pVMR3 = RTR0MemObjAddressR3(pGVM->gvmm.s.VMMapObj);
953 AssertPtr((void *)pVMR3);
954
955 for (VMCPUID i = 0; i < cCpus; i++)
956 {
957 pGVM->aCpus[i].pVMR0 = pGVM;
958 pGVM->aCpus[i].pVMR3 = pVMR3;
959 pGVM->apCpusR3[i] = RTR0MemObjAddressR3(pGVM->aCpus[i].gvmm.s.VMCpuMapObj);
960 pGVM->aCpus[i].pVCpuR3 = pGVM->apCpusR3[i];
961 pGVM->apCpusR0[i] = &pGVM->aCpus[i];
962 AssertPtr((void *)pGVM->apCpusR3[i]);
963 }
964
965 pGVM->paVMPagesR3 = RTR0MemObjAddressR3(pGVM->gvmm.s.VMPagesMapObj);
966 AssertPtr((void *)pGVM->paVMPagesR3);
967
968 /*
969 * Complete the handle - take the UsedLock sem just to be careful.
970 */
971 rc = GVMMR0_USED_EXCLUSIVE_LOCK(pGVMM);
972 AssertRC(rc);
973
974 pHandle->pGVM = pGVM;
975 pHandle->hEMT0 = hEMT0;
976 pHandle->ProcId = ProcId;
977 pGVM->pVMR3 = pVMR3;
978 pGVM->pVMR3Unsafe = pVMR3;
979 pGVM->aCpus[0].hEMT = hEMT0;
980 pGVM->aCpus[0].hNativeThreadR0 = hEMT0;
981 pGVMM->cEMTs += cCpus;
982
983 /* Associate it with the session and create the context hook for EMT0. */
984 rc = SUPR0SetSessionVM(pSession, pGVM, pGVM);
985 if (RT_SUCCESS(rc))
986 {
987 rc = VMMR0ThreadCtxHookCreateForEmt(&pGVM->aCpus[0]);
988 if (RT_SUCCESS(rc))
989 {
990 /*
991 * Done!
992 */
993 VBOXVMM_R0_GVMM_VM_CREATED(pGVM, pGVM, ProcId, (void *)hEMT0, cCpus);
994
995 GVMMR0_USED_EXCLUSIVE_UNLOCK(pGVMM);
996 gvmmR0CreateDestroyUnlock(pGVMM);
997
998 CPUMR0RegisterVCpuThread(&pGVM->aCpus[0]);
999
1000 *ppGVM = pGVM;
1001 Log(("GVMMR0CreateVM: pVMR3=%p pGVM=%p hGVM=%d\n", pVMR3, pGVM, iHandle));
1002 return VINF_SUCCESS;
1003 }
1004
1005 SUPR0SetSessionVM(pSession, NULL, NULL);
1006 }
1007 GVMMR0_USED_EXCLUSIVE_UNLOCK(pGVMM);
1008 }
1009
1010 /* Cleanup mappings. */
1011 if (pGVM->gvmm.s.VMMapObj != NIL_RTR0MEMOBJ)
1012 {
1013 RTR0MemObjFree(pGVM->gvmm.s.VMMapObj, false /* fFreeMappings */);
1014 pGVM->gvmm.s.VMMapObj = NIL_RTR0MEMOBJ;
1015 }
1016 for (VMCPUID i = 0; i < cCpus; i++)
1017 if (pGVM->aCpus[i].gvmm.s.VMCpuMapObj != NIL_RTR0MEMOBJ)
1018 {
1019 RTR0MemObjFree(pGVM->aCpus[i].gvmm.s.VMCpuMapObj, false /* fFreeMappings */);
1020 pGVM->aCpus[i].gvmm.s.VMCpuMapObj = NIL_RTR0MEMOBJ;
1021 }
1022 if (pGVM->gvmm.s.VMPagesMapObj != NIL_RTR0MEMOBJ)
1023 {
1024 RTR0MemObjFree(pGVM->gvmm.s.VMPagesMapObj, false /* fFreeMappings */);
1025 pGVM->gvmm.s.VMPagesMapObj = NIL_RTR0MEMOBJ;
1026 }
1027 }
1028 }
1029 else if (RT_SUCCESS(rc))
1030 rc = rc2;
1031 }
1032 }
1033 /* else: The user wasn't permitted to create this VM. */
1034
1035 /*
1036 * The handle will be freed by gvmmR0HandleObjDestructor as we release the
1037 * object reference here. A little extra mess because of non-recursive lock.
1038 */
1039 void *pvObj = pHandle->pvObj;
1040 pHandle->pvObj = NULL;
1041 gvmmR0CreateDestroyUnlock(pGVMM);
1042
1043 SUPR0ObjRelease(pvObj, pSession);
1044
1045 SUPR0Printf("GVMMR0CreateVM: failed, rc=%Rrc\n", rc);
1046 return rc;
1047 }
1048
1049 rc = VERR_NO_MEMORY;
1050 }
1051 else
1052 rc = VERR_GVMM_IPE_1;
1053 }
1054 else
1055 rc = VERR_GVM_TOO_MANY_VMS;
1056
1057 gvmmR0CreateDestroyUnlock(pGVMM);
1058 return rc;
1059}
1060
1061
1062/**
1063 * Initializes the per VM data belonging to GVMM.
1064 *
1065 * @param pGVM Pointer to the global VM structure.
1066 * @param hSelf The handle.
1067 * @param cCpus The CPU count.
1068 * @param pSession The session this VM is associated with.
1069 */
1070static void gvmmR0InitPerVMData(PGVM pGVM, int16_t hSelf, VMCPUID cCpus, PSUPDRVSESSION pSession)
1071{
1072 AssertCompile(RT_SIZEOFMEMB(GVM,gvmm.s) <= RT_SIZEOFMEMB(GVM,gvmm.padding));
1073 AssertCompile(RT_SIZEOFMEMB(GVMCPU,gvmm.s) <= RT_SIZEOFMEMB(GVMCPU,gvmm.padding));
1074 AssertCompileMemberAlignment(VM, cpum, 64);
1075 AssertCompileMemberAlignment(VM, tm, 64);
1076
1077 /* GVM: */
1078 pGVM->u32Magic = GVM_MAGIC;
1079 pGVM->hSelf = hSelf;
1080 pGVM->cCpus = cCpus;
1081 pGVM->pSession = pSession;
1082 pGVM->pSelf = pGVM;
1083
1084 /* VM: */
1085 pGVM->enmVMState = VMSTATE_CREATING;
1086 pGVM->hSelfUnsafe = hSelf;
1087 pGVM->pSessionUnsafe = pSession;
1088 pGVM->pVMR0ForCall = pGVM;
1089 pGVM->cCpusUnsafe = cCpus;
1090 pGVM->uCpuExecutionCap = 100; /* default is no cap. */
1091 pGVM->uStructVersion = 1;
1092 pGVM->cbSelf = sizeof(VM);
1093 pGVM->cbVCpu = sizeof(VMCPU);
1094
1095 /* GVMM: */
1096 pGVM->gvmm.s.VMMemObj = NIL_RTR0MEMOBJ;
1097 pGVM->gvmm.s.VMMapObj = NIL_RTR0MEMOBJ;
1098 pGVM->gvmm.s.VMPagesMemObj = NIL_RTR0MEMOBJ;
1099 pGVM->gvmm.s.VMPagesMapObj = NIL_RTR0MEMOBJ;
1100 pGVM->gvmm.s.fDoneVMMR0Init = false;
1101 pGVM->gvmm.s.fDoneVMMR0Term = false;
1102
1103 /*
1104 * Per virtual CPU.
1105 */
1106 for (VMCPUID i = 0; i < pGVM->cCpus; i++)
1107 {
1108 pGVM->aCpus[i].idCpu = i;
1109 pGVM->aCpus[i].idCpuUnsafe = i;
1110 pGVM->aCpus[i].gvmm.s.HaltEventMulti = NIL_RTSEMEVENTMULTI;
1111 pGVM->aCpus[i].gvmm.s.VMCpuMapObj = NIL_RTR0MEMOBJ;
1112 pGVM->aCpus[i].hEMT = NIL_RTNATIVETHREAD;
1113 pGVM->aCpus[i].pGVM = pGVM;
1114 pGVM->aCpus[i].idHostCpu = NIL_RTCPUID;
1115 pGVM->aCpus[i].iHostCpuSet = UINT32_MAX;
1116 pGVM->aCpus[i].hNativeThread = NIL_RTNATIVETHREAD;
1117 pGVM->aCpus[i].hNativeThreadR0 = NIL_RTNATIVETHREAD;
1118 pGVM->aCpus[i].enmState = VMCPUSTATE_STOPPED;
1119 pGVM->aCpus[i].pVCpuR0ForVtg = &pGVM->aCpus[i];
1120 }
1121}
1122
1123
1124/**
1125 * Does the VM initialization.
1126 *
1127 * @returns VBox status code.
1128 * @param pGVM The global (ring-0) VM structure.
1129 */
1130GVMMR0DECL(int) GVMMR0InitVM(PGVM pGVM)
1131{
1132 LogFlow(("GVMMR0InitVM: pGVM=%p\n", pGVM));
1133
1134 int rc = VERR_INTERNAL_ERROR_3;
1135 if ( !pGVM->gvmm.s.fDoneVMMR0Init
1136 && pGVM->aCpus[0].gvmm.s.HaltEventMulti == NIL_RTSEMEVENTMULTI)
1137 {
1138 for (VMCPUID i = 0; i < pGVM->cCpus; i++)
1139 {
1140 rc = RTSemEventMultiCreate(&pGVM->aCpus[i].gvmm.s.HaltEventMulti);
1141 if (RT_FAILURE(rc))
1142 {
1143 pGVM->aCpus[i].gvmm.s.HaltEventMulti = NIL_RTSEMEVENTMULTI;
1144 break;
1145 }
1146 }
1147 }
1148 else
1149 rc = VERR_WRONG_ORDER;
1150
1151 LogFlow(("GVMMR0InitVM: returns %Rrc\n", rc));
1152 return rc;
1153}
1154
1155
1156/**
1157 * Indicates that we're done with the ring-0 initialization
1158 * of the VM.
1159 *
1160 * @param pGVM The global (ring-0) VM structure.
1161 * @thread EMT(0)
1162 */
1163GVMMR0DECL(void) GVMMR0DoneInitVM(PGVM pGVM)
1164{
1165 /* Set the indicator. */
1166 pGVM->gvmm.s.fDoneVMMR0Init = true;
1167}
1168
1169
1170/**
1171 * Indicates that we're doing the ring-0 termination of the VM.
1172 *
1173 * @returns true if termination hasn't been done already, false if it has.
1174 * @param pGVM Pointer to the global VM structure. Optional.
1175 * @thread EMT(0) or session cleanup thread.
1176 */
1177GVMMR0DECL(bool) GVMMR0DoingTermVM(PGVM pGVM)
1178{
1179 /* Validate the VM structure, state and handle. */
1180 AssertPtrReturn(pGVM, false);
1181
1182 /* Set the indicator. */
1183 if (pGVM->gvmm.s.fDoneVMMR0Term)
1184 return false;
1185 pGVM->gvmm.s.fDoneVMMR0Term = true;
1186 return true;
1187}
1188
1189
1190/**
1191 * Destroys the VM, freeing all associated resources (the ring-0 ones anyway).
1192 *
1193 * This is call from the vmR3DestroyFinalBit and from a error path in VMR3Create,
1194 * and the caller is not the EMT thread, unfortunately. For security reasons, it
1195 * would've been nice if the caller was actually the EMT thread or that we somehow
1196 * could've associated the calling thread with the VM up front.
1197 *
1198 * @returns VBox status code.
1199 * @param pGVM The global (ring-0) VM structure.
1200 *
1201 * @thread EMT(0) if it's associated with the VM, otherwise any thread.
1202 */
1203GVMMR0DECL(int) GVMMR0DestroyVM(PGVM pGVM)
1204{
1205 LogFlow(("GVMMR0DestroyVM: pGVM=%p\n", pGVM));
1206 PGVMM pGVMM;
1207 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
1208
1209 /*
1210 * Validate the VM structure, state and caller.
1211 */
1212 AssertPtrReturn(pGVM, VERR_INVALID_POINTER);
1213 AssertReturn(!((uintptr_t)pGVM & PAGE_OFFSET_MASK), VERR_INVALID_POINTER);
1214 AssertMsgReturn(pGVM->enmVMState >= VMSTATE_CREATING && pGVM->enmVMState <= VMSTATE_TERMINATED, ("%d\n", pGVM->enmVMState),
1215 VERR_WRONG_ORDER);
1216
1217 uint32_t hGVM = pGVM->hSelf;
1218 ASMCompilerBarrier();
1219 AssertReturn(hGVM != NIL_GVM_HANDLE, VERR_INVALID_VM_HANDLE);
1220 AssertReturn(hGVM < RT_ELEMENTS(pGVMM->aHandles), VERR_INVALID_VM_HANDLE);
1221
1222 PGVMHANDLE pHandle = &pGVMM->aHandles[hGVM];
1223 AssertReturn(pHandle->pGVM == pGVM, VERR_NOT_OWNER);
1224
1225 RTPROCESS ProcId = RTProcSelf();
1226 RTNATIVETHREAD hSelf = RTThreadNativeSelf();
1227 AssertReturn( ( pHandle->hEMT0 == hSelf
1228 && pHandle->ProcId == ProcId)
1229 || pHandle->hEMT0 == NIL_RTNATIVETHREAD, VERR_NOT_OWNER);
1230
1231 /*
1232 * Lookup the handle and destroy the object.
1233 * Since the lock isn't recursive and we'll have to leave it before dereferencing the
1234 * object, we take some precautions against racing callers just in case...
1235 */
1236 int rc = gvmmR0CreateDestroyLock(pGVMM);
1237 AssertRC(rc);
1238
1239 /* Be careful here because we might theoretically be racing someone else cleaning up. */
1240 if ( pHandle->pGVM == pGVM
1241 && ( ( pHandle->hEMT0 == hSelf
1242 && pHandle->ProcId == ProcId)
1243 || pHandle->hEMT0 == NIL_RTNATIVETHREAD)
1244 && RT_VALID_PTR(pHandle->pvObj)
1245 && RT_VALID_PTR(pHandle->pSession)
1246 && RT_VALID_PTR(pHandle->pGVM)
1247 && pHandle->pGVM->u32Magic == GVM_MAGIC)
1248 {
1249 /* Check that other EMTs have deregistered. */
1250 uint32_t cNotDeregistered = 0;
1251 for (VMCPUID idCpu = 1; idCpu < pGVM->cCpus; idCpu++)
1252 cNotDeregistered += pGVM->aCpus[idCpu].hEMT != ~(RTNATIVETHREAD)1; /* see GVMMR0DeregisterVCpu for the value */
1253 if (cNotDeregistered == 0)
1254 {
1255 /* Grab the object pointer. */
1256 void *pvObj = pHandle->pvObj;
1257 pHandle->pvObj = NULL;
1258 gvmmR0CreateDestroyUnlock(pGVMM);
1259
1260 SUPR0ObjRelease(pvObj, pHandle->pSession);
1261 }
1262 else
1263 {
1264 gvmmR0CreateDestroyUnlock(pGVMM);
1265 rc = VERR_GVMM_NOT_ALL_EMTS_DEREGISTERED;
1266 }
1267 }
1268 else
1269 {
1270 SUPR0Printf("GVMMR0DestroyVM: pHandle=%RKv:{.pGVM=%p, .hEMT0=%p, .ProcId=%u, .pvObj=%p} pGVM=%p hSelf=%p\n",
1271 pHandle, pHandle->pGVM, pHandle->hEMT0, pHandle->ProcId, pHandle->pvObj, pGVM, hSelf);
1272 gvmmR0CreateDestroyUnlock(pGVMM);
1273 rc = VERR_GVMM_IPE_2;
1274 }
1275
1276 return rc;
1277}
1278
1279
1280/**
1281 * Performs VM cleanup task as part of object destruction.
1282 *
1283 * @param pGVM The GVM pointer.
1284 */
1285static void gvmmR0CleanupVM(PGVM pGVM)
1286{
1287 if ( pGVM->gvmm.s.fDoneVMMR0Init
1288 && !pGVM->gvmm.s.fDoneVMMR0Term)
1289 {
1290 if ( pGVM->gvmm.s.VMMemObj != NIL_RTR0MEMOBJ
1291 && RTR0MemObjAddress(pGVM->gvmm.s.VMMemObj) == pGVM)
1292 {
1293 LogFlow(("gvmmR0CleanupVM: Calling VMMR0TermVM\n"));
1294 VMMR0TermVM(pGVM, NIL_VMCPUID);
1295 }
1296 else
1297 AssertMsgFailed(("gvmmR0CleanupVM: VMMemObj=%p pGVM=%p\n", pGVM->gvmm.s.VMMemObj, pGVM));
1298 }
1299
1300 GMMR0CleanupVM(pGVM);
1301#ifdef VBOX_WITH_NEM_R0
1302 NEMR0CleanupVM(pGVM);
1303#endif
1304 PDMR0CleanupVM(pGVM);
1305 IOMR0CleanupVM(pGVM);
1306 DBGFR0CleanupVM(pGVM);
1307 PGMR0CleanupVM(pGVM);
1308
1309 AssertCompile(NIL_RTTHREADCTXHOOK == (RTTHREADCTXHOOK)0); /* Depends on zero initialized memory working for NIL at the moment. */
1310 for (VMCPUID idCpu = 0; idCpu < pGVM->cCpus; idCpu++)
1311 {
1312 /** @todo Can we busy wait here for all thread-context hooks to be
1313 * deregistered before releasing (destroying) it? Only until we find a
1314 * solution for not deregistering hooks everytime we're leaving HMR0
1315 * context. */
1316 VMMR0ThreadCtxHookDestroyForEmt(&pGVM->aCpus[idCpu]);
1317 }
1318}
1319
1320
1321/**
1322 * @callback_method_impl{FNSUPDRVDESTRUCTOR,VM handle destructor}
1323 *
1324 * pvUser1 is the GVM instance pointer.
1325 * pvUser2 is the handle pointer.
1326 */
1327static DECLCALLBACK(void) gvmmR0HandleObjDestructor(void *pvObj, void *pvUser1, void *pvUser2)
1328{
1329 LogFlow(("gvmmR0HandleObjDestructor: %p %p %p\n", pvObj, pvUser1, pvUser2));
1330
1331 NOREF(pvObj);
1332
1333 /*
1334 * Some quick, paranoid, input validation.
1335 */
1336 PGVMHANDLE pHandle = (PGVMHANDLE)pvUser2;
1337 AssertPtr(pHandle);
1338 PGVMM pGVMM = (PGVMM)pvUser1;
1339 Assert(pGVMM == g_pGVMM);
1340 const uint16_t iHandle = pHandle - &pGVMM->aHandles[0];
1341 if ( !iHandle
1342 || iHandle >= RT_ELEMENTS(pGVMM->aHandles)
1343 || iHandle != pHandle->iSelf)
1344 {
1345 SUPR0Printf("GVM: handle %d is out of range or corrupt (iSelf=%d)!\n", iHandle, pHandle->iSelf);
1346 return;
1347 }
1348
1349 int rc = gvmmR0CreateDestroyLock(pGVMM);
1350 AssertRC(rc);
1351 rc = GVMMR0_USED_EXCLUSIVE_LOCK(pGVMM);
1352 AssertRC(rc);
1353
1354 /*
1355 * This is a tad slow but a doubly linked list is too much hassle.
1356 */
1357 if (RT_UNLIKELY(pHandle->iNext >= RT_ELEMENTS(pGVMM->aHandles)))
1358 {
1359 SUPR0Printf("GVM: used list index %d is out of range!\n", pHandle->iNext);
1360 GVMMR0_USED_EXCLUSIVE_UNLOCK(pGVMM);
1361 gvmmR0CreateDestroyUnlock(pGVMM);
1362 return;
1363 }
1364
1365 if (pGVMM->iUsedHead == iHandle)
1366 pGVMM->iUsedHead = pHandle->iNext;
1367 else
1368 {
1369 uint16_t iPrev = pGVMM->iUsedHead;
1370 int c = RT_ELEMENTS(pGVMM->aHandles) + 2;
1371 while (iPrev)
1372 {
1373 if (RT_UNLIKELY(iPrev >= RT_ELEMENTS(pGVMM->aHandles)))
1374 {
1375 SUPR0Printf("GVM: used list index %d is out of range!\n", iPrev);
1376 GVMMR0_USED_EXCLUSIVE_UNLOCK(pGVMM);
1377 gvmmR0CreateDestroyUnlock(pGVMM);
1378 return;
1379 }
1380 if (RT_UNLIKELY(c-- <= 0))
1381 {
1382 iPrev = 0;
1383 break;
1384 }
1385
1386 if (pGVMM->aHandles[iPrev].iNext == iHandle)
1387 break;
1388 iPrev = pGVMM->aHandles[iPrev].iNext;
1389 }
1390 if (!iPrev)
1391 {
1392 SUPR0Printf("GVM: can't find the handle previous previous of %d!\n", pHandle->iSelf);
1393 GVMMR0_USED_EXCLUSIVE_UNLOCK(pGVMM);
1394 gvmmR0CreateDestroyUnlock(pGVMM);
1395 return;
1396 }
1397
1398 Assert(pGVMM->aHandles[iPrev].iNext == iHandle);
1399 pGVMM->aHandles[iPrev].iNext = pHandle->iNext;
1400 }
1401 pHandle->iNext = 0;
1402 pGVMM->cVMs--;
1403
1404 /*
1405 * Do the global cleanup round.
1406 */
1407 PGVM pGVM = pHandle->pGVM;
1408 if ( RT_VALID_PTR(pGVM)
1409 && pGVM->u32Magic == GVM_MAGIC)
1410 {
1411 pGVMM->cEMTs -= pGVM->cCpus;
1412
1413 if (pGVM->pSession)
1414 SUPR0SetSessionVM(pGVM->pSession, NULL, NULL);
1415
1416 GVMMR0_USED_EXCLUSIVE_UNLOCK(pGVMM);
1417
1418 gvmmR0CleanupVM(pGVM);
1419
1420 /*
1421 * Do the GVMM cleanup - must be done last.
1422 */
1423 /* The VM and VM pages mappings/allocations. */
1424 if (pGVM->gvmm.s.VMPagesMapObj != NIL_RTR0MEMOBJ)
1425 {
1426 rc = RTR0MemObjFree(pGVM->gvmm.s.VMPagesMapObj, false /* fFreeMappings */); AssertRC(rc);
1427 pGVM->gvmm.s.VMPagesMapObj = NIL_RTR0MEMOBJ;
1428 }
1429
1430 if (pGVM->gvmm.s.VMMapObj != NIL_RTR0MEMOBJ)
1431 {
1432 rc = RTR0MemObjFree(pGVM->gvmm.s.VMMapObj, false /* fFreeMappings */); AssertRC(rc);
1433 pGVM->gvmm.s.VMMapObj = NIL_RTR0MEMOBJ;
1434 }
1435
1436 if (pGVM->gvmm.s.VMPagesMemObj != NIL_RTR0MEMOBJ)
1437 {
1438 rc = RTR0MemObjFree(pGVM->gvmm.s.VMPagesMemObj, false /* fFreeMappings */); AssertRC(rc);
1439 pGVM->gvmm.s.VMPagesMemObj = NIL_RTR0MEMOBJ;
1440 }
1441
1442 for (VMCPUID i = 0; i < pGVM->cCpus; i++)
1443 {
1444 if (pGVM->aCpus[i].gvmm.s.HaltEventMulti != NIL_RTSEMEVENTMULTI)
1445 {
1446 rc = RTSemEventMultiDestroy(pGVM->aCpus[i].gvmm.s.HaltEventMulti); AssertRC(rc);
1447 pGVM->aCpus[i].gvmm.s.HaltEventMulti = NIL_RTSEMEVENTMULTI;
1448 }
1449 if (pGVM->aCpus[i].gvmm.s.VMCpuMapObj != NIL_RTR0MEMOBJ)
1450 {
1451 rc = RTR0MemObjFree(pGVM->aCpus[i].gvmm.s.VMCpuMapObj, false /* fFreeMappings */); AssertRC(rc);
1452 pGVM->aCpus[i].gvmm.s.VMCpuMapObj = NIL_RTR0MEMOBJ;
1453 }
1454 }
1455
1456 /* the GVM structure itself. */
1457 pGVM->u32Magic |= UINT32_C(0x80000000);
1458 Assert(pGVM->gvmm.s.VMMemObj != NIL_RTR0MEMOBJ);
1459 rc = RTR0MemObjFree(pGVM->gvmm.s.VMMemObj, true /*fFreeMappings*/); AssertRC(rc);
1460 pGVM = NULL;
1461
1462 /* Re-acquire the UsedLock before freeing the handle since we're updating handle fields. */
1463 rc = GVMMR0_USED_EXCLUSIVE_LOCK(pGVMM);
1464 AssertRC(rc);
1465 }
1466 /* else: GVMMR0CreateVM cleanup. */
1467
1468 /*
1469 * Free the handle.
1470 */
1471 pHandle->iNext = pGVMM->iFreeHead;
1472 pGVMM->iFreeHead = iHandle;
1473 ASMAtomicWriteNullPtr(&pHandle->pGVM);
1474 ASMAtomicWriteNullPtr(&pHandle->pvObj);
1475 ASMAtomicWriteNullPtr(&pHandle->pSession);
1476 ASMAtomicWriteHandle(&pHandle->hEMT0, NIL_RTNATIVETHREAD);
1477 ASMAtomicWriteU32(&pHandle->ProcId, NIL_RTPROCESS);
1478
1479 GVMMR0_USED_EXCLUSIVE_UNLOCK(pGVMM);
1480 gvmmR0CreateDestroyUnlock(pGVMM);
1481 LogFlow(("gvmmR0HandleObjDestructor: returns\n"));
1482}
1483
1484
1485/**
1486 * Registers the calling thread as the EMT of a Virtual CPU.
1487 *
1488 * Note that VCPU 0 is automatically registered during VM creation.
1489 *
1490 * @returns VBox status code
1491 * @param pGVM The global (ring-0) VM structure.
1492 * @param idCpu VCPU id to register the current thread as.
1493 */
1494GVMMR0DECL(int) GVMMR0RegisterVCpu(PGVM pGVM, VMCPUID idCpu)
1495{
1496 AssertReturn(idCpu != 0, VERR_INVALID_FUNCTION);
1497
1498 /*
1499 * Validate the VM structure, state and handle.
1500 */
1501 PGVMM pGVMM;
1502 int rc = gvmmR0ByGVM(pGVM, &pGVMM, false /* fTakeUsedLock */); /** @todo take lock here. */
1503 if (RT_SUCCESS(rc))
1504 {
1505 if (idCpu < pGVM->cCpus)
1506 {
1507 /* Check that the EMT isn't already assigned to a thread. */
1508 if (pGVM->aCpus[idCpu].hEMT == NIL_RTNATIVETHREAD)
1509 {
1510 Assert(pGVM->aCpus[idCpu].hNativeThreadR0 == NIL_RTNATIVETHREAD);
1511
1512 /* A thread may only be one EMT. */
1513 RTNATIVETHREAD const hNativeSelf = RTThreadNativeSelf();
1514 for (VMCPUID iCpu = 0; iCpu < pGVM->cCpus; iCpu++)
1515 AssertBreakStmt(pGVM->aCpus[iCpu].hEMT != hNativeSelf, rc = VERR_INVALID_PARAMETER);
1516 if (RT_SUCCESS(rc))
1517 {
1518 /*
1519 * Do the assignment, then try setup the hook. Undo if that fails.
1520 */
1521 pGVM->aCpus[idCpu].hNativeThreadR0 = pGVM->aCpus[idCpu].hEMT = RTThreadNativeSelf();
1522
1523 rc = VMMR0ThreadCtxHookCreateForEmt(&pGVM->aCpus[idCpu]);
1524 if (RT_SUCCESS(rc))
1525 CPUMR0RegisterVCpuThread(&pGVM->aCpus[idCpu]);
1526 else
1527 pGVM->aCpus[idCpu].hNativeThreadR0 = pGVM->aCpus[idCpu].hEMT = NIL_RTNATIVETHREAD;
1528 }
1529 }
1530 else
1531 rc = VERR_ACCESS_DENIED;
1532 }
1533 else
1534 rc = VERR_INVALID_CPU_ID;
1535 }
1536 return rc;
1537}
1538
1539
1540/**
1541 * Deregisters the calling thread as the EMT of a Virtual CPU.
1542 *
1543 * Note that VCPU 0 shall call GVMMR0DestroyVM intead of this API.
1544 *
1545 * @returns VBox status code
1546 * @param pGVM The global (ring-0) VM structure.
1547 * @param idCpu VCPU id to register the current thread as.
1548 */
1549GVMMR0DECL(int) GVMMR0DeregisterVCpu(PGVM pGVM, VMCPUID idCpu)
1550{
1551 AssertReturn(idCpu != 0, VERR_INVALID_FUNCTION);
1552
1553 /*
1554 * Validate the VM structure, state and handle.
1555 */
1556 PGVMM pGVMM;
1557 int rc = gvmmR0ByGVMandEMT(pGVM, idCpu, &pGVMM);
1558 if (RT_SUCCESS(rc))
1559 {
1560 /*
1561 * Take the destruction lock and recheck the handle state to
1562 * prevent racing GVMMR0DestroyVM.
1563 */
1564 gvmmR0CreateDestroyLock(pGVMM);
1565 uint32_t hSelf = pGVM->hSelf;
1566 ASMCompilerBarrier();
1567 if ( hSelf < RT_ELEMENTS(pGVMM->aHandles)
1568 && pGVMM->aHandles[hSelf].pvObj != NULL
1569 && pGVMM->aHandles[hSelf].pGVM == pGVM)
1570 {
1571 /*
1572 * Do per-EMT cleanups.
1573 */
1574 VMMR0ThreadCtxHookDestroyForEmt(&pGVM->aCpus[idCpu]);
1575
1576 /*
1577 * Invalidate hEMT. We don't use NIL here as that would allow
1578 * GVMMR0RegisterVCpu to be called again, and we don't want that.
1579 */
1580 AssertCompile(~(RTNATIVETHREAD)1 != NIL_RTNATIVETHREAD);
1581 pGVM->aCpus[idCpu].hEMT = ~(RTNATIVETHREAD)1;
1582 pGVM->aCpus[idCpu].hNativeThreadR0 = NIL_RTNATIVETHREAD;
1583 }
1584
1585 gvmmR0CreateDestroyUnlock(pGVMM);
1586 }
1587 return rc;
1588}
1589
1590
1591/**
1592 * Lookup a GVM structure by its handle.
1593 *
1594 * @returns The GVM pointer on success, NULL on failure.
1595 * @param hGVM The global VM handle. Asserts on bad handle.
1596 */
1597GVMMR0DECL(PGVM) GVMMR0ByHandle(uint32_t hGVM)
1598{
1599 PGVMM pGVMM;
1600 GVMM_GET_VALID_INSTANCE(pGVMM, NULL);
1601
1602 /*
1603 * Validate.
1604 */
1605 AssertReturn(hGVM != NIL_GVM_HANDLE, NULL);
1606 AssertReturn(hGVM < RT_ELEMENTS(pGVMM->aHandles), NULL);
1607
1608 /*
1609 * Look it up.
1610 */
1611 PGVMHANDLE pHandle = &pGVMM->aHandles[hGVM];
1612 AssertPtrReturn(pHandle->pvObj, NULL);
1613 PGVM pGVM = pHandle->pGVM;
1614 AssertPtrReturn(pGVM, NULL);
1615
1616 return pGVM;
1617}
1618
1619
1620/**
1621 * Check that the given GVM and VM structures match up.
1622 *
1623 * The calling thread must be in the same process as the VM. All current lookups
1624 * are by threads inside the same process, so this will not be an issue.
1625 *
1626 * @returns VBox status code.
1627 * @param pGVM The global (ring-0) VM structure.
1628 * @param ppGVMM Where to store the pointer to the GVMM instance data.
1629 * @param fTakeUsedLock Whether to take the used lock or not. We take it in
1630 * shared mode when requested.
1631 *
1632 * Be very careful if not taking the lock as it's
1633 * possible that the VM will disappear then!
1634 *
1635 * @remark This will not assert on an invalid pGVM but try return silently.
1636 */
1637static int gvmmR0ByGVM(PGVM pGVM, PGVMM *ppGVMM, bool fTakeUsedLock)
1638{
1639 /*
1640 * Check the pointers.
1641 */
1642 int rc;
1643 if (RT_LIKELY( RT_VALID_PTR(pGVM)
1644 && ((uintptr_t)pGVM & PAGE_OFFSET_MASK) == 0 ))
1645 {
1646 /*
1647 * Get the pGVMM instance and check the VM handle.
1648 */
1649 PGVMM pGVMM;
1650 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
1651
1652 uint16_t hGVM = pGVM->hSelf;
1653 if (RT_LIKELY( hGVM != NIL_GVM_HANDLE
1654 && hGVM < RT_ELEMENTS(pGVMM->aHandles)))
1655 {
1656 RTPROCESS const pidSelf = RTProcSelf();
1657 PGVMHANDLE pHandle = &pGVMM->aHandles[hGVM];
1658 if (fTakeUsedLock)
1659 {
1660 rc = GVMMR0_USED_SHARED_LOCK(pGVMM);
1661 AssertRCReturn(rc, rc);
1662 }
1663
1664 if (RT_LIKELY( pHandle->pGVM == pGVM
1665 && pHandle->ProcId == pidSelf
1666 && RT_VALID_PTR(pHandle->pvObj)))
1667 {
1668 /*
1669 * Some more VM data consistency checks.
1670 */
1671 if (RT_LIKELY( pGVM->cCpusUnsafe == pGVM->cCpus
1672 && pGVM->hSelfUnsafe == hGVM
1673 && pGVM->pSelf == pGVM))
1674 {
1675 if (RT_LIKELY( pGVM->enmVMState >= VMSTATE_CREATING
1676 && pGVM->enmVMState <= VMSTATE_TERMINATED))
1677 {
1678 *ppGVMM = pGVMM;
1679 return VINF_SUCCESS;
1680 }
1681 rc = VERR_INCONSISTENT_VM_HANDLE;
1682 }
1683 else
1684 rc = VERR_INCONSISTENT_VM_HANDLE;
1685 }
1686 else
1687 rc = VERR_INVALID_VM_HANDLE;
1688
1689 if (fTakeUsedLock)
1690 GVMMR0_USED_SHARED_UNLOCK(pGVMM);
1691 }
1692 else
1693 rc = VERR_INVALID_VM_HANDLE;
1694 }
1695 else
1696 rc = VERR_INVALID_POINTER;
1697 return rc;
1698}
1699
1700
1701/**
1702 * Validates a GVM/VM pair.
1703 *
1704 * @returns VBox status code.
1705 * @param pGVM The global (ring-0) VM structure.
1706 */
1707GVMMR0DECL(int) GVMMR0ValidateGVM(PGVM pGVM)
1708{
1709 PGVMM pGVMM;
1710 return gvmmR0ByGVM(pGVM, &pGVMM, false /*fTakeUsedLock*/);
1711}
1712
1713
1714/**
1715 * Check that the given GVM and VM structures match up.
1716 *
1717 * The calling thread must be in the same process as the VM. All current lookups
1718 * are by threads inside the same process, so this will not be an issue.
1719 *
1720 * @returns VBox status code.
1721 * @param pGVM The global (ring-0) VM structure.
1722 * @param idCpu The (alleged) Virtual CPU ID of the calling EMT.
1723 * @param ppGVMM Where to store the pointer to the GVMM instance data.
1724 * @thread EMT
1725 *
1726 * @remarks This will assert in all failure paths.
1727 */
1728static int gvmmR0ByGVMandEMT(PGVM pGVM, VMCPUID idCpu, PGVMM *ppGVMM)
1729{
1730 /*
1731 * Check the pointers.
1732 */
1733 AssertPtrReturn(pGVM, VERR_INVALID_POINTER);
1734 AssertReturn(((uintptr_t)pGVM & PAGE_OFFSET_MASK) == 0, VERR_INVALID_POINTER);
1735
1736 /*
1737 * Get the pGVMM instance and check the VM handle.
1738 */
1739 PGVMM pGVMM;
1740 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
1741
1742 uint16_t hGVM = pGVM->hSelf;
1743 ASMCompilerBarrier();
1744 AssertReturn( hGVM != NIL_GVM_HANDLE
1745 && hGVM < RT_ELEMENTS(pGVMM->aHandles), VERR_INVALID_VM_HANDLE);
1746
1747 RTPROCESS const pidSelf = RTProcSelf();
1748 PGVMHANDLE pHandle = &pGVMM->aHandles[hGVM];
1749 AssertReturn( pHandle->pGVM == pGVM
1750 && pHandle->ProcId == pidSelf
1751 && RT_VALID_PTR(pHandle->pvObj),
1752 VERR_INVALID_HANDLE);
1753
1754 /*
1755 * Check the EMT claim.
1756 */
1757 RTNATIVETHREAD const hAllegedEMT = RTThreadNativeSelf();
1758 AssertReturn(idCpu < pGVM->cCpus, VERR_INVALID_CPU_ID);
1759 AssertReturn(pGVM->aCpus[idCpu].hEMT == hAllegedEMT, VERR_NOT_OWNER);
1760
1761 /*
1762 * Some more VM data consistency checks.
1763 */
1764 AssertReturn(pGVM->cCpusUnsafe == pGVM->cCpus, VERR_INCONSISTENT_VM_HANDLE);
1765 AssertReturn(pGVM->hSelfUnsafe == hGVM, VERR_INCONSISTENT_VM_HANDLE);
1766 AssertReturn( pGVM->enmVMState >= VMSTATE_CREATING
1767 && pGVM->enmVMState <= VMSTATE_TERMINATED, VERR_INCONSISTENT_VM_HANDLE);
1768
1769 *ppGVMM = pGVMM;
1770 return VINF_SUCCESS;
1771}
1772
1773
1774/**
1775 * Validates a GVM/EMT pair.
1776 *
1777 * @returns VBox status code.
1778 * @param pGVM The global (ring-0) VM structure.
1779 * @param idCpu The Virtual CPU ID of the calling EMT.
1780 * @thread EMT(idCpu)
1781 */
1782GVMMR0DECL(int) GVMMR0ValidateGVMandEMT(PGVM pGVM, VMCPUID idCpu)
1783{
1784 PGVMM pGVMM;
1785 return gvmmR0ByGVMandEMT(pGVM, idCpu, &pGVMM);
1786}
1787
1788
1789/**
1790 * Looks up the VM belonging to the specified EMT thread.
1791 *
1792 * This is used by the assertion machinery in VMMR0.cpp to avoid causing
1793 * unnecessary kernel panics when the EMT thread hits an assertion. The
1794 * call may or not be an EMT thread.
1795 *
1796 * @returns Pointer to the VM on success, NULL on failure.
1797 * @param hEMT The native thread handle of the EMT.
1798 * NIL_RTNATIVETHREAD means the current thread
1799 */
1800GVMMR0DECL(PVMCC) GVMMR0GetVMByEMT(RTNATIVETHREAD hEMT)
1801{
1802 /*
1803 * No Assertions here as we're usually called in a AssertMsgN or
1804 * RTAssert* context.
1805 */
1806 PGVMM pGVMM = g_pGVMM;
1807 if ( !RT_VALID_PTR(pGVMM)
1808 || pGVMM->u32Magic != GVMM_MAGIC)
1809 return NULL;
1810
1811 if (hEMT == NIL_RTNATIVETHREAD)
1812 hEMT = RTThreadNativeSelf();
1813 RTPROCESS ProcId = RTProcSelf();
1814
1815 /*
1816 * Search the handles in a linear fashion as we don't dare to take the lock (assert).
1817 */
1818/** @todo introduce some pid hash table here, please. */
1819 for (unsigned i = 1; i < RT_ELEMENTS(pGVMM->aHandles); i++)
1820 {
1821 if ( pGVMM->aHandles[i].iSelf == i
1822 && pGVMM->aHandles[i].ProcId == ProcId
1823 && RT_VALID_PTR(pGVMM->aHandles[i].pvObj)
1824 && RT_VALID_PTR(pGVMM->aHandles[i].pGVM))
1825 {
1826 if (pGVMM->aHandles[i].hEMT0 == hEMT)
1827 return pGVMM->aHandles[i].pGVM;
1828
1829 /* This is fearly safe with the current process per VM approach. */
1830 PGVM pGVM = pGVMM->aHandles[i].pGVM;
1831 VMCPUID const cCpus = pGVM->cCpus;
1832 ASMCompilerBarrier();
1833 if ( cCpus < 1
1834 || cCpus > VMM_MAX_CPU_COUNT)
1835 continue;
1836 for (VMCPUID idCpu = 1; idCpu < cCpus; idCpu++)
1837 if (pGVM->aCpus[idCpu].hEMT == hEMT)
1838 return pGVMM->aHandles[i].pGVM;
1839 }
1840 }
1841 return NULL;
1842}
1843
1844
1845/**
1846 * Looks up the GVMCPU belonging to the specified EMT thread.
1847 *
1848 * This is used by the assertion machinery in VMMR0.cpp to avoid causing
1849 * unnecessary kernel panics when the EMT thread hits an assertion. The
1850 * call may or not be an EMT thread.
1851 *
1852 * @returns Pointer to the VM on success, NULL on failure.
1853 * @param hEMT The native thread handle of the EMT.
1854 * NIL_RTNATIVETHREAD means the current thread
1855 */
1856GVMMR0DECL(PGVMCPU) GVMMR0GetGVCpuByEMT(RTNATIVETHREAD hEMT)
1857{
1858 /*
1859 * No Assertions here as we're usually called in a AssertMsgN,
1860 * RTAssert*, Log and LogRel contexts.
1861 */
1862 PGVMM pGVMM = g_pGVMM;
1863 if ( !RT_VALID_PTR(pGVMM)
1864 || pGVMM->u32Magic != GVMM_MAGIC)
1865 return NULL;
1866
1867 if (hEMT == NIL_RTNATIVETHREAD)
1868 hEMT = RTThreadNativeSelf();
1869 RTPROCESS ProcId = RTProcSelf();
1870
1871 /*
1872 * Search the handles in a linear fashion as we don't dare to take the lock (assert).
1873 */
1874/** @todo introduce some pid hash table here, please. */
1875 for (unsigned i = 1; i < RT_ELEMENTS(pGVMM->aHandles); i++)
1876 {
1877 if ( pGVMM->aHandles[i].iSelf == i
1878 && pGVMM->aHandles[i].ProcId == ProcId
1879 && RT_VALID_PTR(pGVMM->aHandles[i].pvObj)
1880 && RT_VALID_PTR(pGVMM->aHandles[i].pGVM))
1881 {
1882 PGVM pGVM = pGVMM->aHandles[i].pGVM;
1883 if (pGVMM->aHandles[i].hEMT0 == hEMT)
1884 return &pGVM->aCpus[0];
1885
1886 /* This is fearly safe with the current process per VM approach. */
1887 VMCPUID const cCpus = pGVM->cCpus;
1888 ASMCompilerBarrier();
1889 ASMCompilerBarrier();
1890 if ( cCpus < 1
1891 || cCpus > VMM_MAX_CPU_COUNT)
1892 continue;
1893 for (VMCPUID idCpu = 1; idCpu < cCpus; idCpu++)
1894 if (pGVM->aCpus[idCpu].hEMT == hEMT)
1895 return &pGVM->aCpus[idCpu];
1896 }
1897 }
1898 return NULL;
1899}
1900
1901
1902/**
1903 * This is will wake up expired and soon-to-be expired VMs.
1904 *
1905 * @returns Number of VMs that has been woken up.
1906 * @param pGVMM Pointer to the GVMM instance data.
1907 * @param u64Now The current time.
1908 */
1909static unsigned gvmmR0SchedDoWakeUps(PGVMM pGVMM, uint64_t u64Now)
1910{
1911 /*
1912 * Skip this if we've got disabled because of high resolution wakeups or by
1913 * the user.
1914 */
1915 if (!pGVMM->fDoEarlyWakeUps)
1916 return 0;
1917
1918/** @todo Rewrite this algorithm. See performance defect XYZ. */
1919
1920 /*
1921 * A cheap optimization to stop wasting so much time here on big setups.
1922 */
1923 const uint64_t uNsEarlyWakeUp2 = u64Now + pGVMM->nsEarlyWakeUp2;
1924 if ( pGVMM->cHaltedEMTs == 0
1925 || uNsEarlyWakeUp2 > pGVMM->uNsNextEmtWakeup)
1926 return 0;
1927
1928 /*
1929 * Only one thread doing this at a time.
1930 */
1931 if (!ASMAtomicCmpXchgBool(&pGVMM->fDoingEarlyWakeUps, true, false))
1932 return 0;
1933
1934 /*
1935 * The first pass will wake up VMs which have actually expired
1936 * and look for VMs that should be woken up in the 2nd and 3rd passes.
1937 */
1938 const uint64_t uNsEarlyWakeUp1 = u64Now + pGVMM->nsEarlyWakeUp1;
1939 uint64_t u64Min = UINT64_MAX;
1940 unsigned cWoken = 0;
1941 unsigned cHalted = 0;
1942 unsigned cTodo2nd = 0;
1943 unsigned cTodo3rd = 0;
1944 for (unsigned i = pGVMM->iUsedHead, cGuard = 0;
1945 i != NIL_GVM_HANDLE && i < RT_ELEMENTS(pGVMM->aHandles);
1946 i = pGVMM->aHandles[i].iNext)
1947 {
1948 PGVM pCurGVM = pGVMM->aHandles[i].pGVM;
1949 if ( RT_VALID_PTR(pCurGVM)
1950 && pCurGVM->u32Magic == GVM_MAGIC)
1951 {
1952 for (VMCPUID idCpu = 0; idCpu < pCurGVM->cCpus; idCpu++)
1953 {
1954 PGVMCPU pCurGVCpu = &pCurGVM->aCpus[idCpu];
1955 uint64_t u64 = ASMAtomicUoReadU64(&pCurGVCpu->gvmm.s.u64HaltExpire);
1956 if (u64)
1957 {
1958 if (u64 <= u64Now)
1959 {
1960 if (ASMAtomicXchgU64(&pCurGVCpu->gvmm.s.u64HaltExpire, 0))
1961 {
1962 int rc = RTSemEventMultiSignal(pCurGVCpu->gvmm.s.HaltEventMulti);
1963 AssertRC(rc);
1964 cWoken++;
1965 }
1966 }
1967 else
1968 {
1969 cHalted++;
1970 if (u64 <= uNsEarlyWakeUp1)
1971 cTodo2nd++;
1972 else if (u64 <= uNsEarlyWakeUp2)
1973 cTodo3rd++;
1974 else if (u64 < u64Min)
1975 u64 = u64Min;
1976 }
1977 }
1978 }
1979 }
1980 AssertLogRelBreak(cGuard++ < RT_ELEMENTS(pGVMM->aHandles));
1981 }
1982
1983 if (cTodo2nd)
1984 {
1985 for (unsigned i = pGVMM->iUsedHead, cGuard = 0;
1986 i != NIL_GVM_HANDLE && i < RT_ELEMENTS(pGVMM->aHandles);
1987 i = pGVMM->aHandles[i].iNext)
1988 {
1989 PGVM pCurGVM = pGVMM->aHandles[i].pGVM;
1990 if ( RT_VALID_PTR(pCurGVM)
1991 && pCurGVM->u32Magic == GVM_MAGIC)
1992 {
1993 for (VMCPUID idCpu = 0; idCpu < pCurGVM->cCpus; idCpu++)
1994 {
1995 PGVMCPU pCurGVCpu = &pCurGVM->aCpus[idCpu];
1996 uint64_t u64 = ASMAtomicUoReadU64(&pCurGVCpu->gvmm.s.u64HaltExpire);
1997 if ( u64
1998 && u64 <= uNsEarlyWakeUp1)
1999 {
2000 if (ASMAtomicXchgU64(&pCurGVCpu->gvmm.s.u64HaltExpire, 0))
2001 {
2002 int rc = RTSemEventMultiSignal(pCurGVCpu->gvmm.s.HaltEventMulti);
2003 AssertRC(rc);
2004 cWoken++;
2005 }
2006 }
2007 }
2008 }
2009 AssertLogRelBreak(cGuard++ < RT_ELEMENTS(pGVMM->aHandles));
2010 }
2011 }
2012
2013 if (cTodo3rd)
2014 {
2015 for (unsigned i = pGVMM->iUsedHead, cGuard = 0;
2016 i != NIL_GVM_HANDLE && i < RT_ELEMENTS(pGVMM->aHandles);
2017 i = pGVMM->aHandles[i].iNext)
2018 {
2019 PGVM pCurGVM = pGVMM->aHandles[i].pGVM;
2020 if ( RT_VALID_PTR(pCurGVM)
2021 && pCurGVM->u32Magic == GVM_MAGIC)
2022 {
2023 for (VMCPUID idCpu = 0; idCpu < pCurGVM->cCpus; idCpu++)
2024 {
2025 PGVMCPU pCurGVCpu = &pCurGVM->aCpus[idCpu];
2026 uint64_t u64 = ASMAtomicUoReadU64(&pCurGVCpu->gvmm.s.u64HaltExpire);
2027 if ( u64
2028 && u64 <= uNsEarlyWakeUp2)
2029 {
2030 if (ASMAtomicXchgU64(&pCurGVCpu->gvmm.s.u64HaltExpire, 0))
2031 {
2032 int rc = RTSemEventMultiSignal(pCurGVCpu->gvmm.s.HaltEventMulti);
2033 AssertRC(rc);
2034 cWoken++;
2035 }
2036 }
2037 }
2038 }
2039 AssertLogRelBreak(cGuard++ < RT_ELEMENTS(pGVMM->aHandles));
2040 }
2041 }
2042
2043 /*
2044 * Set the minimum value.
2045 */
2046 pGVMM->uNsNextEmtWakeup = u64Min;
2047
2048 ASMAtomicWriteBool(&pGVMM->fDoingEarlyWakeUps, false);
2049 return cWoken;
2050}
2051
2052
2053/**
2054 * Halt the EMT thread.
2055 *
2056 * @returns VINF_SUCCESS normal wakeup (timeout or kicked by other thread).
2057 * VERR_INTERRUPTED if a signal was scheduled for the thread.
2058 * @param pGVM The global (ring-0) VM structure.
2059 * @param pGVCpu The global (ring-0) CPU structure of the calling
2060 * EMT.
2061 * @param u64ExpireGipTime The time for the sleep to expire expressed as GIP time.
2062 * @thread EMT(pGVCpu).
2063 */
2064GVMMR0DECL(int) GVMMR0SchedHalt(PGVM pGVM, PGVMCPU pGVCpu, uint64_t u64ExpireGipTime)
2065{
2066 LogFlow(("GVMMR0SchedHalt: pGVM=%p pGVCpu=%p(%d) u64ExpireGipTime=%#RX64\n",
2067 pGVM, pGVCpu, pGVCpu->idCpu, u64ExpireGipTime));
2068 GVMM_CHECK_SMAP_SETUP();
2069 GVMM_CHECK_SMAP_CHECK2(pGVM, RT_NOTHING);
2070
2071 PGVMM pGVMM;
2072 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
2073
2074 pGVM->gvmm.s.StatsSched.cHaltCalls++;
2075 Assert(!pGVCpu->gvmm.s.u64HaltExpire);
2076
2077 /*
2078 * If we're doing early wake-ups, we must take the UsedList lock before we
2079 * start querying the current time.
2080 * Note! Interrupts must NOT be disabled at this point because we ask for GIP time!
2081 */
2082 bool const fDoEarlyWakeUps = pGVMM->fDoEarlyWakeUps;
2083 if (fDoEarlyWakeUps)
2084 {
2085 int rc2 = GVMMR0_USED_SHARED_LOCK(pGVMM); AssertRC(rc2);
2086 GVMM_CHECK_SMAP_CHECK2(pGVM, RT_NOTHING);
2087 }
2088
2089 pGVCpu->gvmm.s.iCpuEmt = ASMGetApicId();
2090
2091 /* GIP hack: We might are frequently sleeping for short intervals where the
2092 difference between GIP and system time matters on systems with high resolution
2093 system time. So, convert the input from GIP to System time in that case. */
2094 Assert(ASMGetFlags() & X86_EFL_IF);
2095 const uint64_t u64NowSys = RTTimeSystemNanoTS();
2096 const uint64_t u64NowGip = RTTimeNanoTS();
2097 GVMM_CHECK_SMAP_CHECK2(pGVM, RT_NOTHING);
2098
2099 if (fDoEarlyWakeUps)
2100 {
2101 pGVM->gvmm.s.StatsSched.cHaltWakeUps += gvmmR0SchedDoWakeUps(pGVMM, u64NowGip);
2102 GVMM_CHECK_SMAP_CHECK2(pGVM, RT_NOTHING);
2103 }
2104
2105 /*
2106 * Go to sleep if we must...
2107 * Cap the sleep time to 1 second to be on the safe side.
2108 */
2109 int rc;
2110 uint64_t cNsInterval = u64ExpireGipTime - u64NowGip;
2111 if ( u64NowGip < u64ExpireGipTime
2112 && cNsInterval >= (pGVMM->cEMTs > pGVMM->cEMTsMeansCompany
2113 ? pGVMM->nsMinSleepCompany
2114 : pGVMM->nsMinSleepAlone))
2115 {
2116 pGVM->gvmm.s.StatsSched.cHaltBlocking++;
2117 if (cNsInterval > RT_NS_1SEC)
2118 u64ExpireGipTime = u64NowGip + RT_NS_1SEC;
2119 ASMAtomicWriteU64(&pGVCpu->gvmm.s.u64HaltExpire, u64ExpireGipTime);
2120 ASMAtomicIncU32(&pGVMM->cHaltedEMTs);
2121 if (fDoEarlyWakeUps)
2122 {
2123 if (u64ExpireGipTime < pGVMM->uNsNextEmtWakeup)
2124 pGVMM->uNsNextEmtWakeup = u64ExpireGipTime;
2125 GVMMR0_USED_SHARED_UNLOCK(pGVMM);
2126 }
2127 GVMM_CHECK_SMAP_CHECK2(pGVM, RT_NOTHING);
2128
2129 rc = RTSemEventMultiWaitEx(pGVCpu->gvmm.s.HaltEventMulti,
2130 RTSEMWAIT_FLAGS_ABSOLUTE | RTSEMWAIT_FLAGS_NANOSECS | RTSEMWAIT_FLAGS_INTERRUPTIBLE,
2131 u64NowGip > u64NowSys ? u64ExpireGipTime : u64NowSys + cNsInterval);
2132 GVMM_CHECK_SMAP_CHECK2(pGVM, RT_NOTHING);
2133
2134 ASMAtomicWriteU64(&pGVCpu->gvmm.s.u64HaltExpire, 0);
2135 ASMAtomicDecU32(&pGVMM->cHaltedEMTs);
2136
2137 /* Reset the semaphore to try prevent a few false wake-ups. */
2138 if (rc == VINF_SUCCESS)
2139 {
2140 RTSemEventMultiReset(pGVCpu->gvmm.s.HaltEventMulti);
2141 GVMM_CHECK_SMAP_CHECK2(pGVM, RT_NOTHING);
2142 }
2143 else if (rc == VERR_TIMEOUT)
2144 {
2145 pGVM->gvmm.s.StatsSched.cHaltTimeouts++;
2146 rc = VINF_SUCCESS;
2147 }
2148 }
2149 else
2150 {
2151 pGVM->gvmm.s.StatsSched.cHaltNotBlocking++;
2152 if (fDoEarlyWakeUps)
2153 GVMMR0_USED_SHARED_UNLOCK(pGVMM);
2154 GVMM_CHECK_SMAP_CHECK2(pGVM, RT_NOTHING);
2155 RTSemEventMultiReset(pGVCpu->gvmm.s.HaltEventMulti);
2156 GVMM_CHECK_SMAP_CHECK2(pGVM, RT_NOTHING);
2157 rc = VINF_SUCCESS;
2158 }
2159
2160 return rc;
2161}
2162
2163
2164/**
2165 * Halt the EMT thread.
2166 *
2167 * @returns VINF_SUCCESS normal wakeup (timeout or kicked by other thread).
2168 * VERR_INTERRUPTED if a signal was scheduled for the thread.
2169 * @param pGVM The global (ring-0) VM structure.
2170 * @param idCpu The Virtual CPU ID of the calling EMT.
2171 * @param u64ExpireGipTime The time for the sleep to expire expressed as GIP time.
2172 * @thread EMT(idCpu).
2173 */
2174GVMMR0DECL(int) GVMMR0SchedHaltReq(PGVM pGVM, VMCPUID idCpu, uint64_t u64ExpireGipTime)
2175{
2176 GVMM_CHECK_SMAP_SETUP();
2177 GVMM_CHECK_SMAP_CHECK2(pGVM, RT_NOTHING);
2178 PGVMM pGVMM;
2179 int rc = gvmmR0ByGVMandEMT(pGVM, idCpu, &pGVMM);
2180 if (RT_SUCCESS(rc))
2181 {
2182 GVMM_CHECK_SMAP_CHECK2(pGVM, RT_NOTHING);
2183 rc = GVMMR0SchedHalt(pGVM, &pGVM->aCpus[idCpu], u64ExpireGipTime);
2184 }
2185 GVMM_CHECK_SMAP_CHECK2(pGVM, RT_NOTHING);
2186 return rc;
2187}
2188
2189
2190
2191/**
2192 * Worker for GVMMR0SchedWakeUp and GVMMR0SchedWakeUpAndPokeCpus that wakes up
2193 * the a sleeping EMT.
2194 *
2195 * @retval VINF_SUCCESS if successfully woken up.
2196 * @retval VINF_GVM_NOT_BLOCKED if the EMT wasn't blocked.
2197 *
2198 * @param pGVM The global (ring-0) VM structure.
2199 * @param pGVCpu The global (ring-0) VCPU structure.
2200 */
2201DECLINLINE(int) gvmmR0SchedWakeUpOne(PGVM pGVM, PGVMCPU pGVCpu)
2202{
2203 pGVM->gvmm.s.StatsSched.cWakeUpCalls++;
2204
2205 /*
2206 * Signal the semaphore regardless of whether it's current blocked on it.
2207 *
2208 * The reason for this is that there is absolutely no way we can be 100%
2209 * certain that it isn't *about* go to go to sleep on it and just got
2210 * delayed a bit en route. So, we will always signal the semaphore when
2211 * the it is flagged as halted in the VMM.
2212 */
2213/** @todo we can optimize some of that by means of the pVCpu->enmState now. */
2214 int rc;
2215 if (pGVCpu->gvmm.s.u64HaltExpire)
2216 {
2217 rc = VINF_SUCCESS;
2218 ASMAtomicWriteU64(&pGVCpu->gvmm.s.u64HaltExpire, 0);
2219 }
2220 else
2221 {
2222 rc = VINF_GVM_NOT_BLOCKED;
2223 pGVM->gvmm.s.StatsSched.cWakeUpNotHalted++;
2224 }
2225
2226 int rc2 = RTSemEventMultiSignal(pGVCpu->gvmm.s.HaltEventMulti);
2227 AssertRC(rc2);
2228
2229 return rc;
2230}
2231
2232
2233/**
2234 * Wakes up the halted EMT thread so it can service a pending request.
2235 *
2236 * @returns VBox status code.
2237 * @retval VINF_SUCCESS if successfully woken up.
2238 * @retval VINF_GVM_NOT_BLOCKED if the EMT wasn't blocked.
2239 *
2240 * @param pGVM The global (ring-0) VM structure.
2241 * @param idCpu The Virtual CPU ID of the EMT to wake up.
2242 * @param fTakeUsedLock Take the used lock or not
2243 * @thread Any but EMT(idCpu).
2244 */
2245GVMMR0DECL(int) GVMMR0SchedWakeUpEx(PGVM pGVM, VMCPUID idCpu, bool fTakeUsedLock)
2246{
2247 GVMM_CHECK_SMAP_SETUP();
2248 GVMM_CHECK_SMAP_CHECK2(pGVM, RT_NOTHING);
2249
2250 /*
2251 * Validate input and take the UsedLock.
2252 */
2253 PGVMM pGVMM;
2254 int rc = gvmmR0ByGVM(pGVM, &pGVMM, fTakeUsedLock);
2255 GVMM_CHECK_SMAP_CHECK2(pGVM, RT_NOTHING);
2256 if (RT_SUCCESS(rc))
2257 {
2258 if (idCpu < pGVM->cCpus)
2259 {
2260 /*
2261 * Do the actual job.
2262 */
2263 rc = gvmmR0SchedWakeUpOne(pGVM, &pGVM->aCpus[idCpu]);
2264 GVMM_CHECK_SMAP_CHECK2(pGVM, RT_NOTHING);
2265
2266 if (fTakeUsedLock && pGVMM->fDoEarlyWakeUps)
2267 {
2268 /*
2269 * While we're here, do a round of scheduling.
2270 */
2271 Assert(ASMGetFlags() & X86_EFL_IF);
2272 const uint64_t u64Now = RTTimeNanoTS(); /* (GIP time) */
2273 pGVM->gvmm.s.StatsSched.cWakeUpWakeUps += gvmmR0SchedDoWakeUps(pGVMM, u64Now);
2274 GVMM_CHECK_SMAP_CHECK2(pGVM, RT_NOTHING);
2275 }
2276 }
2277 else
2278 rc = VERR_INVALID_CPU_ID;
2279
2280 if (fTakeUsedLock)
2281 {
2282 int rc2 = GVMMR0_USED_SHARED_UNLOCK(pGVMM);
2283 AssertRC(rc2);
2284 GVMM_CHECK_SMAP_CHECK2(pGVM, RT_NOTHING);
2285 }
2286 }
2287
2288 LogFlow(("GVMMR0SchedWakeUpEx: returns %Rrc\n", rc));
2289 return rc;
2290}
2291
2292
2293/**
2294 * Wakes up the halted EMT thread so it can service a pending request.
2295 *
2296 * @returns VBox status code.
2297 * @retval VINF_SUCCESS if successfully woken up.
2298 * @retval VINF_GVM_NOT_BLOCKED if the EMT wasn't blocked.
2299 *
2300 * @param pGVM The global (ring-0) VM structure.
2301 * @param idCpu The Virtual CPU ID of the EMT to wake up.
2302 * @thread Any but EMT(idCpu).
2303 */
2304GVMMR0DECL(int) GVMMR0SchedWakeUp(PGVM pGVM, VMCPUID idCpu)
2305{
2306 return GVMMR0SchedWakeUpEx(pGVM, idCpu, true /* fTakeUsedLock */);
2307}
2308
2309
2310/**
2311 * Wakes up the halted EMT thread so it can service a pending request, no GVM
2312 * parameter and no used locking.
2313 *
2314 * @returns VBox status code.
2315 * @retval VINF_SUCCESS if successfully woken up.
2316 * @retval VINF_GVM_NOT_BLOCKED if the EMT wasn't blocked.
2317 *
2318 * @param pGVM The global (ring-0) VM structure.
2319 * @param idCpu The Virtual CPU ID of the EMT to wake up.
2320 * @thread Any but EMT(idCpu).
2321 * @deprecated Don't use in new code if possible! Use the GVM variant.
2322 */
2323GVMMR0DECL(int) GVMMR0SchedWakeUpNoGVMNoLock(PGVM pGVM, VMCPUID idCpu)
2324{
2325 GVMM_CHECK_SMAP_SETUP();
2326 GVMM_CHECK_SMAP_CHECK2(pGVM, RT_NOTHING);
2327 PGVMM pGVMM;
2328 int rc = gvmmR0ByGVM(pGVM, &pGVMM, false /*fTakeUsedLock*/);
2329 GVMM_CHECK_SMAP_CHECK2(pGVM, RT_NOTHING);
2330 if (RT_SUCCESS(rc))
2331 rc = GVMMR0SchedWakeUpEx(pGVM, idCpu, false /*fTakeUsedLock*/);
2332 return rc;
2333}
2334
2335
2336/**
2337 * Worker common to GVMMR0SchedPoke and GVMMR0SchedWakeUpAndPokeCpus that pokes
2338 * the Virtual CPU if it's still busy executing guest code.
2339 *
2340 * @returns VBox status code.
2341 * @retval VINF_SUCCESS if poked successfully.
2342 * @retval VINF_GVM_NOT_BUSY_IN_GC if the EMT wasn't busy in GC.
2343 *
2344 * @param pGVM The global (ring-0) VM structure.
2345 * @param pVCpu The cross context virtual CPU structure.
2346 */
2347DECLINLINE(int) gvmmR0SchedPokeOne(PGVM pGVM, PVMCPUCC pVCpu)
2348{
2349 pGVM->gvmm.s.StatsSched.cPokeCalls++;
2350
2351 RTCPUID idHostCpu = pVCpu->idHostCpu;
2352 if ( idHostCpu == NIL_RTCPUID
2353 || VMCPU_GET_STATE(pVCpu) != VMCPUSTATE_STARTED_EXEC)
2354 {
2355 pGVM->gvmm.s.StatsSched.cPokeNotBusy++;
2356 return VINF_GVM_NOT_BUSY_IN_GC;
2357 }
2358
2359 /* Note: this function is not implemented on Darwin and Linux (kernel < 2.6.19) */
2360 RTMpPokeCpu(idHostCpu);
2361 return VINF_SUCCESS;
2362}
2363
2364
2365/**
2366 * Pokes an EMT if it's still busy running guest code.
2367 *
2368 * @returns VBox status code.
2369 * @retval VINF_SUCCESS if poked successfully.
2370 * @retval VINF_GVM_NOT_BUSY_IN_GC if the EMT wasn't busy in GC.
2371 *
2372 * @param pGVM The global (ring-0) VM structure.
2373 * @param idCpu The ID of the virtual CPU to poke.
2374 * @param fTakeUsedLock Take the used lock or not
2375 */
2376GVMMR0DECL(int) GVMMR0SchedPokeEx(PGVM pGVM, VMCPUID idCpu, bool fTakeUsedLock)
2377{
2378 /*
2379 * Validate input and take the UsedLock.
2380 */
2381 PGVMM pGVMM;
2382 int rc = gvmmR0ByGVM(pGVM, &pGVMM, fTakeUsedLock);
2383 if (RT_SUCCESS(rc))
2384 {
2385 if (idCpu < pGVM->cCpus)
2386 rc = gvmmR0SchedPokeOne(pGVM, &pGVM->aCpus[idCpu]);
2387 else
2388 rc = VERR_INVALID_CPU_ID;
2389
2390 if (fTakeUsedLock)
2391 {
2392 int rc2 = GVMMR0_USED_SHARED_UNLOCK(pGVMM);
2393 AssertRC(rc2);
2394 }
2395 }
2396
2397 LogFlow(("GVMMR0SchedWakeUpAndPokeCpus: returns %Rrc\n", rc));
2398 return rc;
2399}
2400
2401
2402/**
2403 * Pokes an EMT if it's still busy running guest code.
2404 *
2405 * @returns VBox status code.
2406 * @retval VINF_SUCCESS if poked successfully.
2407 * @retval VINF_GVM_NOT_BUSY_IN_GC if the EMT wasn't busy in GC.
2408 *
2409 * @param pGVM The global (ring-0) VM structure.
2410 * @param idCpu The ID of the virtual CPU to poke.
2411 */
2412GVMMR0DECL(int) GVMMR0SchedPoke(PGVM pGVM, VMCPUID idCpu)
2413{
2414 return GVMMR0SchedPokeEx(pGVM, idCpu, true /* fTakeUsedLock */);
2415}
2416
2417
2418/**
2419 * Pokes an EMT if it's still busy running guest code, no GVM parameter and no
2420 * used locking.
2421 *
2422 * @returns VBox status code.
2423 * @retval VINF_SUCCESS if poked successfully.
2424 * @retval VINF_GVM_NOT_BUSY_IN_GC if the EMT wasn't busy in GC.
2425 *
2426 * @param pGVM The global (ring-0) VM structure.
2427 * @param idCpu The ID of the virtual CPU to poke.
2428 *
2429 * @deprecated Don't use in new code if possible! Use the GVM variant.
2430 */
2431GVMMR0DECL(int) GVMMR0SchedPokeNoGVMNoLock(PGVM pGVM, VMCPUID idCpu)
2432{
2433 PGVMM pGVMM;
2434 int rc = gvmmR0ByGVM(pGVM, &pGVMM, false /*fTakeUsedLock*/);
2435 if (RT_SUCCESS(rc))
2436 {
2437 if (idCpu < pGVM->cCpus)
2438 rc = gvmmR0SchedPokeOne(pGVM, &pGVM->aCpus[idCpu]);
2439 else
2440 rc = VERR_INVALID_CPU_ID;
2441 }
2442 return rc;
2443}
2444
2445
2446/**
2447 * Wakes up a set of halted EMT threads so they can service pending request.
2448 *
2449 * @returns VBox status code, no informational stuff.
2450 *
2451 * @param pGVM The global (ring-0) VM structure.
2452 * @param pSleepSet The set of sleepers to wake up.
2453 * @param pPokeSet The set of CPUs to poke.
2454 */
2455GVMMR0DECL(int) GVMMR0SchedWakeUpAndPokeCpus(PGVM pGVM, PCVMCPUSET pSleepSet, PCVMCPUSET pPokeSet)
2456{
2457 AssertPtrReturn(pSleepSet, VERR_INVALID_POINTER);
2458 AssertPtrReturn(pPokeSet, VERR_INVALID_POINTER);
2459 GVMM_CHECK_SMAP_SETUP();
2460 GVMM_CHECK_SMAP_CHECK2(pGVM, RT_NOTHING);
2461 RTNATIVETHREAD hSelf = RTThreadNativeSelf();
2462
2463 /*
2464 * Validate input and take the UsedLock.
2465 */
2466 PGVMM pGVMM;
2467 int rc = gvmmR0ByGVM(pGVM, &pGVMM, true /* fTakeUsedLock */);
2468 GVMM_CHECK_SMAP_CHECK2(pGVM, RT_NOTHING);
2469 if (RT_SUCCESS(rc))
2470 {
2471 rc = VINF_SUCCESS;
2472 VMCPUID idCpu = pGVM->cCpus;
2473 while (idCpu-- > 0)
2474 {
2475 /* Don't try poke or wake up ourselves. */
2476 if (pGVM->aCpus[idCpu].hEMT == hSelf)
2477 continue;
2478
2479 /* just ignore errors for now. */
2480 if (VMCPUSET_IS_PRESENT(pSleepSet, idCpu))
2481 {
2482 gvmmR0SchedWakeUpOne(pGVM, &pGVM->aCpus[idCpu]);
2483 GVMM_CHECK_SMAP_CHECK2(pGVM, RT_NOTHING);
2484 }
2485 else if (VMCPUSET_IS_PRESENT(pPokeSet, idCpu))
2486 {
2487 gvmmR0SchedPokeOne(pGVM, &pGVM->aCpus[idCpu]);
2488 GVMM_CHECK_SMAP_CHECK2(pGVM, RT_NOTHING);
2489 }
2490 }
2491
2492 int rc2 = GVMMR0_USED_SHARED_UNLOCK(pGVMM);
2493 AssertRC(rc2);
2494 GVMM_CHECK_SMAP_CHECK2(pGVM, RT_NOTHING);
2495 }
2496
2497 LogFlow(("GVMMR0SchedWakeUpAndPokeCpus: returns %Rrc\n", rc));
2498 return rc;
2499}
2500
2501
2502/**
2503 * VMMR0 request wrapper for GVMMR0SchedWakeUpAndPokeCpus.
2504 *
2505 * @returns see GVMMR0SchedWakeUpAndPokeCpus.
2506 * @param pGVM The global (ring-0) VM structure.
2507 * @param pReq Pointer to the request packet.
2508 */
2509GVMMR0DECL(int) GVMMR0SchedWakeUpAndPokeCpusReq(PGVM pGVM, PGVMMSCHEDWAKEUPANDPOKECPUSREQ pReq)
2510{
2511 /*
2512 * Validate input and pass it on.
2513 */
2514 AssertPtrReturn(pReq, VERR_INVALID_POINTER);
2515 AssertMsgReturn(pReq->Hdr.cbReq == sizeof(*pReq), ("%#x != %#x\n", pReq->Hdr.cbReq, sizeof(*pReq)), VERR_INVALID_PARAMETER);
2516
2517 return GVMMR0SchedWakeUpAndPokeCpus(pGVM, &pReq->SleepSet, &pReq->PokeSet);
2518}
2519
2520
2521
2522/**
2523 * Poll the schedule to see if someone else should get a chance to run.
2524 *
2525 * This is a bit hackish and will not work too well if the machine is
2526 * under heavy load from non-VM processes.
2527 *
2528 * @returns VINF_SUCCESS if not yielded.
2529 * VINF_GVM_YIELDED if an attempt to switch to a different VM task was made.
2530 * @param pGVM The global (ring-0) VM structure.
2531 * @param idCpu The Virtual CPU ID of the calling EMT.
2532 * @param fYield Whether to yield or not.
2533 * This is for when we're spinning in the halt loop.
2534 * @thread EMT(idCpu).
2535 */
2536GVMMR0DECL(int) GVMMR0SchedPoll(PGVM pGVM, VMCPUID idCpu, bool fYield)
2537{
2538 /*
2539 * Validate input.
2540 */
2541 PGVMM pGVMM;
2542 int rc = gvmmR0ByGVMandEMT(pGVM, idCpu, &pGVMM);
2543 if (RT_SUCCESS(rc))
2544 {
2545 /*
2546 * We currently only implement helping doing wakeups (fYield = false), so don't
2547 * bother taking the lock if gvmmR0SchedDoWakeUps is not going to do anything.
2548 */
2549 if (!fYield && pGVMM->fDoEarlyWakeUps)
2550 {
2551 rc = GVMMR0_USED_SHARED_LOCK(pGVMM); AssertRC(rc);
2552 pGVM->gvmm.s.StatsSched.cPollCalls++;
2553
2554 Assert(ASMGetFlags() & X86_EFL_IF);
2555 const uint64_t u64Now = RTTimeNanoTS(); /* (GIP time) */
2556
2557 pGVM->gvmm.s.StatsSched.cPollWakeUps += gvmmR0SchedDoWakeUps(pGVMM, u64Now);
2558
2559 GVMMR0_USED_SHARED_UNLOCK(pGVMM);
2560 }
2561 /*
2562 * Not quite sure what we could do here...
2563 */
2564 else if (fYield)
2565 rc = VERR_NOT_IMPLEMENTED; /** @todo implement this... */
2566 else
2567 rc = VINF_SUCCESS;
2568 }
2569
2570 LogFlow(("GVMMR0SchedWakeUp: returns %Rrc\n", rc));
2571 return rc;
2572}
2573
2574
2575#ifdef GVMM_SCHED_WITH_PPT
2576/**
2577 * Timer callback for the periodic preemption timer.
2578 *
2579 * @param pTimer The timer handle.
2580 * @param pvUser Pointer to the per cpu structure.
2581 * @param iTick The current tick.
2582 */
2583static DECLCALLBACK(void) gvmmR0SchedPeriodicPreemptionTimerCallback(PRTTIMER pTimer, void *pvUser, uint64_t iTick)
2584{
2585 PGVMMHOSTCPU pCpu = (PGVMMHOSTCPU)pvUser;
2586 NOREF(pTimer); NOREF(iTick);
2587
2588 /*
2589 * Termination check
2590 */
2591 if (pCpu->u32Magic != GVMMHOSTCPU_MAGIC)
2592 return;
2593
2594 /*
2595 * Do the house keeping.
2596 */
2597 RTSpinlockAcquire(pCpu->Ppt.hSpinlock);
2598
2599 if (++pCpu->Ppt.iTickHistorization >= pCpu->Ppt.cTicksHistoriziationInterval)
2600 {
2601 /*
2602 * Historicize the max frequency.
2603 */
2604 uint32_t iHzHistory = ++pCpu->Ppt.iHzHistory % RT_ELEMENTS(pCpu->Ppt.aHzHistory);
2605 pCpu->Ppt.aHzHistory[iHzHistory] = pCpu->Ppt.uDesiredHz;
2606 pCpu->Ppt.iTickHistorization = 0;
2607 pCpu->Ppt.uDesiredHz = 0;
2608
2609 /*
2610 * Check if the current timer frequency.
2611 */
2612 uint32_t uHistMaxHz = 0;
2613 for (uint32_t i = 0; i < RT_ELEMENTS(pCpu->Ppt.aHzHistory); i++)
2614 if (pCpu->Ppt.aHzHistory[i] > uHistMaxHz)
2615 uHistMaxHz = pCpu->Ppt.aHzHistory[i];
2616 if (uHistMaxHz == pCpu->Ppt.uTimerHz)
2617 RTSpinlockRelease(pCpu->Ppt.hSpinlock);
2618 else if (uHistMaxHz)
2619 {
2620 /*
2621 * Reprogram it.
2622 */
2623 pCpu->Ppt.cChanges++;
2624 pCpu->Ppt.iTickHistorization = 0;
2625 pCpu->Ppt.uTimerHz = uHistMaxHz;
2626 uint32_t const cNsInterval = RT_NS_1SEC / uHistMaxHz;
2627 pCpu->Ppt.cNsInterval = cNsInterval;
2628 if (cNsInterval < GVMMHOSTCPU_PPT_HIST_INTERVAL_NS)
2629 pCpu->Ppt.cTicksHistoriziationInterval = ( GVMMHOSTCPU_PPT_HIST_INTERVAL_NS
2630 + GVMMHOSTCPU_PPT_HIST_INTERVAL_NS / 2 - 1)
2631 / cNsInterval;
2632 else
2633 pCpu->Ppt.cTicksHistoriziationInterval = 1;
2634 RTSpinlockRelease(pCpu->Ppt.hSpinlock);
2635
2636 /*SUPR0Printf("Cpu%u: change to %u Hz / %u ns\n", pCpu->idxCpuSet, uHistMaxHz, cNsInterval);*/
2637 RTTimerChangeInterval(pTimer, cNsInterval);
2638 }
2639 else
2640 {
2641 /*
2642 * Stop it.
2643 */
2644 pCpu->Ppt.fStarted = false;
2645 pCpu->Ppt.uTimerHz = 0;
2646 pCpu->Ppt.cNsInterval = 0;
2647 RTSpinlockRelease(pCpu->Ppt.hSpinlock);
2648
2649 /*SUPR0Printf("Cpu%u: stopping (%u Hz)\n", pCpu->idxCpuSet, uHistMaxHz);*/
2650 RTTimerStop(pTimer);
2651 }
2652 }
2653 else
2654 RTSpinlockRelease(pCpu->Ppt.hSpinlock);
2655}
2656#endif /* GVMM_SCHED_WITH_PPT */
2657
2658
2659/**
2660 * Updates the periodic preemption timer for the calling CPU.
2661 *
2662 * The caller must have disabled preemption!
2663 * The caller must check that the host can do high resolution timers.
2664 *
2665 * @param pGVM The global (ring-0) VM structure.
2666 * @param idHostCpu The current host CPU id.
2667 * @param uHz The desired frequency.
2668 */
2669GVMMR0DECL(void) GVMMR0SchedUpdatePeriodicPreemptionTimer(PGVM pGVM, RTCPUID idHostCpu, uint32_t uHz)
2670{
2671 NOREF(pGVM);
2672#ifdef GVMM_SCHED_WITH_PPT
2673 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
2674 Assert(RTTimerCanDoHighResolution());
2675
2676 /*
2677 * Resolve the per CPU data.
2678 */
2679 uint32_t iCpu = RTMpCpuIdToSetIndex(idHostCpu);
2680 PGVMM pGVMM = g_pGVMM;
2681 if ( !RT_VALID_PTR(pGVMM)
2682 || pGVMM->u32Magic != GVMM_MAGIC)
2683 return;
2684 AssertMsgReturnVoid(iCpu < pGVMM->cHostCpus, ("iCpu=%d cHostCpus=%d\n", iCpu, pGVMM->cHostCpus));
2685 PGVMMHOSTCPU pCpu = &pGVMM->aHostCpus[iCpu];
2686 AssertMsgReturnVoid( pCpu->u32Magic == GVMMHOSTCPU_MAGIC
2687 && pCpu->idCpu == idHostCpu,
2688 ("u32Magic=%#x idCpu=% idHostCpu=%d\n", pCpu->u32Magic, pCpu->idCpu, idHostCpu));
2689
2690 /*
2691 * Check whether we need to do anything about the timer.
2692 * We have to be a little bit careful since we might be race the timer
2693 * callback here.
2694 */
2695 if (uHz > 16384)
2696 uHz = 16384; /** @todo add a query method for this! */
2697 if (RT_UNLIKELY( uHz > ASMAtomicReadU32(&pCpu->Ppt.uDesiredHz)
2698 && uHz >= pCpu->Ppt.uMinHz
2699 && !pCpu->Ppt.fStarting /* solaris paranoia */))
2700 {
2701 RTSpinlockAcquire(pCpu->Ppt.hSpinlock);
2702
2703 pCpu->Ppt.uDesiredHz = uHz;
2704 uint32_t cNsInterval = 0;
2705 if (!pCpu->Ppt.fStarted)
2706 {
2707 pCpu->Ppt.cStarts++;
2708 pCpu->Ppt.fStarted = true;
2709 pCpu->Ppt.fStarting = true;
2710 pCpu->Ppt.iTickHistorization = 0;
2711 pCpu->Ppt.uTimerHz = uHz;
2712 pCpu->Ppt.cNsInterval = cNsInterval = RT_NS_1SEC / uHz;
2713 if (cNsInterval < GVMMHOSTCPU_PPT_HIST_INTERVAL_NS)
2714 pCpu->Ppt.cTicksHistoriziationInterval = ( GVMMHOSTCPU_PPT_HIST_INTERVAL_NS
2715 + GVMMHOSTCPU_PPT_HIST_INTERVAL_NS / 2 - 1)
2716 / cNsInterval;
2717 else
2718 pCpu->Ppt.cTicksHistoriziationInterval = 1;
2719 }
2720
2721 RTSpinlockRelease(pCpu->Ppt.hSpinlock);
2722
2723 if (cNsInterval)
2724 {
2725 RTTimerChangeInterval(pCpu->Ppt.pTimer, cNsInterval);
2726 int rc = RTTimerStart(pCpu->Ppt.pTimer, cNsInterval);
2727 AssertRC(rc);
2728
2729 RTSpinlockAcquire(pCpu->Ppt.hSpinlock);
2730 if (RT_FAILURE(rc))
2731 pCpu->Ppt.fStarted = false;
2732 pCpu->Ppt.fStarting = false;
2733 RTSpinlockRelease(pCpu->Ppt.hSpinlock);
2734 }
2735 }
2736#else /* !GVMM_SCHED_WITH_PPT */
2737 NOREF(idHostCpu); NOREF(uHz);
2738#endif /* !GVMM_SCHED_WITH_PPT */
2739}
2740
2741
2742/**
2743 * Calls @a pfnCallback for each VM in the system.
2744 *
2745 * This will enumerate the VMs while holding the global VM used list lock in
2746 * shared mode. So, only suitable for simple work. If more expensive work
2747 * needs doing, a different approach must be taken as using this API would
2748 * otherwise block VM creation and destruction.
2749 *
2750 * @returns VBox status code.
2751 * @param pfnCallback The callback function.
2752 * @param pvUser User argument to the callback.
2753 */
2754GVMMR0DECL(int) GVMMR0EnumVMs(PFNGVMMR0ENUMCALLBACK pfnCallback, void *pvUser)
2755{
2756 PGVMM pGVMM;
2757 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
2758
2759 int rc = VINF_SUCCESS;
2760 GVMMR0_USED_SHARED_LOCK(pGVMM);
2761 for (unsigned i = pGVMM->iUsedHead, cLoops = 0;
2762 i != NIL_GVM_HANDLE && i < RT_ELEMENTS(pGVMM->aHandles);
2763 i = pGVMM->aHandles[i].iNext, cLoops++)
2764 {
2765 PGVM pGVM = pGVMM->aHandles[i].pGVM;
2766 if ( RT_VALID_PTR(pGVM)
2767 && RT_VALID_PTR(pGVMM->aHandles[i].pvObj)
2768 && pGVM->u32Magic == GVM_MAGIC)
2769 {
2770 rc = pfnCallback(pGVM, pvUser);
2771 if (rc != VINF_SUCCESS)
2772 break;
2773 }
2774
2775 AssertBreak(cLoops < RT_ELEMENTS(pGVMM->aHandles) * 4); /* paranoia */
2776 }
2777 GVMMR0_USED_SHARED_UNLOCK(pGVMM);
2778 return rc;
2779}
2780
2781
2782/**
2783 * Retrieves the GVMM statistics visible to the caller.
2784 *
2785 * @returns VBox status code.
2786 *
2787 * @param pStats Where to put the statistics.
2788 * @param pSession The current session.
2789 * @param pGVM The GVM to obtain statistics for. Optional.
2790 */
2791GVMMR0DECL(int) GVMMR0QueryStatistics(PGVMMSTATS pStats, PSUPDRVSESSION pSession, PGVM pGVM)
2792{
2793 LogFlow(("GVMMR0QueryStatistics: pStats=%p pSession=%p pGVM=%p\n", pStats, pSession, pGVM));
2794
2795 /*
2796 * Validate input.
2797 */
2798 AssertPtrReturn(pSession, VERR_INVALID_POINTER);
2799 AssertPtrReturn(pStats, VERR_INVALID_POINTER);
2800 pStats->cVMs = 0; /* (crash before taking the sem...) */
2801
2802 /*
2803 * Take the lock and get the VM statistics.
2804 */
2805 PGVMM pGVMM;
2806 if (pGVM)
2807 {
2808 int rc = gvmmR0ByGVM(pGVM, &pGVMM, true /*fTakeUsedLock*/);
2809 if (RT_FAILURE(rc))
2810 return rc;
2811 pStats->SchedVM = pGVM->gvmm.s.StatsSched;
2812 }
2813 else
2814 {
2815 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
2816 memset(&pStats->SchedVM, 0, sizeof(pStats->SchedVM));
2817
2818 int rc = GVMMR0_USED_SHARED_LOCK(pGVMM);
2819 AssertRCReturn(rc, rc);
2820 }
2821
2822 /*
2823 * Enumerate the VMs and add the ones visible to the statistics.
2824 */
2825 pStats->cVMs = 0;
2826 pStats->cEMTs = 0;
2827 memset(&pStats->SchedSum, 0, sizeof(pStats->SchedSum));
2828
2829 for (unsigned i = pGVMM->iUsedHead;
2830 i != NIL_GVM_HANDLE && i < RT_ELEMENTS(pGVMM->aHandles);
2831 i = pGVMM->aHandles[i].iNext)
2832 {
2833 PGVM pOtherGVM = pGVMM->aHandles[i].pGVM;
2834 void *pvObj = pGVMM->aHandles[i].pvObj;
2835 if ( RT_VALID_PTR(pvObj)
2836 && RT_VALID_PTR(pOtherGVM)
2837 && pOtherGVM->u32Magic == GVM_MAGIC
2838 && RT_SUCCESS(SUPR0ObjVerifyAccess(pvObj, pSession, NULL)))
2839 {
2840 pStats->cVMs++;
2841 pStats->cEMTs += pOtherGVM->cCpus;
2842
2843 pStats->SchedSum.cHaltCalls += pOtherGVM->gvmm.s.StatsSched.cHaltCalls;
2844 pStats->SchedSum.cHaltBlocking += pOtherGVM->gvmm.s.StatsSched.cHaltBlocking;
2845 pStats->SchedSum.cHaltTimeouts += pOtherGVM->gvmm.s.StatsSched.cHaltTimeouts;
2846 pStats->SchedSum.cHaltNotBlocking += pOtherGVM->gvmm.s.StatsSched.cHaltNotBlocking;
2847 pStats->SchedSum.cHaltWakeUps += pOtherGVM->gvmm.s.StatsSched.cHaltWakeUps;
2848
2849 pStats->SchedSum.cWakeUpCalls += pOtherGVM->gvmm.s.StatsSched.cWakeUpCalls;
2850 pStats->SchedSum.cWakeUpNotHalted += pOtherGVM->gvmm.s.StatsSched.cWakeUpNotHalted;
2851 pStats->SchedSum.cWakeUpWakeUps += pOtherGVM->gvmm.s.StatsSched.cWakeUpWakeUps;
2852
2853 pStats->SchedSum.cPokeCalls += pOtherGVM->gvmm.s.StatsSched.cPokeCalls;
2854 pStats->SchedSum.cPokeNotBusy += pOtherGVM->gvmm.s.StatsSched.cPokeNotBusy;
2855
2856 pStats->SchedSum.cPollCalls += pOtherGVM->gvmm.s.StatsSched.cPollCalls;
2857 pStats->SchedSum.cPollHalts += pOtherGVM->gvmm.s.StatsSched.cPollHalts;
2858 pStats->SchedSum.cPollWakeUps += pOtherGVM->gvmm.s.StatsSched.cPollWakeUps;
2859 }
2860 }
2861
2862 /*
2863 * Copy out the per host CPU statistics.
2864 */
2865 uint32_t iDstCpu = 0;
2866 uint32_t cSrcCpus = pGVMM->cHostCpus;
2867 for (uint32_t iSrcCpu = 0; iSrcCpu < cSrcCpus; iSrcCpu++)
2868 {
2869 if (pGVMM->aHostCpus[iSrcCpu].idCpu != NIL_RTCPUID)
2870 {
2871 pStats->aHostCpus[iDstCpu].idCpu = pGVMM->aHostCpus[iSrcCpu].idCpu;
2872 pStats->aHostCpus[iDstCpu].idxCpuSet = pGVMM->aHostCpus[iSrcCpu].idxCpuSet;
2873#ifdef GVMM_SCHED_WITH_PPT
2874 pStats->aHostCpus[iDstCpu].uDesiredHz = pGVMM->aHostCpus[iSrcCpu].Ppt.uDesiredHz;
2875 pStats->aHostCpus[iDstCpu].uTimerHz = pGVMM->aHostCpus[iSrcCpu].Ppt.uTimerHz;
2876 pStats->aHostCpus[iDstCpu].cChanges = pGVMM->aHostCpus[iSrcCpu].Ppt.cChanges;
2877 pStats->aHostCpus[iDstCpu].cStarts = pGVMM->aHostCpus[iSrcCpu].Ppt.cStarts;
2878#else
2879 pStats->aHostCpus[iDstCpu].uDesiredHz = 0;
2880 pStats->aHostCpus[iDstCpu].uTimerHz = 0;
2881 pStats->aHostCpus[iDstCpu].cChanges = 0;
2882 pStats->aHostCpus[iDstCpu].cStarts = 0;
2883#endif
2884 iDstCpu++;
2885 if (iDstCpu >= RT_ELEMENTS(pStats->aHostCpus))
2886 break;
2887 }
2888 }
2889 pStats->cHostCpus = iDstCpu;
2890
2891 GVMMR0_USED_SHARED_UNLOCK(pGVMM);
2892
2893 return VINF_SUCCESS;
2894}
2895
2896
2897/**
2898 * VMMR0 request wrapper for GVMMR0QueryStatistics.
2899 *
2900 * @returns see GVMMR0QueryStatistics.
2901 * @param pGVM The global (ring-0) VM structure. Optional.
2902 * @param pReq Pointer to the request packet.
2903 * @param pSession The current session.
2904 */
2905GVMMR0DECL(int) GVMMR0QueryStatisticsReq(PGVM pGVM, PGVMMQUERYSTATISTICSSREQ pReq, PSUPDRVSESSION pSession)
2906{
2907 /*
2908 * Validate input and pass it on.
2909 */
2910 AssertPtrReturn(pReq, VERR_INVALID_POINTER);
2911 AssertMsgReturn(pReq->Hdr.cbReq == sizeof(*pReq), ("%#x != %#x\n", pReq->Hdr.cbReq, sizeof(*pReq)), VERR_INVALID_PARAMETER);
2912 AssertReturn(pReq->pSession == pSession, VERR_INVALID_PARAMETER);
2913
2914 return GVMMR0QueryStatistics(&pReq->Stats, pSession, pGVM);
2915}
2916
2917
2918/**
2919 * Resets the specified GVMM statistics.
2920 *
2921 * @returns VBox status code.
2922 *
2923 * @param pStats Which statistics to reset, that is, non-zero fields indicates which to reset.
2924 * @param pSession The current session.
2925 * @param pGVM The GVM to reset statistics for. Optional.
2926 */
2927GVMMR0DECL(int) GVMMR0ResetStatistics(PCGVMMSTATS pStats, PSUPDRVSESSION pSession, PGVM pGVM)
2928{
2929 LogFlow(("GVMMR0ResetStatistics: pStats=%p pSession=%p pGVM=%p\n", pStats, pSession, pGVM));
2930
2931 /*
2932 * Validate input.
2933 */
2934 AssertPtrReturn(pSession, VERR_INVALID_POINTER);
2935 AssertPtrReturn(pStats, VERR_INVALID_POINTER);
2936
2937 /*
2938 * Take the lock and get the VM statistics.
2939 */
2940 PGVMM pGVMM;
2941 if (pGVM)
2942 {
2943 int rc = gvmmR0ByGVM(pGVM, &pGVMM, true /*fTakeUsedLock*/);
2944 if (RT_FAILURE(rc))
2945 return rc;
2946# define MAYBE_RESET_FIELD(field) \
2947 do { if (pStats->SchedVM. field ) { pGVM->gvmm.s.StatsSched. field = 0; } } while (0)
2948 MAYBE_RESET_FIELD(cHaltCalls);
2949 MAYBE_RESET_FIELD(cHaltBlocking);
2950 MAYBE_RESET_FIELD(cHaltTimeouts);
2951 MAYBE_RESET_FIELD(cHaltNotBlocking);
2952 MAYBE_RESET_FIELD(cHaltWakeUps);
2953 MAYBE_RESET_FIELD(cWakeUpCalls);
2954 MAYBE_RESET_FIELD(cWakeUpNotHalted);
2955 MAYBE_RESET_FIELD(cWakeUpWakeUps);
2956 MAYBE_RESET_FIELD(cPokeCalls);
2957 MAYBE_RESET_FIELD(cPokeNotBusy);
2958 MAYBE_RESET_FIELD(cPollCalls);
2959 MAYBE_RESET_FIELD(cPollHalts);
2960 MAYBE_RESET_FIELD(cPollWakeUps);
2961# undef MAYBE_RESET_FIELD
2962 }
2963 else
2964 {
2965 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
2966
2967 int rc = GVMMR0_USED_SHARED_LOCK(pGVMM);
2968 AssertRCReturn(rc, rc);
2969 }
2970
2971 /*
2972 * Enumerate the VMs and add the ones visible to the statistics.
2973 */
2974 if (!ASMMemIsZero(&pStats->SchedSum, sizeof(pStats->SchedSum)))
2975 {
2976 for (unsigned i = pGVMM->iUsedHead;
2977 i != NIL_GVM_HANDLE && i < RT_ELEMENTS(pGVMM->aHandles);
2978 i = pGVMM->aHandles[i].iNext)
2979 {
2980 PGVM pOtherGVM = pGVMM->aHandles[i].pGVM;
2981 void *pvObj = pGVMM->aHandles[i].pvObj;
2982 if ( RT_VALID_PTR(pvObj)
2983 && RT_VALID_PTR(pOtherGVM)
2984 && pOtherGVM->u32Magic == GVM_MAGIC
2985 && RT_SUCCESS(SUPR0ObjVerifyAccess(pvObj, pSession, NULL)))
2986 {
2987# define MAYBE_RESET_FIELD(field) \
2988 do { if (pStats->SchedSum. field ) { pOtherGVM->gvmm.s.StatsSched. field = 0; } } while (0)
2989 MAYBE_RESET_FIELD(cHaltCalls);
2990 MAYBE_RESET_FIELD(cHaltBlocking);
2991 MAYBE_RESET_FIELD(cHaltTimeouts);
2992 MAYBE_RESET_FIELD(cHaltNotBlocking);
2993 MAYBE_RESET_FIELD(cHaltWakeUps);
2994 MAYBE_RESET_FIELD(cWakeUpCalls);
2995 MAYBE_RESET_FIELD(cWakeUpNotHalted);
2996 MAYBE_RESET_FIELD(cWakeUpWakeUps);
2997 MAYBE_RESET_FIELD(cPokeCalls);
2998 MAYBE_RESET_FIELD(cPokeNotBusy);
2999 MAYBE_RESET_FIELD(cPollCalls);
3000 MAYBE_RESET_FIELD(cPollHalts);
3001 MAYBE_RESET_FIELD(cPollWakeUps);
3002# undef MAYBE_RESET_FIELD
3003 }
3004 }
3005 }
3006
3007 GVMMR0_USED_SHARED_UNLOCK(pGVMM);
3008
3009 return VINF_SUCCESS;
3010}
3011
3012
3013/**
3014 * VMMR0 request wrapper for GVMMR0ResetStatistics.
3015 *
3016 * @returns see GVMMR0ResetStatistics.
3017 * @param pGVM The global (ring-0) VM structure. Optional.
3018 * @param pReq Pointer to the request packet.
3019 * @param pSession The current session.
3020 */
3021GVMMR0DECL(int) GVMMR0ResetStatisticsReq(PGVM pGVM, PGVMMRESETSTATISTICSSREQ pReq, PSUPDRVSESSION pSession)
3022{
3023 /*
3024 * Validate input and pass it on.
3025 */
3026 AssertPtrReturn(pReq, VERR_INVALID_POINTER);
3027 AssertMsgReturn(pReq->Hdr.cbReq == sizeof(*pReq), ("%#x != %#x\n", pReq->Hdr.cbReq, sizeof(*pReq)), VERR_INVALID_PARAMETER);
3028 AssertReturn(pReq->pSession == pSession, VERR_INVALID_PARAMETER);
3029
3030 return GVMMR0ResetStatistics(&pReq->Stats, pSession, pGVM);
3031}
3032
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette