VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMR0/GVMMR0.cpp@ 46150

Last change on this file since 46150 was 44528, checked in by vboxsync, 12 years ago

header (C) fixes

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id Revision
File size: 86.5 KB
Line 
1/* $Id: GVMMR0.cpp 44528 2013-02-04 14:27:54Z vboxsync $ */
2/** @file
3 * GVMM - Global VM Manager.
4 */
5
6/*
7 * Copyright (C) 2007-2012 Oracle Corporation
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 */
17
18
19/** @page pg_gvmm GVMM - The Global VM Manager
20 *
21 * The Global VM Manager lives in ring-0. Its main function at the moment is
22 * to manage a list of all running VMs, keep a ring-0 only structure (GVM) for
23 * each of them, and assign them unique identifiers (so GMM can track page
24 * owners). The GVMM also manage some of the host CPU resources, like the
25 * periodic preemption timer.
26 *
27 * The GVMM will create a ring-0 object for each VM when it is registered, this
28 * is both for session cleanup purposes and for having a point where it is
29 * possible to implement usage polices later (in SUPR0ObjRegister).
30 *
31 *
32 * @section sec_gvmm_ppt Periodic Preemption Timer (PPT)
33 *
34 * On system that sports a high resolution kernel timer API, we use per-cpu
35 * timers to generate interrupts that preempts VT-x, AMD-V and raw-mode guest
36 * execution. The timer frequency is calculating by taking the max
37 * TMCalcHostTimerFrequency for all VMs running on a CPU for the last ~160 ms
38 * (RT_ELEMENTS((PGVMMHOSTCPU)0, Ppt.aHzHistory) *
39 * GVMMHOSTCPU_PPT_HIST_INTERVAL_NS).
40 *
41 * The TMCalcHostTimerFrequency() part of the things gets its takes the max
42 * TMTimerSetFrequencyHint() value and adjusts by the current catch-up percent,
43 * warp drive percent and some fudge factors. VMMR0.cpp reports the result via
44 * GVMMR0SchedUpdatePeriodicPreemptionTimer() before switching to the VT-x,
45 * AMD-V and raw-mode execution environments.
46 */
47
48
49/*******************************************************************************
50* Header Files *
51*******************************************************************************/
52#define LOG_GROUP LOG_GROUP_GVMM
53#include <VBox/vmm/gvmm.h>
54#include <VBox/vmm/gmm.h>
55#include "GVMMR0Internal.h"
56#include <VBox/vmm/gvm.h>
57#include <VBox/vmm/vm.h>
58#include <VBox/vmm/vmcpuset.h>
59#include <VBox/vmm/vmm.h>
60#include <VBox/param.h>
61#include <VBox/err.h>
62
63#include <iprt/asm.h>
64#include <iprt/asm-amd64-x86.h>
65#include <iprt/mem.h>
66#include <iprt/semaphore.h>
67#include <iprt/time.h>
68#include <VBox/log.h>
69#include <iprt/thread.h>
70#include <iprt/process.h>
71#include <iprt/param.h>
72#include <iprt/string.h>
73#include <iprt/assert.h>
74#include <iprt/mem.h>
75#include <iprt/memobj.h>
76#include <iprt/mp.h>
77#include <iprt/cpuset.h>
78#include <iprt/spinlock.h>
79#include <iprt/timer.h>
80
81#include "dtrace/VBoxVMM.h"
82
83
84/*******************************************************************************
85* Defined Constants And Macros *
86*******************************************************************************/
87#if defined(RT_OS_LINUX) || defined(DOXYGEN_RUNNING)
88/** Define this to enable the periodic preemption timer. */
89# define GVMM_SCHED_WITH_PPT
90#endif
91
92
93/*******************************************************************************
94* Structures and Typedefs *
95*******************************************************************************/
96
97/**
98 * Global VM handle.
99 */
100typedef struct GVMHANDLE
101{
102 /** The index of the next handle in the list (free or used). (0 is nil.) */
103 uint16_t volatile iNext;
104 /** Our own index / handle value. */
105 uint16_t iSelf;
106 /** The process ID of the handle owner.
107 * This is used for access checks. */
108 RTPROCESS ProcId;
109 /** The pointer to the ring-0 only (aka global) VM structure. */
110 PGVM pGVM;
111 /** The ring-0 mapping of the shared VM instance data. */
112 PVM pVM;
113 /** The virtual machine object. */
114 void *pvObj;
115 /** The session this VM is associated with. */
116 PSUPDRVSESSION pSession;
117 /** The ring-0 handle of the EMT0 thread.
118 * This is used for ownership checks as well as looking up a VM handle by thread
119 * at times like assertions. */
120 RTNATIVETHREAD hEMT0;
121} GVMHANDLE;
122/** Pointer to a global VM handle. */
123typedef GVMHANDLE *PGVMHANDLE;
124
125/** Number of GVM handles (including the NIL handle). */
126#if HC_ARCH_BITS == 64
127# define GVMM_MAX_HANDLES 8192
128#else
129# define GVMM_MAX_HANDLES 128
130#endif
131
132/**
133 * Per host CPU GVMM data.
134 */
135typedef struct GVMMHOSTCPU
136{
137 /** Magic number (GVMMHOSTCPU_MAGIC). */
138 uint32_t volatile u32Magic;
139 /** The CPU ID. */
140 RTCPUID idCpu;
141 /** The CPU set index. */
142 uint32_t idxCpuSet;
143
144#ifdef GVMM_SCHED_WITH_PPT
145 /** Periodic preemption timer data. */
146 struct
147 {
148 /** The handle to the periodic preemption timer. */
149 PRTTIMER pTimer;
150 /** Spinlock protecting the data below. */
151 RTSPINLOCK hSpinlock;
152 /** The smalles Hz that we need to care about. (static) */
153 uint32_t uMinHz;
154 /** The number of ticks between each historization. */
155 uint32_t cTicksHistoriziationInterval;
156 /** The current historization tick (counting up to
157 * cTicksHistoriziationInterval and then resetting). */
158 uint32_t iTickHistorization;
159 /** The current timer interval. This is set to 0 when inactive. */
160 uint32_t cNsInterval;
161 /** The current timer frequency. This is set to 0 when inactive. */
162 uint32_t uTimerHz;
163 /** The current max frequency reported by the EMTs.
164 * This gets historicize and reset by the timer callback. This is
165 * read without holding the spinlock, so needs atomic updating. */
166 uint32_t volatile uDesiredHz;
167 /** Whether the timer was started or not. */
168 bool volatile fStarted;
169 /** Set if we're starting timer. */
170 bool volatile fStarting;
171 /** The index of the next history entry (mod it). */
172 uint32_t iHzHistory;
173 /** Historicized uDesiredHz values. The array wraps around, new entries
174 * are added at iHzHistory. This is updated approximately every
175 * GVMMHOSTCPU_PPT_HIST_INTERVAL_NS by the timer callback. */
176 uint32_t aHzHistory[8];
177 /** Statistics counter for recording the number of interval changes. */
178 uint32_t cChanges;
179 /** Statistics counter for recording the number of timer starts. */
180 uint32_t cStarts;
181 } Ppt;
182#endif /* GVMM_SCHED_WITH_PPT */
183
184} GVMMHOSTCPU;
185/** Pointer to the per host CPU GVMM data. */
186typedef GVMMHOSTCPU *PGVMMHOSTCPU;
187/** The GVMMHOSTCPU::u32Magic value (Petra, Tanya & Rachel Haden). */
188#define GVMMHOSTCPU_MAGIC UINT32_C(0x19711011)
189/** The interval on history entry should cover (approximately) give in
190 * nanoseconds. */
191#define GVMMHOSTCPU_PPT_HIST_INTERVAL_NS UINT32_C(20000000)
192
193
194/**
195 * The GVMM instance data.
196 */
197typedef struct GVMM
198{
199 /** Eyecatcher / magic. */
200 uint32_t u32Magic;
201 /** The index of the head of the free handle chain. (0 is nil.) */
202 uint16_t volatile iFreeHead;
203 /** The index of the head of the active handle chain. (0 is nil.) */
204 uint16_t volatile iUsedHead;
205 /** The number of VMs. */
206 uint16_t volatile cVMs;
207 /** Alignment padding. */
208 uint16_t u16Reserved;
209 /** The number of EMTs. */
210 uint32_t volatile cEMTs;
211 /** The number of EMTs that have halted in GVMMR0SchedHalt. */
212 uint32_t volatile cHaltedEMTs;
213 /** Alignment padding. */
214 uint32_t u32Alignment;
215 /** When the next halted or sleeping EMT will wake up.
216 * This is set to 0 when it needs recalculating and to UINT64_MAX when
217 * there are no halted or sleeping EMTs in the GVMM. */
218 uint64_t uNsNextEmtWakeup;
219 /** The lock used to serialize VM creation, destruction and associated events that
220 * isn't performance critical. Owners may acquire the list lock. */
221 RTSEMFASTMUTEX CreateDestroyLock;
222 /** The lock used to serialize used list updates and accesses.
223 * This indirectly includes scheduling since the scheduler will have to walk the
224 * used list to examin running VMs. Owners may not acquire any other locks. */
225 RTSEMFASTMUTEX UsedLock;
226 /** The handle array.
227 * The size of this array defines the maximum number of currently running VMs.
228 * The first entry is unused as it represents the NIL handle. */
229 GVMHANDLE aHandles[GVMM_MAX_HANDLES];
230
231 /** @gcfgm{/GVMM/cEMTsMeansCompany, 32-bit, 0, UINT32_MAX, 1}
232 * The number of EMTs that means we no longer consider ourselves alone on a
233 * CPU/Core.
234 */
235 uint32_t cEMTsMeansCompany;
236 /** @gcfgm{/GVMM/MinSleepAlone,32-bit, 0, 100000000, 750000, ns}
237 * The minimum sleep time for when we're alone, in nano seconds.
238 */
239 uint32_t nsMinSleepAlone;
240 /** @gcfgm{/GVMM/MinSleepCompany,32-bit,0, 100000000, 15000, ns}
241 * The minimum sleep time for when we've got company, in nano seconds.
242 */
243 uint32_t nsMinSleepCompany;
244 /** @gcfgm{/GVMM/EarlyWakeUp1, 32-bit, 0, 100000000, 25000, ns}
245 * The limit for the first round of early wakeups, given in nano seconds.
246 */
247 uint32_t nsEarlyWakeUp1;
248 /** @gcfgm{/GVMM/EarlyWakeUp2, 32-bit, 0, 100000000, 50000, ns}
249 * The limit for the second round of early wakeups, given in nano seconds.
250 */
251 uint32_t nsEarlyWakeUp2;
252
253 /** The number of entries in the host CPU array (aHostCpus). */
254 uint32_t cHostCpus;
255 /** Per host CPU data (variable length). */
256 GVMMHOSTCPU aHostCpus[1];
257} GVMM;
258/** Pointer to the GVMM instance data. */
259typedef GVMM *PGVMM;
260
261/** The GVMM::u32Magic value (Charlie Haden). */
262#define GVMM_MAGIC UINT32_C(0x19370806)
263
264
265
266/*******************************************************************************
267* Global Variables *
268*******************************************************************************/
269/** Pointer to the GVMM instance data.
270 * (Just my general dislike for global variables.) */
271static PGVMM g_pGVMM = NULL;
272
273/** Macro for obtaining and validating the g_pGVMM pointer.
274 * On failure it will return from the invoking function with the specified return value.
275 *
276 * @param pGVMM The name of the pGVMM variable.
277 * @param rc The return value on failure. Use VERR_GVMM_INSTANCE for VBox
278 * status codes.
279 */
280#define GVMM_GET_VALID_INSTANCE(pGVMM, rc) \
281 do { \
282 (pGVMM) = g_pGVMM;\
283 AssertPtrReturn((pGVMM), (rc)); \
284 AssertMsgReturn((pGVMM)->u32Magic == GVMM_MAGIC, ("%p - %#x\n", (pGVMM), (pGVMM)->u32Magic), (rc)); \
285 } while (0)
286
287/** Macro for obtaining and validating the g_pGVMM pointer, void function variant.
288 * On failure it will return from the invoking function.
289 *
290 * @param pGVMM The name of the pGVMM variable.
291 */
292#define GVMM_GET_VALID_INSTANCE_VOID(pGVMM) \
293 do { \
294 (pGVMM) = g_pGVMM;\
295 AssertPtrReturnVoid((pGVMM)); \
296 AssertMsgReturnVoid((pGVMM)->u32Magic == GVMM_MAGIC, ("%p - %#x\n", (pGVMM), (pGVMM)->u32Magic)); \
297 } while (0)
298
299
300/*******************************************************************************
301* Internal Functions *
302*******************************************************************************/
303static void gvmmR0InitPerVMData(PGVM pGVM);
304static DECLCALLBACK(void) gvmmR0HandleObjDestructor(void *pvObj, void *pvGVMM, void *pvHandle);
305static int gvmmR0ByVM(PVM pVM, PGVM *ppGVM, PGVMM *ppGVMM, bool fTakeUsedLock);
306static int gvmmR0ByVMAndEMT(PVM pVM, VMCPUID idCpu, PGVM *ppGVM, PGVMM *ppGVMM);
307#ifdef GVMM_SCHED_WITH_PPT
308static DECLCALLBACK(void) gvmmR0SchedPeriodicPreemptionTimerCallback(PRTTIMER pTimer, void *pvUser, uint64_t iTick);
309#endif
310
311
312/**
313 * Initializes the GVMM.
314 *
315 * This is called while owning the loader semaphore (see supdrvIOCtl_LdrLoad()).
316 *
317 * @returns VBox status code.
318 */
319GVMMR0DECL(int) GVMMR0Init(void)
320{
321 LogFlow(("GVMMR0Init:\n"));
322
323 /*
324 * Allocate and initialize the instance data.
325 */
326 uint32_t cHostCpus = RTMpGetArraySize();
327 AssertMsgReturn(cHostCpus > 0 && cHostCpus < _64K, ("%d", (int)cHostCpus), VERR_GVMM_HOST_CPU_RANGE);
328
329 PGVMM pGVMM = (PGVMM)RTMemAllocZ(RT_UOFFSETOF(GVMM, aHostCpus[cHostCpus]));
330 if (!pGVMM)
331 return VERR_NO_MEMORY;
332 int rc = RTSemFastMutexCreate(&pGVMM->CreateDestroyLock);
333 if (RT_SUCCESS(rc))
334 {
335 rc = RTSemFastMutexCreate(&pGVMM->UsedLock);
336 if (RT_SUCCESS(rc))
337 {
338 pGVMM->u32Magic = GVMM_MAGIC;
339 pGVMM->iUsedHead = 0;
340 pGVMM->iFreeHead = 1;
341
342 /* the nil handle */
343 pGVMM->aHandles[0].iSelf = 0;
344 pGVMM->aHandles[0].iNext = 0;
345
346 /* the tail */
347 unsigned i = RT_ELEMENTS(pGVMM->aHandles) - 1;
348 pGVMM->aHandles[i].iSelf = i;
349 pGVMM->aHandles[i].iNext = 0; /* nil */
350
351 /* the rest */
352 while (i-- > 1)
353 {
354 pGVMM->aHandles[i].iSelf = i;
355 pGVMM->aHandles[i].iNext = i + 1;
356 }
357
358 /* The default configuration values. */
359 uint32_t cNsResolution = RTSemEventMultiGetResolution();
360 pGVMM->cEMTsMeansCompany = 1; /** @todo should be adjusted to relative to the cpu count or something... */
361 if (cNsResolution >= 5*RT_NS_100US)
362 {
363 pGVMM->nsMinSleepAlone = 750000 /* ns (0.750 ms) */; /** @todo this should be adjusted to be 75% (or something) of the scheduler granularity... */
364 pGVMM->nsMinSleepCompany = 15000 /* ns (0.015 ms) */;
365 pGVMM->nsEarlyWakeUp1 = 25000 /* ns (0.025 ms) */;
366 pGVMM->nsEarlyWakeUp2 = 50000 /* ns (0.050 ms) */;
367 }
368 else if (cNsResolution > RT_NS_100US)
369 {
370 pGVMM->nsMinSleepAlone = cNsResolution / 2;
371 pGVMM->nsMinSleepCompany = cNsResolution / 4;
372 pGVMM->nsEarlyWakeUp1 = 0;
373 pGVMM->nsEarlyWakeUp2 = 0;
374 }
375 else
376 {
377 pGVMM->nsMinSleepAlone = 2000;
378 pGVMM->nsMinSleepCompany = 2000;
379 pGVMM->nsEarlyWakeUp1 = 0;
380 pGVMM->nsEarlyWakeUp2 = 0;
381 }
382
383 /* The host CPU data. */
384 pGVMM->cHostCpus = cHostCpus;
385 uint32_t iCpu = cHostCpus;
386 RTCPUSET PossibleSet;
387 RTMpGetSet(&PossibleSet);
388 while (iCpu-- > 0)
389 {
390 pGVMM->aHostCpus[iCpu].idxCpuSet = iCpu;
391#ifdef GVMM_SCHED_WITH_PPT
392 pGVMM->aHostCpus[iCpu].Ppt.pTimer = NULL;
393 pGVMM->aHostCpus[iCpu].Ppt.hSpinlock = NIL_RTSPINLOCK;
394 pGVMM->aHostCpus[iCpu].Ppt.uMinHz = 5; /** @todo Add some API which figures this one out. (not *that* important) */
395 pGVMM->aHostCpus[iCpu].Ppt.cTicksHistoriziationInterval = 1;
396 //pGVMM->aHostCpus[iCpu].Ppt.iTickHistorization = 0;
397 //pGVMM->aHostCpus[iCpu].Ppt.cNsInterval = 0;
398 //pGVMM->aHostCpus[iCpu].Ppt.uTimerHz = 0;
399 //pGVMM->aHostCpus[iCpu].Ppt.uDesiredHz = 0;
400 //pGVMM->aHostCpus[iCpu].Ppt.fStarted = false;
401 //pGVMM->aHostCpus[iCpu].Ppt.fStarting = false;
402 //pGVMM->aHostCpus[iCpu].Ppt.iHzHistory = 0;
403 //pGVMM->aHostCpus[iCpu].Ppt.aHzHistory = {0};
404#endif
405
406 if (RTCpuSetIsMember(&PossibleSet, iCpu))
407 {
408 pGVMM->aHostCpus[iCpu].idCpu = RTMpCpuIdFromSetIndex(iCpu);
409 pGVMM->aHostCpus[iCpu].u32Magic = GVMMHOSTCPU_MAGIC;
410
411#ifdef GVMM_SCHED_WITH_PPT
412 rc = RTTimerCreateEx(&pGVMM->aHostCpus[iCpu].Ppt.pTimer,
413 50*1000*1000 /* whatever */,
414 RTTIMER_FLAGS_CPU(iCpu) | RTTIMER_FLAGS_HIGH_RES,
415 gvmmR0SchedPeriodicPreemptionTimerCallback,
416 &pGVMM->aHostCpus[iCpu]);
417 if (RT_SUCCESS(rc))
418 rc = RTSpinlockCreate(&pGVMM->aHostCpus[iCpu].Ppt.hSpinlock, RTSPINLOCK_FLAGS_INTERRUPT_SAFE, "GVMM/CPU");
419 if (RT_FAILURE(rc))
420 {
421 while (iCpu < cHostCpus)
422 {
423 RTTimerDestroy(pGVMM->aHostCpus[iCpu].Ppt.pTimer);
424 RTSpinlockDestroy(pGVMM->aHostCpus[iCpu].Ppt.hSpinlock);
425 pGVMM->aHostCpus[iCpu].Ppt.hSpinlock = NIL_RTSPINLOCK;
426 iCpu++;
427 }
428 break;
429 }
430#endif
431 }
432 else
433 {
434 pGVMM->aHostCpus[iCpu].idCpu = NIL_RTCPUID;
435 pGVMM->aHostCpus[iCpu].u32Magic = 0;
436 }
437 }
438 if (RT_SUCCESS(rc))
439 {
440 g_pGVMM = pGVMM;
441 LogFlow(("GVMMR0Init: pGVMM=%p cHostCpus=%u\n", pGVMM, cHostCpus));
442 return VINF_SUCCESS;
443 }
444
445 /* bail out. */
446 RTSemFastMutexDestroy(pGVMM->UsedLock);
447 pGVMM->UsedLock = NIL_RTSEMFASTMUTEX;
448 }
449 RTSemFastMutexDestroy(pGVMM->CreateDestroyLock);
450 pGVMM->CreateDestroyLock = NIL_RTSEMFASTMUTEX;
451 }
452
453 RTMemFree(pGVMM);
454 return rc;
455}
456
457
458/**
459 * Terminates the GVM.
460 *
461 * This is called while owning the loader semaphore (see supdrvLdrFree()).
462 * And unless something is wrong, there should be absolutely no VMs
463 * registered at this point.
464 */
465GVMMR0DECL(void) GVMMR0Term(void)
466{
467 LogFlow(("GVMMR0Term:\n"));
468
469 PGVMM pGVMM = g_pGVMM;
470 g_pGVMM = NULL;
471 if (RT_UNLIKELY(!VALID_PTR(pGVMM)))
472 {
473 SUPR0Printf("GVMMR0Term: pGVMM=%p\n", pGVMM);
474 return;
475 }
476
477 /*
478 * First of all, stop all active timers.
479 */
480 uint32_t cActiveTimers = 0;
481 uint32_t iCpu = pGVMM->cHostCpus;
482 while (iCpu-- > 0)
483 {
484 ASMAtomicWriteU32(&pGVMM->aHostCpus[iCpu].u32Magic, ~GVMMHOSTCPU_MAGIC);
485#ifdef GVMM_SCHED_WITH_PPT
486 if ( pGVMM->aHostCpus[iCpu].Ppt.pTimer != NULL
487 && RT_SUCCESS(RTTimerStop(pGVMM->aHostCpus[iCpu].Ppt.pTimer)))
488 cActiveTimers++;
489#endif
490 }
491 if (cActiveTimers)
492 RTThreadSleep(1); /* fudge */
493
494 /*
495 * Invalidate the and free resources.
496 */
497 pGVMM->u32Magic = ~GVMM_MAGIC;
498 RTSemFastMutexDestroy(pGVMM->UsedLock);
499 pGVMM->UsedLock = NIL_RTSEMFASTMUTEX;
500 RTSemFastMutexDestroy(pGVMM->CreateDestroyLock);
501 pGVMM->CreateDestroyLock = NIL_RTSEMFASTMUTEX;
502
503 pGVMM->iFreeHead = 0;
504 if (pGVMM->iUsedHead)
505 {
506 SUPR0Printf("GVMMR0Term: iUsedHead=%#x! (cVMs=%#x cEMTs=%#x)\n", pGVMM->iUsedHead, pGVMM->cVMs, pGVMM->cEMTs);
507 pGVMM->iUsedHead = 0;
508 }
509
510#ifdef GVMM_SCHED_WITH_PPT
511 iCpu = pGVMM->cHostCpus;
512 while (iCpu-- > 0)
513 {
514 RTTimerDestroy(pGVMM->aHostCpus[iCpu].Ppt.pTimer);
515 pGVMM->aHostCpus[iCpu].Ppt.pTimer = NULL;
516 RTSpinlockDestroy(pGVMM->aHostCpus[iCpu].Ppt.hSpinlock);
517 pGVMM->aHostCpus[iCpu].Ppt.hSpinlock = NIL_RTSPINLOCK;
518 }
519#endif
520
521 RTMemFree(pGVMM);
522}
523
524
525/**
526 * A quick hack for setting global config values.
527 *
528 * @returns VBox status code.
529 *
530 * @param pSession The session handle. Used for authentication.
531 * @param pszName The variable name.
532 * @param u64Value The new value.
533 */
534GVMMR0DECL(int) GVMMR0SetConfig(PSUPDRVSESSION pSession, const char *pszName, uint64_t u64Value)
535{
536 /*
537 * Validate input.
538 */
539 PGVMM pGVMM;
540 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
541 AssertPtrReturn(pSession, VERR_INVALID_HANDLE);
542 AssertPtrReturn(pszName, VERR_INVALID_POINTER);
543
544 /*
545 * String switch time!
546 */
547 if (strncmp(pszName, "/GVMM/", sizeof("/GVMM/") - 1))
548 return VERR_CFGM_VALUE_NOT_FOUND; /* borrow status codes from CFGM... */
549 int rc = VINF_SUCCESS;
550 pszName += sizeof("/GVMM/") - 1;
551 if (!strcmp(pszName, "cEMTsMeansCompany"))
552 {
553 if (u64Value <= UINT32_MAX)
554 pGVMM->cEMTsMeansCompany = u64Value;
555 else
556 rc = VERR_OUT_OF_RANGE;
557 }
558 else if (!strcmp(pszName, "MinSleepAlone"))
559 {
560 if (u64Value <= RT_NS_100MS)
561 pGVMM->nsMinSleepAlone = u64Value;
562 else
563 rc = VERR_OUT_OF_RANGE;
564 }
565 else if (!strcmp(pszName, "MinSleepCompany"))
566 {
567 if (u64Value <= RT_NS_100MS)
568 pGVMM->nsMinSleepCompany = u64Value;
569 else
570 rc = VERR_OUT_OF_RANGE;
571 }
572 else if (!strcmp(pszName, "EarlyWakeUp1"))
573 {
574 if (u64Value <= RT_NS_100MS)
575 pGVMM->nsEarlyWakeUp1 = u64Value;
576 else
577 rc = VERR_OUT_OF_RANGE;
578 }
579 else if (!strcmp(pszName, "EarlyWakeUp2"))
580 {
581 if (u64Value <= RT_NS_100MS)
582 pGVMM->nsEarlyWakeUp2 = u64Value;
583 else
584 rc = VERR_OUT_OF_RANGE;
585 }
586 else
587 rc = VERR_CFGM_VALUE_NOT_FOUND;
588 return rc;
589}
590
591
592/**
593 * A quick hack for getting global config values.
594 *
595 * @returns VBox status code.
596 *
597 * @param pSession The session handle. Used for authentication.
598 * @param pszName The variable name.
599 * @param u64Value The new value.
600 */
601GVMMR0DECL(int) GVMMR0QueryConfig(PSUPDRVSESSION pSession, const char *pszName, uint64_t *pu64Value)
602{
603 /*
604 * Validate input.
605 */
606 PGVMM pGVMM;
607 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
608 AssertPtrReturn(pSession, VERR_INVALID_HANDLE);
609 AssertPtrReturn(pszName, VERR_INVALID_POINTER);
610 AssertPtrReturn(pu64Value, VERR_INVALID_POINTER);
611
612 /*
613 * String switch time!
614 */
615 if (strncmp(pszName, "/GVMM/", sizeof("/GVMM/") - 1))
616 return VERR_CFGM_VALUE_NOT_FOUND; /* borrow status codes from CFGM... */
617 int rc = VINF_SUCCESS;
618 pszName += sizeof("/GVMM/") - 1;
619 if (!strcmp(pszName, "cEMTsMeansCompany"))
620 *pu64Value = pGVMM->cEMTsMeansCompany;
621 else if (!strcmp(pszName, "MinSleepAlone"))
622 *pu64Value = pGVMM->nsMinSleepAlone;
623 else if (!strcmp(pszName, "MinSleepCompany"))
624 *pu64Value = pGVMM->nsMinSleepCompany;
625 else if (!strcmp(pszName, "EarlyWakeUp1"))
626 *pu64Value = pGVMM->nsEarlyWakeUp1;
627 else if (!strcmp(pszName, "EarlyWakeUp2"))
628 *pu64Value = pGVMM->nsEarlyWakeUp2;
629 else
630 rc = VERR_CFGM_VALUE_NOT_FOUND;
631 return rc;
632}
633
634
635/**
636 * Try acquire the 'used' lock.
637 *
638 * @returns IPRT status code, see RTSemFastMutexRequest.
639 * @param pGVMM The GVMM instance data.
640 */
641DECLINLINE(int) gvmmR0UsedLock(PGVMM pGVMM)
642{
643 LogFlow(("++gvmmR0UsedLock(%p)\n", pGVMM));
644 int rc = RTSemFastMutexRequest(pGVMM->UsedLock);
645 LogFlow(("gvmmR0UsedLock(%p)->%Rrc\n", pGVMM, rc));
646 return rc;
647}
648
649
650/**
651 * Release the 'used' lock.
652 *
653 * @returns IPRT status code, see RTSemFastMutexRelease.
654 * @param pGVMM The GVMM instance data.
655 */
656DECLINLINE(int) gvmmR0UsedUnlock(PGVMM pGVMM)
657{
658 LogFlow(("--gvmmR0UsedUnlock(%p)\n", pGVMM));
659 int rc = RTSemFastMutexRelease(pGVMM->UsedLock);
660 AssertRC(rc);
661 return rc;
662}
663
664
665/**
666 * Try acquire the 'create & destroy' lock.
667 *
668 * @returns IPRT status code, see RTSemFastMutexRequest.
669 * @param pGVMM The GVMM instance data.
670 */
671DECLINLINE(int) gvmmR0CreateDestroyLock(PGVMM pGVMM)
672{
673 LogFlow(("++gvmmR0CreateDestroyLock(%p)\n", pGVMM));
674 int rc = RTSemFastMutexRequest(pGVMM->CreateDestroyLock);
675 LogFlow(("gvmmR0CreateDestroyLock(%p)->%Rrc\n", pGVMM, rc));
676 return rc;
677}
678
679
680/**
681 * Release the 'create & destroy' lock.
682 *
683 * @returns IPRT status code, see RTSemFastMutexRequest.
684 * @param pGVMM The GVMM instance data.
685 */
686DECLINLINE(int) gvmmR0CreateDestroyUnlock(PGVMM pGVMM)
687{
688 LogFlow(("--gvmmR0CreateDestroyUnlock(%p)\n", pGVMM));
689 int rc = RTSemFastMutexRelease(pGVMM->CreateDestroyLock);
690 AssertRC(rc);
691 return rc;
692}
693
694
695/**
696 * Request wrapper for the GVMMR0CreateVM API.
697 *
698 * @returns VBox status code.
699 * @param pReq The request buffer.
700 */
701GVMMR0DECL(int) GVMMR0CreateVMReq(PGVMMCREATEVMREQ pReq)
702{
703 /*
704 * Validate the request.
705 */
706 if (!VALID_PTR(pReq))
707 return VERR_INVALID_POINTER;
708 if (pReq->Hdr.cbReq != sizeof(*pReq))
709 return VERR_INVALID_PARAMETER;
710 if (!VALID_PTR(pReq->pSession))
711 return VERR_INVALID_POINTER;
712
713 /*
714 * Execute it.
715 */
716 PVM pVM;
717 pReq->pVMR0 = NULL;
718 pReq->pVMR3 = NIL_RTR3PTR;
719 int rc = GVMMR0CreateVM(pReq->pSession, pReq->cCpus, &pVM);
720 if (RT_SUCCESS(rc))
721 {
722 pReq->pVMR0 = pVM;
723 pReq->pVMR3 = pVM->pVMR3;
724 }
725 return rc;
726}
727
728
729/**
730 * Allocates the VM structure and registers it with GVM.
731 *
732 * The caller will become the VM owner and there by the EMT.
733 *
734 * @returns VBox status code.
735 * @param pSession The support driver session.
736 * @param cCpus Number of virtual CPUs for the new VM.
737 * @param ppVM Where to store the pointer to the VM structure.
738 *
739 * @thread EMT.
740 */
741GVMMR0DECL(int) GVMMR0CreateVM(PSUPDRVSESSION pSession, uint32_t cCpus, PVM *ppVM)
742{
743 LogFlow(("GVMMR0CreateVM: pSession=%p\n", pSession));
744 PGVMM pGVMM;
745 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
746
747 AssertPtrReturn(ppVM, VERR_INVALID_POINTER);
748 *ppVM = NULL;
749
750 if ( cCpus == 0
751 || cCpus > VMM_MAX_CPU_COUNT)
752 return VERR_INVALID_PARAMETER;
753
754 RTNATIVETHREAD hEMT0 = RTThreadNativeSelf();
755 AssertReturn(hEMT0 != NIL_RTNATIVETHREAD, VERR_GVMM_BROKEN_IPRT);
756 RTPROCESS ProcId = RTProcSelf();
757 AssertReturn(ProcId != NIL_RTPROCESS, VERR_GVMM_BROKEN_IPRT);
758
759 /*
760 * The whole allocation process is protected by the lock.
761 */
762 int rc = gvmmR0CreateDestroyLock(pGVMM);
763 AssertRCReturn(rc, rc);
764
765 /*
766 * Allocate a handle first so we don't waste resources unnecessarily.
767 */
768 uint16_t iHandle = pGVMM->iFreeHead;
769 if (iHandle)
770 {
771 PGVMHANDLE pHandle = &pGVMM->aHandles[iHandle];
772
773 /* consistency checks, a bit paranoid as always. */
774 if ( !pHandle->pVM
775 && !pHandle->pGVM
776 && !pHandle->pvObj
777 && pHandle->iSelf == iHandle)
778 {
779 pHandle->pvObj = SUPR0ObjRegister(pSession, SUPDRVOBJTYPE_VM, gvmmR0HandleObjDestructor, pGVMM, pHandle);
780 if (pHandle->pvObj)
781 {
782 /*
783 * Move the handle from the free to used list and perform permission checks.
784 */
785 rc = gvmmR0UsedLock(pGVMM);
786 AssertRC(rc);
787
788 pGVMM->iFreeHead = pHandle->iNext;
789 pHandle->iNext = pGVMM->iUsedHead;
790 pGVMM->iUsedHead = iHandle;
791 pGVMM->cVMs++;
792
793 pHandle->pVM = NULL;
794 pHandle->pGVM = NULL;
795 pHandle->pSession = pSession;
796 pHandle->hEMT0 = NIL_RTNATIVETHREAD;
797 pHandle->ProcId = NIL_RTPROCESS;
798
799 gvmmR0UsedUnlock(pGVMM);
800
801 rc = SUPR0ObjVerifyAccess(pHandle->pvObj, pSession, NULL);
802 if (RT_SUCCESS(rc))
803 {
804 /*
805 * Allocate the global VM structure (GVM) and initialize it.
806 */
807 PGVM pGVM = (PGVM)RTMemAllocZ(RT_UOFFSETOF(GVM, aCpus[cCpus]));
808 if (pGVM)
809 {
810 pGVM->u32Magic = GVM_MAGIC;
811 pGVM->hSelf = iHandle;
812 pGVM->pVM = NULL;
813 pGVM->cCpus = cCpus;
814
815 gvmmR0InitPerVMData(pGVM);
816 GMMR0InitPerVMData(pGVM);
817
818 /*
819 * Allocate the shared VM structure and associated page array.
820 */
821 const uint32_t cbVM = RT_UOFFSETOF(VM, aCpus[cCpus]);
822 const uint32_t cPages = RT_ALIGN_32(cbVM, PAGE_SIZE) >> PAGE_SHIFT;
823 rc = RTR0MemObjAllocLow(&pGVM->gvmm.s.VMMemObj, cPages << PAGE_SHIFT, false /* fExecutable */);
824 if (RT_SUCCESS(rc))
825 {
826 PVM pVM = (PVM)RTR0MemObjAddress(pGVM->gvmm.s.VMMemObj); AssertPtr(pVM);
827 memset(pVM, 0, cPages << PAGE_SHIFT);
828 pVM->enmVMState = VMSTATE_CREATING;
829 pVM->pVMR0 = pVM;
830 pVM->pSession = pSession;
831 pVM->hSelf = iHandle;
832 pVM->cbSelf = cbVM;
833 pVM->cCpus = cCpus;
834 pVM->uCpuExecutionCap = 100; /* default is no cap. */
835 pVM->offVMCPU = RT_UOFFSETOF(VM, aCpus);
836 AssertCompileMemberAlignment(VM, cpum, 64);
837 AssertCompileMemberAlignment(VM, tm, 64);
838 AssertCompileMemberAlignment(VM, aCpus, PAGE_SIZE);
839
840 rc = RTR0MemObjAllocPage(&pGVM->gvmm.s.VMPagesMemObj, cPages * sizeof(SUPPAGE), false /* fExecutable */);
841 if (RT_SUCCESS(rc))
842 {
843 PSUPPAGE paPages = (PSUPPAGE)RTR0MemObjAddress(pGVM->gvmm.s.VMPagesMemObj); AssertPtr(paPages);
844 for (uint32_t iPage = 0; iPage < cPages; iPage++)
845 {
846 paPages[iPage].uReserved = 0;
847 paPages[iPage].Phys = RTR0MemObjGetPagePhysAddr(pGVM->gvmm.s.VMMemObj, iPage);
848 Assert(paPages[iPage].Phys != NIL_RTHCPHYS);
849 }
850
851 /*
852 * Map them into ring-3.
853 */
854 rc = RTR0MemObjMapUser(&pGVM->gvmm.s.VMMapObj, pGVM->gvmm.s.VMMemObj, (RTR3PTR)-1, 0,
855 RTMEM_PROT_READ | RTMEM_PROT_WRITE, NIL_RTR0PROCESS);
856 if (RT_SUCCESS(rc))
857 {
858 pVM->pVMR3 = RTR0MemObjAddressR3(pGVM->gvmm.s.VMMapObj);
859 AssertPtr((void *)pVM->pVMR3);
860
861 /* Initialize all the VM pointers. */
862 for (uint32_t i = 0; i < cCpus; i++)
863 {
864 pVM->aCpus[i].pVMR0 = pVM;
865 pVM->aCpus[i].pVMR3 = pVM->pVMR3;
866 pVM->aCpus[i].idHostCpu = NIL_RTCPUID;
867 pVM->aCpus[i].hNativeThreadR0 = NIL_RTNATIVETHREAD;
868 }
869
870 rc = RTR0MemObjMapUser(&pGVM->gvmm.s.VMPagesMapObj, pGVM->gvmm.s.VMPagesMemObj, (RTR3PTR)-1, 0,
871 RTMEM_PROT_READ | RTMEM_PROT_WRITE, NIL_RTR0PROCESS);
872 if (RT_SUCCESS(rc))
873 {
874 pVM->paVMPagesR3 = RTR0MemObjAddressR3(pGVM->gvmm.s.VMPagesMapObj);
875 AssertPtr((void *)pVM->paVMPagesR3);
876
877 /* complete the handle - take the UsedLock sem just to be careful. */
878 rc = gvmmR0UsedLock(pGVMM);
879 AssertRC(rc);
880
881 pHandle->pVM = pVM;
882 pHandle->pGVM = pGVM;
883 pHandle->hEMT0 = hEMT0;
884 pHandle->ProcId = ProcId;
885 pGVM->pVM = pVM;
886 pGVM->aCpus[0].hEMT = hEMT0;
887 pVM->aCpus[0].hNativeThreadR0 = hEMT0;
888 pGVMM->cEMTs += cCpus;
889
890 VBOXVMM_R0_GVMM_VM_CREATED(pGVM, pVM, ProcId, (void *)hEMT0, cCpus);
891
892 gvmmR0UsedUnlock(pGVMM);
893 gvmmR0CreateDestroyUnlock(pGVMM);
894
895 *ppVM = pVM;
896 Log(("GVMMR0CreateVM: pVM=%p pVMR3=%p pGVM=%p hGVM=%d\n", pVM, pVM->pVMR3, pGVM, iHandle));
897 return VINF_SUCCESS;
898 }
899
900 RTR0MemObjFree(pGVM->gvmm.s.VMMapObj, false /* fFreeMappings */);
901 pGVM->gvmm.s.VMMapObj = NIL_RTR0MEMOBJ;
902 }
903 RTR0MemObjFree(pGVM->gvmm.s.VMPagesMemObj, false /* fFreeMappings */);
904 pGVM->gvmm.s.VMPagesMemObj = NIL_RTR0MEMOBJ;
905 }
906 RTR0MemObjFree(pGVM->gvmm.s.VMMemObj, false /* fFreeMappings */);
907 pGVM->gvmm.s.VMMemObj = NIL_RTR0MEMOBJ;
908 }
909 }
910 }
911 /* else: The user wasn't permitted to create this VM. */
912
913 /*
914 * The handle will be freed by gvmmR0HandleObjDestructor as we release the
915 * object reference here. A little extra mess because of non-recursive lock.
916 */
917 void *pvObj = pHandle->pvObj;
918 pHandle->pvObj = NULL;
919 gvmmR0CreateDestroyUnlock(pGVMM);
920
921 SUPR0ObjRelease(pvObj, pSession);
922
923 SUPR0Printf("GVMMR0CreateVM: failed, rc=%d\n", rc);
924 return rc;
925 }
926
927 rc = VERR_NO_MEMORY;
928 }
929 else
930 rc = VERR_GVMM_IPE_1;
931 }
932 else
933 rc = VERR_GVM_TOO_MANY_VMS;
934
935 gvmmR0CreateDestroyUnlock(pGVMM);
936 return rc;
937}
938
939
940/**
941 * Initializes the per VM data belonging to GVMM.
942 *
943 * @param pGVM Pointer to the global VM structure.
944 */
945static void gvmmR0InitPerVMData(PGVM pGVM)
946{
947 AssertCompile(RT_SIZEOFMEMB(GVM,gvmm.s) <= RT_SIZEOFMEMB(GVM,gvmm.padding));
948 AssertCompile(RT_SIZEOFMEMB(GVMCPU,gvmm.s) <= RT_SIZEOFMEMB(GVMCPU,gvmm.padding));
949 pGVM->gvmm.s.VMMemObj = NIL_RTR0MEMOBJ;
950 pGVM->gvmm.s.VMMapObj = NIL_RTR0MEMOBJ;
951 pGVM->gvmm.s.VMPagesMemObj = NIL_RTR0MEMOBJ;
952 pGVM->gvmm.s.VMPagesMapObj = NIL_RTR0MEMOBJ;
953 pGVM->gvmm.s.fDoneVMMR0Init = false;
954 pGVM->gvmm.s.fDoneVMMR0Term = false;
955
956 for (VMCPUID i = 0; i < pGVM->cCpus; i++)
957 {
958 pGVM->aCpus[i].gvmm.s.HaltEventMulti = NIL_RTSEMEVENTMULTI;
959 pGVM->aCpus[i].hEMT = NIL_RTNATIVETHREAD;
960 }
961}
962
963
964/**
965 * Does the VM initialization.
966 *
967 * @returns VBox status code.
968 * @param pVM Pointer to the VM.
969 */
970GVMMR0DECL(int) GVMMR0InitVM(PVM pVM)
971{
972 LogFlow(("GVMMR0InitVM: pVM=%p\n", pVM));
973
974 /*
975 * Validate the VM structure, state and handle.
976 */
977 PGVM pGVM;
978 PGVMM pGVMM;
979 int rc = gvmmR0ByVMAndEMT(pVM, 0 /* idCpu */, &pGVM, &pGVMM);
980 if (RT_SUCCESS(rc))
981 {
982 if ( !pGVM->gvmm.s.fDoneVMMR0Init
983 && pGVM->aCpus[0].gvmm.s.HaltEventMulti == NIL_RTSEMEVENTMULTI)
984 {
985 for (VMCPUID i = 0; i < pGVM->cCpus; i++)
986 {
987 rc = RTSemEventMultiCreate(&pGVM->aCpus[i].gvmm.s.HaltEventMulti);
988 if (RT_FAILURE(rc))
989 {
990 pGVM->aCpus[i].gvmm.s.HaltEventMulti = NIL_RTSEMEVENTMULTI;
991 break;
992 }
993 }
994 }
995 else
996 rc = VERR_WRONG_ORDER;
997 }
998
999 LogFlow(("GVMMR0InitVM: returns %Rrc\n", rc));
1000 return rc;
1001}
1002
1003
1004/**
1005 * Indicates that we're done with the ring-0 initialization
1006 * of the VM.
1007 *
1008 * @param pVM Pointer to the VM.
1009 * @thread EMT(0)
1010 */
1011GVMMR0DECL(void) GVMMR0DoneInitVM(PVM pVM)
1012{
1013 /* Validate the VM structure, state and handle. */
1014 PGVM pGVM;
1015 PGVMM pGVMM;
1016 int rc = gvmmR0ByVMAndEMT(pVM, 0 /* idCpu */, &pGVM, &pGVMM);
1017 AssertRCReturnVoid(rc);
1018
1019 /* Set the indicator. */
1020 pGVM->gvmm.s.fDoneVMMR0Init = true;
1021}
1022
1023
1024/**
1025 * Indicates that we're doing the ring-0 termination of the VM.
1026 *
1027 * @returns true if termination hasn't been done already, false if it has.
1028 * @param pVM Pointer to the VM.
1029 * @param pGVM Pointer to the global VM structure. Optional.
1030 * @thread EMT(0)
1031 */
1032GVMMR0DECL(bool) GVMMR0DoingTermVM(PVM pVM, PGVM pGVM)
1033{
1034 /* Validate the VM structure, state and handle. */
1035 AssertPtrNullReturn(pGVM, false);
1036 AssertReturn(!pGVM || pGVM->u32Magic == GVM_MAGIC, false);
1037 if (!pGVM)
1038 {
1039 PGVMM pGVMM;
1040 int rc = gvmmR0ByVMAndEMT(pVM, 0 /* idCpu */, &pGVM, &pGVMM);
1041 AssertRCReturn(rc, false);
1042 }
1043
1044 /* Set the indicator. */
1045 if (pGVM->gvmm.s.fDoneVMMR0Term)
1046 return false;
1047 pGVM->gvmm.s.fDoneVMMR0Term = true;
1048 return true;
1049}
1050
1051
1052/**
1053 * Destroys the VM, freeing all associated resources (the ring-0 ones anyway).
1054 *
1055 * This is call from the vmR3DestroyFinalBit and from a error path in VMR3Create,
1056 * and the caller is not the EMT thread, unfortunately. For security reasons, it
1057 * would've been nice if the caller was actually the EMT thread or that we somehow
1058 * could've associated the calling thread with the VM up front.
1059 *
1060 * @returns VBox status code.
1061 * @param pVM Pointer to the VM.
1062 *
1063 * @thread EMT(0) if it's associated with the VM, otherwise any thread.
1064 */
1065GVMMR0DECL(int) GVMMR0DestroyVM(PVM pVM)
1066{
1067 LogFlow(("GVMMR0DestroyVM: pVM=%p\n", pVM));
1068 PGVMM pGVMM;
1069 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
1070
1071
1072 /*
1073 * Validate the VM structure, state and caller.
1074 */
1075 AssertPtrReturn(pVM, VERR_INVALID_POINTER);
1076 AssertReturn(!((uintptr_t)pVM & PAGE_OFFSET_MASK), VERR_INVALID_POINTER);
1077 AssertMsgReturn(pVM->enmVMState >= VMSTATE_CREATING && pVM->enmVMState <= VMSTATE_TERMINATED, ("%d\n", pVM->enmVMState), VERR_WRONG_ORDER);
1078
1079 uint32_t hGVM = pVM->hSelf;
1080 AssertReturn(hGVM != NIL_GVM_HANDLE, VERR_INVALID_HANDLE);
1081 AssertReturn(hGVM < RT_ELEMENTS(pGVMM->aHandles), VERR_INVALID_HANDLE);
1082
1083 PGVMHANDLE pHandle = &pGVMM->aHandles[hGVM];
1084 AssertReturn(pHandle->pVM == pVM, VERR_NOT_OWNER);
1085
1086 RTPROCESS ProcId = RTProcSelf();
1087 RTNATIVETHREAD hSelf = RTThreadNativeSelf();
1088 AssertReturn( ( pHandle->hEMT0 == hSelf
1089 && pHandle->ProcId == ProcId)
1090 || pHandle->hEMT0 == NIL_RTNATIVETHREAD, VERR_NOT_OWNER);
1091
1092 /*
1093 * Lookup the handle and destroy the object.
1094 * Since the lock isn't recursive and we'll have to leave it before dereferencing the
1095 * object, we take some precautions against racing callers just in case...
1096 */
1097 int rc = gvmmR0CreateDestroyLock(pGVMM);
1098 AssertRC(rc);
1099
1100 /* be careful here because we might theoretically be racing someone else cleaning up. */
1101 if ( pHandle->pVM == pVM
1102 && ( ( pHandle->hEMT0 == hSelf
1103 && pHandle->ProcId == ProcId)
1104 || pHandle->hEMT0 == NIL_RTNATIVETHREAD)
1105 && VALID_PTR(pHandle->pvObj)
1106 && VALID_PTR(pHandle->pSession)
1107 && VALID_PTR(pHandle->pGVM)
1108 && pHandle->pGVM->u32Magic == GVM_MAGIC)
1109 {
1110 void *pvObj = pHandle->pvObj;
1111 pHandle->pvObj = NULL;
1112 gvmmR0CreateDestroyUnlock(pGVMM);
1113
1114 SUPR0ObjRelease(pvObj, pHandle->pSession);
1115 }
1116 else
1117 {
1118 SUPR0Printf("GVMMR0DestroyVM: pHandle=%p:{.pVM=%p, .hEMT0=%p, .ProcId=%u, .pvObj=%p} pVM=%p hSelf=%p\n",
1119 pHandle, pHandle->pVM, pHandle->hEMT0, pHandle->ProcId, pHandle->pvObj, pVM, hSelf);
1120 gvmmR0CreateDestroyUnlock(pGVMM);
1121 rc = VERR_GVMM_IPE_2;
1122 }
1123
1124 return rc;
1125}
1126
1127
1128/**
1129 * Performs VM cleanup task as part of object destruction.
1130 *
1131 * @param pGVM The GVM pointer.
1132 */
1133static void gvmmR0CleanupVM(PGVM pGVM)
1134{
1135 if ( pGVM->gvmm.s.fDoneVMMR0Init
1136 && !pGVM->gvmm.s.fDoneVMMR0Term)
1137 {
1138 if ( pGVM->gvmm.s.VMMemObj != NIL_RTR0MEMOBJ
1139 && RTR0MemObjAddress(pGVM->gvmm.s.VMMemObj) == pGVM->pVM)
1140 {
1141 LogFlow(("gvmmR0CleanupVM: Calling VMMR0TermVM\n"));
1142 VMMR0TermVM(pGVM->pVM, pGVM);
1143 }
1144 else
1145 AssertMsgFailed(("gvmmR0CleanupVM: VMMemObj=%p pVM=%p\n", pGVM->gvmm.s.VMMemObj, pGVM->pVM));
1146 }
1147
1148 GMMR0CleanupVM(pGVM);
1149}
1150
1151
1152/**
1153 * Handle destructor.
1154 *
1155 * @param pvGVMM The GVM instance pointer.
1156 * @param pvHandle The handle pointer.
1157 */
1158static DECLCALLBACK(void) gvmmR0HandleObjDestructor(void *pvObj, void *pvGVMM, void *pvHandle)
1159{
1160 LogFlow(("gvmmR0HandleObjDestructor: %p %p %p\n", pvObj, pvGVMM, pvHandle));
1161
1162 /*
1163 * Some quick, paranoid, input validation.
1164 */
1165 PGVMHANDLE pHandle = (PGVMHANDLE)pvHandle;
1166 AssertPtr(pHandle);
1167 PGVMM pGVMM = (PGVMM)pvGVMM;
1168 Assert(pGVMM == g_pGVMM);
1169 const uint16_t iHandle = pHandle - &pGVMM->aHandles[0];
1170 if ( !iHandle
1171 || iHandle >= RT_ELEMENTS(pGVMM->aHandles)
1172 || iHandle != pHandle->iSelf)
1173 {
1174 SUPR0Printf("GVM: handle %d is out of range or corrupt (iSelf=%d)!\n", iHandle, pHandle->iSelf);
1175 return;
1176 }
1177
1178 int rc = gvmmR0CreateDestroyLock(pGVMM);
1179 AssertRC(rc);
1180 rc = gvmmR0UsedLock(pGVMM);
1181 AssertRC(rc);
1182
1183 /*
1184 * This is a tad slow but a doubly linked list is too much hassle.
1185 */
1186 if (RT_UNLIKELY(pHandle->iNext >= RT_ELEMENTS(pGVMM->aHandles)))
1187 {
1188 SUPR0Printf("GVM: used list index %d is out of range!\n", pHandle->iNext);
1189 gvmmR0UsedUnlock(pGVMM);
1190 gvmmR0CreateDestroyUnlock(pGVMM);
1191 return;
1192 }
1193
1194 if (pGVMM->iUsedHead == iHandle)
1195 pGVMM->iUsedHead = pHandle->iNext;
1196 else
1197 {
1198 uint16_t iPrev = pGVMM->iUsedHead;
1199 int c = RT_ELEMENTS(pGVMM->aHandles) + 2;
1200 while (iPrev)
1201 {
1202 if (RT_UNLIKELY(iPrev >= RT_ELEMENTS(pGVMM->aHandles)))
1203 {
1204 SUPR0Printf("GVM: used list index %d is out of range!\n", iPrev);
1205 gvmmR0UsedUnlock(pGVMM);
1206 gvmmR0CreateDestroyUnlock(pGVMM);
1207 return;
1208 }
1209 if (RT_UNLIKELY(c-- <= 0))
1210 {
1211 iPrev = 0;
1212 break;
1213 }
1214
1215 if (pGVMM->aHandles[iPrev].iNext == iHandle)
1216 break;
1217 iPrev = pGVMM->aHandles[iPrev].iNext;
1218 }
1219 if (!iPrev)
1220 {
1221 SUPR0Printf("GVM: can't find the handle previous previous of %d!\n", pHandle->iSelf);
1222 gvmmR0UsedUnlock(pGVMM);
1223 gvmmR0CreateDestroyUnlock(pGVMM);
1224 return;
1225 }
1226
1227 Assert(pGVMM->aHandles[iPrev].iNext == iHandle);
1228 pGVMM->aHandles[iPrev].iNext = pHandle->iNext;
1229 }
1230 pHandle->iNext = 0;
1231 pGVMM->cVMs--;
1232
1233 /*
1234 * Do the global cleanup round.
1235 */
1236 PGVM pGVM = pHandle->pGVM;
1237 if ( VALID_PTR(pGVM)
1238 && pGVM->u32Magic == GVM_MAGIC)
1239 {
1240 pGVMM->cEMTs -= pGVM->cCpus;
1241 gvmmR0UsedUnlock(pGVMM);
1242
1243 gvmmR0CleanupVM(pGVM);
1244
1245 /*
1246 * Do the GVMM cleanup - must be done last.
1247 */
1248 /* The VM and VM pages mappings/allocations. */
1249 if (pGVM->gvmm.s.VMPagesMapObj != NIL_RTR0MEMOBJ)
1250 {
1251 rc = RTR0MemObjFree(pGVM->gvmm.s.VMPagesMapObj, false /* fFreeMappings */); AssertRC(rc);
1252 pGVM->gvmm.s.VMPagesMapObj = NIL_RTR0MEMOBJ;
1253 }
1254
1255 if (pGVM->gvmm.s.VMMapObj != NIL_RTR0MEMOBJ)
1256 {
1257 rc = RTR0MemObjFree(pGVM->gvmm.s.VMMapObj, false /* fFreeMappings */); AssertRC(rc);
1258 pGVM->gvmm.s.VMMapObj = NIL_RTR0MEMOBJ;
1259 }
1260
1261 if (pGVM->gvmm.s.VMPagesMemObj != NIL_RTR0MEMOBJ)
1262 {
1263 rc = RTR0MemObjFree(pGVM->gvmm.s.VMPagesMemObj, false /* fFreeMappings */); AssertRC(rc);
1264 pGVM->gvmm.s.VMPagesMemObj = NIL_RTR0MEMOBJ;
1265 }
1266
1267 if (pGVM->gvmm.s.VMMemObj != NIL_RTR0MEMOBJ)
1268 {
1269 rc = RTR0MemObjFree(pGVM->gvmm.s.VMMemObj, false /* fFreeMappings */); AssertRC(rc);
1270 pGVM->gvmm.s.VMMemObj = NIL_RTR0MEMOBJ;
1271 }
1272
1273 for (VMCPUID i = 0; i < pGVM->cCpus; i++)
1274 {
1275 if (pGVM->aCpus[i].gvmm.s.HaltEventMulti != NIL_RTSEMEVENTMULTI)
1276 {
1277 rc = RTSemEventMultiDestroy(pGVM->aCpus[i].gvmm.s.HaltEventMulti); AssertRC(rc);
1278 pGVM->aCpus[i].gvmm.s.HaltEventMulti = NIL_RTSEMEVENTMULTI;
1279 }
1280 }
1281
1282 /* the GVM structure itself. */
1283 pGVM->u32Magic |= UINT32_C(0x80000000);
1284 RTMemFree(pGVM);
1285
1286 /* Re-acquire the UsedLock before freeing the handle since we're updating handle fields. */
1287 rc = gvmmR0UsedLock(pGVMM);
1288 AssertRC(rc);
1289 }
1290 /* else: GVMMR0CreateVM cleanup. */
1291
1292 /*
1293 * Free the handle.
1294 */
1295 pHandle->iNext = pGVMM->iFreeHead;
1296 pGVMM->iFreeHead = iHandle;
1297 ASMAtomicWriteNullPtr(&pHandle->pGVM);
1298 ASMAtomicWriteNullPtr(&pHandle->pVM);
1299 ASMAtomicWriteNullPtr(&pHandle->pvObj);
1300 ASMAtomicWriteNullPtr(&pHandle->pSession);
1301 ASMAtomicWriteHandle(&pHandle->hEMT0, NIL_RTNATIVETHREAD);
1302 ASMAtomicWriteU32(&pHandle->ProcId, NIL_RTPROCESS);
1303
1304 gvmmR0UsedUnlock(pGVMM);
1305 gvmmR0CreateDestroyUnlock(pGVMM);
1306 LogFlow(("gvmmR0HandleObjDestructor: returns\n"));
1307}
1308
1309
1310/**
1311 * Registers the calling thread as the EMT of a Virtual CPU.
1312 *
1313 * Note that VCPU 0 is automatically registered during VM creation.
1314 *
1315 * @returns VBox status code
1316 * @param pVM Pointer to the VM.
1317 * @param idCpu VCPU id.
1318 */
1319GVMMR0DECL(int) GVMMR0RegisterVCpu(PVM pVM, VMCPUID idCpu)
1320{
1321 AssertReturn(idCpu != 0, VERR_NOT_OWNER);
1322
1323 /*
1324 * Validate the VM structure, state and handle.
1325 */
1326 PGVM pGVM;
1327 PGVMM pGVMM;
1328 int rc = gvmmR0ByVM(pVM, &pGVM, &pGVMM, false /* fTakeUsedLock */);
1329 if (RT_FAILURE(rc))
1330 return rc;
1331
1332 AssertReturn(idCpu < pGVM->cCpus, VERR_INVALID_CPU_ID);
1333 AssertReturn(pGVM->aCpus[idCpu].hEMT == NIL_RTNATIVETHREAD, VERR_ACCESS_DENIED);
1334 Assert(pGVM->cCpus == pVM->cCpus);
1335 Assert(pVM->aCpus[idCpu].hNativeThreadR0 == NIL_RTNATIVETHREAD);
1336
1337 pVM->aCpus[idCpu].hNativeThreadR0 = pGVM->aCpus[idCpu].hEMT = RTThreadNativeSelf();
1338
1339 return VINF_SUCCESS;
1340}
1341
1342
1343/**
1344 * Lookup a GVM structure by its handle.
1345 *
1346 * @returns The GVM pointer on success, NULL on failure.
1347 * @param hGVM The global VM handle. Asserts on bad handle.
1348 */
1349GVMMR0DECL(PGVM) GVMMR0ByHandle(uint32_t hGVM)
1350{
1351 PGVMM pGVMM;
1352 GVMM_GET_VALID_INSTANCE(pGVMM, NULL);
1353
1354 /*
1355 * Validate.
1356 */
1357 AssertReturn(hGVM != NIL_GVM_HANDLE, NULL);
1358 AssertReturn(hGVM < RT_ELEMENTS(pGVMM->aHandles), NULL);
1359
1360 /*
1361 * Look it up.
1362 */
1363 PGVMHANDLE pHandle = &pGVMM->aHandles[hGVM];
1364 AssertPtrReturn(pHandle->pVM, NULL);
1365 AssertPtrReturn(pHandle->pvObj, NULL);
1366 PGVM pGVM = pHandle->pGVM;
1367 AssertPtrReturn(pGVM, NULL);
1368 AssertReturn(pGVM->pVM == pHandle->pVM, NULL);
1369
1370 return pHandle->pGVM;
1371}
1372
1373
1374/**
1375 * Lookup a GVM structure by the shared VM structure.
1376 *
1377 * The calling thread must be in the same process as the VM. All current lookups
1378 * are by threads inside the same process, so this will not be an issue.
1379 *
1380 * @returns VBox status code.
1381 * @param pVM Pointer to the VM.
1382 * @param ppGVM Where to store the GVM pointer.
1383 * @param ppGVMM Where to store the pointer to the GVMM instance data.
1384 * @param fTakeUsedLock Whether to take the used lock or not.
1385 * Be very careful if not taking the lock as it's possible that
1386 * the VM will disappear then.
1387 *
1388 * @remark This will not assert on an invalid pVM but try return silently.
1389 */
1390static int gvmmR0ByVM(PVM pVM, PGVM *ppGVM, PGVMM *ppGVMM, bool fTakeUsedLock)
1391{
1392 RTPROCESS ProcId = RTProcSelf();
1393 PGVMM pGVMM;
1394 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
1395
1396 /*
1397 * Validate.
1398 */
1399 if (RT_UNLIKELY( !VALID_PTR(pVM)
1400 || ((uintptr_t)pVM & PAGE_OFFSET_MASK)))
1401 return VERR_INVALID_POINTER;
1402 if (RT_UNLIKELY( pVM->enmVMState < VMSTATE_CREATING
1403 || pVM->enmVMState >= VMSTATE_TERMINATED))
1404 return VERR_INVALID_POINTER;
1405
1406 uint16_t hGVM = pVM->hSelf;
1407 if (RT_UNLIKELY( hGVM == NIL_GVM_HANDLE
1408 || hGVM >= RT_ELEMENTS(pGVMM->aHandles)))
1409 return VERR_INVALID_HANDLE;
1410
1411 /*
1412 * Look it up.
1413 */
1414 PGVMHANDLE pHandle = &pGVMM->aHandles[hGVM];
1415 PGVM pGVM;
1416 if (fTakeUsedLock)
1417 {
1418 int rc = gvmmR0UsedLock(pGVMM);
1419 AssertRCReturn(rc, rc);
1420
1421 pGVM = pHandle->pGVM;
1422 if (RT_UNLIKELY( pHandle->pVM != pVM
1423 || pHandle->ProcId != ProcId
1424 || !VALID_PTR(pHandle->pvObj)
1425 || !VALID_PTR(pGVM)
1426 || pGVM->pVM != pVM))
1427 {
1428 gvmmR0UsedUnlock(pGVMM);
1429 return VERR_INVALID_HANDLE;
1430 }
1431 }
1432 else
1433 {
1434 if (RT_UNLIKELY(pHandle->pVM != pVM))
1435 return VERR_INVALID_HANDLE;
1436 if (RT_UNLIKELY(pHandle->ProcId != ProcId))
1437 return VERR_INVALID_HANDLE;
1438 if (RT_UNLIKELY(!VALID_PTR(pHandle->pvObj)))
1439 return VERR_INVALID_HANDLE;
1440
1441 pGVM = pHandle->pGVM;
1442 if (RT_UNLIKELY(!VALID_PTR(pGVM)))
1443 return VERR_INVALID_HANDLE;
1444 if (RT_UNLIKELY(pGVM->pVM != pVM))
1445 return VERR_INVALID_HANDLE;
1446 }
1447
1448 *ppGVM = pGVM;
1449 *ppGVMM = pGVMM;
1450 return VINF_SUCCESS;
1451}
1452
1453
1454/**
1455 * Lookup a GVM structure by the shared VM structure.
1456 *
1457 * @returns VBox status code.
1458 * @param pVM Pointer to the VM.
1459 * @param ppGVM Where to store the GVM pointer.
1460 *
1461 * @remark This will not take the 'used'-lock because it doesn't do
1462 * nesting and this function will be used from under the lock.
1463 */
1464GVMMR0DECL(int) GVMMR0ByVM(PVM pVM, PGVM *ppGVM)
1465{
1466 PGVMM pGVMM;
1467 return gvmmR0ByVM(pVM, ppGVM, &pGVMM, false /* fTakeUsedLock */);
1468}
1469
1470
1471/**
1472 * Lookup a GVM structure by the shared VM structure and ensuring that the
1473 * caller is an EMT thread.
1474 *
1475 * @returns VBox status code.
1476 * @param pVM Pointer to the VM.
1477 * @param idCpu The Virtual CPU ID of the calling EMT.
1478 * @param ppGVM Where to store the GVM pointer.
1479 * @param ppGVMM Where to store the pointer to the GVMM instance data.
1480 * @thread EMT
1481 *
1482 * @remark This will assert in all failure paths.
1483 */
1484static int gvmmR0ByVMAndEMT(PVM pVM, VMCPUID idCpu, PGVM *ppGVM, PGVMM *ppGVMM)
1485{
1486 PGVMM pGVMM;
1487 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
1488
1489 /*
1490 * Validate.
1491 */
1492 AssertPtrReturn(pVM, VERR_INVALID_POINTER);
1493 AssertReturn(!((uintptr_t)pVM & PAGE_OFFSET_MASK), VERR_INVALID_POINTER);
1494
1495 uint16_t hGVM = pVM->hSelf;
1496 AssertReturn(hGVM != NIL_GVM_HANDLE, VERR_INVALID_HANDLE);
1497 AssertReturn(hGVM < RT_ELEMENTS(pGVMM->aHandles), VERR_INVALID_HANDLE);
1498
1499 /*
1500 * Look it up.
1501 */
1502 PGVMHANDLE pHandle = &pGVMM->aHandles[hGVM];
1503 AssertReturn(pHandle->pVM == pVM, VERR_NOT_OWNER);
1504 RTPROCESS ProcId = RTProcSelf();
1505 AssertReturn(pHandle->ProcId == ProcId, VERR_NOT_OWNER);
1506 AssertPtrReturn(pHandle->pvObj, VERR_NOT_OWNER);
1507
1508 PGVM pGVM = pHandle->pGVM;
1509 AssertPtrReturn(pGVM, VERR_NOT_OWNER);
1510 AssertReturn(pGVM->pVM == pVM, VERR_NOT_OWNER);
1511 RTNATIVETHREAD hAllegedEMT = RTThreadNativeSelf();
1512 AssertReturn(idCpu < pGVM->cCpus, VERR_INVALID_CPU_ID);
1513 AssertReturn(pGVM->aCpus[idCpu].hEMT == hAllegedEMT, VERR_NOT_OWNER);
1514
1515 *ppGVM = pGVM;
1516 *ppGVMM = pGVMM;
1517 return VINF_SUCCESS;
1518}
1519
1520
1521/**
1522 * Lookup a GVM structure by the shared VM structure
1523 * and ensuring that the caller is the EMT thread.
1524 *
1525 * @returns VBox status code.
1526 * @param pVM Pointer to the VM.
1527 * @param idCpu The Virtual CPU ID of the calling EMT.
1528 * @param ppGVM Where to store the GVM pointer.
1529 * @thread EMT
1530 */
1531GVMMR0DECL(int) GVMMR0ByVMAndEMT(PVM pVM, VMCPUID idCpu, PGVM *ppGVM)
1532{
1533 AssertPtrReturn(ppGVM, VERR_INVALID_POINTER);
1534 PGVMM pGVMM;
1535 return gvmmR0ByVMAndEMT(pVM, idCpu, ppGVM, &pGVMM);
1536}
1537
1538
1539/**
1540 * Lookup a VM by its global handle.
1541 *
1542 * @returns Pointer to the VM on success, NULL on failure.
1543 * @param hGVM The global VM handle. Asserts on bad handle.
1544 */
1545GVMMR0DECL(PVM) GVMMR0GetVMByHandle(uint32_t hGVM)
1546{
1547 PGVM pGVM = GVMMR0ByHandle(hGVM);
1548 return pGVM ? pGVM->pVM : NULL;
1549}
1550
1551
1552/**
1553 * Looks up the VM belonging to the specified EMT thread.
1554 *
1555 * This is used by the assertion machinery in VMMR0.cpp to avoid causing
1556 * unnecessary kernel panics when the EMT thread hits an assertion. The
1557 * call may or not be an EMT thread.
1558 *
1559 * @returns Pointer to the VM on success, NULL on failure.
1560 * @param hEMT The native thread handle of the EMT.
1561 * NIL_RTNATIVETHREAD means the current thread
1562 */
1563GVMMR0DECL(PVM) GVMMR0GetVMByEMT(RTNATIVETHREAD hEMT)
1564{
1565 /*
1566 * No Assertions here as we're usually called in a AssertMsgN or
1567 * RTAssert* context.
1568 */
1569 PGVMM pGVMM = g_pGVMM;
1570 if ( !VALID_PTR(pGVMM)
1571 || pGVMM->u32Magic != GVMM_MAGIC)
1572 return NULL;
1573
1574 if (hEMT == NIL_RTNATIVETHREAD)
1575 hEMT = RTThreadNativeSelf();
1576 RTPROCESS ProcId = RTProcSelf();
1577
1578 /*
1579 * Search the handles in a linear fashion as we don't dare to take the lock (assert).
1580 */
1581 for (unsigned i = 1; i < RT_ELEMENTS(pGVMM->aHandles); i++)
1582 {
1583 if ( pGVMM->aHandles[i].iSelf == i
1584 && pGVMM->aHandles[i].ProcId == ProcId
1585 && VALID_PTR(pGVMM->aHandles[i].pvObj)
1586 && VALID_PTR(pGVMM->aHandles[i].pVM)
1587 && VALID_PTR(pGVMM->aHandles[i].pGVM))
1588 {
1589 if (pGVMM->aHandles[i].hEMT0 == hEMT)
1590 return pGVMM->aHandles[i].pVM;
1591
1592 /* This is fearly safe with the current process per VM approach. */
1593 PGVM pGVM = pGVMM->aHandles[i].pGVM;
1594 VMCPUID const cCpus = pGVM->cCpus;
1595 if ( cCpus < 1
1596 || cCpus > VMM_MAX_CPU_COUNT)
1597 continue;
1598 for (VMCPUID idCpu = 1; idCpu < cCpus; idCpu++)
1599 if (pGVM->aCpus[idCpu].hEMT == hEMT)
1600 return pGVMM->aHandles[i].pVM;
1601 }
1602 }
1603 return NULL;
1604}
1605
1606
1607/**
1608 * This is will wake up expired and soon-to-be expired VMs.
1609 *
1610 * @returns Number of VMs that has been woken up.
1611 * @param pGVMM Pointer to the GVMM instance data.
1612 * @param u64Now The current time.
1613 */
1614static unsigned gvmmR0SchedDoWakeUps(PGVMM pGVMM, uint64_t u64Now)
1615{
1616 /*
1617 * Skip this if we've got disabled because of high resolution wakeups or by
1618 * the user.
1619 */
1620 if ( !pGVMM->nsEarlyWakeUp1
1621 && !pGVMM->nsEarlyWakeUp2)
1622 return 0;
1623
1624/** @todo Rewrite this algorithm. See performance defect XYZ. */
1625
1626 /*
1627 * A cheap optimization to stop wasting so much time here on big setups.
1628 */
1629 const uint64_t uNsEarlyWakeUp2 = u64Now + pGVMM->nsEarlyWakeUp2;
1630 if ( pGVMM->cHaltedEMTs == 0
1631 || uNsEarlyWakeUp2 > pGVMM->uNsNextEmtWakeup)
1632 return 0;
1633
1634 /*
1635 * The first pass will wake up VMs which have actually expired
1636 * and look for VMs that should be woken up in the 2nd and 3rd passes.
1637 */
1638 const uint64_t uNsEarlyWakeUp1 = u64Now + pGVMM->nsEarlyWakeUp1;
1639 uint64_t u64Min = UINT64_MAX;
1640 unsigned cWoken = 0;
1641 unsigned cHalted = 0;
1642 unsigned cTodo2nd = 0;
1643 unsigned cTodo3rd = 0;
1644 for (unsigned i = pGVMM->iUsedHead, cGuard = 0;
1645 i != NIL_GVM_HANDLE && i < RT_ELEMENTS(pGVMM->aHandles);
1646 i = pGVMM->aHandles[i].iNext)
1647 {
1648 PGVM pCurGVM = pGVMM->aHandles[i].pGVM;
1649 if ( VALID_PTR(pCurGVM)
1650 && pCurGVM->u32Magic == GVM_MAGIC)
1651 {
1652 for (VMCPUID idCpu = 0; idCpu < pCurGVM->cCpus; idCpu++)
1653 {
1654 PGVMCPU pCurGVCpu = &pCurGVM->aCpus[idCpu];
1655 uint64_t u64 = ASMAtomicUoReadU64(&pCurGVCpu->gvmm.s.u64HaltExpire);
1656 if (u64)
1657 {
1658 if (u64 <= u64Now)
1659 {
1660 if (ASMAtomicXchgU64(&pCurGVCpu->gvmm.s.u64HaltExpire, 0))
1661 {
1662 int rc = RTSemEventMultiSignal(pCurGVCpu->gvmm.s.HaltEventMulti);
1663 AssertRC(rc);
1664 cWoken++;
1665 }
1666 }
1667 else
1668 {
1669 cHalted++;
1670 if (u64 <= uNsEarlyWakeUp1)
1671 cTodo2nd++;
1672 else if (u64 <= uNsEarlyWakeUp2)
1673 cTodo3rd++;
1674 else if (u64 < u64Min)
1675 u64 = u64Min;
1676 }
1677 }
1678 }
1679 }
1680 AssertLogRelBreak(cGuard++ < RT_ELEMENTS(pGVMM->aHandles));
1681 }
1682
1683 if (cTodo2nd)
1684 {
1685 for (unsigned i = pGVMM->iUsedHead, cGuard = 0;
1686 i != NIL_GVM_HANDLE && i < RT_ELEMENTS(pGVMM->aHandles);
1687 i = pGVMM->aHandles[i].iNext)
1688 {
1689 PGVM pCurGVM = pGVMM->aHandles[i].pGVM;
1690 if ( VALID_PTR(pCurGVM)
1691 && pCurGVM->u32Magic == GVM_MAGIC)
1692 {
1693 for (VMCPUID idCpu = 0; idCpu < pCurGVM->cCpus; idCpu++)
1694 {
1695 PGVMCPU pCurGVCpu = &pCurGVM->aCpus[idCpu];
1696 uint64_t u64 = ASMAtomicUoReadU64(&pCurGVCpu->gvmm.s.u64HaltExpire);
1697 if ( u64
1698 && u64 <= uNsEarlyWakeUp1)
1699 {
1700 if (ASMAtomicXchgU64(&pCurGVCpu->gvmm.s.u64HaltExpire, 0))
1701 {
1702 int rc = RTSemEventMultiSignal(pCurGVCpu->gvmm.s.HaltEventMulti);
1703 AssertRC(rc);
1704 cWoken++;
1705 }
1706 }
1707 }
1708 }
1709 AssertLogRelBreak(cGuard++ < RT_ELEMENTS(pGVMM->aHandles));
1710 }
1711 }
1712
1713 if (cTodo3rd)
1714 {
1715 for (unsigned i = pGVMM->iUsedHead, cGuard = 0;
1716 i != NIL_GVM_HANDLE && i < RT_ELEMENTS(pGVMM->aHandles);
1717 i = pGVMM->aHandles[i].iNext)
1718 {
1719 PGVM pCurGVM = pGVMM->aHandles[i].pGVM;
1720 if ( VALID_PTR(pCurGVM)
1721 && pCurGVM->u32Magic == GVM_MAGIC)
1722 {
1723 for (VMCPUID idCpu = 0; idCpu < pCurGVM->cCpus; idCpu++)
1724 {
1725 PGVMCPU pCurGVCpu = &pCurGVM->aCpus[idCpu];
1726 uint64_t u64 = ASMAtomicUoReadU64(&pCurGVCpu->gvmm.s.u64HaltExpire);
1727 if ( u64
1728 && u64 <= uNsEarlyWakeUp2)
1729 {
1730 if (ASMAtomicXchgU64(&pCurGVCpu->gvmm.s.u64HaltExpire, 0))
1731 {
1732 int rc = RTSemEventMultiSignal(pCurGVCpu->gvmm.s.HaltEventMulti);
1733 AssertRC(rc);
1734 cWoken++;
1735 }
1736 }
1737 }
1738 }
1739 AssertLogRelBreak(cGuard++ < RT_ELEMENTS(pGVMM->aHandles));
1740 }
1741 }
1742
1743 /*
1744 * Set the minimum value.
1745 */
1746 pGVMM->uNsNextEmtWakeup = u64Min;
1747
1748 return cWoken;
1749}
1750
1751
1752/**
1753 * Halt the EMT thread.
1754 *
1755 * @returns VINF_SUCCESS normal wakeup (timeout or kicked by other thread).
1756 * VERR_INTERRUPTED if a signal was scheduled for the thread.
1757 * @param pVM Pointer to the VM.
1758 * @param idCpu The Virtual CPU ID of the calling EMT.
1759 * @param u64ExpireGipTime The time for the sleep to expire expressed as GIP time.
1760 * @thread EMT(idCpu).
1761 */
1762GVMMR0DECL(int) GVMMR0SchedHalt(PVM pVM, VMCPUID idCpu, uint64_t u64ExpireGipTime)
1763{
1764 LogFlow(("GVMMR0SchedHalt: pVM=%p\n", pVM));
1765
1766 /*
1767 * Validate the VM structure, state and handle.
1768 */
1769 PGVM pGVM;
1770 PGVMM pGVMM;
1771 int rc = gvmmR0ByVMAndEMT(pVM, idCpu, &pGVM, &pGVMM);
1772 if (RT_FAILURE(rc))
1773 return rc;
1774 pGVM->gvmm.s.StatsSched.cHaltCalls++;
1775
1776 PGVMCPU pCurGVCpu = &pGVM->aCpus[idCpu];
1777 Assert(!pCurGVCpu->gvmm.s.u64HaltExpire);
1778
1779 /*
1780 * Take the UsedList semaphore, get the current time
1781 * and check if anyone needs waking up.
1782 * Interrupts must NOT be disabled at this point because we ask for GIP time!
1783 */
1784 rc = gvmmR0UsedLock(pGVMM);
1785 AssertRC(rc);
1786
1787 pCurGVCpu->gvmm.s.iCpuEmt = ASMGetApicId();
1788
1789 /* GIP hack: We might are frequently sleeping for short intervals where the
1790 difference between GIP and system time matters on systems with high resolution
1791 system time. So, convert the input from GIP to System time in that case. */
1792 Assert(ASMGetFlags() & X86_EFL_IF);
1793 const uint64_t u64NowSys = RTTimeSystemNanoTS();
1794 const uint64_t u64NowGip = RTTimeNanoTS();
1795 pGVM->gvmm.s.StatsSched.cHaltWakeUps += gvmmR0SchedDoWakeUps(pGVMM, u64NowGip);
1796
1797 /*
1798 * Go to sleep if we must...
1799 * Cap the sleep time to 1 second to be on the safe side.
1800 */
1801 uint64_t cNsInterval = u64ExpireGipTime - u64NowGip;
1802 if ( u64NowGip < u64ExpireGipTime
1803 && cNsInterval >= (pGVMM->cEMTs > pGVMM->cEMTsMeansCompany
1804 ? pGVMM->nsMinSleepCompany
1805 : pGVMM->nsMinSleepAlone))
1806 {
1807 pGVM->gvmm.s.StatsSched.cHaltBlocking++;
1808 if (cNsInterval > RT_NS_1SEC)
1809 u64ExpireGipTime = u64NowGip + RT_NS_1SEC;
1810 if (u64ExpireGipTime < pGVMM->uNsNextEmtWakeup)
1811 pGVMM->uNsNextEmtWakeup = u64ExpireGipTime;
1812 ASMAtomicWriteU64(&pCurGVCpu->gvmm.s.u64HaltExpire, u64ExpireGipTime);
1813 ASMAtomicIncU32(&pGVMM->cHaltedEMTs);
1814 gvmmR0UsedUnlock(pGVMM);
1815
1816 rc = RTSemEventMultiWaitEx(pCurGVCpu->gvmm.s.HaltEventMulti,
1817 RTSEMWAIT_FLAGS_ABSOLUTE | RTSEMWAIT_FLAGS_NANOSECS | RTSEMWAIT_FLAGS_INTERRUPTIBLE,
1818 u64NowGip > u64NowSys ? u64ExpireGipTime : u64NowSys + cNsInterval);
1819
1820 ASMAtomicWriteU64(&pCurGVCpu->gvmm.s.u64HaltExpire, 0);
1821 ASMAtomicDecU32(&pGVMM->cHaltedEMTs);
1822
1823 /* Reset the semaphore to try prevent a few false wake-ups. */
1824 if (rc == VINF_SUCCESS)
1825 RTSemEventMultiReset(pCurGVCpu->gvmm.s.HaltEventMulti);
1826 else if (rc == VERR_TIMEOUT)
1827 {
1828 pGVM->gvmm.s.StatsSched.cHaltTimeouts++;
1829 rc = VINF_SUCCESS;
1830 }
1831 }
1832 else
1833 {
1834 pGVM->gvmm.s.StatsSched.cHaltNotBlocking++;
1835 gvmmR0UsedUnlock(pGVMM);
1836 RTSemEventMultiReset(pCurGVCpu->gvmm.s.HaltEventMulti);
1837 }
1838
1839 return rc;
1840}
1841
1842
1843/**
1844 * Worker for GVMMR0SchedWakeUp and GVMMR0SchedWakeUpAndPokeCpus that wakes up
1845 * the a sleeping EMT.
1846 *
1847 * @retval VINF_SUCCESS if successfully woken up.
1848 * @retval VINF_GVM_NOT_BLOCKED if the EMT wasn't blocked.
1849 *
1850 * @param pGVM The global (ring-0) VM structure.
1851 * @param pGVCpu The global (ring-0) VCPU structure.
1852 */
1853DECLINLINE(int) gvmmR0SchedWakeUpOne(PGVM pGVM, PGVMCPU pGVCpu)
1854{
1855 pGVM->gvmm.s.StatsSched.cWakeUpCalls++;
1856
1857 /*
1858 * Signal the semaphore regardless of whether it's current blocked on it.
1859 *
1860 * The reason for this is that there is absolutely no way we can be 100%
1861 * certain that it isn't *about* go to go to sleep on it and just got
1862 * delayed a bit en route. So, we will always signal the semaphore when
1863 * the it is flagged as halted in the VMM.
1864 */
1865/** @todo we can optimize some of that by means of the pVCpu->enmState now. */
1866 int rc;
1867 if (pGVCpu->gvmm.s.u64HaltExpire)
1868 {
1869 rc = VINF_SUCCESS;
1870 ASMAtomicWriteU64(&pGVCpu->gvmm.s.u64HaltExpire, 0);
1871 }
1872 else
1873 {
1874 rc = VINF_GVM_NOT_BLOCKED;
1875 pGVM->gvmm.s.StatsSched.cWakeUpNotHalted++;
1876 }
1877
1878 int rc2 = RTSemEventMultiSignal(pGVCpu->gvmm.s.HaltEventMulti);
1879 AssertRC(rc2);
1880
1881 return rc;
1882}
1883
1884
1885/**
1886 * Wakes up the halted EMT thread so it can service a pending request.
1887 *
1888 * @returns VBox status code.
1889 * @retval VINF_SUCCESS if successfully woken up.
1890 * @retval VINF_GVM_NOT_BLOCKED if the EMT wasn't blocked.
1891 *
1892 * @param pVM Pointer to the VM.
1893 * @param idCpu The Virtual CPU ID of the EMT to wake up.
1894 * @param fTakeUsedLock Take the used lock or not
1895 * @thread Any but EMT.
1896 */
1897GVMMR0DECL(int) GVMMR0SchedWakeUpEx(PVM pVM, VMCPUID idCpu, bool fTakeUsedLock)
1898{
1899 /*
1900 * Validate input and take the UsedLock.
1901 */
1902 PGVM pGVM;
1903 PGVMM pGVMM;
1904 int rc = gvmmR0ByVM(pVM, &pGVM, &pGVMM, fTakeUsedLock);
1905 if (RT_SUCCESS(rc))
1906 {
1907 if (idCpu < pGVM->cCpus)
1908 {
1909 /*
1910 * Do the actual job.
1911 */
1912 rc = gvmmR0SchedWakeUpOne(pGVM, &pGVM->aCpus[idCpu]);
1913
1914 if (fTakeUsedLock)
1915 {
1916 /*
1917 * While we're here, do a round of scheduling.
1918 */
1919 Assert(ASMGetFlags() & X86_EFL_IF);
1920 const uint64_t u64Now = RTTimeNanoTS(); /* (GIP time) */
1921 pGVM->gvmm.s.StatsSched.cWakeUpWakeUps += gvmmR0SchedDoWakeUps(pGVMM, u64Now);
1922 }
1923 }
1924 else
1925 rc = VERR_INVALID_CPU_ID;
1926
1927 if (fTakeUsedLock)
1928 {
1929 int rc2 = gvmmR0UsedUnlock(pGVMM);
1930 AssertRC(rc2);
1931 }
1932 }
1933
1934 LogFlow(("GVMMR0SchedWakeUp: returns %Rrc\n", rc));
1935 return rc;
1936}
1937
1938
1939/**
1940 * Wakes up the halted EMT thread so it can service a pending request.
1941 *
1942 * @returns VBox status code.
1943 * @retval VINF_SUCCESS if successfully woken up.
1944 * @retval VINF_GVM_NOT_BLOCKED if the EMT wasn't blocked.
1945 *
1946 * @param pVM Pointer to the VM.
1947 * @param idCpu The Virtual CPU ID of the EMT to wake up.
1948 * @thread Any but EMT.
1949 */
1950GVMMR0DECL(int) GVMMR0SchedWakeUp(PVM pVM, VMCPUID idCpu)
1951{
1952 return GVMMR0SchedWakeUpEx(pVM, idCpu, true /* fTakeUsedLock */);
1953}
1954
1955/**
1956 * Worker common to GVMMR0SchedPoke and GVMMR0SchedWakeUpAndPokeCpus that pokes
1957 * the Virtual CPU if it's still busy executing guest code.
1958 *
1959 * @returns VBox status code.
1960 * @retval VINF_SUCCESS if poked successfully.
1961 * @retval VINF_GVM_NOT_BUSY_IN_GC if the EMT wasn't busy in GC.
1962 *
1963 * @param pGVM The global (ring-0) VM structure.
1964 * @param pVCpu Pointer to the VMCPU.
1965 */
1966DECLINLINE(int) gvmmR0SchedPokeOne(PGVM pGVM, PVMCPU pVCpu)
1967{
1968 pGVM->gvmm.s.StatsSched.cPokeCalls++;
1969
1970 RTCPUID idHostCpu = pVCpu->idHostCpu;
1971 if ( idHostCpu == NIL_RTCPUID
1972 || VMCPU_GET_STATE(pVCpu) != VMCPUSTATE_STARTED_EXEC)
1973 {
1974 pGVM->gvmm.s.StatsSched.cPokeNotBusy++;
1975 return VINF_GVM_NOT_BUSY_IN_GC;
1976 }
1977
1978 /* Note: this function is not implemented on Darwin and Linux (kernel < 2.6.19) */
1979 RTMpPokeCpu(idHostCpu);
1980 return VINF_SUCCESS;
1981}
1982
1983/**
1984 * Pokes an EMT if it's still busy running guest code.
1985 *
1986 * @returns VBox status code.
1987 * @retval VINF_SUCCESS if poked successfully.
1988 * @retval VINF_GVM_NOT_BUSY_IN_GC if the EMT wasn't busy in GC.
1989 *
1990 * @param pVM Pointer to the VM.
1991 * @param idCpu The ID of the virtual CPU to poke.
1992 * @param fTakeUsedLock Take the used lock or not
1993 */
1994GVMMR0DECL(int) GVMMR0SchedPokeEx(PVM pVM, VMCPUID idCpu, bool fTakeUsedLock)
1995{
1996 /*
1997 * Validate input and take the UsedLock.
1998 */
1999 PGVM pGVM;
2000 PGVMM pGVMM;
2001 int rc = gvmmR0ByVM(pVM, &pGVM, &pGVMM, fTakeUsedLock);
2002 if (RT_SUCCESS(rc))
2003 {
2004 if (idCpu < pGVM->cCpus)
2005 rc = gvmmR0SchedPokeOne(pGVM, &pVM->aCpus[idCpu]);
2006 else
2007 rc = VERR_INVALID_CPU_ID;
2008
2009 if (fTakeUsedLock)
2010 {
2011 int rc2 = gvmmR0UsedUnlock(pGVMM);
2012 AssertRC(rc2);
2013 }
2014 }
2015
2016 LogFlow(("GVMMR0SchedWakeUpAndPokeCpus: returns %Rrc\n", rc));
2017 return rc;
2018}
2019
2020
2021/**
2022 * Pokes an EMT if it's still busy running guest code.
2023 *
2024 * @returns VBox status code.
2025 * @retval VINF_SUCCESS if poked successfully.
2026 * @retval VINF_GVM_NOT_BUSY_IN_GC if the EMT wasn't busy in GC.
2027 *
2028 * @param pVM Pointer to the VM.
2029 * @param idCpu The ID of the virtual CPU to poke.
2030 */
2031GVMMR0DECL(int) GVMMR0SchedPoke(PVM pVM, VMCPUID idCpu)
2032{
2033 return GVMMR0SchedPokeEx(pVM, idCpu, true /* fTakeUsedLock */);
2034}
2035
2036
2037/**
2038 * Wakes up a set of halted EMT threads so they can service pending request.
2039 *
2040 * @returns VBox status code, no informational stuff.
2041 *
2042 * @param pVM Pointer to the VM.
2043 * @param pSleepSet The set of sleepers to wake up.
2044 * @param pPokeSet The set of CPUs to poke.
2045 */
2046GVMMR0DECL(int) GVMMR0SchedWakeUpAndPokeCpus(PVM pVM, PCVMCPUSET pSleepSet, PCVMCPUSET pPokeSet)
2047{
2048 AssertPtrReturn(pSleepSet, VERR_INVALID_POINTER);
2049 AssertPtrReturn(pPokeSet, VERR_INVALID_POINTER);
2050 RTNATIVETHREAD hSelf = RTThreadNativeSelf();
2051
2052 /*
2053 * Validate input and take the UsedLock.
2054 */
2055 PGVM pGVM;
2056 PGVMM pGVMM;
2057 int rc = gvmmR0ByVM(pVM, &pGVM, &pGVMM, true /* fTakeUsedLock */);
2058 if (RT_SUCCESS(rc))
2059 {
2060 rc = VINF_SUCCESS;
2061 VMCPUID idCpu = pGVM->cCpus;
2062 while (idCpu-- > 0)
2063 {
2064 /* Don't try poke or wake up ourselves. */
2065 if (pGVM->aCpus[idCpu].hEMT == hSelf)
2066 continue;
2067
2068 /* just ignore errors for now. */
2069 if (VMCPUSET_IS_PRESENT(pSleepSet, idCpu))
2070 gvmmR0SchedWakeUpOne(pGVM, &pGVM->aCpus[idCpu]);
2071 else if (VMCPUSET_IS_PRESENT(pPokeSet, idCpu))
2072 gvmmR0SchedPokeOne(pGVM, &pVM->aCpus[idCpu]);
2073 }
2074
2075 int rc2 = gvmmR0UsedUnlock(pGVMM);
2076 AssertRC(rc2);
2077 }
2078
2079 LogFlow(("GVMMR0SchedWakeUpAndPokeCpus: returns %Rrc\n", rc));
2080 return rc;
2081}
2082
2083
2084/**
2085 * VMMR0 request wrapper for GVMMR0SchedWakeUpAndPokeCpus.
2086 *
2087 * @returns see GVMMR0SchedWakeUpAndPokeCpus.
2088 * @param pVM Pointer to the VM.
2089 * @param pReq Pointer to the request packet.
2090 */
2091GVMMR0DECL(int) GVMMR0SchedWakeUpAndPokeCpusReq(PVM pVM, PGVMMSCHEDWAKEUPANDPOKECPUSREQ pReq)
2092{
2093 /*
2094 * Validate input and pass it on.
2095 */
2096 AssertPtrReturn(pReq, VERR_INVALID_POINTER);
2097 AssertMsgReturn(pReq->Hdr.cbReq == sizeof(*pReq), ("%#x != %#x\n", pReq->Hdr.cbReq, sizeof(*pReq)), VERR_INVALID_PARAMETER);
2098
2099 return GVMMR0SchedWakeUpAndPokeCpus(pVM, &pReq->SleepSet, &pReq->PokeSet);
2100}
2101
2102
2103
2104/**
2105 * Poll the schedule to see if someone else should get a chance to run.
2106 *
2107 * This is a bit hackish and will not work too well if the machine is
2108 * under heavy load from non-VM processes.
2109 *
2110 * @returns VINF_SUCCESS if not yielded.
2111 * VINF_GVM_YIELDED if an attempt to switch to a different VM task was made.
2112 * @param pVM Pointer to the VM.
2113 * @param idCpu The Virtual CPU ID of the calling EMT.
2114 * @param u64ExpireGipTime The time for the sleep to expire expressed as GIP time.
2115 * @param fYield Whether to yield or not.
2116 * This is for when we're spinning in the halt loop.
2117 * @thread EMT(idCpu).
2118 */
2119GVMMR0DECL(int) GVMMR0SchedPoll(PVM pVM, VMCPUID idCpu, bool fYield)
2120{
2121 /*
2122 * Validate input.
2123 */
2124 PGVM pGVM;
2125 PGVMM pGVMM;
2126 int rc = gvmmR0ByVMAndEMT(pVM, idCpu, &pGVM, &pGVMM);
2127 if (RT_SUCCESS(rc))
2128 {
2129 rc = gvmmR0UsedLock(pGVMM);
2130 AssertRC(rc);
2131 pGVM->gvmm.s.StatsSched.cPollCalls++;
2132
2133 Assert(ASMGetFlags() & X86_EFL_IF);
2134 const uint64_t u64Now = RTTimeNanoTS(); /* (GIP time) */
2135
2136 if (!fYield)
2137 pGVM->gvmm.s.StatsSched.cPollWakeUps += gvmmR0SchedDoWakeUps(pGVMM, u64Now);
2138 else
2139 {
2140 /** @todo implement this... */
2141 rc = VERR_NOT_IMPLEMENTED;
2142 }
2143
2144 gvmmR0UsedUnlock(pGVMM);
2145 }
2146
2147 LogFlow(("GVMMR0SchedWakeUp: returns %Rrc\n", rc));
2148 return rc;
2149}
2150
2151
2152#ifdef GVMM_SCHED_WITH_PPT
2153/**
2154 * Timer callback for the periodic preemption timer.
2155 *
2156 * @param pTimer The timer handle.
2157 * @param pvUser Pointer to the per cpu structure.
2158 * @param iTick The current tick.
2159 */
2160static DECLCALLBACK(void) gvmmR0SchedPeriodicPreemptionTimerCallback(PRTTIMER pTimer, void *pvUser, uint64_t iTick)
2161{
2162 PGVMMHOSTCPU pCpu = (PGVMMHOSTCPU)pvUser;
2163 NOREF(pTimer); NOREF(iTick);
2164
2165 /*
2166 * Termination check
2167 */
2168 if (pCpu->u32Magic != GVMMHOSTCPU_MAGIC)
2169 return;
2170
2171 /*
2172 * Do the house keeping.
2173 */
2174 RTSpinlockAcquire(pCpu->Ppt.hSpinlock);
2175
2176 if (++pCpu->Ppt.iTickHistorization >= pCpu->Ppt.cTicksHistoriziationInterval)
2177 {
2178 /*
2179 * Historicize the max frequency.
2180 */
2181 uint32_t iHzHistory = ++pCpu->Ppt.iHzHistory % RT_ELEMENTS(pCpu->Ppt.aHzHistory);
2182 pCpu->Ppt.aHzHistory[iHzHistory] = pCpu->Ppt.uDesiredHz;
2183 pCpu->Ppt.iTickHistorization = 0;
2184 pCpu->Ppt.uDesiredHz = 0;
2185
2186 /*
2187 * Check if the current timer frequency.
2188 */
2189 uint32_t uHistMaxHz = 0;
2190 for (uint32_t i = 0; i < RT_ELEMENTS(pCpu->Ppt.aHzHistory); i++)
2191 if (pCpu->Ppt.aHzHistory[i] > uHistMaxHz)
2192 uHistMaxHz = pCpu->Ppt.aHzHistory[i];
2193 if (uHistMaxHz == pCpu->Ppt.uTimerHz)
2194 RTSpinlockReleaseNoInts(pCpu->Ppt.hSpinlock);
2195 else if (uHistMaxHz)
2196 {
2197 /*
2198 * Reprogram it.
2199 */
2200 pCpu->Ppt.cChanges++;
2201 pCpu->Ppt.iTickHistorization = 0;
2202 pCpu->Ppt.uTimerHz = uHistMaxHz;
2203 uint32_t const cNsInterval = RT_NS_1SEC / uHistMaxHz;
2204 pCpu->Ppt.cNsInterval = cNsInterval;
2205 if (cNsInterval < GVMMHOSTCPU_PPT_HIST_INTERVAL_NS)
2206 pCpu->Ppt.cTicksHistoriziationInterval = ( GVMMHOSTCPU_PPT_HIST_INTERVAL_NS
2207 + GVMMHOSTCPU_PPT_HIST_INTERVAL_NS / 2 - 1)
2208 / cNsInterval;
2209 else
2210 pCpu->Ppt.cTicksHistoriziationInterval = 1;
2211 RTSpinlockReleaseNoInts(pCpu->Ppt.hSpinlock);
2212
2213 /*SUPR0Printf("Cpu%u: change to %u Hz / %u ns\n", pCpu->idxCpuSet, uHistMaxHz, cNsInterval);*/
2214 RTTimerChangeInterval(pTimer, cNsInterval);
2215 }
2216 else
2217 {
2218 /*
2219 * Stop it.
2220 */
2221 pCpu->Ppt.fStarted = false;
2222 pCpu->Ppt.uTimerHz = 0;
2223 pCpu->Ppt.cNsInterval = 0;
2224 RTSpinlockReleaseNoInts(pCpu->Ppt.hSpinlock);
2225
2226 /*SUPR0Printf("Cpu%u: stopping (%u Hz)\n", pCpu->idxCpuSet, uHistMaxHz);*/
2227 RTTimerStop(pTimer);
2228 }
2229 }
2230 else
2231 RTSpinlockReleaseNoInts(pCpu->Ppt.hSpinlock);
2232}
2233#endif /* GVMM_SCHED_WITH_PPT */
2234
2235
2236/**
2237 * Updates the periodic preemption timer for the calling CPU.
2238 *
2239 * The caller must have disabled preemption!
2240 * The caller must check that the host can do high resolution timers.
2241 *
2242 * @param pVM Pointer to the VM.
2243 * @param idHostCpu The current host CPU id.
2244 * @param uHz The desired frequency.
2245 */
2246GVMMR0DECL(void) GVMMR0SchedUpdatePeriodicPreemptionTimer(PVM pVM, RTCPUID idHostCpu, uint32_t uHz)
2247{
2248 NOREF(pVM);
2249#ifdef GVMM_SCHED_WITH_PPT
2250 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
2251 Assert(RTTimerCanDoHighResolution());
2252
2253 /*
2254 * Resolve the per CPU data.
2255 */
2256 uint32_t iCpu = RTMpCpuIdToSetIndex(idHostCpu);
2257 PGVMM pGVMM = g_pGVMM;
2258 if ( !VALID_PTR(pGVMM)
2259 || pGVMM->u32Magic != GVMM_MAGIC)
2260 return;
2261 AssertMsgReturnVoid(iCpu < pGVMM->cHostCpus, ("iCpu=%d cHostCpus=%d\n", iCpu, pGVMM->cHostCpus));
2262 PGVMMHOSTCPU pCpu = &pGVMM->aHostCpus[iCpu];
2263 AssertMsgReturnVoid( pCpu->u32Magic == GVMMHOSTCPU_MAGIC
2264 && pCpu->idCpu == idHostCpu,
2265 ("u32Magic=%#x idCpu=% idHostCpu=%d\n", pCpu->u32Magic, pCpu->idCpu, idHostCpu));
2266
2267 /*
2268 * Check whether we need to do anything about the timer.
2269 * We have to be a little bit careful since we might be race the timer
2270 * callback here.
2271 */
2272 if (uHz > 16384)
2273 uHz = 16384; /** @todo add a query method for this! */
2274 if (RT_UNLIKELY( uHz > ASMAtomicReadU32(&pCpu->Ppt.uDesiredHz)
2275 && uHz >= pCpu->Ppt.uMinHz
2276 && !pCpu->Ppt.fStarting /* solaris paranoia */))
2277 {
2278 RTSpinlockAcquire(pCpu->Ppt.hSpinlock);
2279
2280 pCpu->Ppt.uDesiredHz = uHz;
2281 uint32_t cNsInterval = 0;
2282 if (!pCpu->Ppt.fStarted)
2283 {
2284 pCpu->Ppt.cStarts++;
2285 pCpu->Ppt.fStarted = true;
2286 pCpu->Ppt.fStarting = true;
2287 pCpu->Ppt.iTickHistorization = 0;
2288 pCpu->Ppt.uTimerHz = uHz;
2289 pCpu->Ppt.cNsInterval = cNsInterval = RT_NS_1SEC / uHz;
2290 if (cNsInterval < GVMMHOSTCPU_PPT_HIST_INTERVAL_NS)
2291 pCpu->Ppt.cTicksHistoriziationInterval = ( GVMMHOSTCPU_PPT_HIST_INTERVAL_NS
2292 + GVMMHOSTCPU_PPT_HIST_INTERVAL_NS / 2 - 1)
2293 / cNsInterval;
2294 else
2295 pCpu->Ppt.cTicksHistoriziationInterval = 1;
2296 }
2297
2298 RTSpinlockReleaseNoInts(pCpu->Ppt.hSpinlock);
2299
2300 if (cNsInterval)
2301 {
2302 RTTimerChangeInterval(pCpu->Ppt.pTimer, cNsInterval);
2303 int rc = RTTimerStart(pCpu->Ppt.pTimer, cNsInterval);
2304 AssertRC(rc);
2305
2306 RTSpinlockAcquire(pCpu->Ppt.hSpinlock);
2307 if (RT_FAILURE(rc))
2308 pCpu->Ppt.fStarted = false;
2309 pCpu->Ppt.fStarting = false;
2310 RTSpinlockReleaseNoInts(pCpu->Ppt.hSpinlock);
2311 }
2312 }
2313#else /* !GVMM_SCHED_WITH_PPT */
2314 NOREF(idHostCpu); NOREF(uHz);
2315#endif /* !GVMM_SCHED_WITH_PPT */
2316}
2317
2318
2319/**
2320 * Retrieves the GVMM statistics visible to the caller.
2321 *
2322 * @returns VBox status code.
2323 *
2324 * @param pStats Where to put the statistics.
2325 * @param pSession The current session.
2326 * @param pVM The VM to obtain statistics for. Optional.
2327 */
2328GVMMR0DECL(int) GVMMR0QueryStatistics(PGVMMSTATS pStats, PSUPDRVSESSION pSession, PVM pVM)
2329{
2330 LogFlow(("GVMMR0QueryStatistics: pStats=%p pSession=%p pVM=%p\n", pStats, pSession, pVM));
2331
2332 /*
2333 * Validate input.
2334 */
2335 AssertPtrReturn(pSession, VERR_INVALID_POINTER);
2336 AssertPtrReturn(pStats, VERR_INVALID_POINTER);
2337 pStats->cVMs = 0; /* (crash before taking the sem...) */
2338
2339 /*
2340 * Take the lock and get the VM statistics.
2341 */
2342 PGVMM pGVMM;
2343 if (pVM)
2344 {
2345 PGVM pGVM;
2346 int rc = gvmmR0ByVM(pVM, &pGVM, &pGVMM, true /*fTakeUsedLock*/);
2347 if (RT_FAILURE(rc))
2348 return rc;
2349 pStats->SchedVM = pGVM->gvmm.s.StatsSched;
2350 }
2351 else
2352 {
2353 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
2354 memset(&pStats->SchedVM, 0, sizeof(pStats->SchedVM));
2355
2356 int rc = gvmmR0UsedLock(pGVMM);
2357 AssertRCReturn(rc, rc);
2358 }
2359
2360 /*
2361 * Enumerate the VMs and add the ones visible to the statistics.
2362 */
2363 pStats->cVMs = 0;
2364 pStats->cEMTs = 0;
2365 memset(&pStats->SchedSum, 0, sizeof(pStats->SchedSum));
2366
2367 for (unsigned i = pGVMM->iUsedHead;
2368 i != NIL_GVM_HANDLE && i < RT_ELEMENTS(pGVMM->aHandles);
2369 i = pGVMM->aHandles[i].iNext)
2370 {
2371 PGVM pGVM = pGVMM->aHandles[i].pGVM;
2372 void *pvObj = pGVMM->aHandles[i].pvObj;
2373 if ( VALID_PTR(pvObj)
2374 && VALID_PTR(pGVM)
2375 && pGVM->u32Magic == GVM_MAGIC
2376 && RT_SUCCESS(SUPR0ObjVerifyAccess(pvObj, pSession, NULL)))
2377 {
2378 pStats->cVMs++;
2379 pStats->cEMTs += pGVM->cCpus;
2380
2381 pStats->SchedSum.cHaltCalls += pGVM->gvmm.s.StatsSched.cHaltCalls;
2382 pStats->SchedSum.cHaltBlocking += pGVM->gvmm.s.StatsSched.cHaltBlocking;
2383 pStats->SchedSum.cHaltTimeouts += pGVM->gvmm.s.StatsSched.cHaltTimeouts;
2384 pStats->SchedSum.cHaltNotBlocking += pGVM->gvmm.s.StatsSched.cHaltNotBlocking;
2385 pStats->SchedSum.cHaltWakeUps += pGVM->gvmm.s.StatsSched.cHaltWakeUps;
2386
2387 pStats->SchedSum.cWakeUpCalls += pGVM->gvmm.s.StatsSched.cWakeUpCalls;
2388 pStats->SchedSum.cWakeUpNotHalted += pGVM->gvmm.s.StatsSched.cWakeUpNotHalted;
2389 pStats->SchedSum.cWakeUpWakeUps += pGVM->gvmm.s.StatsSched.cWakeUpWakeUps;
2390
2391 pStats->SchedSum.cPokeCalls += pGVM->gvmm.s.StatsSched.cPokeCalls;
2392 pStats->SchedSum.cPokeNotBusy += pGVM->gvmm.s.StatsSched.cPokeNotBusy;
2393
2394 pStats->SchedSum.cPollCalls += pGVM->gvmm.s.StatsSched.cPollCalls;
2395 pStats->SchedSum.cPollHalts += pGVM->gvmm.s.StatsSched.cPollHalts;
2396 pStats->SchedSum.cPollWakeUps += pGVM->gvmm.s.StatsSched.cPollWakeUps;
2397 }
2398 }
2399
2400 /*
2401 * Copy out the per host CPU statistics.
2402 */
2403 uint32_t iDstCpu = 0;
2404 uint32_t cSrcCpus = pGVMM->cHostCpus;
2405 for (uint32_t iSrcCpu = 0; iSrcCpu < cSrcCpus; iSrcCpu++)
2406 {
2407 if (pGVMM->aHostCpus[iSrcCpu].idCpu != NIL_RTCPUID)
2408 {
2409 pStats->aHostCpus[iDstCpu].idCpu = pGVMM->aHostCpus[iSrcCpu].idCpu;
2410 pStats->aHostCpus[iDstCpu].idxCpuSet = pGVMM->aHostCpus[iSrcCpu].idxCpuSet;
2411#ifdef GVMM_SCHED_WITH_PPT
2412 pStats->aHostCpus[iDstCpu].uDesiredHz = pGVMM->aHostCpus[iSrcCpu].Ppt.uDesiredHz;
2413 pStats->aHostCpus[iDstCpu].uTimerHz = pGVMM->aHostCpus[iSrcCpu].Ppt.uTimerHz;
2414 pStats->aHostCpus[iDstCpu].cChanges = pGVMM->aHostCpus[iSrcCpu].Ppt.cChanges;
2415 pStats->aHostCpus[iDstCpu].cStarts = pGVMM->aHostCpus[iSrcCpu].Ppt.cStarts;
2416#else
2417 pStats->aHostCpus[iDstCpu].uDesiredHz = 0;
2418 pStats->aHostCpus[iDstCpu].uTimerHz = 0;
2419 pStats->aHostCpus[iDstCpu].cChanges = 0;
2420 pStats->aHostCpus[iDstCpu].cStarts = 0;
2421#endif
2422 iDstCpu++;
2423 if (iDstCpu >= RT_ELEMENTS(pStats->aHostCpus))
2424 break;
2425 }
2426 }
2427 pStats->cHostCpus = iDstCpu;
2428
2429 gvmmR0UsedUnlock(pGVMM);
2430
2431 return VINF_SUCCESS;
2432}
2433
2434
2435/**
2436 * VMMR0 request wrapper for GVMMR0QueryStatistics.
2437 *
2438 * @returns see GVMMR0QueryStatistics.
2439 * @param pVM Pointer to the VM. Optional.
2440 * @param pReq Pointer to the request packet.
2441 */
2442GVMMR0DECL(int) GVMMR0QueryStatisticsReq(PVM pVM, PGVMMQUERYSTATISTICSSREQ pReq)
2443{
2444 /*
2445 * Validate input and pass it on.
2446 */
2447 AssertPtrReturn(pReq, VERR_INVALID_POINTER);
2448 AssertMsgReturn(pReq->Hdr.cbReq == sizeof(*pReq), ("%#x != %#x\n", pReq->Hdr.cbReq, sizeof(*pReq)), VERR_INVALID_PARAMETER);
2449
2450 return GVMMR0QueryStatistics(&pReq->Stats, pReq->pSession, pVM);
2451}
2452
2453
2454/**
2455 * Resets the specified GVMM statistics.
2456 *
2457 * @returns VBox status code.
2458 *
2459 * @param pStats Which statistics to reset, that is, non-zero fields indicates which to reset.
2460 * @param pSession The current session.
2461 * @param pVM The VM to reset statistics for. Optional.
2462 */
2463GVMMR0DECL(int) GVMMR0ResetStatistics(PCGVMMSTATS pStats, PSUPDRVSESSION pSession, PVM pVM)
2464{
2465 LogFlow(("GVMMR0ResetStatistics: pStats=%p pSession=%p pVM=%p\n", pStats, pSession, pVM));
2466
2467 /*
2468 * Validate input.
2469 */
2470 AssertPtrReturn(pSession, VERR_INVALID_POINTER);
2471 AssertPtrReturn(pStats, VERR_INVALID_POINTER);
2472
2473 /*
2474 * Take the lock and get the VM statistics.
2475 */
2476 PGVMM pGVMM;
2477 if (pVM)
2478 {
2479 PGVM pGVM;
2480 int rc = gvmmR0ByVM(pVM, &pGVM, &pGVMM, true /*fTakeUsedLock*/);
2481 if (RT_FAILURE(rc))
2482 return rc;
2483# define MAYBE_RESET_FIELD(field) \
2484 do { if (pStats->SchedVM. field ) { pGVM->gvmm.s.StatsSched. field = 0; } } while (0)
2485 MAYBE_RESET_FIELD(cHaltCalls);
2486 MAYBE_RESET_FIELD(cHaltBlocking);
2487 MAYBE_RESET_FIELD(cHaltTimeouts);
2488 MAYBE_RESET_FIELD(cHaltNotBlocking);
2489 MAYBE_RESET_FIELD(cHaltWakeUps);
2490 MAYBE_RESET_FIELD(cWakeUpCalls);
2491 MAYBE_RESET_FIELD(cWakeUpNotHalted);
2492 MAYBE_RESET_FIELD(cWakeUpWakeUps);
2493 MAYBE_RESET_FIELD(cPokeCalls);
2494 MAYBE_RESET_FIELD(cPokeNotBusy);
2495 MAYBE_RESET_FIELD(cPollCalls);
2496 MAYBE_RESET_FIELD(cPollHalts);
2497 MAYBE_RESET_FIELD(cPollWakeUps);
2498# undef MAYBE_RESET_FIELD
2499 }
2500 else
2501 {
2502 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
2503
2504 int rc = gvmmR0UsedLock(pGVMM);
2505 AssertRCReturn(rc, rc);
2506 }
2507
2508 /*
2509 * Enumerate the VMs and add the ones visible to the statistics.
2510 */
2511 if (ASMMemIsAll8(&pStats->SchedSum, sizeof(pStats->SchedSum), 0))
2512 {
2513 for (unsigned i = pGVMM->iUsedHead;
2514 i != NIL_GVM_HANDLE && i < RT_ELEMENTS(pGVMM->aHandles);
2515 i = pGVMM->aHandles[i].iNext)
2516 {
2517 PGVM pGVM = pGVMM->aHandles[i].pGVM;
2518 void *pvObj = pGVMM->aHandles[i].pvObj;
2519 if ( VALID_PTR(pvObj)
2520 && VALID_PTR(pGVM)
2521 && pGVM->u32Magic == GVM_MAGIC
2522 && RT_SUCCESS(SUPR0ObjVerifyAccess(pvObj, pSession, NULL)))
2523 {
2524# define MAYBE_RESET_FIELD(field) \
2525 do { if (pStats->SchedSum. field ) { pGVM->gvmm.s.StatsSched. field = 0; } } while (0)
2526 MAYBE_RESET_FIELD(cHaltCalls);
2527 MAYBE_RESET_FIELD(cHaltBlocking);
2528 MAYBE_RESET_FIELD(cHaltTimeouts);
2529 MAYBE_RESET_FIELD(cHaltNotBlocking);
2530 MAYBE_RESET_FIELD(cHaltWakeUps);
2531 MAYBE_RESET_FIELD(cWakeUpCalls);
2532 MAYBE_RESET_FIELD(cWakeUpNotHalted);
2533 MAYBE_RESET_FIELD(cWakeUpWakeUps);
2534 MAYBE_RESET_FIELD(cPokeCalls);
2535 MAYBE_RESET_FIELD(cPokeNotBusy);
2536 MAYBE_RESET_FIELD(cPollCalls);
2537 MAYBE_RESET_FIELD(cPollHalts);
2538 MAYBE_RESET_FIELD(cPollWakeUps);
2539# undef MAYBE_RESET_FIELD
2540 }
2541 }
2542 }
2543
2544 gvmmR0UsedUnlock(pGVMM);
2545
2546 return VINF_SUCCESS;
2547}
2548
2549
2550/**
2551 * VMMR0 request wrapper for GVMMR0ResetStatistics.
2552 *
2553 * @returns see GVMMR0ResetStatistics.
2554 * @param pVM Pointer to the VM. Optional.
2555 * @param pReq Pointer to the request packet.
2556 */
2557GVMMR0DECL(int) GVMMR0ResetStatisticsReq(PVM pVM, PGVMMRESETSTATISTICSSREQ pReq)
2558{
2559 /*
2560 * Validate input and pass it on.
2561 */
2562 AssertPtrReturn(pReq, VERR_INVALID_POINTER);
2563 AssertMsgReturn(pReq->Hdr.cbReq == sizeof(*pReq), ("%#x != %#x\n", pReq->Hdr.cbReq, sizeof(*pReq)), VERR_INVALID_PARAMETER);
2564
2565 return GVMMR0ResetStatistics(&pReq->Stats, pReq->pSession, pVM);
2566}
2567
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette