VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMR0/GVMMR0.cpp@ 95248

Last change on this file since 95248 was 94943, checked in by vboxsync, 3 years ago

VMM/CPUM: Copy the ring-0 host CPUMFEATURES structure into the VM structure upon creation and make ring-3 use it when present (not necessarily present if in NEM or IEM mode). bugref:10093

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id Revision
File size: 122.1 KB
Line 
1/* $Id: GVMMR0.cpp 94943 2022-05-09 09:39:04Z vboxsync $ */
2/** @file
3 * GVMM - Global VM Manager.
4 */
5
6/*
7 * Copyright (C) 2007-2022 Oracle Corporation
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 */
17
18
19/** @page pg_gvmm GVMM - The Global VM Manager
20 *
21 * The Global VM Manager lives in ring-0. Its main function at the moment is
22 * to manage a list of all running VMs, keep a ring-0 only structure (GVM) for
23 * each of them, and assign them unique identifiers (so GMM can track page
24 * owners). The GVMM also manage some of the host CPU resources, like the
25 * periodic preemption timer.
26 *
27 * The GVMM will create a ring-0 object for each VM when it is registered, this
28 * is both for session cleanup purposes and for having a point where it is
29 * possible to implement usage polices later (in SUPR0ObjRegister).
30 *
31 *
32 * @section sec_gvmm_ppt Periodic Preemption Timer (PPT)
33 *
34 * On system that sports a high resolution kernel timer API, we use per-cpu
35 * timers to generate interrupts that preempts VT-x, AMD-V and raw-mode guest
36 * execution. The timer frequency is calculating by taking the max
37 * TMCalcHostTimerFrequency for all VMs running on a CPU for the last ~160 ms
38 * (RT_ELEMENTS((PGVMMHOSTCPU)0, Ppt.aHzHistory) *
39 * GVMMHOSTCPU_PPT_HIST_INTERVAL_NS).
40 *
41 * The TMCalcHostTimerFrequency() part of the things gets its takes the max
42 * TMTimerSetFrequencyHint() value and adjusts by the current catch-up percent,
43 * warp drive percent and some fudge factors. VMMR0.cpp reports the result via
44 * GVMMR0SchedUpdatePeriodicPreemptionTimer() before switching to the VT-x,
45 * AMD-V and raw-mode execution environments.
46 */
47
48
49/*********************************************************************************************************************************
50* Header Files *
51*********************************************************************************************************************************/
52#define LOG_GROUP LOG_GROUP_GVMM
53#include <VBox/vmm/gvmm.h>
54#include <VBox/vmm/gmm.h>
55#include "GVMMR0Internal.h"
56#include <VBox/vmm/dbgf.h>
57#include <VBox/vmm/iom.h>
58#include <VBox/vmm/pdm.h>
59#include <VBox/vmm/pgm.h>
60#include <VBox/vmm/vmm.h>
61#ifdef VBOX_WITH_NEM_R0
62# include <VBox/vmm/nem.h>
63#endif
64#include <VBox/vmm/vmcpuset.h>
65#include <VBox/vmm/vmcc.h>
66#include <VBox/param.h>
67#include <VBox/err.h>
68
69#include <iprt/asm.h>
70#include <iprt/asm-amd64-x86.h>
71#include <iprt/critsect.h>
72#include <iprt/mem.h>
73#include <iprt/semaphore.h>
74#include <iprt/time.h>
75#include <VBox/log.h>
76#include <iprt/thread.h>
77#include <iprt/process.h>
78#include <iprt/param.h>
79#include <iprt/string.h>
80#include <iprt/assert.h>
81#include <iprt/mem.h>
82#include <iprt/memobj.h>
83#include <iprt/mp.h>
84#include <iprt/cpuset.h>
85#include <iprt/spinlock.h>
86#include <iprt/timer.h>
87
88#include "dtrace/VBoxVMM.h"
89
90
91/*********************************************************************************************************************************
92* Defined Constants And Macros *
93*********************************************************************************************************************************/
94#if defined(RT_OS_LINUX) || defined(RT_OS_SOLARIS) || defined(RT_OS_WINDOWS) || defined(DOXYGEN_RUNNING)
95/** Define this to enable the periodic preemption timer. */
96# define GVMM_SCHED_WITH_PPT
97#endif
98
99#if /*defined(RT_OS_WINDOWS) ||*/ defined(DOXYGEN_RUNNING)
100/** Define this to enable the per-EMT high resolution wakeup timers. */
101# define GVMM_SCHED_WITH_HR_WAKE_UP_TIMER
102#endif
103
104
105/** Special value that GVMMR0DeregisterVCpu sets. */
106#define GVMM_RTNATIVETHREAD_DESTROYED (~(RTNATIVETHREAD)1)
107AssertCompile(GVMM_RTNATIVETHREAD_DESTROYED != NIL_RTNATIVETHREAD);
108
109
110/*********************************************************************************************************************************
111* Structures and Typedefs *
112*********************************************************************************************************************************/
113
114/**
115 * Global VM handle.
116 */
117typedef struct GVMHANDLE
118{
119 /** The index of the next handle in the list (free or used). (0 is nil.) */
120 uint16_t volatile iNext;
121 /** Our own index / handle value. */
122 uint16_t iSelf;
123 /** The process ID of the handle owner.
124 * This is used for access checks. */
125 RTPROCESS ProcId;
126 /** The pointer to the ring-0 only (aka global) VM structure. */
127 PGVM pGVM;
128 /** The virtual machine object. */
129 void *pvObj;
130 /** The session this VM is associated with. */
131 PSUPDRVSESSION pSession;
132 /** The ring-0 handle of the EMT0 thread.
133 * This is used for ownership checks as well as looking up a VM handle by thread
134 * at times like assertions. */
135 RTNATIVETHREAD hEMT0;
136} GVMHANDLE;
137/** Pointer to a global VM handle. */
138typedef GVMHANDLE *PGVMHANDLE;
139
140/** Number of GVM handles (including the NIL handle). */
141#if HC_ARCH_BITS == 64
142# define GVMM_MAX_HANDLES 8192
143#else
144# define GVMM_MAX_HANDLES 128
145#endif
146
147/**
148 * Per host CPU GVMM data.
149 */
150typedef struct GVMMHOSTCPU
151{
152 /** Magic number (GVMMHOSTCPU_MAGIC). */
153 uint32_t volatile u32Magic;
154 /** The CPU ID. */
155 RTCPUID idCpu;
156 /** The CPU set index. */
157 uint32_t idxCpuSet;
158
159#ifdef GVMM_SCHED_WITH_PPT
160 /** Periodic preemption timer data. */
161 struct
162 {
163 /** The handle to the periodic preemption timer. */
164 PRTTIMER pTimer;
165 /** Spinlock protecting the data below. */
166 RTSPINLOCK hSpinlock;
167 /** The smalles Hz that we need to care about. (static) */
168 uint32_t uMinHz;
169 /** The number of ticks between each historization. */
170 uint32_t cTicksHistoriziationInterval;
171 /** The current historization tick (counting up to
172 * cTicksHistoriziationInterval and then resetting). */
173 uint32_t iTickHistorization;
174 /** The current timer interval. This is set to 0 when inactive. */
175 uint32_t cNsInterval;
176 /** The current timer frequency. This is set to 0 when inactive. */
177 uint32_t uTimerHz;
178 /** The current max frequency reported by the EMTs.
179 * This gets historicize and reset by the timer callback. This is
180 * read without holding the spinlock, so needs atomic updating. */
181 uint32_t volatile uDesiredHz;
182 /** Whether the timer was started or not. */
183 bool volatile fStarted;
184 /** Set if we're starting timer. */
185 bool volatile fStarting;
186 /** The index of the next history entry (mod it). */
187 uint32_t iHzHistory;
188 /** Historicized uDesiredHz values. The array wraps around, new entries
189 * are added at iHzHistory. This is updated approximately every
190 * GVMMHOSTCPU_PPT_HIST_INTERVAL_NS by the timer callback. */
191 uint32_t aHzHistory[8];
192 /** Statistics counter for recording the number of interval changes. */
193 uint32_t cChanges;
194 /** Statistics counter for recording the number of timer starts. */
195 uint32_t cStarts;
196 } Ppt;
197#endif /* GVMM_SCHED_WITH_PPT */
198
199} GVMMHOSTCPU;
200/** Pointer to the per host CPU GVMM data. */
201typedef GVMMHOSTCPU *PGVMMHOSTCPU;
202/** The GVMMHOSTCPU::u32Magic value (Petra, Tanya & Rachel Haden). */
203#define GVMMHOSTCPU_MAGIC UINT32_C(0x19711011)
204/** The interval on history entry should cover (approximately) give in
205 * nanoseconds. */
206#define GVMMHOSTCPU_PPT_HIST_INTERVAL_NS UINT32_C(20000000)
207
208
209/**
210 * The GVMM instance data.
211 */
212typedef struct GVMM
213{
214 /** Eyecatcher / magic. */
215 uint32_t u32Magic;
216 /** The index of the head of the free handle chain. (0 is nil.) */
217 uint16_t volatile iFreeHead;
218 /** The index of the head of the active handle chain. (0 is nil.) */
219 uint16_t volatile iUsedHead;
220 /** The number of VMs. */
221 uint16_t volatile cVMs;
222 /** Alignment padding. */
223 uint16_t u16Reserved;
224 /** The number of EMTs. */
225 uint32_t volatile cEMTs;
226 /** The number of EMTs that have halted in GVMMR0SchedHalt. */
227 uint32_t volatile cHaltedEMTs;
228 /** Mini lock for restricting early wake-ups to one thread. */
229 bool volatile fDoingEarlyWakeUps;
230 bool afPadding[3]; /**< explicit alignment padding. */
231 /** When the next halted or sleeping EMT will wake up.
232 * This is set to 0 when it needs recalculating and to UINT64_MAX when
233 * there are no halted or sleeping EMTs in the GVMM. */
234 uint64_t uNsNextEmtWakeup;
235 /** The lock used to serialize VM creation, destruction and associated events that
236 * isn't performance critical. Owners may acquire the list lock. */
237 RTCRITSECT CreateDestroyLock;
238 /** The lock used to serialize used list updates and accesses.
239 * This indirectly includes scheduling since the scheduler will have to walk the
240 * used list to examin running VMs. Owners may not acquire any other locks. */
241 RTCRITSECTRW UsedLock;
242 /** The handle array.
243 * The size of this array defines the maximum number of currently running VMs.
244 * The first entry is unused as it represents the NIL handle. */
245 GVMHANDLE aHandles[GVMM_MAX_HANDLES];
246
247 /** @gcfgm{/GVMM/cEMTsMeansCompany, 32-bit, 0, UINT32_MAX, 1}
248 * The number of EMTs that means we no longer consider ourselves alone on a
249 * CPU/Core.
250 */
251 uint32_t cEMTsMeansCompany;
252 /** @gcfgm{/GVMM/MinSleepAlone,32-bit, 0, 100000000, 750000, ns}
253 * The minimum sleep time for when we're alone, in nano seconds.
254 */
255 uint32_t nsMinSleepAlone;
256 /** @gcfgm{/GVMM/MinSleepCompany,32-bit,0, 100000000, 15000, ns}
257 * The minimum sleep time for when we've got company, in nano seconds.
258 */
259 uint32_t nsMinSleepCompany;
260#ifdef GVMM_SCHED_WITH_HR_WAKE_UP_TIMER
261 /** @gcfgm{/GVMM/MinSleepWithHrWakeUp,32-bit,0, 100000000, 5000, ns}
262 * The minimum sleep time for when we've got a high-resolution wake-up timer, in
263 * nano seconds.
264 */
265 uint32_t nsMinSleepWithHrTimer;
266#endif
267 /** @gcfgm{/GVMM/EarlyWakeUp1, 32-bit, 0, 100000000, 25000, ns}
268 * The limit for the first round of early wake-ups, given in nano seconds.
269 */
270 uint32_t nsEarlyWakeUp1;
271 /** @gcfgm{/GVMM/EarlyWakeUp2, 32-bit, 0, 100000000, 50000, ns}
272 * The limit for the second round of early wake-ups, given in nano seconds.
273 */
274 uint32_t nsEarlyWakeUp2;
275
276 /** Set if we're doing early wake-ups.
277 * This reflects nsEarlyWakeUp1 and nsEarlyWakeUp2. */
278 bool volatile fDoEarlyWakeUps;
279
280 /** The number of entries in the host CPU array (aHostCpus). */
281 uint32_t cHostCpus;
282 /** Per host CPU data (variable length). */
283 GVMMHOSTCPU aHostCpus[1];
284} GVMM;
285AssertCompileMemberAlignment(GVMM, CreateDestroyLock, 8);
286AssertCompileMemberAlignment(GVMM, UsedLock, 8);
287AssertCompileMemberAlignment(GVMM, uNsNextEmtWakeup, 8);
288/** Pointer to the GVMM instance data. */
289typedef GVMM *PGVMM;
290
291/** The GVMM::u32Magic value (Charlie Haden). */
292#define GVMM_MAGIC UINT32_C(0x19370806)
293
294
295
296/*********************************************************************************************************************************
297* Global Variables *
298*********************************************************************************************************************************/
299/** Pointer to the GVMM instance data.
300 * (Just my general dislike for global variables.) */
301static PGVMM g_pGVMM = NULL;
302
303/** Macro for obtaining and validating the g_pGVMM pointer.
304 * On failure it will return from the invoking function with the specified return value.
305 *
306 * @param pGVMM The name of the pGVMM variable.
307 * @param rc The return value on failure. Use VERR_GVMM_INSTANCE for VBox
308 * status codes.
309 */
310#define GVMM_GET_VALID_INSTANCE(pGVMM, rc) \
311 do { \
312 (pGVMM) = g_pGVMM;\
313 AssertPtrReturn((pGVMM), (rc)); \
314 AssertMsgReturn((pGVMM)->u32Magic == GVMM_MAGIC, ("%p - %#x\n", (pGVMM), (pGVMM)->u32Magic), (rc)); \
315 } while (0)
316
317/** Macro for obtaining and validating the g_pGVMM pointer, void function variant.
318 * On failure it will return from the invoking function.
319 *
320 * @param pGVMM The name of the pGVMM variable.
321 */
322#define GVMM_GET_VALID_INSTANCE_VOID(pGVMM) \
323 do { \
324 (pGVMM) = g_pGVMM;\
325 AssertPtrReturnVoid((pGVMM)); \
326 AssertMsgReturnVoid((pGVMM)->u32Magic == GVMM_MAGIC, ("%p - %#x\n", (pGVMM), (pGVMM)->u32Magic)); \
327 } while (0)
328
329
330/*********************************************************************************************************************************
331* Internal Functions *
332*********************************************************************************************************************************/
333static void gvmmR0InitPerVMData(PGVM pGVM, int16_t hSelf, VMCPUID cCpus, PSUPDRVSESSION pSession);
334static DECLCALLBACK(void) gvmmR0HandleObjDestructor(void *pvObj, void *pvGVMM, void *pvHandle);
335static int gvmmR0ByGVM(PGVM pGVM, PGVMM *ppGVMM, bool fTakeUsedLock);
336static int gvmmR0ByGVMandEMT(PGVM pGVM, VMCPUID idCpu, PGVMM *ppGVMM);
337
338#ifdef GVMM_SCHED_WITH_PPT
339static DECLCALLBACK(void) gvmmR0SchedPeriodicPreemptionTimerCallback(PRTTIMER pTimer, void *pvUser, uint64_t iTick);
340#endif
341#ifdef GVMM_SCHED_WITH_HR_WAKE_UP_TIMER
342static DECLCALLBACK(void) gvmmR0EmtWakeUpTimerCallback(PRTTIMER pTimer, void *pvUser, uint64_t iTick);
343#endif
344
345
346/**
347 * Initializes the GVMM.
348 *
349 * This is called while owning the loader semaphore (see supdrvIOCtl_LdrLoad()).
350 *
351 * @returns VBox status code.
352 */
353GVMMR0DECL(int) GVMMR0Init(void)
354{
355 LogFlow(("GVMMR0Init:\n"));
356
357 /*
358 * Allocate and initialize the instance data.
359 */
360 uint32_t cHostCpus = RTMpGetArraySize();
361 AssertMsgReturn(cHostCpus > 0 && cHostCpus < _64K, ("%d", (int)cHostCpus), VERR_GVMM_HOST_CPU_RANGE);
362
363 PGVMM pGVMM = (PGVMM)RTMemAllocZ(RT_UOFFSETOF_DYN(GVMM, aHostCpus[cHostCpus]));
364 if (!pGVMM)
365 return VERR_NO_MEMORY;
366 int rc = RTCritSectInitEx(&pGVMM->CreateDestroyLock, 0, NIL_RTLOCKVALCLASS, RTLOCKVAL_SUB_CLASS_NONE,
367 "GVMM-CreateDestroyLock");
368 if (RT_SUCCESS(rc))
369 {
370 rc = RTCritSectRwInitEx(&pGVMM->UsedLock, 0, NIL_RTLOCKVALCLASS, RTLOCKVAL_SUB_CLASS_NONE, "GVMM-UsedLock");
371 if (RT_SUCCESS(rc))
372 {
373 pGVMM->u32Magic = GVMM_MAGIC;
374 pGVMM->iUsedHead = 0;
375 pGVMM->iFreeHead = 1;
376
377 /* the nil handle */
378 pGVMM->aHandles[0].iSelf = 0;
379 pGVMM->aHandles[0].iNext = 0;
380
381 /* the tail */
382 unsigned i = RT_ELEMENTS(pGVMM->aHandles) - 1;
383 pGVMM->aHandles[i].iSelf = i;
384 pGVMM->aHandles[i].iNext = 0; /* nil */
385
386 /* the rest */
387 while (i-- > 1)
388 {
389 pGVMM->aHandles[i].iSelf = i;
390 pGVMM->aHandles[i].iNext = i + 1;
391 }
392
393 /* The default configuration values. */
394 uint32_t cNsResolution = RTSemEventMultiGetResolution();
395 pGVMM->cEMTsMeansCompany = 1; /** @todo should be adjusted to relative to the cpu count or something... */
396 if (cNsResolution >= 5*RT_NS_100US)
397 {
398 pGVMM->nsMinSleepAlone = 750000 /* ns (0.750 ms) */; /** @todo this should be adjusted to be 75% (or something) of the scheduler granularity... */
399 pGVMM->nsMinSleepCompany = 15000 /* ns (0.015 ms) */;
400 pGVMM->nsEarlyWakeUp1 = 25000 /* ns (0.025 ms) */;
401 pGVMM->nsEarlyWakeUp2 = 50000 /* ns (0.050 ms) */;
402 }
403 else if (cNsResolution > RT_NS_100US)
404 {
405 pGVMM->nsMinSleepAlone = cNsResolution / 2;
406 pGVMM->nsMinSleepCompany = cNsResolution / 4;
407 pGVMM->nsEarlyWakeUp1 = 0;
408 pGVMM->nsEarlyWakeUp2 = 0;
409 }
410 else
411 {
412 pGVMM->nsMinSleepAlone = 2000;
413 pGVMM->nsMinSleepCompany = 2000;
414 pGVMM->nsEarlyWakeUp1 = 0;
415 pGVMM->nsEarlyWakeUp2 = 0;
416 }
417#ifdef GVMM_SCHED_WITH_HR_WAKE_UP_TIMER
418 pGVMM->nsMinSleepWithHrTimer = 5000 /* ns (0.005 ms) */;
419#endif
420 pGVMM->fDoEarlyWakeUps = pGVMM->nsEarlyWakeUp1 > 0 && pGVMM->nsEarlyWakeUp2 > 0;
421
422 /* The host CPU data. */
423 pGVMM->cHostCpus = cHostCpus;
424 uint32_t iCpu = cHostCpus;
425 RTCPUSET PossibleSet;
426 RTMpGetSet(&PossibleSet);
427 while (iCpu-- > 0)
428 {
429 pGVMM->aHostCpus[iCpu].idxCpuSet = iCpu;
430#ifdef GVMM_SCHED_WITH_PPT
431 pGVMM->aHostCpus[iCpu].Ppt.pTimer = NULL;
432 pGVMM->aHostCpus[iCpu].Ppt.hSpinlock = NIL_RTSPINLOCK;
433 pGVMM->aHostCpus[iCpu].Ppt.uMinHz = 5; /** @todo Add some API which figures this one out. (not *that* important) */
434 pGVMM->aHostCpus[iCpu].Ppt.cTicksHistoriziationInterval = 1;
435 //pGVMM->aHostCpus[iCpu].Ppt.iTickHistorization = 0;
436 //pGVMM->aHostCpus[iCpu].Ppt.cNsInterval = 0;
437 //pGVMM->aHostCpus[iCpu].Ppt.uTimerHz = 0;
438 //pGVMM->aHostCpus[iCpu].Ppt.uDesiredHz = 0;
439 //pGVMM->aHostCpus[iCpu].Ppt.fStarted = false;
440 //pGVMM->aHostCpus[iCpu].Ppt.fStarting = false;
441 //pGVMM->aHostCpus[iCpu].Ppt.iHzHistory = 0;
442 //pGVMM->aHostCpus[iCpu].Ppt.aHzHistory = {0};
443#endif
444
445 if (RTCpuSetIsMember(&PossibleSet, iCpu))
446 {
447 pGVMM->aHostCpus[iCpu].idCpu = RTMpCpuIdFromSetIndex(iCpu);
448 pGVMM->aHostCpus[iCpu].u32Magic = GVMMHOSTCPU_MAGIC;
449
450#ifdef GVMM_SCHED_WITH_PPT
451 rc = RTTimerCreateEx(&pGVMM->aHostCpus[iCpu].Ppt.pTimer,
452 50*1000*1000 /* whatever */,
453 RTTIMER_FLAGS_CPU(iCpu) | RTTIMER_FLAGS_HIGH_RES,
454 gvmmR0SchedPeriodicPreemptionTimerCallback,
455 &pGVMM->aHostCpus[iCpu]);
456 if (RT_SUCCESS(rc))
457 {
458 rc = RTSpinlockCreate(&pGVMM->aHostCpus[iCpu].Ppt.hSpinlock, RTSPINLOCK_FLAGS_INTERRUPT_SAFE, "GVMM/CPU");
459 if (RT_FAILURE(rc))
460 LogRel(("GVMMR0Init: RTSpinlockCreate failed for #%u (%d)\n", iCpu, rc));
461 }
462 else
463 LogRel(("GVMMR0Init: RTTimerCreateEx failed for #%u (%d)\n", iCpu, rc));
464 if (RT_FAILURE(rc))
465 {
466 while (iCpu < cHostCpus)
467 {
468 RTTimerDestroy(pGVMM->aHostCpus[iCpu].Ppt.pTimer);
469 RTSpinlockDestroy(pGVMM->aHostCpus[iCpu].Ppt.hSpinlock);
470 pGVMM->aHostCpus[iCpu].Ppt.hSpinlock = NIL_RTSPINLOCK;
471 iCpu++;
472 }
473 break;
474 }
475#endif
476 }
477 else
478 {
479 pGVMM->aHostCpus[iCpu].idCpu = NIL_RTCPUID;
480 pGVMM->aHostCpus[iCpu].u32Magic = 0;
481 }
482 }
483 if (RT_SUCCESS(rc))
484 {
485 g_pGVMM = pGVMM;
486 LogFlow(("GVMMR0Init: pGVMM=%p cHostCpus=%u\n", pGVMM, cHostCpus));
487 return VINF_SUCCESS;
488 }
489
490 /* bail out. */
491 RTCritSectRwDelete(&pGVMM->UsedLock);
492 }
493 else
494 LogRel(("GVMMR0Init: RTCritSectRwInitEx failed (%d)\n", rc));
495 RTCritSectDelete(&pGVMM->CreateDestroyLock);
496 }
497 else
498 LogRel(("GVMMR0Init: RTCritSectInitEx failed (%d)\n", rc));
499
500 RTMemFree(pGVMM);
501 return rc;
502}
503
504
505/**
506 * Terminates the GVM.
507 *
508 * This is called while owning the loader semaphore (see supdrvLdrFree()).
509 * And unless something is wrong, there should be absolutely no VMs
510 * registered at this point.
511 */
512GVMMR0DECL(void) GVMMR0Term(void)
513{
514 LogFlow(("GVMMR0Term:\n"));
515
516 PGVMM pGVMM = g_pGVMM;
517 g_pGVMM = NULL;
518 if (RT_UNLIKELY(!RT_VALID_PTR(pGVMM)))
519 {
520 SUPR0Printf("GVMMR0Term: pGVMM=%RKv\n", pGVMM);
521 return;
522 }
523
524 /*
525 * First of all, stop all active timers.
526 */
527 uint32_t cActiveTimers = 0;
528 uint32_t iCpu = pGVMM->cHostCpus;
529 while (iCpu-- > 0)
530 {
531 ASMAtomicWriteU32(&pGVMM->aHostCpus[iCpu].u32Magic, ~GVMMHOSTCPU_MAGIC);
532#ifdef GVMM_SCHED_WITH_PPT
533 if ( pGVMM->aHostCpus[iCpu].Ppt.pTimer != NULL
534 && RT_SUCCESS(RTTimerStop(pGVMM->aHostCpus[iCpu].Ppt.pTimer)))
535 cActiveTimers++;
536#endif
537 }
538 if (cActiveTimers)
539 RTThreadSleep(1); /* fudge */
540
541 /*
542 * Invalidate the and free resources.
543 */
544 pGVMM->u32Magic = ~GVMM_MAGIC;
545 RTCritSectRwDelete(&pGVMM->UsedLock);
546 RTCritSectDelete(&pGVMM->CreateDestroyLock);
547
548 pGVMM->iFreeHead = 0;
549 if (pGVMM->iUsedHead)
550 {
551 SUPR0Printf("GVMMR0Term: iUsedHead=%#x! (cVMs=%#x cEMTs=%#x)\n", pGVMM->iUsedHead, pGVMM->cVMs, pGVMM->cEMTs);
552 pGVMM->iUsedHead = 0;
553 }
554
555#ifdef GVMM_SCHED_WITH_PPT
556 iCpu = pGVMM->cHostCpus;
557 while (iCpu-- > 0)
558 {
559 RTTimerDestroy(pGVMM->aHostCpus[iCpu].Ppt.pTimer);
560 pGVMM->aHostCpus[iCpu].Ppt.pTimer = NULL;
561 RTSpinlockDestroy(pGVMM->aHostCpus[iCpu].Ppt.hSpinlock);
562 pGVMM->aHostCpus[iCpu].Ppt.hSpinlock = NIL_RTSPINLOCK;
563 }
564#endif
565
566 RTMemFree(pGVMM);
567}
568
569
570/**
571 * A quick hack for setting global config values.
572 *
573 * @returns VBox status code.
574 *
575 * @param pSession The session handle. Used for authentication.
576 * @param pszName The variable name.
577 * @param u64Value The new value.
578 */
579GVMMR0DECL(int) GVMMR0SetConfig(PSUPDRVSESSION pSession, const char *pszName, uint64_t u64Value)
580{
581 /*
582 * Validate input.
583 */
584 PGVMM pGVMM;
585 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
586 AssertPtrReturn(pSession, VERR_INVALID_HANDLE);
587 AssertPtrReturn(pszName, VERR_INVALID_POINTER);
588
589 /*
590 * String switch time!
591 */
592 if (strncmp(pszName, RT_STR_TUPLE("/GVMM/")))
593 return VERR_CFGM_VALUE_NOT_FOUND; /* borrow status codes from CFGM... */
594 int rc = VINF_SUCCESS;
595 pszName += sizeof("/GVMM/") - 1;
596 if (!strcmp(pszName, "cEMTsMeansCompany"))
597 {
598 if (u64Value <= UINT32_MAX)
599 pGVMM->cEMTsMeansCompany = u64Value;
600 else
601 rc = VERR_OUT_OF_RANGE;
602 }
603 else if (!strcmp(pszName, "MinSleepAlone"))
604 {
605 if (u64Value <= RT_NS_100MS)
606 pGVMM->nsMinSleepAlone = u64Value;
607 else
608 rc = VERR_OUT_OF_RANGE;
609 }
610 else if (!strcmp(pszName, "MinSleepCompany"))
611 {
612 if (u64Value <= RT_NS_100MS)
613 pGVMM->nsMinSleepCompany = u64Value;
614 else
615 rc = VERR_OUT_OF_RANGE;
616 }
617#ifdef GVMM_SCHED_WITH_HR_WAKE_UP_TIMER
618 else if (!strcmp(pszName, "MinSleepWithHrWakeUp"))
619 {
620 if (u64Value <= RT_NS_100MS)
621 pGVMM->nsMinSleepWithHrTimer = u64Value;
622 else
623 rc = VERR_OUT_OF_RANGE;
624 }
625#endif
626 else if (!strcmp(pszName, "EarlyWakeUp1"))
627 {
628 if (u64Value <= RT_NS_100MS)
629 {
630 pGVMM->nsEarlyWakeUp1 = u64Value;
631 pGVMM->fDoEarlyWakeUps = pGVMM->nsEarlyWakeUp1 > 0 && pGVMM->nsEarlyWakeUp2 > 0;
632 }
633 else
634 rc = VERR_OUT_OF_RANGE;
635 }
636 else if (!strcmp(pszName, "EarlyWakeUp2"))
637 {
638 if (u64Value <= RT_NS_100MS)
639 {
640 pGVMM->nsEarlyWakeUp2 = u64Value;
641 pGVMM->fDoEarlyWakeUps = pGVMM->nsEarlyWakeUp1 > 0 && pGVMM->nsEarlyWakeUp2 > 0;
642 }
643 else
644 rc = VERR_OUT_OF_RANGE;
645 }
646 else
647 rc = VERR_CFGM_VALUE_NOT_FOUND;
648 return rc;
649}
650
651
652/**
653 * A quick hack for getting global config values.
654 *
655 * @returns VBox status code.
656 *
657 * @param pSession The session handle. Used for authentication.
658 * @param pszName The variable name.
659 * @param pu64Value Where to return the value.
660 */
661GVMMR0DECL(int) GVMMR0QueryConfig(PSUPDRVSESSION pSession, const char *pszName, uint64_t *pu64Value)
662{
663 /*
664 * Validate input.
665 */
666 PGVMM pGVMM;
667 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
668 AssertPtrReturn(pSession, VERR_INVALID_HANDLE);
669 AssertPtrReturn(pszName, VERR_INVALID_POINTER);
670 AssertPtrReturn(pu64Value, VERR_INVALID_POINTER);
671
672 /*
673 * String switch time!
674 */
675 if (strncmp(pszName, RT_STR_TUPLE("/GVMM/")))
676 return VERR_CFGM_VALUE_NOT_FOUND; /* borrow status codes from CFGM... */
677 int rc = VINF_SUCCESS;
678 pszName += sizeof("/GVMM/") - 1;
679 if (!strcmp(pszName, "cEMTsMeansCompany"))
680 *pu64Value = pGVMM->cEMTsMeansCompany;
681 else if (!strcmp(pszName, "MinSleepAlone"))
682 *pu64Value = pGVMM->nsMinSleepAlone;
683 else if (!strcmp(pszName, "MinSleepCompany"))
684 *pu64Value = pGVMM->nsMinSleepCompany;
685#ifdef GVMM_SCHED_WITH_HR_WAKE_UP_TIMER
686 else if (!strcmp(pszName, "MinSleepWithHrWakeUp"))
687 *pu64Value = pGVMM->nsMinSleepWithHrTimer;
688#endif
689 else if (!strcmp(pszName, "EarlyWakeUp1"))
690 *pu64Value = pGVMM->nsEarlyWakeUp1;
691 else if (!strcmp(pszName, "EarlyWakeUp2"))
692 *pu64Value = pGVMM->nsEarlyWakeUp2;
693 else
694 rc = VERR_CFGM_VALUE_NOT_FOUND;
695 return rc;
696}
697
698
699/**
700 * Acquire the 'used' lock in shared mode.
701 *
702 * This prevents destruction of the VM while we're in ring-0.
703 *
704 * @returns IPRT status code, see RTSemFastMutexRequest.
705 * @param a_pGVMM The GVMM instance data.
706 * @sa GVMMR0_USED_SHARED_UNLOCK, GVMMR0_USED_EXCLUSIVE_LOCK
707 */
708#define GVMMR0_USED_SHARED_LOCK(a_pGVMM) RTCritSectRwEnterShared(&(a_pGVMM)->UsedLock)
709
710/**
711 * Release the 'used' lock in when owning it in shared mode.
712 *
713 * @returns IPRT status code, see RTSemFastMutexRequest.
714 * @param a_pGVMM The GVMM instance data.
715 * @sa GVMMR0_USED_SHARED_LOCK
716 */
717#define GVMMR0_USED_SHARED_UNLOCK(a_pGVMM) RTCritSectRwLeaveShared(&(a_pGVMM)->UsedLock)
718
719/**
720 * Acquire the 'used' lock in exclusive mode.
721 *
722 * Only use this function when making changes to the used list.
723 *
724 * @returns IPRT status code, see RTSemFastMutexRequest.
725 * @param a_pGVMM The GVMM instance data.
726 * @sa GVMMR0_USED_EXCLUSIVE_UNLOCK
727 */
728#define GVMMR0_USED_EXCLUSIVE_LOCK(a_pGVMM) RTCritSectRwEnterExcl(&(a_pGVMM)->UsedLock)
729
730/**
731 * Release the 'used' lock when owning it in exclusive mode.
732 *
733 * @returns IPRT status code, see RTSemFastMutexRelease.
734 * @param a_pGVMM The GVMM instance data.
735 * @sa GVMMR0_USED_EXCLUSIVE_LOCK, GVMMR0_USED_SHARED_UNLOCK
736 */
737#define GVMMR0_USED_EXCLUSIVE_UNLOCK(a_pGVMM) RTCritSectRwLeaveExcl(&(a_pGVMM)->UsedLock)
738
739
740/**
741 * Try acquire the 'create & destroy' lock.
742 *
743 * @returns IPRT status code, see RTSemFastMutexRequest.
744 * @param pGVMM The GVMM instance data.
745 */
746DECLINLINE(int) gvmmR0CreateDestroyLock(PGVMM pGVMM)
747{
748 LogFlow(("++gvmmR0CreateDestroyLock(%p)\n", pGVMM));
749 int rc = RTCritSectEnter(&pGVMM->CreateDestroyLock);
750 LogFlow(("gvmmR0CreateDestroyLock(%p)->%Rrc\n", pGVMM, rc));
751 return rc;
752}
753
754
755/**
756 * Release the 'create & destroy' lock.
757 *
758 * @returns IPRT status code, see RTSemFastMutexRequest.
759 * @param pGVMM The GVMM instance data.
760 */
761DECLINLINE(int) gvmmR0CreateDestroyUnlock(PGVMM pGVMM)
762{
763 LogFlow(("--gvmmR0CreateDestroyUnlock(%p)\n", pGVMM));
764 int rc = RTCritSectLeave(&pGVMM->CreateDestroyLock);
765 AssertRC(rc);
766 return rc;
767}
768
769
770/**
771 * Request wrapper for the GVMMR0CreateVM API.
772 *
773 * @returns VBox status code.
774 * @param pReq The request buffer.
775 * @param pSession The session handle. The VM will be associated with this.
776 */
777GVMMR0DECL(int) GVMMR0CreateVMReq(PGVMMCREATEVMREQ pReq, PSUPDRVSESSION pSession)
778{
779 /*
780 * Validate the request.
781 */
782 if (!RT_VALID_PTR(pReq))
783 return VERR_INVALID_POINTER;
784 if (pReq->Hdr.cbReq != sizeof(*pReq))
785 return VERR_INVALID_PARAMETER;
786 if (pReq->pSession != pSession)
787 return VERR_INVALID_POINTER;
788
789 /*
790 * Execute it.
791 */
792 PGVM pGVM;
793 pReq->pVMR0 = NULL;
794 pReq->pVMR3 = NIL_RTR3PTR;
795 int rc = GVMMR0CreateVM(pSession, pReq->cCpus, &pGVM);
796 if (RT_SUCCESS(rc))
797 {
798 pReq->pVMR0 = pGVM; /** @todo don't expose this to ring-3, use a unique random number instead. */
799 pReq->pVMR3 = pGVM->pVMR3;
800 }
801 return rc;
802}
803
804
805/**
806 * Allocates the VM structure and registers it with GVM.
807 *
808 * The caller will become the VM owner and there by the EMT.
809 *
810 * @returns VBox status code.
811 * @param pSession The support driver session.
812 * @param cCpus Number of virtual CPUs for the new VM.
813 * @param ppGVM Where to store the pointer to the VM structure.
814 *
815 * @thread EMT.
816 */
817GVMMR0DECL(int) GVMMR0CreateVM(PSUPDRVSESSION pSession, uint32_t cCpus, PGVM *ppGVM)
818{
819 LogFlow(("GVMMR0CreateVM: pSession=%p\n", pSession));
820 PGVMM pGVMM;
821 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
822
823 AssertPtrReturn(ppGVM, VERR_INVALID_POINTER);
824 *ppGVM = NULL;
825
826 if ( cCpus == 0
827 || cCpus > VMM_MAX_CPU_COUNT)
828 return VERR_INVALID_PARAMETER;
829
830 RTNATIVETHREAD hEMT0 = RTThreadNativeSelf();
831 AssertReturn(hEMT0 != NIL_RTNATIVETHREAD, VERR_GVMM_BROKEN_IPRT);
832 RTPROCESS ProcId = RTProcSelf();
833 AssertReturn(ProcId != NIL_RTPROCESS, VERR_GVMM_BROKEN_IPRT);
834
835 /*
836 * The whole allocation process is protected by the lock.
837 */
838 int rc = gvmmR0CreateDestroyLock(pGVMM);
839 AssertRCReturn(rc, rc);
840
841 /*
842 * Only one VM per session.
843 */
844 if (SUPR0GetSessionVM(pSession) != NULL)
845 {
846 gvmmR0CreateDestroyUnlock(pGVMM);
847 SUPR0Printf("GVMMR0CreateVM: The session %p already got a VM: %p\n", pSession, SUPR0GetSessionVM(pSession));
848 return VERR_ALREADY_EXISTS;
849 }
850
851 /*
852 * Allocate a handle first so we don't waste resources unnecessarily.
853 */
854 uint16_t iHandle = pGVMM->iFreeHead;
855 if (iHandle)
856 {
857 PGVMHANDLE pHandle = &pGVMM->aHandles[iHandle];
858
859 /* consistency checks, a bit paranoid as always. */
860 if ( !pHandle->pGVM
861 && !pHandle->pvObj
862 && pHandle->iSelf == iHandle)
863 {
864 pHandle->pvObj = SUPR0ObjRegister(pSession, SUPDRVOBJTYPE_VM, gvmmR0HandleObjDestructor, pGVMM, pHandle);
865 if (pHandle->pvObj)
866 {
867 /*
868 * Move the handle from the free to used list and perform permission checks.
869 */
870 rc = GVMMR0_USED_EXCLUSIVE_LOCK(pGVMM);
871 AssertRC(rc);
872
873 pGVMM->iFreeHead = pHandle->iNext;
874 pHandle->iNext = pGVMM->iUsedHead;
875 pGVMM->iUsedHead = iHandle;
876 pGVMM->cVMs++;
877
878 pHandle->pGVM = NULL;
879 pHandle->pSession = pSession;
880 pHandle->hEMT0 = NIL_RTNATIVETHREAD;
881 pHandle->ProcId = NIL_RTPROCESS;
882
883 GVMMR0_USED_EXCLUSIVE_UNLOCK(pGVMM);
884
885 rc = SUPR0ObjVerifyAccess(pHandle->pvObj, pSession, NULL);
886 if (RT_SUCCESS(rc))
887 {
888 /*
889 * Allocate memory for the VM structure (combined VM + GVM).
890 */
891 const uint32_t cbVM = RT_UOFFSETOF_DYN(GVM, aCpus[cCpus]);
892 const uint32_t cPages = RT_ALIGN_32(cbVM, HOST_PAGE_SIZE) >> HOST_PAGE_SHIFT;
893 RTR0MEMOBJ hVMMemObj = NIL_RTR0MEMOBJ;
894 rc = RTR0MemObjAllocPage(&hVMMemObj, cPages << HOST_PAGE_SHIFT, false /* fExecutable */);
895 if (RT_SUCCESS(rc))
896 {
897 PGVM pGVM = (PGVM)RTR0MemObjAddress(hVMMemObj);
898 AssertPtr(pGVM);
899
900 /*
901 * Initialise the structure.
902 */
903 RT_BZERO(pGVM, cPages << HOST_PAGE_SHIFT);
904 gvmmR0InitPerVMData(pGVM, iHandle, cCpus, pSession);
905 pGVM->gvmm.s.VMMemObj = hVMMemObj;
906 rc = GMMR0InitPerVMData(pGVM);
907 int rc2 = PGMR0InitPerVMData(pGVM, hVMMemObj);
908 int rc3 = VMMR0InitPerVMData(pGVM);
909 CPUMR0InitPerVMData(pGVM);
910 DBGFR0InitPerVMData(pGVM);
911 PDMR0InitPerVMData(pGVM);
912 IOMR0InitPerVMData(pGVM);
913 TMR0InitPerVMData(pGVM);
914 if (RT_SUCCESS(rc) && RT_SUCCESS(rc2) && RT_SUCCESS(rc3))
915 {
916 /*
917 * Allocate page array.
918 * This currently have to be made available to ring-3, but this is should change eventually.
919 */
920 rc = RTR0MemObjAllocPage(&pGVM->gvmm.s.VMPagesMemObj, cPages * sizeof(SUPPAGE), false /* fExecutable */);
921 if (RT_SUCCESS(rc))
922 {
923 PSUPPAGE paPages = (PSUPPAGE)RTR0MemObjAddress(pGVM->gvmm.s.VMPagesMemObj); AssertPtr(paPages);
924 for (uint32_t iPage = 0; iPage < cPages; iPage++)
925 {
926 paPages[iPage].uReserved = 0;
927 paPages[iPage].Phys = RTR0MemObjGetPagePhysAddr(pGVM->gvmm.s.VMMemObj, iPage);
928 Assert(paPages[iPage].Phys != NIL_RTHCPHYS);
929 }
930
931 /*
932 * Map the page array, VM and VMCPU structures into ring-3.
933 */
934 AssertCompileSizeAlignment(VM, HOST_PAGE_SIZE);
935 rc = RTR0MemObjMapUserEx(&pGVM->gvmm.s.VMMapObj, pGVM->gvmm.s.VMMemObj, (RTR3PTR)-1, 0,
936 RTMEM_PROT_READ | RTMEM_PROT_WRITE, NIL_RTR0PROCESS,
937 0 /*offSub*/, sizeof(VM));
938 for (VMCPUID i = 0; i < cCpus && RT_SUCCESS(rc); i++)
939 {
940 AssertCompileSizeAlignment(VMCPU, HOST_PAGE_SIZE);
941 rc = RTR0MemObjMapUserEx(&pGVM->aCpus[i].gvmm.s.VMCpuMapObj, pGVM->gvmm.s.VMMemObj,
942 (RTR3PTR)-1, 0, RTMEM_PROT_READ | RTMEM_PROT_WRITE, NIL_RTR0PROCESS,
943 RT_UOFFSETOF_DYN(GVM, aCpus[i]), sizeof(VMCPU));
944 }
945 if (RT_SUCCESS(rc))
946 rc = RTR0MemObjMapUser(&pGVM->gvmm.s.VMPagesMapObj, pGVM->gvmm.s.VMPagesMemObj, (RTR3PTR)-1,
947 0 /* uAlignment */, RTMEM_PROT_READ | RTMEM_PROT_WRITE,
948 NIL_RTR0PROCESS);
949 if (RT_SUCCESS(rc))
950 {
951 /*
952 * Initialize all the VM pointers.
953 */
954 PVMR3 pVMR3 = RTR0MemObjAddressR3(pGVM->gvmm.s.VMMapObj);
955 AssertMsg(RTR0MemUserIsValidAddr(pVMR3) && pVMR3 != NIL_RTR3PTR, ("%p\n", pVMR3));
956
957 for (VMCPUID i = 0; i < cCpus; i++)
958 {
959 pGVM->aCpus[i].pVMR0 = pGVM;
960 pGVM->aCpus[i].pVMR3 = pVMR3;
961 pGVM->apCpusR3[i] = RTR0MemObjAddressR3(pGVM->aCpus[i].gvmm.s.VMCpuMapObj);
962 pGVM->aCpus[i].pVCpuR3 = pGVM->apCpusR3[i];
963 pGVM->apCpusR0[i] = &pGVM->aCpus[i];
964 AssertMsg(RTR0MemUserIsValidAddr(pGVM->apCpusR3[i]) && pGVM->apCpusR3[i] != NIL_RTR3PTR,
965 ("apCpusR3[%u]=%p\n", i, pGVM->apCpusR3[i]));
966 }
967
968 pGVM->paVMPagesR3 = RTR0MemObjAddressR3(pGVM->gvmm.s.VMPagesMapObj);
969 AssertMsg(RTR0MemUserIsValidAddr(pGVM->paVMPagesR3) && pGVM->paVMPagesR3 != NIL_RTR3PTR,
970 ("%p\n", pGVM->paVMPagesR3));
971
972#ifdef GVMM_SCHED_WITH_HR_WAKE_UP_TIMER
973 /*
974 * Create the high resolution wake-up timer for EMT 0, ignore failures.
975 */
976 if (RTTimerCanDoHighResolution())
977 {
978 int rc4 = RTTimerCreateEx(&pGVM->aCpus[0].gvmm.s.hHrWakeUpTimer,
979 0 /*one-shot, no interval*/,
980 RTTIMER_FLAGS_HIGH_RES, gvmmR0EmtWakeUpTimerCallback,
981 &pGVM->aCpus[0]);
982 if (RT_FAILURE(rc4))
983 pGVM->aCpus[0].gvmm.s.hHrWakeUpTimer = NULL;
984 }
985#endif
986
987 /*
988 * Complete the handle - take the UsedLock sem just to be careful.
989 */
990 rc = GVMMR0_USED_EXCLUSIVE_LOCK(pGVMM);
991 AssertRC(rc);
992
993 pHandle->pGVM = pGVM;
994 pHandle->hEMT0 = hEMT0;
995 pHandle->ProcId = ProcId;
996 pGVM->pVMR3 = pVMR3;
997 pGVM->pVMR3Unsafe = pVMR3;
998 pGVM->aCpus[0].hEMT = hEMT0;
999 pGVM->aCpus[0].hNativeThreadR0 = hEMT0;
1000 pGVM->aCpus[0].cEmtHashCollisions = 0;
1001 uint32_t const idxHash = GVMM_EMT_HASH_1(hEMT0);
1002 pGVM->aCpus[0].gvmm.s.idxEmtHash = (uint16_t)idxHash;
1003 pGVM->gvmm.s.aEmtHash[idxHash].hNativeEmt = hEMT0;
1004 pGVM->gvmm.s.aEmtHash[idxHash].idVCpu = 0;
1005 pGVMM->cEMTs += cCpus;
1006
1007 /* Associate it with the session and create the context hook for EMT0. */
1008 rc = SUPR0SetSessionVM(pSession, pGVM, pGVM);
1009 if (RT_SUCCESS(rc))
1010 {
1011 rc = VMMR0ThreadCtxHookCreateForEmt(&pGVM->aCpus[0]);
1012 if (RT_SUCCESS(rc))
1013 {
1014 /*
1015 * Done!
1016 */
1017 VBOXVMM_R0_GVMM_VM_CREATED(pGVM, pGVM, ProcId, (void *)hEMT0, cCpus);
1018
1019 GVMMR0_USED_EXCLUSIVE_UNLOCK(pGVMM);
1020 gvmmR0CreateDestroyUnlock(pGVMM);
1021
1022 CPUMR0RegisterVCpuThread(&pGVM->aCpus[0]);
1023
1024 *ppGVM = pGVM;
1025 Log(("GVMMR0CreateVM: pVMR3=%p pGVM=%p hGVM=%d\n", pVMR3, pGVM, iHandle));
1026 return VINF_SUCCESS;
1027 }
1028
1029 SUPR0SetSessionVM(pSession, NULL, NULL);
1030 }
1031 GVMMR0_USED_EXCLUSIVE_UNLOCK(pGVMM);
1032 }
1033
1034 /* Cleanup mappings. */
1035 if (pGVM->gvmm.s.VMMapObj != NIL_RTR0MEMOBJ)
1036 {
1037 RTR0MemObjFree(pGVM->gvmm.s.VMMapObj, false /* fFreeMappings */);
1038 pGVM->gvmm.s.VMMapObj = NIL_RTR0MEMOBJ;
1039 }
1040 for (VMCPUID i = 0; i < cCpus; i++)
1041 if (pGVM->aCpus[i].gvmm.s.VMCpuMapObj != NIL_RTR0MEMOBJ)
1042 {
1043 RTR0MemObjFree(pGVM->aCpus[i].gvmm.s.VMCpuMapObj, false /* fFreeMappings */);
1044 pGVM->aCpus[i].gvmm.s.VMCpuMapObj = NIL_RTR0MEMOBJ;
1045 }
1046 if (pGVM->gvmm.s.VMPagesMapObj != NIL_RTR0MEMOBJ)
1047 {
1048 RTR0MemObjFree(pGVM->gvmm.s.VMPagesMapObj, false /* fFreeMappings */);
1049 pGVM->gvmm.s.VMPagesMapObj = NIL_RTR0MEMOBJ;
1050 }
1051 }
1052 }
1053 else
1054 {
1055 if (RT_SUCCESS_NP(rc))
1056 rc = rc2;
1057 if (RT_SUCCESS_NP(rc))
1058 rc = rc3;
1059 }
1060 }
1061 }
1062 /* else: The user wasn't permitted to create this VM. */
1063
1064 /*
1065 * The handle will be freed by gvmmR0HandleObjDestructor as we release the
1066 * object reference here. A little extra mess because of non-recursive lock.
1067 */
1068 void *pvObj = pHandle->pvObj;
1069 pHandle->pvObj = NULL;
1070 gvmmR0CreateDestroyUnlock(pGVMM);
1071
1072 SUPR0ObjRelease(pvObj, pSession);
1073
1074 SUPR0Printf("GVMMR0CreateVM: failed, rc=%Rrc\n", rc);
1075 return rc;
1076 }
1077
1078 rc = VERR_NO_MEMORY;
1079 }
1080 else
1081 rc = VERR_GVMM_IPE_1;
1082 }
1083 else
1084 rc = VERR_GVM_TOO_MANY_VMS;
1085
1086 gvmmR0CreateDestroyUnlock(pGVMM);
1087 return rc;
1088}
1089
1090
1091/**
1092 * Initializes the per VM data belonging to GVMM.
1093 *
1094 * @param pGVM Pointer to the global VM structure.
1095 * @param hSelf The handle.
1096 * @param cCpus The CPU count.
1097 * @param pSession The session this VM is associated with.
1098 */
1099static void gvmmR0InitPerVMData(PGVM pGVM, int16_t hSelf, VMCPUID cCpus, PSUPDRVSESSION pSession)
1100{
1101 AssertCompile(RT_SIZEOFMEMB(GVM,gvmm.s) <= RT_SIZEOFMEMB(GVM,gvmm.padding));
1102 AssertCompile(RT_SIZEOFMEMB(GVMCPU,gvmm.s) <= RT_SIZEOFMEMB(GVMCPU,gvmm.padding));
1103 AssertCompileMemberAlignment(VM, cpum, 64);
1104 AssertCompileMemberAlignment(VM, tm, 64);
1105
1106 /* GVM: */
1107 pGVM->u32Magic = GVM_MAGIC;
1108 pGVM->hSelf = hSelf;
1109 pGVM->cCpus = cCpus;
1110 pGVM->pSession = pSession;
1111 pGVM->pSelf = pGVM;
1112
1113 /* VM: */
1114 pGVM->enmVMState = VMSTATE_CREATING;
1115 pGVM->hSelfUnsafe = hSelf;
1116 pGVM->pSessionUnsafe = pSession;
1117 pGVM->pVMR0ForCall = pGVM;
1118 pGVM->cCpusUnsafe = cCpus;
1119 pGVM->uCpuExecutionCap = 100; /* default is no cap. */
1120 pGVM->uStructVersion = 1;
1121 pGVM->cbSelf = sizeof(VM);
1122 pGVM->cbVCpu = sizeof(VMCPU);
1123
1124 /* GVMM: */
1125 pGVM->gvmm.s.VMMemObj = NIL_RTR0MEMOBJ;
1126 pGVM->gvmm.s.VMMapObj = NIL_RTR0MEMOBJ;
1127 pGVM->gvmm.s.VMPagesMemObj = NIL_RTR0MEMOBJ;
1128 pGVM->gvmm.s.VMPagesMapObj = NIL_RTR0MEMOBJ;
1129 pGVM->gvmm.s.fDoneVMMR0Init = false;
1130 pGVM->gvmm.s.fDoneVMMR0Term = false;
1131
1132 for (size_t i = 0; i < RT_ELEMENTS(pGVM->gvmm.s.aWorkerThreads); i++)
1133 {
1134 pGVM->gvmm.s.aWorkerThreads[i].hNativeThread = NIL_RTNATIVETHREAD;
1135 pGVM->gvmm.s.aWorkerThreads[i].hNativeThreadR3 = NIL_RTNATIVETHREAD;
1136 }
1137 pGVM->gvmm.s.aWorkerThreads[0].hNativeThread = GVMM_RTNATIVETHREAD_DESTROYED; /* invalid entry */
1138
1139 for (size_t i = 0; i < RT_ELEMENTS(pGVM->gvmm.s.aEmtHash); i++)
1140 {
1141 pGVM->gvmm.s.aEmtHash[i].hNativeEmt = NIL_RTNATIVETHREAD;
1142 pGVM->gvmm.s.aEmtHash[i].idVCpu = NIL_VMCPUID;
1143 }
1144
1145 /*
1146 * Per virtual CPU.
1147 */
1148 for (VMCPUID i = 0; i < pGVM->cCpus; i++)
1149 {
1150 pGVM->aCpus[i].idCpu = i;
1151 pGVM->aCpus[i].idCpuUnsafe = i;
1152 pGVM->aCpus[i].gvmm.s.HaltEventMulti = NIL_RTSEMEVENTMULTI;
1153 pGVM->aCpus[i].gvmm.s.VMCpuMapObj = NIL_RTR0MEMOBJ;
1154 pGVM->aCpus[i].gvmm.s.idxEmtHash = UINT16_MAX;
1155 pGVM->aCpus[i].gvmm.s.hHrWakeUpTimer = NULL;
1156 pGVM->aCpus[i].hEMT = NIL_RTNATIVETHREAD;
1157 pGVM->aCpus[i].pGVM = pGVM;
1158 pGVM->aCpus[i].idHostCpu = NIL_RTCPUID;
1159 pGVM->aCpus[i].iHostCpuSet = UINT32_MAX;
1160 pGVM->aCpus[i].hNativeThread = NIL_RTNATIVETHREAD;
1161 pGVM->aCpus[i].hNativeThreadR0 = NIL_RTNATIVETHREAD;
1162 pGVM->aCpus[i].enmState = VMCPUSTATE_STOPPED;
1163 pGVM->aCpus[i].pVCpuR0ForVtg = &pGVM->aCpus[i];
1164 }
1165}
1166
1167
1168/**
1169 * Does the VM initialization.
1170 *
1171 * @returns VBox status code.
1172 * @param pGVM The global (ring-0) VM structure.
1173 */
1174GVMMR0DECL(int) GVMMR0InitVM(PGVM pGVM)
1175{
1176 LogFlow(("GVMMR0InitVM: pGVM=%p\n", pGVM));
1177
1178 int rc = VERR_INTERNAL_ERROR_3;
1179 if ( !pGVM->gvmm.s.fDoneVMMR0Init
1180 && pGVM->aCpus[0].gvmm.s.HaltEventMulti == NIL_RTSEMEVENTMULTI)
1181 {
1182 for (VMCPUID i = 0; i < pGVM->cCpus; i++)
1183 {
1184 rc = RTSemEventMultiCreate(&pGVM->aCpus[i].gvmm.s.HaltEventMulti);
1185 if (RT_FAILURE(rc))
1186 {
1187 pGVM->aCpus[i].gvmm.s.HaltEventMulti = NIL_RTSEMEVENTMULTI;
1188 break;
1189 }
1190 }
1191 }
1192 else
1193 rc = VERR_WRONG_ORDER;
1194
1195 LogFlow(("GVMMR0InitVM: returns %Rrc\n", rc));
1196 return rc;
1197}
1198
1199
1200/**
1201 * Indicates that we're done with the ring-0 initialization
1202 * of the VM.
1203 *
1204 * @param pGVM The global (ring-0) VM structure.
1205 * @thread EMT(0)
1206 */
1207GVMMR0DECL(void) GVMMR0DoneInitVM(PGVM pGVM)
1208{
1209 /* Set the indicator. */
1210 pGVM->gvmm.s.fDoneVMMR0Init = true;
1211}
1212
1213
1214/**
1215 * Indicates that we're doing the ring-0 termination of the VM.
1216 *
1217 * @returns true if termination hasn't been done already, false if it has.
1218 * @param pGVM Pointer to the global VM structure. Optional.
1219 * @thread EMT(0) or session cleanup thread.
1220 */
1221GVMMR0DECL(bool) GVMMR0DoingTermVM(PGVM pGVM)
1222{
1223 /* Validate the VM structure, state and handle. */
1224 AssertPtrReturn(pGVM, false);
1225
1226 /* Set the indicator. */
1227 if (pGVM->gvmm.s.fDoneVMMR0Term)
1228 return false;
1229 pGVM->gvmm.s.fDoneVMMR0Term = true;
1230 return true;
1231}
1232
1233
1234/**
1235 * Destroys the VM, freeing all associated resources (the ring-0 ones anyway).
1236 *
1237 * This is call from the vmR3DestroyFinalBit and from a error path in VMR3Create,
1238 * and the caller is not the EMT thread, unfortunately. For security reasons, it
1239 * would've been nice if the caller was actually the EMT thread or that we somehow
1240 * could've associated the calling thread with the VM up front.
1241 *
1242 * @returns VBox status code.
1243 * @param pGVM The global (ring-0) VM structure.
1244 *
1245 * @thread EMT(0) if it's associated with the VM, otherwise any thread.
1246 */
1247GVMMR0DECL(int) GVMMR0DestroyVM(PGVM pGVM)
1248{
1249 LogFlow(("GVMMR0DestroyVM: pGVM=%p\n", pGVM));
1250 PGVMM pGVMM;
1251 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
1252
1253 /*
1254 * Validate the VM structure, state and caller.
1255 */
1256 AssertPtrReturn(pGVM, VERR_INVALID_POINTER);
1257 AssertReturn(!((uintptr_t)pGVM & HOST_PAGE_OFFSET_MASK), VERR_INVALID_POINTER);
1258 AssertMsgReturn(pGVM->enmVMState >= VMSTATE_CREATING && pGVM->enmVMState <= VMSTATE_TERMINATED, ("%d\n", pGVM->enmVMState),
1259 VERR_WRONG_ORDER);
1260
1261 uint32_t hGVM = pGVM->hSelf;
1262 ASMCompilerBarrier();
1263 AssertReturn(hGVM != NIL_GVM_HANDLE, VERR_INVALID_VM_HANDLE);
1264 AssertReturn(hGVM < RT_ELEMENTS(pGVMM->aHandles), VERR_INVALID_VM_HANDLE);
1265
1266 PGVMHANDLE pHandle = &pGVMM->aHandles[hGVM];
1267 AssertReturn(pHandle->pGVM == pGVM, VERR_NOT_OWNER);
1268
1269 RTPROCESS ProcId = RTProcSelf();
1270 RTNATIVETHREAD hSelf = RTThreadNativeSelf();
1271 AssertReturn( ( pHandle->hEMT0 == hSelf
1272 && pHandle->ProcId == ProcId)
1273 || pHandle->hEMT0 == NIL_RTNATIVETHREAD, VERR_NOT_OWNER);
1274
1275 /*
1276 * Lookup the handle and destroy the object.
1277 * Since the lock isn't recursive and we'll have to leave it before dereferencing the
1278 * object, we take some precautions against racing callers just in case...
1279 */
1280 int rc = gvmmR0CreateDestroyLock(pGVMM);
1281 AssertRC(rc);
1282
1283 /* Be careful here because we might theoretically be racing someone else cleaning up. */
1284 if ( pHandle->pGVM == pGVM
1285 && ( ( pHandle->hEMT0 == hSelf
1286 && pHandle->ProcId == ProcId)
1287 || pHandle->hEMT0 == NIL_RTNATIVETHREAD)
1288 && RT_VALID_PTR(pHandle->pvObj)
1289 && RT_VALID_PTR(pHandle->pSession)
1290 && RT_VALID_PTR(pHandle->pGVM)
1291 && pHandle->pGVM->u32Magic == GVM_MAGIC)
1292 {
1293 /* Check that other EMTs have deregistered. */
1294 uint32_t cNotDeregistered = 0;
1295 for (VMCPUID idCpu = 1; idCpu < pGVM->cCpus; idCpu++)
1296 cNotDeregistered += pGVM->aCpus[idCpu].hEMT != GVMM_RTNATIVETHREAD_DESTROYED;
1297 if (cNotDeregistered == 0)
1298 {
1299 /* Grab the object pointer. */
1300 void *pvObj = pHandle->pvObj;
1301 pHandle->pvObj = NULL;
1302 gvmmR0CreateDestroyUnlock(pGVMM);
1303
1304 SUPR0ObjRelease(pvObj, pHandle->pSession);
1305 }
1306 else
1307 {
1308 gvmmR0CreateDestroyUnlock(pGVMM);
1309 rc = VERR_GVMM_NOT_ALL_EMTS_DEREGISTERED;
1310 }
1311 }
1312 else
1313 {
1314 SUPR0Printf("GVMMR0DestroyVM: pHandle=%RKv:{.pGVM=%p, .hEMT0=%p, .ProcId=%u, .pvObj=%p} pGVM=%p hSelf=%p\n",
1315 pHandle, pHandle->pGVM, pHandle->hEMT0, pHandle->ProcId, pHandle->pvObj, pGVM, hSelf);
1316 gvmmR0CreateDestroyUnlock(pGVMM);
1317 rc = VERR_GVMM_IPE_2;
1318 }
1319
1320 return rc;
1321}
1322
1323
1324/**
1325 * Performs VM cleanup task as part of object destruction.
1326 *
1327 * @param pGVM The GVM pointer.
1328 */
1329static void gvmmR0CleanupVM(PGVM pGVM)
1330{
1331 if ( pGVM->gvmm.s.fDoneVMMR0Init
1332 && !pGVM->gvmm.s.fDoneVMMR0Term)
1333 {
1334 if ( pGVM->gvmm.s.VMMemObj != NIL_RTR0MEMOBJ
1335 && RTR0MemObjAddress(pGVM->gvmm.s.VMMemObj) == pGVM)
1336 {
1337 LogFlow(("gvmmR0CleanupVM: Calling VMMR0TermVM\n"));
1338 VMMR0TermVM(pGVM, NIL_VMCPUID);
1339 }
1340 else
1341 AssertMsgFailed(("gvmmR0CleanupVM: VMMemObj=%p pGVM=%p\n", pGVM->gvmm.s.VMMemObj, pGVM));
1342 }
1343
1344 GMMR0CleanupVM(pGVM);
1345#ifdef VBOX_WITH_NEM_R0
1346 NEMR0CleanupVM(pGVM);
1347#endif
1348 PDMR0CleanupVM(pGVM);
1349 IOMR0CleanupVM(pGVM);
1350 DBGFR0CleanupVM(pGVM);
1351 PGMR0CleanupVM(pGVM);
1352 TMR0CleanupVM(pGVM);
1353 VMMR0CleanupVM(pGVM);
1354}
1355
1356
1357/**
1358 * @callback_method_impl{FNSUPDRVDESTRUCTOR,VM handle destructor}
1359 *
1360 * pvUser1 is the GVM instance pointer.
1361 * pvUser2 is the handle pointer.
1362 */
1363static DECLCALLBACK(void) gvmmR0HandleObjDestructor(void *pvObj, void *pvUser1, void *pvUser2)
1364{
1365 LogFlow(("gvmmR0HandleObjDestructor: %p %p %p\n", pvObj, pvUser1, pvUser2));
1366
1367 NOREF(pvObj);
1368
1369 /*
1370 * Some quick, paranoid, input validation.
1371 */
1372 PGVMHANDLE pHandle = (PGVMHANDLE)pvUser2;
1373 AssertPtr(pHandle);
1374 PGVMM pGVMM = (PGVMM)pvUser1;
1375 Assert(pGVMM == g_pGVMM);
1376 const uint16_t iHandle = pHandle - &pGVMM->aHandles[0];
1377 if ( !iHandle
1378 || iHandle >= RT_ELEMENTS(pGVMM->aHandles)
1379 || iHandle != pHandle->iSelf)
1380 {
1381 SUPR0Printf("GVM: handle %d is out of range or corrupt (iSelf=%d)!\n", iHandle, pHandle->iSelf);
1382 return;
1383 }
1384
1385 int rc = gvmmR0CreateDestroyLock(pGVMM);
1386 AssertRC(rc);
1387 rc = GVMMR0_USED_EXCLUSIVE_LOCK(pGVMM);
1388 AssertRC(rc);
1389
1390 /*
1391 * This is a tad slow but a doubly linked list is too much hassle.
1392 */
1393 if (RT_UNLIKELY(pHandle->iNext >= RT_ELEMENTS(pGVMM->aHandles)))
1394 {
1395 SUPR0Printf("GVM: used list index %d is out of range!\n", pHandle->iNext);
1396 GVMMR0_USED_EXCLUSIVE_UNLOCK(pGVMM);
1397 gvmmR0CreateDestroyUnlock(pGVMM);
1398 return;
1399 }
1400
1401 if (pGVMM->iUsedHead == iHandle)
1402 pGVMM->iUsedHead = pHandle->iNext;
1403 else
1404 {
1405 uint16_t iPrev = pGVMM->iUsedHead;
1406 int c = RT_ELEMENTS(pGVMM->aHandles) + 2;
1407 while (iPrev)
1408 {
1409 if (RT_UNLIKELY(iPrev >= RT_ELEMENTS(pGVMM->aHandles)))
1410 {
1411 SUPR0Printf("GVM: used list index %d is out of range!\n", iPrev);
1412 GVMMR0_USED_EXCLUSIVE_UNLOCK(pGVMM);
1413 gvmmR0CreateDestroyUnlock(pGVMM);
1414 return;
1415 }
1416 if (RT_UNLIKELY(c-- <= 0))
1417 {
1418 iPrev = 0;
1419 break;
1420 }
1421
1422 if (pGVMM->aHandles[iPrev].iNext == iHandle)
1423 break;
1424 iPrev = pGVMM->aHandles[iPrev].iNext;
1425 }
1426 if (!iPrev)
1427 {
1428 SUPR0Printf("GVM: can't find the handle previous previous of %d!\n", pHandle->iSelf);
1429 GVMMR0_USED_EXCLUSIVE_UNLOCK(pGVMM);
1430 gvmmR0CreateDestroyUnlock(pGVMM);
1431 return;
1432 }
1433
1434 Assert(pGVMM->aHandles[iPrev].iNext == iHandle);
1435 pGVMM->aHandles[iPrev].iNext = pHandle->iNext;
1436 }
1437 pHandle->iNext = 0;
1438 pGVMM->cVMs--;
1439
1440 /*
1441 * Do the global cleanup round.
1442 */
1443 PGVM pGVM = pHandle->pGVM;
1444 if ( RT_VALID_PTR(pGVM)
1445 && pGVM->u32Magic == GVM_MAGIC)
1446 {
1447 pGVMM->cEMTs -= pGVM->cCpus;
1448
1449 if (pGVM->pSession)
1450 SUPR0SetSessionVM(pGVM->pSession, NULL, NULL);
1451
1452 GVMMR0_USED_EXCLUSIVE_UNLOCK(pGVMM);
1453
1454 gvmmR0CleanupVM(pGVM);
1455
1456 /*
1457 * Do the GVMM cleanup - must be done last.
1458 */
1459 /* The VM and VM pages mappings/allocations. */
1460 if (pGVM->gvmm.s.VMPagesMapObj != NIL_RTR0MEMOBJ)
1461 {
1462 rc = RTR0MemObjFree(pGVM->gvmm.s.VMPagesMapObj, false /* fFreeMappings */); AssertRC(rc);
1463 pGVM->gvmm.s.VMPagesMapObj = NIL_RTR0MEMOBJ;
1464 }
1465
1466 if (pGVM->gvmm.s.VMMapObj != NIL_RTR0MEMOBJ)
1467 {
1468 rc = RTR0MemObjFree(pGVM->gvmm.s.VMMapObj, false /* fFreeMappings */); AssertRC(rc);
1469 pGVM->gvmm.s.VMMapObj = NIL_RTR0MEMOBJ;
1470 }
1471
1472 if (pGVM->gvmm.s.VMPagesMemObj != NIL_RTR0MEMOBJ)
1473 {
1474 rc = RTR0MemObjFree(pGVM->gvmm.s.VMPagesMemObj, false /* fFreeMappings */); AssertRC(rc);
1475 pGVM->gvmm.s.VMPagesMemObj = NIL_RTR0MEMOBJ;
1476 }
1477
1478 for (VMCPUID i = 0; i < pGVM->cCpus; i++)
1479 {
1480 if (pGVM->aCpus[i].gvmm.s.HaltEventMulti != NIL_RTSEMEVENTMULTI)
1481 {
1482 rc = RTSemEventMultiDestroy(pGVM->aCpus[i].gvmm.s.HaltEventMulti); AssertRC(rc);
1483 pGVM->aCpus[i].gvmm.s.HaltEventMulti = NIL_RTSEMEVENTMULTI;
1484 }
1485 if (pGVM->aCpus[i].gvmm.s.VMCpuMapObj != NIL_RTR0MEMOBJ)
1486 {
1487 rc = RTR0MemObjFree(pGVM->aCpus[i].gvmm.s.VMCpuMapObj, false /* fFreeMappings */); AssertRC(rc);
1488 pGVM->aCpus[i].gvmm.s.VMCpuMapObj = NIL_RTR0MEMOBJ;
1489 }
1490#ifdef GVMM_SCHED_WITH_HR_WAKE_UP_TIMER
1491 if (pGVM->aCpus[i].gvmm.s.hHrWakeUpTimer != NULL)
1492 {
1493 RTTimerDestroy(pGVM->aCpus[i].gvmm.s.hHrWakeUpTimer);
1494 pGVM->aCpus[i].gvmm.s.hHrWakeUpTimer = NULL;
1495 }
1496#endif
1497 }
1498
1499 /* the GVM structure itself. */
1500 pGVM->u32Magic |= UINT32_C(0x80000000);
1501 Assert(pGVM->gvmm.s.VMMemObj != NIL_RTR0MEMOBJ);
1502 rc = RTR0MemObjFree(pGVM->gvmm.s.VMMemObj, true /*fFreeMappings*/); AssertRC(rc);
1503 pGVM = NULL;
1504
1505 /* Re-acquire the UsedLock before freeing the handle since we're updating handle fields. */
1506 rc = GVMMR0_USED_EXCLUSIVE_LOCK(pGVMM);
1507 AssertRC(rc);
1508 }
1509 /* else: GVMMR0CreateVM cleanup. */
1510
1511 /*
1512 * Free the handle.
1513 */
1514 pHandle->iNext = pGVMM->iFreeHead;
1515 pGVMM->iFreeHead = iHandle;
1516 ASMAtomicWriteNullPtr(&pHandle->pGVM);
1517 ASMAtomicWriteNullPtr(&pHandle->pvObj);
1518 ASMAtomicWriteNullPtr(&pHandle->pSession);
1519 ASMAtomicWriteHandle(&pHandle->hEMT0, NIL_RTNATIVETHREAD);
1520 ASMAtomicWriteU32(&pHandle->ProcId, NIL_RTPROCESS);
1521
1522 GVMMR0_USED_EXCLUSIVE_UNLOCK(pGVMM);
1523 gvmmR0CreateDestroyUnlock(pGVMM);
1524 LogFlow(("gvmmR0HandleObjDestructor: returns\n"));
1525}
1526
1527
1528/**
1529 * Registers the calling thread as the EMT of a Virtual CPU.
1530 *
1531 * Note that VCPU 0 is automatically registered during VM creation.
1532 *
1533 * @returns VBox status code
1534 * @param pGVM The global (ring-0) VM structure.
1535 * @param idCpu VCPU id to register the current thread as.
1536 */
1537GVMMR0DECL(int) GVMMR0RegisterVCpu(PGVM pGVM, VMCPUID idCpu)
1538{
1539 AssertReturn(idCpu != 0, VERR_INVALID_FUNCTION);
1540
1541 /*
1542 * Validate the VM structure, state and handle.
1543 */
1544 PGVMM pGVMM;
1545 int rc = gvmmR0ByGVM(pGVM, &pGVMM, false /* fTakeUsedLock */);
1546 if (RT_SUCCESS(rc))
1547 {
1548 if (idCpu < pGVM->cCpus)
1549 {
1550 PGVMCPU const pGVCpu = &pGVM->aCpus[idCpu];
1551 RTNATIVETHREAD const hNativeSelf = RTThreadNativeSelf();
1552
1553 gvmmR0CreateDestroyLock(pGVMM); /** @todo per-VM lock? */
1554
1555 /* Check that the EMT isn't already assigned to a thread. */
1556 if (pGVCpu->hEMT == NIL_RTNATIVETHREAD)
1557 {
1558 Assert(pGVCpu->hNativeThreadR0 == NIL_RTNATIVETHREAD);
1559
1560 /* A thread may only be one EMT (this makes sure hNativeSelf isn't NIL). */
1561 for (VMCPUID iCpu = 0; iCpu < pGVM->cCpus; iCpu++)
1562 AssertBreakStmt(pGVM->aCpus[iCpu].hEMT != hNativeSelf, rc = VERR_INVALID_PARAMETER);
1563 if (RT_SUCCESS(rc))
1564 {
1565 /*
1566 * Do the assignment, then try setup the hook. Undo if that fails.
1567 */
1568 unsigned cCollisions = 0;
1569 uint32_t idxHash = GVMM_EMT_HASH_1(hNativeSelf);
1570 if (pGVM->gvmm.s.aEmtHash[idxHash].hNativeEmt != NIL_RTNATIVETHREAD)
1571 {
1572 uint32_t const idxHash2 = GVMM_EMT_HASH_2(hNativeSelf);
1573 do
1574 {
1575 cCollisions++;
1576 Assert(cCollisions < GVMM_EMT_HASH_SIZE);
1577 idxHash = (idxHash + idxHash2) % GVMM_EMT_HASH_SIZE;
1578 } while (pGVM->gvmm.s.aEmtHash[idxHash].hNativeEmt != NIL_RTNATIVETHREAD);
1579 }
1580 pGVM->gvmm.s.aEmtHash[idxHash].hNativeEmt = hNativeSelf;
1581 pGVM->gvmm.s.aEmtHash[idxHash].idVCpu = idCpu;
1582
1583 pGVCpu->hNativeThreadR0 = hNativeSelf;
1584 pGVCpu->hEMT = hNativeSelf;
1585 pGVCpu->cEmtHashCollisions = (uint8_t)cCollisions;
1586 pGVCpu->gvmm.s.idxEmtHash = (uint16_t)idxHash;
1587
1588 rc = VMMR0ThreadCtxHookCreateForEmt(pGVCpu);
1589 if (RT_SUCCESS(rc))
1590 {
1591 CPUMR0RegisterVCpuThread(pGVCpu);
1592
1593#ifdef GVMM_SCHED_WITH_HR_WAKE_UP_TIMER
1594 /*
1595 * Create the high resolution wake-up timer, ignore failures.
1596 */
1597 if (RTTimerCanDoHighResolution())
1598 {
1599 int rc2 = RTTimerCreateEx(&pGVCpu->gvmm.s.hHrWakeUpTimer, 0 /*one-shot, no interval*/,
1600 RTTIMER_FLAGS_HIGH_RES, gvmmR0EmtWakeUpTimerCallback, pGVCpu);
1601 if (RT_FAILURE(rc2))
1602 pGVCpu->gvmm.s.hHrWakeUpTimer = NULL;
1603 }
1604#endif
1605 }
1606 else
1607 {
1608 pGVCpu->hNativeThreadR0 = NIL_RTNATIVETHREAD;
1609 pGVCpu->hEMT = NIL_RTNATIVETHREAD;
1610 pGVM->gvmm.s.aEmtHash[idxHash].hNativeEmt = NIL_RTNATIVETHREAD;
1611 pGVM->gvmm.s.aEmtHash[idxHash].idVCpu = NIL_VMCPUID;
1612 pGVCpu->gvmm.s.idxEmtHash = UINT16_MAX;
1613 }
1614 }
1615 }
1616 else
1617 rc = VERR_ACCESS_DENIED;
1618
1619 gvmmR0CreateDestroyUnlock(pGVMM);
1620 }
1621 else
1622 rc = VERR_INVALID_CPU_ID;
1623 }
1624 return rc;
1625}
1626
1627
1628/**
1629 * Deregisters the calling thread as the EMT of a Virtual CPU.
1630 *
1631 * Note that VCPU 0 shall call GVMMR0DestroyVM intead of this API.
1632 *
1633 * @returns VBox status code
1634 * @param pGVM The global (ring-0) VM structure.
1635 * @param idCpu VCPU id to register the current thread as.
1636 */
1637GVMMR0DECL(int) GVMMR0DeregisterVCpu(PGVM pGVM, VMCPUID idCpu)
1638{
1639 AssertReturn(idCpu != 0, VERR_INVALID_FUNCTION);
1640
1641 /*
1642 * Validate the VM structure, state and handle.
1643 */
1644 PGVMM pGVMM;
1645 int rc = gvmmR0ByGVMandEMT(pGVM, idCpu, &pGVMM);
1646 if (RT_SUCCESS(rc))
1647 {
1648 /*
1649 * Take the destruction lock and recheck the handle state to
1650 * prevent racing GVMMR0DestroyVM.
1651 */
1652 gvmmR0CreateDestroyLock(pGVMM);
1653
1654 uint32_t hSelf = pGVM->hSelf;
1655 ASMCompilerBarrier();
1656 if ( hSelf < RT_ELEMENTS(pGVMM->aHandles)
1657 && pGVMM->aHandles[hSelf].pvObj != NULL
1658 && pGVMM->aHandles[hSelf].pGVM == pGVM)
1659 {
1660 /*
1661 * Do per-EMT cleanups.
1662 */
1663 VMMR0ThreadCtxHookDestroyForEmt(&pGVM->aCpus[idCpu]);
1664
1665 /*
1666 * Invalidate hEMT. We don't use NIL here as that would allow
1667 * GVMMR0RegisterVCpu to be called again, and we don't want that.
1668 */
1669 pGVM->aCpus[idCpu].hEMT = GVMM_RTNATIVETHREAD_DESTROYED;
1670 pGVM->aCpus[idCpu].hNativeThreadR0 = NIL_RTNATIVETHREAD;
1671
1672 uint32_t const idxHash = pGVM->aCpus[idCpu].gvmm.s.idxEmtHash;
1673 if (idxHash < RT_ELEMENTS(pGVM->gvmm.s.aEmtHash))
1674 pGVM->gvmm.s.aEmtHash[idxHash].hNativeEmt = GVMM_RTNATIVETHREAD_DESTROYED;
1675 }
1676
1677 gvmmR0CreateDestroyUnlock(pGVMM);
1678 }
1679 return rc;
1680}
1681
1682
1683/**
1684 * Registers the caller as a given worker thread.
1685 *
1686 * This enables the thread to operate critical sections in ring-0.
1687 *
1688 * @returns VBox status code.
1689 * @param pGVM The global (ring-0) VM structure.
1690 * @param enmWorker The worker thread this is supposed to be.
1691 * @param hNativeSelfR3 The ring-3 native self of the caller.
1692 */
1693GVMMR0DECL(int) GVMMR0RegisterWorkerThread(PGVM pGVM, GVMMWORKERTHREAD enmWorker, RTNATIVETHREAD hNativeSelfR3)
1694{
1695 /*
1696 * Validate input.
1697 */
1698 AssertReturn(enmWorker > GVMMWORKERTHREAD_INVALID && enmWorker < GVMMWORKERTHREAD_END, VERR_INVALID_PARAMETER);
1699 AssertReturn(hNativeSelfR3 != NIL_RTNATIVETHREAD, VERR_INVALID_HANDLE);
1700 RTNATIVETHREAD const hNativeSelf = RTThreadNativeSelf();
1701 AssertReturn(hNativeSelf != NIL_RTNATIVETHREAD, VERR_INTERNAL_ERROR_3);
1702 PGVMM pGVMM;
1703 int rc = gvmmR0ByGVM(pGVM, &pGVMM, false /*fTakeUsedLock*/);
1704 AssertRCReturn(rc, rc);
1705 AssertReturn(pGVM->enmVMState < VMSTATE_DESTROYING, VERR_VM_INVALID_VM_STATE);
1706
1707 /*
1708 * Grab the big lock and check the VM state again.
1709 */
1710 uint32_t const hSelf = pGVM->hSelf;
1711 gvmmR0CreateDestroyLock(pGVMM); /** @todo per-VM lock? */
1712 if ( hSelf < RT_ELEMENTS(pGVMM->aHandles)
1713 && pGVMM->aHandles[hSelf].pvObj != NULL
1714 && pGVMM->aHandles[hSelf].pGVM == pGVM
1715 && pGVMM->aHandles[hSelf].ProcId == RTProcSelf())
1716 {
1717 if (pGVM->enmVMState < VMSTATE_DESTROYING)
1718 {
1719 /*
1720 * Check that the thread isn't an EMT or serving in some other worker capacity.
1721 */
1722 for (VMCPUID iCpu = 0; iCpu < pGVM->cCpus; iCpu++)
1723 AssertBreakStmt(pGVM->aCpus[iCpu].hEMT != hNativeSelf, rc = VERR_INVALID_PARAMETER);
1724 for (size_t idx = 0; idx < RT_ELEMENTS(pGVM->gvmm.s.aWorkerThreads); idx++)
1725 AssertBreakStmt(idx == (size_t)enmWorker || pGVM->gvmm.s.aWorkerThreads[enmWorker].hNativeThread != hNativeSelf,
1726 rc = VERR_INVALID_PARAMETER);
1727 if (RT_SUCCESS(rc))
1728 {
1729 /*
1730 * Do the registration.
1731 */
1732 if ( pGVM->gvmm.s.aWorkerThreads[enmWorker].hNativeThread == NIL_RTNATIVETHREAD
1733 && pGVM->gvmm.s.aWorkerThreads[enmWorker].hNativeThreadR3 == NIL_RTNATIVETHREAD)
1734 {
1735 pGVM->gvmm.s.aWorkerThreads[enmWorker].hNativeThread = hNativeSelf;
1736 pGVM->gvmm.s.aWorkerThreads[enmWorker].hNativeThreadR3 = hNativeSelfR3;
1737 rc = VINF_SUCCESS;
1738 }
1739 else if ( pGVM->gvmm.s.aWorkerThreads[enmWorker].hNativeThread == hNativeSelf
1740 && pGVM->gvmm.s.aWorkerThreads[enmWorker].hNativeThreadR3 == hNativeSelfR3)
1741 rc = VERR_ALREADY_EXISTS;
1742 else
1743 rc = VERR_RESOURCE_BUSY;
1744 }
1745 }
1746 else
1747 rc = VERR_VM_INVALID_VM_STATE;
1748 }
1749 else
1750 rc = VERR_INVALID_VM_HANDLE;
1751 gvmmR0CreateDestroyUnlock(pGVMM);
1752 return rc;
1753}
1754
1755
1756/**
1757 * Deregisters a workinger thread (caller).
1758 *
1759 * The worker thread cannot be re-created and re-registered, instead the given
1760 * @a enmWorker slot becomes invalid.
1761 *
1762 * @returns VBox status code.
1763 * @param pGVM The global (ring-0) VM structure.
1764 * @param enmWorker The worker thread this is supposed to be.
1765 */
1766GVMMR0DECL(int) GVMMR0DeregisterWorkerThread(PGVM pGVM, GVMMWORKERTHREAD enmWorker)
1767{
1768 /*
1769 * Validate input.
1770 */
1771 AssertReturn(enmWorker > GVMMWORKERTHREAD_INVALID && enmWorker < GVMMWORKERTHREAD_END, VERR_INVALID_PARAMETER);
1772 RTNATIVETHREAD const hNativeThread = RTThreadNativeSelf();
1773 AssertReturn(hNativeThread != NIL_RTNATIVETHREAD, VERR_INTERNAL_ERROR_3);
1774 PGVMM pGVMM;
1775 int rc = gvmmR0ByGVM(pGVM, &pGVMM, false /*fTakeUsedLock*/);
1776 AssertRCReturn(rc, rc);
1777
1778 /*
1779 * Grab the big lock and check the VM state again.
1780 */
1781 uint32_t const hSelf = pGVM->hSelf;
1782 gvmmR0CreateDestroyLock(pGVMM); /** @todo per-VM lock? */
1783 if ( hSelf < RT_ELEMENTS(pGVMM->aHandles)
1784 && pGVMM->aHandles[hSelf].pvObj != NULL
1785 && pGVMM->aHandles[hSelf].pGVM == pGVM
1786 && pGVMM->aHandles[hSelf].ProcId == RTProcSelf())
1787 {
1788 /*
1789 * Do the deregistration.
1790 * This will prevent any other threads register as the worker later.
1791 */
1792 if (pGVM->gvmm.s.aWorkerThreads[enmWorker].hNativeThread == hNativeThread)
1793 {
1794 pGVM->gvmm.s.aWorkerThreads[enmWorker].hNativeThread = GVMM_RTNATIVETHREAD_DESTROYED;
1795 pGVM->gvmm.s.aWorkerThreads[enmWorker].hNativeThreadR3 = GVMM_RTNATIVETHREAD_DESTROYED;
1796 rc = VINF_SUCCESS;
1797 }
1798 else if ( pGVM->gvmm.s.aWorkerThreads[enmWorker].hNativeThread == GVMM_RTNATIVETHREAD_DESTROYED
1799 && pGVM->gvmm.s.aWorkerThreads[enmWorker].hNativeThreadR3 == GVMM_RTNATIVETHREAD_DESTROYED)
1800 rc = VINF_SUCCESS;
1801 else
1802 rc = VERR_NOT_OWNER;
1803 }
1804 else
1805 rc = VERR_INVALID_VM_HANDLE;
1806 gvmmR0CreateDestroyUnlock(pGVMM);
1807 return rc;
1808}
1809
1810
1811/**
1812 * Lookup a GVM structure by its handle.
1813 *
1814 * @returns The GVM pointer on success, NULL on failure.
1815 * @param hGVM The global VM handle. Asserts on bad handle.
1816 */
1817GVMMR0DECL(PGVM) GVMMR0ByHandle(uint32_t hGVM)
1818{
1819 PGVMM pGVMM;
1820 GVMM_GET_VALID_INSTANCE(pGVMM, NULL);
1821
1822 /*
1823 * Validate.
1824 */
1825 AssertReturn(hGVM != NIL_GVM_HANDLE, NULL);
1826 AssertReturn(hGVM < RT_ELEMENTS(pGVMM->aHandles), NULL);
1827
1828 /*
1829 * Look it up.
1830 */
1831 PGVMHANDLE pHandle = &pGVMM->aHandles[hGVM];
1832 AssertPtrReturn(pHandle->pvObj, NULL);
1833 PGVM pGVM = pHandle->pGVM;
1834 AssertPtrReturn(pGVM, NULL);
1835
1836 return pGVM;
1837}
1838
1839
1840/**
1841 * Check that the given GVM and VM structures match up.
1842 *
1843 * The calling thread must be in the same process as the VM. All current lookups
1844 * are by threads inside the same process, so this will not be an issue.
1845 *
1846 * @returns VBox status code.
1847 * @param pGVM The global (ring-0) VM structure.
1848 * @param ppGVMM Where to store the pointer to the GVMM instance data.
1849 * @param fTakeUsedLock Whether to take the used lock or not. We take it in
1850 * shared mode when requested.
1851 *
1852 * Be very careful if not taking the lock as it's
1853 * possible that the VM will disappear then!
1854 *
1855 * @remark This will not assert on an invalid pGVM but try return silently.
1856 */
1857static int gvmmR0ByGVM(PGVM pGVM, PGVMM *ppGVMM, bool fTakeUsedLock)
1858{
1859 /*
1860 * Check the pointers.
1861 */
1862 int rc;
1863 if (RT_LIKELY( RT_VALID_PTR(pGVM)
1864 && ((uintptr_t)pGVM & HOST_PAGE_OFFSET_MASK) == 0 ))
1865 {
1866 /*
1867 * Get the pGVMM instance and check the VM handle.
1868 */
1869 PGVMM pGVMM;
1870 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
1871
1872 uint16_t hGVM = pGVM->hSelf;
1873 if (RT_LIKELY( hGVM != NIL_GVM_HANDLE
1874 && hGVM < RT_ELEMENTS(pGVMM->aHandles)))
1875 {
1876 RTPROCESS const pidSelf = RTProcSelf();
1877 PGVMHANDLE pHandle = &pGVMM->aHandles[hGVM];
1878 if (fTakeUsedLock)
1879 {
1880 rc = GVMMR0_USED_SHARED_LOCK(pGVMM);
1881 AssertRCReturn(rc, rc);
1882 }
1883
1884 if (RT_LIKELY( pHandle->pGVM == pGVM
1885 && pHandle->ProcId == pidSelf
1886 && RT_VALID_PTR(pHandle->pvObj)))
1887 {
1888 /*
1889 * Some more VM data consistency checks.
1890 */
1891 if (RT_LIKELY( pGVM->cCpusUnsafe == pGVM->cCpus
1892 && pGVM->hSelfUnsafe == hGVM
1893 && pGVM->pSelf == pGVM))
1894 {
1895 if (RT_LIKELY( pGVM->enmVMState >= VMSTATE_CREATING
1896 && pGVM->enmVMState <= VMSTATE_TERMINATED))
1897 {
1898 *ppGVMM = pGVMM;
1899 return VINF_SUCCESS;
1900 }
1901 rc = VERR_INCONSISTENT_VM_HANDLE;
1902 }
1903 else
1904 rc = VERR_INCONSISTENT_VM_HANDLE;
1905 }
1906 else
1907 rc = VERR_INVALID_VM_HANDLE;
1908
1909 if (fTakeUsedLock)
1910 GVMMR0_USED_SHARED_UNLOCK(pGVMM);
1911 }
1912 else
1913 rc = VERR_INVALID_VM_HANDLE;
1914 }
1915 else
1916 rc = VERR_INVALID_POINTER;
1917 return rc;
1918}
1919
1920
1921/**
1922 * Validates a GVM/VM pair.
1923 *
1924 * @returns VBox status code.
1925 * @param pGVM The global (ring-0) VM structure.
1926 */
1927GVMMR0DECL(int) GVMMR0ValidateGVM(PGVM pGVM)
1928{
1929 PGVMM pGVMM;
1930 return gvmmR0ByGVM(pGVM, &pGVMM, false /*fTakeUsedLock*/);
1931}
1932
1933
1934/**
1935 * Check that the given GVM and VM structures match up.
1936 *
1937 * The calling thread must be in the same process as the VM. All current lookups
1938 * are by threads inside the same process, so this will not be an issue.
1939 *
1940 * @returns VBox status code.
1941 * @param pGVM The global (ring-0) VM structure.
1942 * @param idCpu The (alleged) Virtual CPU ID of the calling EMT.
1943 * @param ppGVMM Where to store the pointer to the GVMM instance data.
1944 * @thread EMT
1945 *
1946 * @remarks This will assert in all failure paths.
1947 */
1948static int gvmmR0ByGVMandEMT(PGVM pGVM, VMCPUID idCpu, PGVMM *ppGVMM)
1949{
1950 /*
1951 * Check the pointers.
1952 */
1953 AssertPtrReturn(pGVM, VERR_INVALID_POINTER);
1954 AssertReturn(((uintptr_t)pGVM & HOST_PAGE_OFFSET_MASK) == 0, VERR_INVALID_POINTER);
1955
1956 /*
1957 * Get the pGVMM instance and check the VM handle.
1958 */
1959 PGVMM pGVMM;
1960 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
1961
1962 uint16_t hGVM = pGVM->hSelf;
1963 ASMCompilerBarrier();
1964 AssertReturn( hGVM != NIL_GVM_HANDLE
1965 && hGVM < RT_ELEMENTS(pGVMM->aHandles), VERR_INVALID_VM_HANDLE);
1966
1967 RTPROCESS const pidSelf = RTProcSelf();
1968 PGVMHANDLE pHandle = &pGVMM->aHandles[hGVM];
1969 AssertReturn( pHandle->pGVM == pGVM
1970 && pHandle->ProcId == pidSelf
1971 && RT_VALID_PTR(pHandle->pvObj),
1972 VERR_INVALID_HANDLE);
1973
1974 /*
1975 * Check the EMT claim.
1976 */
1977 RTNATIVETHREAD const hAllegedEMT = RTThreadNativeSelf();
1978 AssertReturn(idCpu < pGVM->cCpus, VERR_INVALID_CPU_ID);
1979 AssertReturn(pGVM->aCpus[idCpu].hEMT == hAllegedEMT, VERR_NOT_OWNER);
1980
1981 /*
1982 * Some more VM data consistency checks.
1983 */
1984 AssertReturn(pGVM->cCpusUnsafe == pGVM->cCpus, VERR_INCONSISTENT_VM_HANDLE);
1985 AssertReturn(pGVM->hSelfUnsafe == hGVM, VERR_INCONSISTENT_VM_HANDLE);
1986 AssertReturn( pGVM->enmVMState >= VMSTATE_CREATING
1987 && pGVM->enmVMState <= VMSTATE_TERMINATED, VERR_INCONSISTENT_VM_HANDLE);
1988
1989 *ppGVMM = pGVMM;
1990 return VINF_SUCCESS;
1991}
1992
1993
1994/**
1995 * Validates a GVM/EMT pair.
1996 *
1997 * @returns VBox status code.
1998 * @param pGVM The global (ring-0) VM structure.
1999 * @param idCpu The Virtual CPU ID of the calling EMT.
2000 * @thread EMT(idCpu)
2001 */
2002GVMMR0DECL(int) GVMMR0ValidateGVMandEMT(PGVM pGVM, VMCPUID idCpu)
2003{
2004 PGVMM pGVMM;
2005 return gvmmR0ByGVMandEMT(pGVM, idCpu, &pGVMM);
2006}
2007
2008
2009/**
2010 * Looks up the VM belonging to the specified EMT thread.
2011 *
2012 * This is used by the assertion machinery in VMMR0.cpp to avoid causing
2013 * unnecessary kernel panics when the EMT thread hits an assertion. The
2014 * call may or not be an EMT thread.
2015 *
2016 * @returns Pointer to the VM on success, NULL on failure.
2017 * @param hEMT The native thread handle of the EMT.
2018 * NIL_RTNATIVETHREAD means the current thread
2019 */
2020GVMMR0DECL(PVMCC) GVMMR0GetVMByEMT(RTNATIVETHREAD hEMT)
2021{
2022 /*
2023 * No Assertions here as we're usually called in a AssertMsgN or
2024 * RTAssert* context.
2025 */
2026 PGVMM pGVMM = g_pGVMM;
2027 if ( !RT_VALID_PTR(pGVMM)
2028 || pGVMM->u32Magic != GVMM_MAGIC)
2029 return NULL;
2030
2031 if (hEMT == NIL_RTNATIVETHREAD)
2032 hEMT = RTThreadNativeSelf();
2033 RTPROCESS ProcId = RTProcSelf();
2034
2035 /*
2036 * Search the handles in a linear fashion as we don't dare to take the lock (assert).
2037 */
2038/** @todo introduce some pid hash table here, please. */
2039 for (unsigned i = 1; i < RT_ELEMENTS(pGVMM->aHandles); i++)
2040 {
2041 if ( pGVMM->aHandles[i].iSelf == i
2042 && pGVMM->aHandles[i].ProcId == ProcId
2043 && RT_VALID_PTR(pGVMM->aHandles[i].pvObj)
2044 && RT_VALID_PTR(pGVMM->aHandles[i].pGVM))
2045 {
2046 if (pGVMM->aHandles[i].hEMT0 == hEMT)
2047 return pGVMM->aHandles[i].pGVM;
2048
2049 /* This is fearly safe with the current process per VM approach. */
2050 PGVM pGVM = pGVMM->aHandles[i].pGVM;
2051 VMCPUID const cCpus = pGVM->cCpus;
2052 ASMCompilerBarrier();
2053 if ( cCpus < 1
2054 || cCpus > VMM_MAX_CPU_COUNT)
2055 continue;
2056 for (VMCPUID idCpu = 1; idCpu < cCpus; idCpu++)
2057 if (pGVM->aCpus[idCpu].hEMT == hEMT)
2058 return pGVMM->aHandles[i].pGVM;
2059 }
2060 }
2061 return NULL;
2062}
2063
2064
2065/**
2066 * Looks up the GVMCPU belonging to the specified EMT thread.
2067 *
2068 * This is used by the assertion machinery in VMMR0.cpp to avoid causing
2069 * unnecessary kernel panics when the EMT thread hits an assertion. The
2070 * call may or not be an EMT thread.
2071 *
2072 * @returns Pointer to the VM on success, NULL on failure.
2073 * @param hEMT The native thread handle of the EMT.
2074 * NIL_RTNATIVETHREAD means the current thread
2075 */
2076GVMMR0DECL(PGVMCPU) GVMMR0GetGVCpuByEMT(RTNATIVETHREAD hEMT)
2077{
2078 /*
2079 * No Assertions here as we're usually called in a AssertMsgN,
2080 * RTAssert*, Log and LogRel contexts.
2081 */
2082 PGVMM pGVMM = g_pGVMM;
2083 if ( !RT_VALID_PTR(pGVMM)
2084 || pGVMM->u32Magic != GVMM_MAGIC)
2085 return NULL;
2086
2087 if (hEMT == NIL_RTNATIVETHREAD)
2088 hEMT = RTThreadNativeSelf();
2089 RTPROCESS ProcId = RTProcSelf();
2090
2091 /*
2092 * Search the handles in a linear fashion as we don't dare to take the lock (assert).
2093 */
2094/** @todo introduce some pid hash table here, please. */
2095 for (unsigned i = 1; i < RT_ELEMENTS(pGVMM->aHandles); i++)
2096 {
2097 if ( pGVMM->aHandles[i].iSelf == i
2098 && pGVMM->aHandles[i].ProcId == ProcId
2099 && RT_VALID_PTR(pGVMM->aHandles[i].pvObj)
2100 && RT_VALID_PTR(pGVMM->aHandles[i].pGVM))
2101 {
2102 PGVM pGVM = pGVMM->aHandles[i].pGVM;
2103 if (pGVMM->aHandles[i].hEMT0 == hEMT)
2104 return &pGVM->aCpus[0];
2105
2106 /* This is fearly safe with the current process per VM approach. */
2107 VMCPUID const cCpus = pGVM->cCpus;
2108 ASMCompilerBarrier();
2109 ASMCompilerBarrier();
2110 if ( cCpus < 1
2111 || cCpus > VMM_MAX_CPU_COUNT)
2112 continue;
2113 for (VMCPUID idCpu = 1; idCpu < cCpus; idCpu++)
2114 if (pGVM->aCpus[idCpu].hEMT == hEMT)
2115 return &pGVM->aCpus[idCpu];
2116 }
2117 }
2118 return NULL;
2119}
2120
2121
2122/**
2123 * Get the GVMCPU structure for the given EMT.
2124 *
2125 * @returns The VCpu structure for @a hEMT, NULL if not an EMT.
2126 * @param pGVM The global (ring-0) VM structure.
2127 * @param hEMT The native thread handle of the EMT.
2128 * NIL_RTNATIVETHREAD means the current thread
2129 */
2130GVMMR0DECL(PGVMCPU) GVMMR0GetGVCpuByGVMandEMT(PGVM pGVM, RTNATIVETHREAD hEMT)
2131{
2132 /*
2133 * Validate & adjust input.
2134 */
2135 AssertPtr(pGVM);
2136 Assert(pGVM->u32Magic == GVM_MAGIC);
2137 if (hEMT == NIL_RTNATIVETHREAD /* likely */)
2138 {
2139 hEMT = RTThreadNativeSelf();
2140 AssertReturn(hEMT != NIL_RTNATIVETHREAD, NULL);
2141 }
2142
2143 /*
2144 * Find the matching hash table entry.
2145 * See similar code in GVMMR0GetRing3ThreadForSelf.
2146 */
2147 uint32_t idxHash = GVMM_EMT_HASH_1(hEMT);
2148 if (pGVM->gvmm.s.aEmtHash[idxHash].hNativeEmt == hEMT)
2149 { /* likely */ }
2150 else
2151 {
2152#ifdef VBOX_STRICT
2153 unsigned cCollisions = 0;
2154#endif
2155 uint32_t const idxHash2 = GVMM_EMT_HASH_2(hEMT);
2156 for (;;)
2157 {
2158 Assert(cCollisions++ < GVMM_EMT_HASH_SIZE);
2159 idxHash = (idxHash + idxHash2) % GVMM_EMT_HASH_SIZE;
2160 if (pGVM->gvmm.s.aEmtHash[idxHash].hNativeEmt == hEMT)
2161 break;
2162 if (pGVM->gvmm.s.aEmtHash[idxHash].hNativeEmt == NIL_RTNATIVETHREAD)
2163 {
2164#ifdef VBOX_STRICT
2165 uint32_t idxCpu = pGVM->cCpus;
2166 AssertStmt(idxCpu < VMM_MAX_CPU_COUNT, idxCpu = VMM_MAX_CPU_COUNT);
2167 while (idxCpu-- > 0)
2168 Assert(pGVM->aCpus[idxCpu].hNativeThreadR0 != hEMT);
2169#endif
2170 return NULL;
2171 }
2172 }
2173 }
2174
2175 /*
2176 * Validate the VCpu number and translate it into a pointer.
2177 */
2178 VMCPUID const idCpu = pGVM->gvmm.s.aEmtHash[idxHash].idVCpu;
2179 AssertReturn(idCpu < pGVM->cCpus, NULL);
2180 PGVMCPU pGVCpu = &pGVM->aCpus[idCpu];
2181 Assert(pGVCpu->hNativeThreadR0 == hEMT);
2182 Assert(pGVCpu->gvmm.s.idxEmtHash == idxHash);
2183 return pGVCpu;
2184}
2185
2186
2187/**
2188 * Get the native ring-3 thread handle for the caller.
2189 *
2190 * This works for EMTs and registered workers.
2191 *
2192 * @returns ring-3 native thread handle or NIL_RTNATIVETHREAD.
2193 * @param pGVM The global (ring-0) VM structure.
2194 */
2195GVMMR0DECL(RTNATIVETHREAD) GVMMR0GetRing3ThreadForSelf(PGVM pGVM)
2196{
2197 /*
2198 * Validate input.
2199 */
2200 AssertPtr(pGVM);
2201 AssertReturn(pGVM->u32Magic == GVM_MAGIC, NIL_RTNATIVETHREAD);
2202 RTNATIVETHREAD const hNativeSelf = RTThreadNativeSelf();
2203 AssertReturn(hNativeSelf != NIL_RTNATIVETHREAD, NIL_RTNATIVETHREAD);
2204
2205 /*
2206 * Find the matching hash table entry.
2207 * See similar code in GVMMR0GetGVCpuByGVMandEMT.
2208 */
2209 uint32_t idxHash = GVMM_EMT_HASH_1(hNativeSelf);
2210 if (pGVM->gvmm.s.aEmtHash[idxHash].hNativeEmt == hNativeSelf)
2211 { /* likely */ }
2212 else
2213 {
2214#ifdef VBOX_STRICT
2215 unsigned cCollisions = 0;
2216#endif
2217 uint32_t const idxHash2 = GVMM_EMT_HASH_2(hNativeSelf);
2218 for (;;)
2219 {
2220 Assert(cCollisions++ < GVMM_EMT_HASH_SIZE);
2221 idxHash = (idxHash + idxHash2) % GVMM_EMT_HASH_SIZE;
2222 if (pGVM->gvmm.s.aEmtHash[idxHash].hNativeEmt == hNativeSelf)
2223 break;
2224 if (pGVM->gvmm.s.aEmtHash[idxHash].hNativeEmt == NIL_RTNATIVETHREAD)
2225 {
2226#ifdef VBOX_STRICT
2227 uint32_t idxCpu = pGVM->cCpus;
2228 AssertStmt(idxCpu < VMM_MAX_CPU_COUNT, idxCpu = VMM_MAX_CPU_COUNT);
2229 while (idxCpu-- > 0)
2230 Assert(pGVM->aCpus[idxCpu].hNativeThreadR0 != hNativeSelf);
2231#endif
2232
2233 /*
2234 * Not an EMT, so see if it's a worker thread.
2235 */
2236 size_t idx = RT_ELEMENTS(pGVM->gvmm.s.aWorkerThreads);
2237 while (--idx > GVMMWORKERTHREAD_INVALID)
2238 if (pGVM->gvmm.s.aWorkerThreads[idx].hNativeThread == hNativeSelf)
2239 return pGVM->gvmm.s.aWorkerThreads[idx].hNativeThreadR3;
2240
2241 return NIL_RTNATIVETHREAD;
2242 }
2243 }
2244 }
2245
2246 /*
2247 * Validate the VCpu number and translate it into a pointer.
2248 */
2249 VMCPUID const idCpu = pGVM->gvmm.s.aEmtHash[idxHash].idVCpu;
2250 AssertReturn(idCpu < pGVM->cCpus, NIL_RTNATIVETHREAD);
2251 PGVMCPU pGVCpu = &pGVM->aCpus[idCpu];
2252 Assert(pGVCpu->hNativeThreadR0 == hNativeSelf);
2253 Assert(pGVCpu->gvmm.s.idxEmtHash == idxHash);
2254 return pGVCpu->hNativeThread;
2255}
2256
2257
2258/**
2259 * Converts a pointer with the GVM structure to a host physical address.
2260 *
2261 * @returns Host physical address.
2262 * @param pGVM The global (ring-0) VM structure.
2263 * @param pv The address to convert.
2264 * @thread EMT
2265 */
2266GVMMR0DECL(RTHCPHYS) GVMMR0ConvertGVMPtr2HCPhys(PGVM pGVM, void *pv)
2267{
2268 AssertPtr(pGVM);
2269 Assert(pGVM->u32Magic == GVM_MAGIC);
2270 uintptr_t const off = (uintptr_t)pv - (uintptr_t)pGVM;
2271 Assert(off < RT_UOFFSETOF_DYN(GVM, aCpus[pGVM->cCpus]));
2272 return RTR0MemObjGetPagePhysAddr(pGVM->gvmm.s.VMMemObj, off >> HOST_PAGE_SHIFT) | ((uintptr_t)pv & HOST_PAGE_OFFSET_MASK);
2273}
2274
2275
2276/**
2277 * This is will wake up expired and soon-to-be expired VMs.
2278 *
2279 * @returns Number of VMs that has been woken up.
2280 * @param pGVMM Pointer to the GVMM instance data.
2281 * @param u64Now The current time.
2282 */
2283static unsigned gvmmR0SchedDoWakeUps(PGVMM pGVMM, uint64_t u64Now)
2284{
2285 /*
2286 * Skip this if we've got disabled because of high resolution wakeups or by
2287 * the user.
2288 */
2289 if (!pGVMM->fDoEarlyWakeUps)
2290 return 0;
2291
2292/** @todo Rewrite this algorithm. See performance defect XYZ. */
2293
2294 /*
2295 * A cheap optimization to stop wasting so much time here on big setups.
2296 */
2297 const uint64_t uNsEarlyWakeUp2 = u64Now + pGVMM->nsEarlyWakeUp2;
2298 if ( pGVMM->cHaltedEMTs == 0
2299 || uNsEarlyWakeUp2 > pGVMM->uNsNextEmtWakeup)
2300 return 0;
2301
2302 /*
2303 * Only one thread doing this at a time.
2304 */
2305 if (!ASMAtomicCmpXchgBool(&pGVMM->fDoingEarlyWakeUps, true, false))
2306 return 0;
2307
2308 /*
2309 * The first pass will wake up VMs which have actually expired
2310 * and look for VMs that should be woken up in the 2nd and 3rd passes.
2311 */
2312 const uint64_t uNsEarlyWakeUp1 = u64Now + pGVMM->nsEarlyWakeUp1;
2313 uint64_t u64Min = UINT64_MAX;
2314 unsigned cWoken = 0;
2315 unsigned cHalted = 0;
2316 unsigned cTodo2nd = 0;
2317 unsigned cTodo3rd = 0;
2318 for (unsigned i = pGVMM->iUsedHead, cGuard = 0;
2319 i != NIL_GVM_HANDLE && i < RT_ELEMENTS(pGVMM->aHandles);
2320 i = pGVMM->aHandles[i].iNext)
2321 {
2322 PGVM pCurGVM = pGVMM->aHandles[i].pGVM;
2323 if ( RT_VALID_PTR(pCurGVM)
2324 && pCurGVM->u32Magic == GVM_MAGIC)
2325 {
2326 for (VMCPUID idCpu = 0; idCpu < pCurGVM->cCpus; idCpu++)
2327 {
2328 PGVMCPU pCurGVCpu = &pCurGVM->aCpus[idCpu];
2329 uint64_t u64 = ASMAtomicUoReadU64(&pCurGVCpu->gvmm.s.u64HaltExpire);
2330 if (u64)
2331 {
2332 if (u64 <= u64Now)
2333 {
2334 if (ASMAtomicXchgU64(&pCurGVCpu->gvmm.s.u64HaltExpire, 0))
2335 {
2336 int rc = RTSemEventMultiSignal(pCurGVCpu->gvmm.s.HaltEventMulti);
2337 AssertRC(rc);
2338 cWoken++;
2339 }
2340 }
2341 else
2342 {
2343 cHalted++;
2344 if (u64 <= uNsEarlyWakeUp1)
2345 cTodo2nd++;
2346 else if (u64 <= uNsEarlyWakeUp2)
2347 cTodo3rd++;
2348 else if (u64 < u64Min)
2349 u64 = u64Min;
2350 }
2351 }
2352 }
2353 }
2354 AssertLogRelBreak(cGuard++ < RT_ELEMENTS(pGVMM->aHandles));
2355 }
2356
2357 if (cTodo2nd)
2358 {
2359 for (unsigned i = pGVMM->iUsedHead, cGuard = 0;
2360 i != NIL_GVM_HANDLE && i < RT_ELEMENTS(pGVMM->aHandles);
2361 i = pGVMM->aHandles[i].iNext)
2362 {
2363 PGVM pCurGVM = pGVMM->aHandles[i].pGVM;
2364 if ( RT_VALID_PTR(pCurGVM)
2365 && pCurGVM->u32Magic == GVM_MAGIC)
2366 {
2367 for (VMCPUID idCpu = 0; idCpu < pCurGVM->cCpus; idCpu++)
2368 {
2369 PGVMCPU pCurGVCpu = &pCurGVM->aCpus[idCpu];
2370 uint64_t u64 = ASMAtomicUoReadU64(&pCurGVCpu->gvmm.s.u64HaltExpire);
2371 if ( u64
2372 && u64 <= uNsEarlyWakeUp1)
2373 {
2374 if (ASMAtomicXchgU64(&pCurGVCpu->gvmm.s.u64HaltExpire, 0))
2375 {
2376 int rc = RTSemEventMultiSignal(pCurGVCpu->gvmm.s.HaltEventMulti);
2377 AssertRC(rc);
2378 cWoken++;
2379 }
2380 }
2381 }
2382 }
2383 AssertLogRelBreak(cGuard++ < RT_ELEMENTS(pGVMM->aHandles));
2384 }
2385 }
2386
2387 if (cTodo3rd)
2388 {
2389 for (unsigned i = pGVMM->iUsedHead, cGuard = 0;
2390 i != NIL_GVM_HANDLE && i < RT_ELEMENTS(pGVMM->aHandles);
2391 i = pGVMM->aHandles[i].iNext)
2392 {
2393 PGVM pCurGVM = pGVMM->aHandles[i].pGVM;
2394 if ( RT_VALID_PTR(pCurGVM)
2395 && pCurGVM->u32Magic == GVM_MAGIC)
2396 {
2397 for (VMCPUID idCpu = 0; idCpu < pCurGVM->cCpus; idCpu++)
2398 {
2399 PGVMCPU pCurGVCpu = &pCurGVM->aCpus[idCpu];
2400 uint64_t u64 = ASMAtomicUoReadU64(&pCurGVCpu->gvmm.s.u64HaltExpire);
2401 if ( u64
2402 && u64 <= uNsEarlyWakeUp2)
2403 {
2404 if (ASMAtomicXchgU64(&pCurGVCpu->gvmm.s.u64HaltExpire, 0))
2405 {
2406 int rc = RTSemEventMultiSignal(pCurGVCpu->gvmm.s.HaltEventMulti);
2407 AssertRC(rc);
2408 cWoken++;
2409 }
2410 }
2411 }
2412 }
2413 AssertLogRelBreak(cGuard++ < RT_ELEMENTS(pGVMM->aHandles));
2414 }
2415 }
2416
2417 /*
2418 * Set the minimum value.
2419 */
2420 pGVMM->uNsNextEmtWakeup = u64Min;
2421
2422 ASMAtomicWriteBool(&pGVMM->fDoingEarlyWakeUps, false);
2423 return cWoken;
2424}
2425
2426
2427#ifdef GVMM_SCHED_WITH_HR_WAKE_UP_TIMER
2428/**
2429 * Timer callback for the EMT high-resolution wake-up timer.
2430 *
2431 * @param pTimer The timer handle.
2432 * @param pvUser The global (ring-0) CPU structure for the EMT to wake up.
2433 * @param iTick The current tick.
2434 */
2435static DECLCALLBACK(void) gvmmR0EmtWakeUpTimerCallback(PRTTIMER pTimer, void *pvUser, uint64_t iTick)
2436{
2437 PGVMCPU pGVCpu = (PGVMCPU)pvUser;
2438 NOREF(pTimer); NOREF(iTick);
2439
2440 pGVCpu->gvmm.s.fHrWakeUptimerArmed = false;
2441 if (pGVCpu->gvmm.s.u64HaltExpire != 0)
2442 {
2443 RTSemEventMultiSignal(pGVCpu->gvmm.s.HaltEventMulti);
2444 pGVCpu->gvmm.s.Stats.cWakeUpTimerHits += 1;
2445 }
2446 else
2447 pGVCpu->gvmm.s.Stats.cWakeUpTimerMisses += 1;
2448
2449 if (RTMpCpuId() == pGVCpu->gvmm.s.idHaltedOnCpu)
2450 pGVCpu->gvmm.s.Stats.cWakeUpTimerSameCpu += 1;
2451}
2452#endif /* GVMM_SCHED_WITH_HR_WAKE_UP_TIMER */
2453
2454
2455/**
2456 * Halt the EMT thread.
2457 *
2458 * @returns VINF_SUCCESS normal wakeup (timeout or kicked by other thread).
2459 * VERR_INTERRUPTED if a signal was scheduled for the thread.
2460 * @param pGVM The global (ring-0) VM structure.
2461 * @param pGVCpu The global (ring-0) CPU structure of the calling
2462 * EMT.
2463 * @param u64ExpireGipTime The time for the sleep to expire expressed as GIP time.
2464 * @thread EMT(pGVCpu).
2465 */
2466GVMMR0DECL(int) GVMMR0SchedHalt(PGVM pGVM, PGVMCPU pGVCpu, uint64_t u64ExpireGipTime)
2467{
2468 LogFlow(("GVMMR0SchedHalt: pGVM=%p pGVCpu=%p(%d) u64ExpireGipTime=%#RX64\n",
2469 pGVM, pGVCpu, pGVCpu->idCpu, u64ExpireGipTime));
2470 PGVMM pGVMM;
2471 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
2472
2473 pGVM->gvmm.s.StatsSched.cHaltCalls++;
2474 Assert(!pGVCpu->gvmm.s.u64HaltExpire);
2475
2476 /*
2477 * If we're doing early wake-ups, we must take the UsedList lock before we
2478 * start querying the current time.
2479 * Note! Interrupts must NOT be disabled at this point because we ask for GIP time!
2480 */
2481 bool const fDoEarlyWakeUps = pGVMM->fDoEarlyWakeUps;
2482 if (fDoEarlyWakeUps)
2483 {
2484 int rc2 = GVMMR0_USED_SHARED_LOCK(pGVMM); AssertRC(rc2);
2485 }
2486
2487 /* GIP hack: We might are frequently sleeping for short intervals where the
2488 difference between GIP and system time matters on systems with high resolution
2489 system time. So, convert the input from GIP to System time in that case. */
2490 Assert(ASMGetFlags() & X86_EFL_IF);
2491 const uint64_t u64NowSys = RTTimeSystemNanoTS();
2492 const uint64_t u64NowGip = RTTimeNanoTS();
2493
2494 if (fDoEarlyWakeUps)
2495 pGVM->gvmm.s.StatsSched.cHaltWakeUps += gvmmR0SchedDoWakeUps(pGVMM, u64NowGip);
2496
2497 /*
2498 * Go to sleep if we must...
2499 * Cap the sleep time to 1 second to be on the safe side.
2500 */
2501 int rc;
2502 uint64_t cNsInterval = u64ExpireGipTime - u64NowGip;
2503 if ( u64NowGip < u64ExpireGipTime
2504 && ( cNsInterval >= (pGVMM->cEMTs > pGVMM->cEMTsMeansCompany
2505 ? pGVMM->nsMinSleepCompany
2506 : pGVMM->nsMinSleepAlone)
2507#ifdef GVMM_SCHED_WITH_HR_WAKE_UP_TIMER
2508 || (pGVCpu->gvmm.s.hHrWakeUpTimer != NULL && cNsInterval >= pGVMM->nsMinSleepWithHrTimer)
2509#endif
2510 )
2511 )
2512 {
2513 pGVM->gvmm.s.StatsSched.cHaltBlocking++;
2514 if (cNsInterval > RT_NS_1SEC)
2515 u64ExpireGipTime = u64NowGip + RT_NS_1SEC;
2516 ASMAtomicWriteU64(&pGVCpu->gvmm.s.u64HaltExpire, u64ExpireGipTime);
2517 ASMAtomicIncU32(&pGVMM->cHaltedEMTs);
2518 if (fDoEarlyWakeUps)
2519 {
2520 if (u64ExpireGipTime < pGVMM->uNsNextEmtWakeup)
2521 pGVMM->uNsNextEmtWakeup = u64ExpireGipTime;
2522 GVMMR0_USED_SHARED_UNLOCK(pGVMM);
2523 }
2524
2525#ifdef GVMM_SCHED_WITH_HR_WAKE_UP_TIMER
2526 if ( pGVCpu->gvmm.s.hHrWakeUpTimer != NULL
2527 && cNsInterval >= RT_MIN(RT_NS_1US, pGVMM->nsMinSleepWithHrTimer))
2528 {
2529 STAM_REL_PROFILE_START(&pGVCpu->gvmm.s.Stats.Start, a);
2530 RTTimerStart(pGVCpu->gvmm.s.hHrWakeUpTimer, cNsInterval);
2531 pGVCpu->gvmm.s.fHrWakeUptimerArmed = true;
2532 pGVCpu->gvmm.s.idHaltedOnCpu = RTMpCpuId();
2533 STAM_REL_PROFILE_STOP(&pGVCpu->gvmm.s.Stats.Start, a);
2534 }
2535#endif
2536
2537 rc = RTSemEventMultiWaitEx(pGVCpu->gvmm.s.HaltEventMulti,
2538 RTSEMWAIT_FLAGS_ABSOLUTE | RTSEMWAIT_FLAGS_NANOSECS | RTSEMWAIT_FLAGS_INTERRUPTIBLE,
2539 u64NowGip > u64NowSys ? u64ExpireGipTime : u64NowSys + cNsInterval);
2540
2541 ASMAtomicWriteU64(&pGVCpu->gvmm.s.u64HaltExpire, 0);
2542 ASMAtomicDecU32(&pGVMM->cHaltedEMTs);
2543
2544#ifdef GVMM_SCHED_WITH_HR_WAKE_UP_TIMER
2545 if (!pGVCpu->gvmm.s.fHrWakeUptimerArmed)
2546 { /* likely */ }
2547 else
2548 {
2549 STAM_REL_PROFILE_START(&pGVCpu->gvmm.s.Stats.Stop, a);
2550 RTTimerStop(pGVCpu->gvmm.s.hHrWakeUpTimer);
2551 pGVCpu->gvmm.s.fHrWakeUptimerArmed = false;
2552 pGVCpu->gvmm.s.Stats.cWakeUpTimerCanceled += 1;
2553 STAM_REL_PROFILE_STOP(&pGVCpu->gvmm.s.Stats.Stop, a);
2554 }
2555#endif
2556
2557 /* Reset the semaphore to try prevent a few false wake-ups. */
2558 if (rc == VINF_SUCCESS)
2559 RTSemEventMultiReset(pGVCpu->gvmm.s.HaltEventMulti);
2560 else if (rc == VERR_TIMEOUT)
2561 {
2562 pGVM->gvmm.s.StatsSched.cHaltTimeouts++;
2563 rc = VINF_SUCCESS;
2564 }
2565 }
2566 else
2567 {
2568 pGVM->gvmm.s.StatsSched.cHaltNotBlocking++;
2569 if (fDoEarlyWakeUps)
2570 GVMMR0_USED_SHARED_UNLOCK(pGVMM);
2571 RTSemEventMultiReset(pGVCpu->gvmm.s.HaltEventMulti);
2572 rc = VINF_SUCCESS;
2573 }
2574
2575 return rc;
2576}
2577
2578
2579/**
2580 * Halt the EMT thread.
2581 *
2582 * @returns VINF_SUCCESS normal wakeup (timeout or kicked by other thread).
2583 * VERR_INTERRUPTED if a signal was scheduled for the thread.
2584 * @param pGVM The global (ring-0) VM structure.
2585 * @param idCpu The Virtual CPU ID of the calling EMT.
2586 * @param u64ExpireGipTime The time for the sleep to expire expressed as GIP time.
2587 * @thread EMT(idCpu).
2588 */
2589GVMMR0DECL(int) GVMMR0SchedHaltReq(PGVM pGVM, VMCPUID idCpu, uint64_t u64ExpireGipTime)
2590{
2591 PGVMM pGVMM;
2592 int rc = gvmmR0ByGVMandEMT(pGVM, idCpu, &pGVMM);
2593 if (RT_SUCCESS(rc))
2594 rc = GVMMR0SchedHalt(pGVM, &pGVM->aCpus[idCpu], u64ExpireGipTime);
2595 return rc;
2596}
2597
2598
2599
2600/**
2601 * Worker for GVMMR0SchedWakeUp and GVMMR0SchedWakeUpAndPokeCpus that wakes up
2602 * the a sleeping EMT.
2603 *
2604 * @retval VINF_SUCCESS if successfully woken up.
2605 * @retval VINF_GVM_NOT_BLOCKED if the EMT wasn't blocked.
2606 *
2607 * @param pGVM The global (ring-0) VM structure.
2608 * @param pGVCpu The global (ring-0) VCPU structure.
2609 */
2610DECLINLINE(int) gvmmR0SchedWakeUpOne(PGVM pGVM, PGVMCPU pGVCpu)
2611{
2612 pGVM->gvmm.s.StatsSched.cWakeUpCalls++;
2613
2614 /*
2615 * Signal the semaphore regardless of whether it's current blocked on it.
2616 *
2617 * The reason for this is that there is absolutely no way we can be 100%
2618 * certain that it isn't *about* go to go to sleep on it and just got
2619 * delayed a bit en route. So, we will always signal the semaphore when
2620 * the it is flagged as halted in the VMM.
2621 */
2622/** @todo we can optimize some of that by means of the pVCpu->enmState now. */
2623 int rc;
2624 if (pGVCpu->gvmm.s.u64HaltExpire)
2625 {
2626 rc = VINF_SUCCESS;
2627 ASMAtomicWriteU64(&pGVCpu->gvmm.s.u64HaltExpire, 0);
2628 }
2629 else
2630 {
2631 rc = VINF_GVM_NOT_BLOCKED;
2632 pGVM->gvmm.s.StatsSched.cWakeUpNotHalted++;
2633 }
2634
2635 int rc2 = RTSemEventMultiSignal(pGVCpu->gvmm.s.HaltEventMulti);
2636 AssertRC(rc2);
2637
2638 return rc;
2639}
2640
2641
2642/**
2643 * Wakes up the halted EMT thread so it can service a pending request.
2644 *
2645 * @returns VBox status code.
2646 * @retval VINF_SUCCESS if successfully woken up.
2647 * @retval VINF_GVM_NOT_BLOCKED if the EMT wasn't blocked.
2648 *
2649 * @param pGVM The global (ring-0) VM structure.
2650 * @param idCpu The Virtual CPU ID of the EMT to wake up.
2651 * @param fTakeUsedLock Take the used lock or not
2652 * @thread Any but EMT(idCpu).
2653 */
2654GVMMR0DECL(int) GVMMR0SchedWakeUpEx(PGVM pGVM, VMCPUID idCpu, bool fTakeUsedLock)
2655{
2656 /*
2657 * Validate input and take the UsedLock.
2658 */
2659 PGVMM pGVMM;
2660 int rc = gvmmR0ByGVM(pGVM, &pGVMM, fTakeUsedLock);
2661 if (RT_SUCCESS(rc))
2662 {
2663 if (idCpu < pGVM->cCpus)
2664 {
2665 /*
2666 * Do the actual job.
2667 */
2668 rc = gvmmR0SchedWakeUpOne(pGVM, &pGVM->aCpus[idCpu]);
2669
2670 if (fTakeUsedLock && pGVMM->fDoEarlyWakeUps)
2671 {
2672 /*
2673 * While we're here, do a round of scheduling.
2674 */
2675 Assert(ASMGetFlags() & X86_EFL_IF);
2676 const uint64_t u64Now = RTTimeNanoTS(); /* (GIP time) */
2677 pGVM->gvmm.s.StatsSched.cWakeUpWakeUps += gvmmR0SchedDoWakeUps(pGVMM, u64Now);
2678 }
2679 }
2680 else
2681 rc = VERR_INVALID_CPU_ID;
2682
2683 if (fTakeUsedLock)
2684 {
2685 int rc2 = GVMMR0_USED_SHARED_UNLOCK(pGVMM);
2686 AssertRC(rc2);
2687 }
2688 }
2689
2690 LogFlow(("GVMMR0SchedWakeUpEx: returns %Rrc\n", rc));
2691 return rc;
2692}
2693
2694
2695/**
2696 * Wakes up the halted EMT thread so it can service a pending request.
2697 *
2698 * @returns VBox status code.
2699 * @retval VINF_SUCCESS if successfully woken up.
2700 * @retval VINF_GVM_NOT_BLOCKED if the EMT wasn't blocked.
2701 *
2702 * @param pGVM The global (ring-0) VM structure.
2703 * @param idCpu The Virtual CPU ID of the EMT to wake up.
2704 * @thread Any but EMT(idCpu).
2705 */
2706GVMMR0DECL(int) GVMMR0SchedWakeUp(PGVM pGVM, VMCPUID idCpu)
2707{
2708 return GVMMR0SchedWakeUpEx(pGVM, idCpu, true /* fTakeUsedLock */);
2709}
2710
2711
2712/**
2713 * Wakes up the halted EMT thread so it can service a pending request, no GVM
2714 * parameter and no used locking.
2715 *
2716 * @returns VBox status code.
2717 * @retval VINF_SUCCESS if successfully woken up.
2718 * @retval VINF_GVM_NOT_BLOCKED if the EMT wasn't blocked.
2719 *
2720 * @param pGVM The global (ring-0) VM structure.
2721 * @param idCpu The Virtual CPU ID of the EMT to wake up.
2722 * @thread Any but EMT(idCpu).
2723 * @deprecated Don't use in new code if possible! Use the GVM variant.
2724 */
2725GVMMR0DECL(int) GVMMR0SchedWakeUpNoGVMNoLock(PGVM pGVM, VMCPUID idCpu)
2726{
2727 PGVMM pGVMM;
2728 int rc = gvmmR0ByGVM(pGVM, &pGVMM, false /*fTakeUsedLock*/);
2729 if (RT_SUCCESS(rc))
2730 rc = GVMMR0SchedWakeUpEx(pGVM, idCpu, false /*fTakeUsedLock*/);
2731 return rc;
2732}
2733
2734
2735/**
2736 * Worker common to GVMMR0SchedPoke and GVMMR0SchedWakeUpAndPokeCpus that pokes
2737 * the Virtual CPU if it's still busy executing guest code.
2738 *
2739 * @returns VBox status code.
2740 * @retval VINF_SUCCESS if poked successfully.
2741 * @retval VINF_GVM_NOT_BUSY_IN_GC if the EMT wasn't busy in GC.
2742 *
2743 * @param pGVM The global (ring-0) VM structure.
2744 * @param pVCpu The cross context virtual CPU structure.
2745 */
2746DECLINLINE(int) gvmmR0SchedPokeOne(PGVM pGVM, PVMCPUCC pVCpu)
2747{
2748 pGVM->gvmm.s.StatsSched.cPokeCalls++;
2749
2750 RTCPUID idHostCpu = pVCpu->idHostCpu;
2751 if ( idHostCpu == NIL_RTCPUID
2752 || VMCPU_GET_STATE(pVCpu) != VMCPUSTATE_STARTED_EXEC)
2753 {
2754 pGVM->gvmm.s.StatsSched.cPokeNotBusy++;
2755 return VINF_GVM_NOT_BUSY_IN_GC;
2756 }
2757
2758 /* Note: this function is not implemented on Darwin and Linux (kernel < 2.6.19) */
2759 RTMpPokeCpu(idHostCpu);
2760 return VINF_SUCCESS;
2761}
2762
2763
2764/**
2765 * Pokes an EMT if it's still busy running guest code.
2766 *
2767 * @returns VBox status code.
2768 * @retval VINF_SUCCESS if poked successfully.
2769 * @retval VINF_GVM_NOT_BUSY_IN_GC if the EMT wasn't busy in GC.
2770 *
2771 * @param pGVM The global (ring-0) VM structure.
2772 * @param idCpu The ID of the virtual CPU to poke.
2773 * @param fTakeUsedLock Take the used lock or not
2774 */
2775GVMMR0DECL(int) GVMMR0SchedPokeEx(PGVM pGVM, VMCPUID idCpu, bool fTakeUsedLock)
2776{
2777 /*
2778 * Validate input and take the UsedLock.
2779 */
2780 PGVMM pGVMM;
2781 int rc = gvmmR0ByGVM(pGVM, &pGVMM, fTakeUsedLock);
2782 if (RT_SUCCESS(rc))
2783 {
2784 if (idCpu < pGVM->cCpus)
2785 rc = gvmmR0SchedPokeOne(pGVM, &pGVM->aCpus[idCpu]);
2786 else
2787 rc = VERR_INVALID_CPU_ID;
2788
2789 if (fTakeUsedLock)
2790 {
2791 int rc2 = GVMMR0_USED_SHARED_UNLOCK(pGVMM);
2792 AssertRC(rc2);
2793 }
2794 }
2795
2796 LogFlow(("GVMMR0SchedWakeUpAndPokeCpus: returns %Rrc\n", rc));
2797 return rc;
2798}
2799
2800
2801/**
2802 * Pokes an EMT if it's still busy running guest code.
2803 *
2804 * @returns VBox status code.
2805 * @retval VINF_SUCCESS if poked successfully.
2806 * @retval VINF_GVM_NOT_BUSY_IN_GC if the EMT wasn't busy in GC.
2807 *
2808 * @param pGVM The global (ring-0) VM structure.
2809 * @param idCpu The ID of the virtual CPU to poke.
2810 */
2811GVMMR0DECL(int) GVMMR0SchedPoke(PGVM pGVM, VMCPUID idCpu)
2812{
2813 return GVMMR0SchedPokeEx(pGVM, idCpu, true /* fTakeUsedLock */);
2814}
2815
2816
2817/**
2818 * Pokes an EMT if it's still busy running guest code, no GVM parameter and no
2819 * used locking.
2820 *
2821 * @returns VBox status code.
2822 * @retval VINF_SUCCESS if poked successfully.
2823 * @retval VINF_GVM_NOT_BUSY_IN_GC if the EMT wasn't busy in GC.
2824 *
2825 * @param pGVM The global (ring-0) VM structure.
2826 * @param idCpu The ID of the virtual CPU to poke.
2827 *
2828 * @deprecated Don't use in new code if possible! Use the GVM variant.
2829 */
2830GVMMR0DECL(int) GVMMR0SchedPokeNoGVMNoLock(PGVM pGVM, VMCPUID idCpu)
2831{
2832 PGVMM pGVMM;
2833 int rc = gvmmR0ByGVM(pGVM, &pGVMM, false /*fTakeUsedLock*/);
2834 if (RT_SUCCESS(rc))
2835 {
2836 if (idCpu < pGVM->cCpus)
2837 rc = gvmmR0SchedPokeOne(pGVM, &pGVM->aCpus[idCpu]);
2838 else
2839 rc = VERR_INVALID_CPU_ID;
2840 }
2841 return rc;
2842}
2843
2844
2845/**
2846 * Wakes up a set of halted EMT threads so they can service pending request.
2847 *
2848 * @returns VBox status code, no informational stuff.
2849 *
2850 * @param pGVM The global (ring-0) VM structure.
2851 * @param pSleepSet The set of sleepers to wake up.
2852 * @param pPokeSet The set of CPUs to poke.
2853 */
2854GVMMR0DECL(int) GVMMR0SchedWakeUpAndPokeCpus(PGVM pGVM, PCVMCPUSET pSleepSet, PCVMCPUSET pPokeSet)
2855{
2856 AssertPtrReturn(pSleepSet, VERR_INVALID_POINTER);
2857 AssertPtrReturn(pPokeSet, VERR_INVALID_POINTER);
2858 RTNATIVETHREAD hSelf = RTThreadNativeSelf();
2859
2860 /*
2861 * Validate input and take the UsedLock.
2862 */
2863 PGVMM pGVMM;
2864 int rc = gvmmR0ByGVM(pGVM, &pGVMM, true /* fTakeUsedLock */);
2865 if (RT_SUCCESS(rc))
2866 {
2867 rc = VINF_SUCCESS;
2868 VMCPUID idCpu = pGVM->cCpus;
2869 while (idCpu-- > 0)
2870 {
2871 /* Don't try poke or wake up ourselves. */
2872 if (pGVM->aCpus[idCpu].hEMT == hSelf)
2873 continue;
2874
2875 /* just ignore errors for now. */
2876 if (VMCPUSET_IS_PRESENT(pSleepSet, idCpu))
2877 gvmmR0SchedWakeUpOne(pGVM, &pGVM->aCpus[idCpu]);
2878 else if (VMCPUSET_IS_PRESENT(pPokeSet, idCpu))
2879 gvmmR0SchedPokeOne(pGVM, &pGVM->aCpus[idCpu]);
2880 }
2881
2882 int rc2 = GVMMR0_USED_SHARED_UNLOCK(pGVMM);
2883 AssertRC(rc2);
2884 }
2885
2886 LogFlow(("GVMMR0SchedWakeUpAndPokeCpus: returns %Rrc\n", rc));
2887 return rc;
2888}
2889
2890
2891/**
2892 * VMMR0 request wrapper for GVMMR0SchedWakeUpAndPokeCpus.
2893 *
2894 * @returns see GVMMR0SchedWakeUpAndPokeCpus.
2895 * @param pGVM The global (ring-0) VM structure.
2896 * @param pReq Pointer to the request packet.
2897 */
2898GVMMR0DECL(int) GVMMR0SchedWakeUpAndPokeCpusReq(PGVM pGVM, PGVMMSCHEDWAKEUPANDPOKECPUSREQ pReq)
2899{
2900 /*
2901 * Validate input and pass it on.
2902 */
2903 AssertPtrReturn(pReq, VERR_INVALID_POINTER);
2904 AssertMsgReturn(pReq->Hdr.cbReq == sizeof(*pReq), ("%#x != %#x\n", pReq->Hdr.cbReq, sizeof(*pReq)), VERR_INVALID_PARAMETER);
2905
2906 return GVMMR0SchedWakeUpAndPokeCpus(pGVM, &pReq->SleepSet, &pReq->PokeSet);
2907}
2908
2909
2910
2911/**
2912 * Poll the schedule to see if someone else should get a chance to run.
2913 *
2914 * This is a bit hackish and will not work too well if the machine is
2915 * under heavy load from non-VM processes.
2916 *
2917 * @returns VINF_SUCCESS if not yielded.
2918 * VINF_GVM_YIELDED if an attempt to switch to a different VM task was made.
2919 * @param pGVM The global (ring-0) VM structure.
2920 * @param idCpu The Virtual CPU ID of the calling EMT.
2921 * @param fYield Whether to yield or not.
2922 * This is for when we're spinning in the halt loop.
2923 * @thread EMT(idCpu).
2924 */
2925GVMMR0DECL(int) GVMMR0SchedPoll(PGVM pGVM, VMCPUID idCpu, bool fYield)
2926{
2927 /*
2928 * Validate input.
2929 */
2930 PGVMM pGVMM;
2931 int rc = gvmmR0ByGVMandEMT(pGVM, idCpu, &pGVMM);
2932 if (RT_SUCCESS(rc))
2933 {
2934 /*
2935 * We currently only implement helping doing wakeups (fYield = false), so don't
2936 * bother taking the lock if gvmmR0SchedDoWakeUps is not going to do anything.
2937 */
2938 if (!fYield && pGVMM->fDoEarlyWakeUps)
2939 {
2940 rc = GVMMR0_USED_SHARED_LOCK(pGVMM); AssertRC(rc);
2941 pGVM->gvmm.s.StatsSched.cPollCalls++;
2942
2943 Assert(ASMGetFlags() & X86_EFL_IF);
2944 const uint64_t u64Now = RTTimeNanoTS(); /* (GIP time) */
2945
2946 pGVM->gvmm.s.StatsSched.cPollWakeUps += gvmmR0SchedDoWakeUps(pGVMM, u64Now);
2947
2948 GVMMR0_USED_SHARED_UNLOCK(pGVMM);
2949 }
2950 /*
2951 * Not quite sure what we could do here...
2952 */
2953 else if (fYield)
2954 rc = VERR_NOT_IMPLEMENTED; /** @todo implement this... */
2955 else
2956 rc = VINF_SUCCESS;
2957 }
2958
2959 LogFlow(("GVMMR0SchedWakeUp: returns %Rrc\n", rc));
2960 return rc;
2961}
2962
2963
2964#ifdef GVMM_SCHED_WITH_PPT
2965/**
2966 * Timer callback for the periodic preemption timer.
2967 *
2968 * @param pTimer The timer handle.
2969 * @param pvUser Pointer to the per cpu structure.
2970 * @param iTick The current tick.
2971 */
2972static DECLCALLBACK(void) gvmmR0SchedPeriodicPreemptionTimerCallback(PRTTIMER pTimer, void *pvUser, uint64_t iTick)
2973{
2974 PGVMMHOSTCPU pCpu = (PGVMMHOSTCPU)pvUser;
2975 NOREF(pTimer); NOREF(iTick);
2976
2977 /*
2978 * Termination check
2979 */
2980 if (pCpu->u32Magic != GVMMHOSTCPU_MAGIC)
2981 return;
2982
2983 /*
2984 * Do the house keeping.
2985 */
2986 RTSpinlockAcquire(pCpu->Ppt.hSpinlock);
2987
2988 if (++pCpu->Ppt.iTickHistorization >= pCpu->Ppt.cTicksHistoriziationInterval)
2989 {
2990 /*
2991 * Historicize the max frequency.
2992 */
2993 uint32_t iHzHistory = ++pCpu->Ppt.iHzHistory % RT_ELEMENTS(pCpu->Ppt.aHzHistory);
2994 pCpu->Ppt.aHzHistory[iHzHistory] = pCpu->Ppt.uDesiredHz;
2995 pCpu->Ppt.iTickHistorization = 0;
2996 pCpu->Ppt.uDesiredHz = 0;
2997
2998 /*
2999 * Check if the current timer frequency.
3000 */
3001 uint32_t uHistMaxHz = 0;
3002 for (uint32_t i = 0; i < RT_ELEMENTS(pCpu->Ppt.aHzHistory); i++)
3003 if (pCpu->Ppt.aHzHistory[i] > uHistMaxHz)
3004 uHistMaxHz = pCpu->Ppt.aHzHistory[i];
3005 if (uHistMaxHz == pCpu->Ppt.uTimerHz)
3006 RTSpinlockRelease(pCpu->Ppt.hSpinlock);
3007 else if (uHistMaxHz)
3008 {
3009 /*
3010 * Reprogram it.
3011 */
3012 pCpu->Ppt.cChanges++;
3013 pCpu->Ppt.iTickHistorization = 0;
3014 pCpu->Ppt.uTimerHz = uHistMaxHz;
3015 uint32_t const cNsInterval = RT_NS_1SEC / uHistMaxHz;
3016 pCpu->Ppt.cNsInterval = cNsInterval;
3017 if (cNsInterval < GVMMHOSTCPU_PPT_HIST_INTERVAL_NS)
3018 pCpu->Ppt.cTicksHistoriziationInterval = ( GVMMHOSTCPU_PPT_HIST_INTERVAL_NS
3019 + GVMMHOSTCPU_PPT_HIST_INTERVAL_NS / 2 - 1)
3020 / cNsInterval;
3021 else
3022 pCpu->Ppt.cTicksHistoriziationInterval = 1;
3023 RTSpinlockRelease(pCpu->Ppt.hSpinlock);
3024
3025 /*SUPR0Printf("Cpu%u: change to %u Hz / %u ns\n", pCpu->idxCpuSet, uHistMaxHz, cNsInterval);*/
3026 RTTimerChangeInterval(pTimer, cNsInterval);
3027 }
3028 else
3029 {
3030 /*
3031 * Stop it.
3032 */
3033 pCpu->Ppt.fStarted = false;
3034 pCpu->Ppt.uTimerHz = 0;
3035 pCpu->Ppt.cNsInterval = 0;
3036 RTSpinlockRelease(pCpu->Ppt.hSpinlock);
3037
3038 /*SUPR0Printf("Cpu%u: stopping (%u Hz)\n", pCpu->idxCpuSet, uHistMaxHz);*/
3039 RTTimerStop(pTimer);
3040 }
3041 }
3042 else
3043 RTSpinlockRelease(pCpu->Ppt.hSpinlock);
3044}
3045#endif /* GVMM_SCHED_WITH_PPT */
3046
3047
3048/**
3049 * Updates the periodic preemption timer for the calling CPU.
3050 *
3051 * The caller must have disabled preemption!
3052 * The caller must check that the host can do high resolution timers.
3053 *
3054 * @param pGVM The global (ring-0) VM structure.
3055 * @param idHostCpu The current host CPU id.
3056 * @param uHz The desired frequency.
3057 */
3058GVMMR0DECL(void) GVMMR0SchedUpdatePeriodicPreemptionTimer(PGVM pGVM, RTCPUID idHostCpu, uint32_t uHz)
3059{
3060 NOREF(pGVM);
3061#ifdef GVMM_SCHED_WITH_PPT
3062 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
3063 Assert(RTTimerCanDoHighResolution());
3064
3065 /*
3066 * Resolve the per CPU data.
3067 */
3068 uint32_t iCpu = RTMpCpuIdToSetIndex(idHostCpu);
3069 PGVMM pGVMM = g_pGVMM;
3070 if ( !RT_VALID_PTR(pGVMM)
3071 || pGVMM->u32Magic != GVMM_MAGIC)
3072 return;
3073 AssertMsgReturnVoid(iCpu < pGVMM->cHostCpus, ("iCpu=%d cHostCpus=%d\n", iCpu, pGVMM->cHostCpus));
3074 PGVMMHOSTCPU pCpu = &pGVMM->aHostCpus[iCpu];
3075 AssertMsgReturnVoid( pCpu->u32Magic == GVMMHOSTCPU_MAGIC
3076 && pCpu->idCpu == idHostCpu,
3077 ("u32Magic=%#x idCpu=% idHostCpu=%d\n", pCpu->u32Magic, pCpu->idCpu, idHostCpu));
3078
3079 /*
3080 * Check whether we need to do anything about the timer.
3081 * We have to be a little bit careful since we might be race the timer
3082 * callback here.
3083 */
3084 if (uHz > 16384)
3085 uHz = 16384; /** @todo add a query method for this! */
3086 if (RT_UNLIKELY( uHz > ASMAtomicReadU32(&pCpu->Ppt.uDesiredHz)
3087 && uHz >= pCpu->Ppt.uMinHz
3088 && !pCpu->Ppt.fStarting /* solaris paranoia */))
3089 {
3090 RTSpinlockAcquire(pCpu->Ppt.hSpinlock);
3091
3092 pCpu->Ppt.uDesiredHz = uHz;
3093 uint32_t cNsInterval = 0;
3094 if (!pCpu->Ppt.fStarted)
3095 {
3096 pCpu->Ppt.cStarts++;
3097 pCpu->Ppt.fStarted = true;
3098 pCpu->Ppt.fStarting = true;
3099 pCpu->Ppt.iTickHistorization = 0;
3100 pCpu->Ppt.uTimerHz = uHz;
3101 pCpu->Ppt.cNsInterval = cNsInterval = RT_NS_1SEC / uHz;
3102 if (cNsInterval < GVMMHOSTCPU_PPT_HIST_INTERVAL_NS)
3103 pCpu->Ppt.cTicksHistoriziationInterval = ( GVMMHOSTCPU_PPT_HIST_INTERVAL_NS
3104 + GVMMHOSTCPU_PPT_HIST_INTERVAL_NS / 2 - 1)
3105 / cNsInterval;
3106 else
3107 pCpu->Ppt.cTicksHistoriziationInterval = 1;
3108 }
3109
3110 RTSpinlockRelease(pCpu->Ppt.hSpinlock);
3111
3112 if (cNsInterval)
3113 {
3114 RTTimerChangeInterval(pCpu->Ppt.pTimer, cNsInterval);
3115 int rc = RTTimerStart(pCpu->Ppt.pTimer, cNsInterval);
3116 AssertRC(rc);
3117
3118 RTSpinlockAcquire(pCpu->Ppt.hSpinlock);
3119 if (RT_FAILURE(rc))
3120 pCpu->Ppt.fStarted = false;
3121 pCpu->Ppt.fStarting = false;
3122 RTSpinlockRelease(pCpu->Ppt.hSpinlock);
3123 }
3124 }
3125#else /* !GVMM_SCHED_WITH_PPT */
3126 NOREF(idHostCpu); NOREF(uHz);
3127#endif /* !GVMM_SCHED_WITH_PPT */
3128}
3129
3130
3131/**
3132 * Calls @a pfnCallback for each VM in the system.
3133 *
3134 * This will enumerate the VMs while holding the global VM used list lock in
3135 * shared mode. So, only suitable for simple work. If more expensive work
3136 * needs doing, a different approach must be taken as using this API would
3137 * otherwise block VM creation and destruction.
3138 *
3139 * @returns VBox status code.
3140 * @param pfnCallback The callback function.
3141 * @param pvUser User argument to the callback.
3142 */
3143GVMMR0DECL(int) GVMMR0EnumVMs(PFNGVMMR0ENUMCALLBACK pfnCallback, void *pvUser)
3144{
3145 PGVMM pGVMM;
3146 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
3147
3148 int rc = VINF_SUCCESS;
3149 GVMMR0_USED_SHARED_LOCK(pGVMM);
3150 for (unsigned i = pGVMM->iUsedHead, cLoops = 0;
3151 i != NIL_GVM_HANDLE && i < RT_ELEMENTS(pGVMM->aHandles);
3152 i = pGVMM->aHandles[i].iNext, cLoops++)
3153 {
3154 PGVM pGVM = pGVMM->aHandles[i].pGVM;
3155 if ( RT_VALID_PTR(pGVM)
3156 && RT_VALID_PTR(pGVMM->aHandles[i].pvObj)
3157 && pGVM->u32Magic == GVM_MAGIC)
3158 {
3159 rc = pfnCallback(pGVM, pvUser);
3160 if (rc != VINF_SUCCESS)
3161 break;
3162 }
3163
3164 AssertBreak(cLoops < RT_ELEMENTS(pGVMM->aHandles) * 4); /* paranoia */
3165 }
3166 GVMMR0_USED_SHARED_UNLOCK(pGVMM);
3167 return rc;
3168}
3169
3170
3171/**
3172 * Retrieves the GVMM statistics visible to the caller.
3173 *
3174 * @returns VBox status code.
3175 *
3176 * @param pStats Where to put the statistics.
3177 * @param pSession The current session.
3178 * @param pGVM The GVM to obtain statistics for. Optional.
3179 */
3180GVMMR0DECL(int) GVMMR0QueryStatistics(PGVMMSTATS pStats, PSUPDRVSESSION pSession, PGVM pGVM)
3181{
3182 LogFlow(("GVMMR0QueryStatistics: pStats=%p pSession=%p pGVM=%p\n", pStats, pSession, pGVM));
3183
3184 /*
3185 * Validate input.
3186 */
3187 AssertPtrReturn(pSession, VERR_INVALID_POINTER);
3188 AssertPtrReturn(pStats, VERR_INVALID_POINTER);
3189 pStats->cVMs = 0; /* (crash before taking the sem...) */
3190
3191 /*
3192 * Take the lock and get the VM statistics.
3193 */
3194 PGVMM pGVMM;
3195 if (pGVM)
3196 {
3197 int rc = gvmmR0ByGVM(pGVM, &pGVMM, true /*fTakeUsedLock*/);
3198 if (RT_FAILURE(rc))
3199 return rc;
3200 pStats->SchedVM = pGVM->gvmm.s.StatsSched;
3201
3202 uint32_t iCpu = RT_MIN(pGVM->cCpus, RT_ELEMENTS(pStats->aVCpus));
3203 if (iCpu < RT_ELEMENTS(pStats->aVCpus))
3204 RT_BZERO(&pStats->aVCpus[iCpu], (RT_ELEMENTS(pStats->aVCpus) - iCpu) * sizeof(pStats->aVCpus[0]));
3205 while (iCpu-- > 0)
3206 pStats->aVCpus[iCpu] = pGVM->aCpus[iCpu].gvmm.s.Stats;
3207 }
3208 else
3209 {
3210 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
3211 RT_ZERO(pStats->SchedVM);
3212 RT_ZERO(pStats->aVCpus);
3213
3214 int rc = GVMMR0_USED_SHARED_LOCK(pGVMM);
3215 AssertRCReturn(rc, rc);
3216 }
3217
3218 /*
3219 * Enumerate the VMs and add the ones visible to the statistics.
3220 */
3221 pStats->cVMs = 0;
3222 pStats->cEMTs = 0;
3223 memset(&pStats->SchedSum, 0, sizeof(pStats->SchedSum));
3224
3225 for (unsigned i = pGVMM->iUsedHead;
3226 i != NIL_GVM_HANDLE && i < RT_ELEMENTS(pGVMM->aHandles);
3227 i = pGVMM->aHandles[i].iNext)
3228 {
3229 PGVM pOtherGVM = pGVMM->aHandles[i].pGVM;
3230 void *pvObj = pGVMM->aHandles[i].pvObj;
3231 if ( RT_VALID_PTR(pvObj)
3232 && RT_VALID_PTR(pOtherGVM)
3233 && pOtherGVM->u32Magic == GVM_MAGIC
3234 && RT_SUCCESS(SUPR0ObjVerifyAccess(pvObj, pSession, NULL)))
3235 {
3236 pStats->cVMs++;
3237 pStats->cEMTs += pOtherGVM->cCpus;
3238
3239 pStats->SchedSum.cHaltCalls += pOtherGVM->gvmm.s.StatsSched.cHaltCalls;
3240 pStats->SchedSum.cHaltBlocking += pOtherGVM->gvmm.s.StatsSched.cHaltBlocking;
3241 pStats->SchedSum.cHaltTimeouts += pOtherGVM->gvmm.s.StatsSched.cHaltTimeouts;
3242 pStats->SchedSum.cHaltNotBlocking += pOtherGVM->gvmm.s.StatsSched.cHaltNotBlocking;
3243 pStats->SchedSum.cHaltWakeUps += pOtherGVM->gvmm.s.StatsSched.cHaltWakeUps;
3244
3245 pStats->SchedSum.cWakeUpCalls += pOtherGVM->gvmm.s.StatsSched.cWakeUpCalls;
3246 pStats->SchedSum.cWakeUpNotHalted += pOtherGVM->gvmm.s.StatsSched.cWakeUpNotHalted;
3247 pStats->SchedSum.cWakeUpWakeUps += pOtherGVM->gvmm.s.StatsSched.cWakeUpWakeUps;
3248
3249 pStats->SchedSum.cPokeCalls += pOtherGVM->gvmm.s.StatsSched.cPokeCalls;
3250 pStats->SchedSum.cPokeNotBusy += pOtherGVM->gvmm.s.StatsSched.cPokeNotBusy;
3251
3252 pStats->SchedSum.cPollCalls += pOtherGVM->gvmm.s.StatsSched.cPollCalls;
3253 pStats->SchedSum.cPollHalts += pOtherGVM->gvmm.s.StatsSched.cPollHalts;
3254 pStats->SchedSum.cPollWakeUps += pOtherGVM->gvmm.s.StatsSched.cPollWakeUps;
3255 }
3256 }
3257
3258 /*
3259 * Copy out the per host CPU statistics.
3260 */
3261 uint32_t iDstCpu = 0;
3262 uint32_t cSrcCpus = pGVMM->cHostCpus;
3263 for (uint32_t iSrcCpu = 0; iSrcCpu < cSrcCpus; iSrcCpu++)
3264 {
3265 if (pGVMM->aHostCpus[iSrcCpu].idCpu != NIL_RTCPUID)
3266 {
3267 pStats->aHostCpus[iDstCpu].idCpu = pGVMM->aHostCpus[iSrcCpu].idCpu;
3268 pStats->aHostCpus[iDstCpu].idxCpuSet = pGVMM->aHostCpus[iSrcCpu].idxCpuSet;
3269#ifdef GVMM_SCHED_WITH_PPT
3270 pStats->aHostCpus[iDstCpu].uDesiredHz = pGVMM->aHostCpus[iSrcCpu].Ppt.uDesiredHz;
3271 pStats->aHostCpus[iDstCpu].uTimerHz = pGVMM->aHostCpus[iSrcCpu].Ppt.uTimerHz;
3272 pStats->aHostCpus[iDstCpu].cChanges = pGVMM->aHostCpus[iSrcCpu].Ppt.cChanges;
3273 pStats->aHostCpus[iDstCpu].cStarts = pGVMM->aHostCpus[iSrcCpu].Ppt.cStarts;
3274#else
3275 pStats->aHostCpus[iDstCpu].uDesiredHz = 0;
3276 pStats->aHostCpus[iDstCpu].uTimerHz = 0;
3277 pStats->aHostCpus[iDstCpu].cChanges = 0;
3278 pStats->aHostCpus[iDstCpu].cStarts = 0;
3279#endif
3280 iDstCpu++;
3281 if (iDstCpu >= RT_ELEMENTS(pStats->aHostCpus))
3282 break;
3283 }
3284 }
3285 pStats->cHostCpus = iDstCpu;
3286
3287 GVMMR0_USED_SHARED_UNLOCK(pGVMM);
3288
3289 return VINF_SUCCESS;
3290}
3291
3292
3293/**
3294 * VMMR0 request wrapper for GVMMR0QueryStatistics.
3295 *
3296 * @returns see GVMMR0QueryStatistics.
3297 * @param pGVM The global (ring-0) VM structure. Optional.
3298 * @param pReq Pointer to the request packet.
3299 * @param pSession The current session.
3300 */
3301GVMMR0DECL(int) GVMMR0QueryStatisticsReq(PGVM pGVM, PGVMMQUERYSTATISTICSSREQ pReq, PSUPDRVSESSION pSession)
3302{
3303 /*
3304 * Validate input and pass it on.
3305 */
3306 AssertPtrReturn(pReq, VERR_INVALID_POINTER);
3307 AssertMsgReturn(pReq->Hdr.cbReq == sizeof(*pReq), ("%#x != %#x\n", pReq->Hdr.cbReq, sizeof(*pReq)), VERR_INVALID_PARAMETER);
3308 AssertReturn(pReq->pSession == pSession, VERR_INVALID_PARAMETER);
3309
3310 return GVMMR0QueryStatistics(&pReq->Stats, pSession, pGVM);
3311}
3312
3313
3314/**
3315 * Resets the specified GVMM statistics.
3316 *
3317 * @returns VBox status code.
3318 *
3319 * @param pStats Which statistics to reset, that is, non-zero fields indicates which to reset.
3320 * @param pSession The current session.
3321 * @param pGVM The GVM to reset statistics for. Optional.
3322 */
3323GVMMR0DECL(int) GVMMR0ResetStatistics(PCGVMMSTATS pStats, PSUPDRVSESSION pSession, PGVM pGVM)
3324{
3325 LogFlow(("GVMMR0ResetStatistics: pStats=%p pSession=%p pGVM=%p\n", pStats, pSession, pGVM));
3326
3327 /*
3328 * Validate input.
3329 */
3330 AssertPtrReturn(pSession, VERR_INVALID_POINTER);
3331 AssertPtrReturn(pStats, VERR_INVALID_POINTER);
3332
3333 /*
3334 * Take the lock and get the VM statistics.
3335 */
3336 PGVMM pGVMM;
3337 if (pGVM)
3338 {
3339 int rc = gvmmR0ByGVM(pGVM, &pGVMM, true /*fTakeUsedLock*/);
3340 if (RT_FAILURE(rc))
3341 return rc;
3342# define MAYBE_RESET_FIELD(field) \
3343 do { if (pStats->SchedVM. field ) { pGVM->gvmm.s.StatsSched. field = 0; } } while (0)
3344 MAYBE_RESET_FIELD(cHaltCalls);
3345 MAYBE_RESET_FIELD(cHaltBlocking);
3346 MAYBE_RESET_FIELD(cHaltTimeouts);
3347 MAYBE_RESET_FIELD(cHaltNotBlocking);
3348 MAYBE_RESET_FIELD(cHaltWakeUps);
3349 MAYBE_RESET_FIELD(cWakeUpCalls);
3350 MAYBE_RESET_FIELD(cWakeUpNotHalted);
3351 MAYBE_RESET_FIELD(cWakeUpWakeUps);
3352 MAYBE_RESET_FIELD(cPokeCalls);
3353 MAYBE_RESET_FIELD(cPokeNotBusy);
3354 MAYBE_RESET_FIELD(cPollCalls);
3355 MAYBE_RESET_FIELD(cPollHalts);
3356 MAYBE_RESET_FIELD(cPollWakeUps);
3357# undef MAYBE_RESET_FIELD
3358 }
3359 else
3360 {
3361 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
3362
3363 int rc = GVMMR0_USED_SHARED_LOCK(pGVMM);
3364 AssertRCReturn(rc, rc);
3365 }
3366
3367 /*
3368 * Enumerate the VMs and add the ones visible to the statistics.
3369 */
3370 if (!ASMMemIsZero(&pStats->SchedSum, sizeof(pStats->SchedSum)))
3371 {
3372 for (unsigned i = pGVMM->iUsedHead;
3373 i != NIL_GVM_HANDLE && i < RT_ELEMENTS(pGVMM->aHandles);
3374 i = pGVMM->aHandles[i].iNext)
3375 {
3376 PGVM pOtherGVM = pGVMM->aHandles[i].pGVM;
3377 void *pvObj = pGVMM->aHandles[i].pvObj;
3378 if ( RT_VALID_PTR(pvObj)
3379 && RT_VALID_PTR(pOtherGVM)
3380 && pOtherGVM->u32Magic == GVM_MAGIC
3381 && RT_SUCCESS(SUPR0ObjVerifyAccess(pvObj, pSession, NULL)))
3382 {
3383# define MAYBE_RESET_FIELD(field) \
3384 do { if (pStats->SchedSum. field ) { pOtherGVM->gvmm.s.StatsSched. field = 0; } } while (0)
3385 MAYBE_RESET_FIELD(cHaltCalls);
3386 MAYBE_RESET_FIELD(cHaltBlocking);
3387 MAYBE_RESET_FIELD(cHaltTimeouts);
3388 MAYBE_RESET_FIELD(cHaltNotBlocking);
3389 MAYBE_RESET_FIELD(cHaltWakeUps);
3390 MAYBE_RESET_FIELD(cWakeUpCalls);
3391 MAYBE_RESET_FIELD(cWakeUpNotHalted);
3392 MAYBE_RESET_FIELD(cWakeUpWakeUps);
3393 MAYBE_RESET_FIELD(cPokeCalls);
3394 MAYBE_RESET_FIELD(cPokeNotBusy);
3395 MAYBE_RESET_FIELD(cPollCalls);
3396 MAYBE_RESET_FIELD(cPollHalts);
3397 MAYBE_RESET_FIELD(cPollWakeUps);
3398# undef MAYBE_RESET_FIELD
3399 }
3400 }
3401 }
3402
3403 GVMMR0_USED_SHARED_UNLOCK(pGVMM);
3404
3405 return VINF_SUCCESS;
3406}
3407
3408
3409/**
3410 * VMMR0 request wrapper for GVMMR0ResetStatistics.
3411 *
3412 * @returns see GVMMR0ResetStatistics.
3413 * @param pGVM The global (ring-0) VM structure. Optional.
3414 * @param pReq Pointer to the request packet.
3415 * @param pSession The current session.
3416 */
3417GVMMR0DECL(int) GVMMR0ResetStatisticsReq(PGVM pGVM, PGVMMRESETSTATISTICSSREQ pReq, PSUPDRVSESSION pSession)
3418{
3419 /*
3420 * Validate input and pass it on.
3421 */
3422 AssertPtrReturn(pReq, VERR_INVALID_POINTER);
3423 AssertMsgReturn(pReq->Hdr.cbReq == sizeof(*pReq), ("%#x != %#x\n", pReq->Hdr.cbReq, sizeof(*pReq)), VERR_INVALID_PARAMETER);
3424 AssertReturn(pReq->pSession == pSession, VERR_INVALID_PARAMETER);
3425
3426 return GVMMR0ResetStatistics(&pReq->Stats, pSession, pGVM);
3427}
3428
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette