VirtualBox

source: vbox/trunk/src/VBox/HostDrivers/Support/SUPDrv.c@ 53877

Last change on this file since 53877 was 53843, checked in by vboxsync, 10 years ago

HostDrivers/Support: Fixed invalid assertion.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 296.3 KB
Line 
1/* $Id: SUPDrv.c 53843 2015-01-16 06:25:54Z vboxsync $ */
2/** @file
3 * VBoxDrv - The VirtualBox Support Driver - Common code.
4 */
5
6/*
7 * Copyright (C) 2006-2014 Oracle Corporation
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 *
17 * The contents of this file may alternatively be used under the terms
18 * of the Common Development and Distribution License Version 1.0
19 * (CDDL) only, as it comes in the "COPYING.CDDL" file of the
20 * VirtualBox OSE distribution, in which case the provisions of the
21 * CDDL are applicable instead of those of the GPL.
22 *
23 * You may elect to license modified versions of this file under the
24 * terms and conditions of either the GPL or the CDDL or both.
25 */
26
27/*******************************************************************************
28* Header Files *
29*******************************************************************************/
30#define LOG_GROUP LOG_GROUP_SUP_DRV
31#define SUPDRV_AGNOSTIC
32#include "SUPDrvInternal.h"
33#ifndef PAGE_SHIFT
34# include <iprt/param.h>
35#endif
36#include <iprt/asm.h>
37#include <iprt/asm-amd64-x86.h>
38#include <iprt/asm-math.h>
39#include <iprt/cpuset.h>
40#include <iprt/handletable.h>
41#include <iprt/mem.h>
42#include <iprt/mp.h>
43#include <iprt/power.h>
44#include <iprt/process.h>
45#include <iprt/semaphore.h>
46#include <iprt/spinlock.h>
47#include <iprt/thread.h>
48#include <iprt/uuid.h>
49#include <iprt/net.h>
50#include <iprt/crc.h>
51#include <iprt/string.h>
52#include <iprt/timer.h>
53#if defined(RT_OS_DARWIN) || defined(RT_OS_SOLARIS) || defined(RT_OS_FREEBSD)
54# include <iprt/rand.h>
55# include <iprt/path.h>
56#endif
57#include <iprt/uint128.h>
58#include <iprt/x86.h>
59
60#include <VBox/param.h>
61#include <VBox/log.h>
62#include <VBox/err.h>
63#include <VBox/vmm/hm_svm.h>
64#include <VBox/vmm/hm_vmx.h>
65
66#if defined(RT_OS_SOLARIS) || defined(RT_OS_DARWIN)
67# include "dtrace/SUPDrv.h"
68#else
69# define VBOXDRV_SESSION_CREATE(pvSession, fUser) do { } while (0)
70# define VBOXDRV_SESSION_CLOSE(pvSession) do { } while (0)
71# define VBOXDRV_IOCTL_ENTRY(pvSession, uIOCtl, pvReqHdr) do { } while (0)
72# define VBOXDRV_IOCTL_RETURN(pvSession, uIOCtl, pvReqHdr, rcRet, rcReq) do { } while (0)
73#endif
74
75/*
76 * Logging assignments:
77 * Log - useful stuff, like failures.
78 * LogFlow - program flow, except the really noisy bits.
79 * Log2 - Cleanup.
80 * Log3 - Loader flow noise.
81 * Log4 - Call VMMR0 flow noise.
82 * Log5 - Native yet-to-be-defined noise.
83 * Log6 - Native ioctl flow noise.
84 *
85 * Logging requires BUILD_TYPE=debug and possibly changes to the logger
86 * instantiation in log-vbox.c(pp).
87 */
88
89
90/*******************************************************************************
91* Defined Constants And Macros *
92*******************************************************************************/
93/** The frequency by which we recalculate the u32UpdateHz and
94 * u32UpdateIntervalNS GIP members. The value must be a power of 2.
95 *
96 * Warning: Bumping this too high might overflow u32UpdateIntervalNS.
97 */
98#define GIP_UPDATEHZ_RECALC_FREQ 0x800
99
100/** A reserved TSC value used for synchronization as well as measurement of
101 * TSC deltas. */
102#define GIP_TSC_DELTA_RSVD UINT64_MAX
103/** The number of TSC delta measurement loops in total (includes primer and
104 * read-time loops). */
105#define GIP_TSC_DELTA_LOOPS 96
106/** The number of cache primer loops. */
107#define GIP_TSC_DELTA_PRIMER_LOOPS 4
108/** The number of loops until we keep computing the minumum read time. */
109#define GIP_TSC_DELTA_READ_TIME_LOOPS 24
110/** Stop measurement of TSC delta. */
111#define GIP_TSC_DELTA_SYNC_STOP 0
112/** Start measurement of TSC delta. */
113#define GIP_TSC_DELTA_SYNC_START 1
114/** Worker thread is ready for reading the TSC. */
115#define GIP_TSC_DELTA_SYNC_WORKER_READY 2
116/** Worker thread is done updating TSC delta info. */
117#define GIP_TSC_DELTA_SYNC_WORKER_DONE 3
118/** When IPRT is isn't concurrent safe: Master is ready and will wait for worker
119 * with a timeout. */
120#define GIP_TSC_DELTA_SYNC_PRESTART_MASTER 4
121/** When IPRT is isn't concurrent safe: Worker is ready after waiting for
122 * master with a timeout. */
123#define GIP_TSC_DELTA_SYNC_PRESTART_WORKER 5
124/** The TSC-refinement interval in seconds. */
125#define GIP_TSC_REFINE_INTERVAL 5
126
127AssertCompile(GIP_TSC_DELTA_PRIMER_LOOPS < GIP_TSC_DELTA_READ_TIME_LOOPS);
128AssertCompile(GIP_TSC_DELTA_PRIMER_LOOPS + GIP_TSC_DELTA_READ_TIME_LOOPS < GIP_TSC_DELTA_LOOPS);
129
130/** @def VBOX_SVN_REV
131 * The makefile should define this if it can. */
132#ifndef VBOX_SVN_REV
133# define VBOX_SVN_REV 0
134#endif
135
136#if 0 /* Don't start the GIP timers. Useful when debugging the IPRT timer code. */
137# define DO_NOT_START_GIP
138#endif
139
140/*******************************************************************************
141* Internal Functions *
142*******************************************************************************/
143static DECLCALLBACK(int) supdrvSessionObjHandleRetain(RTHANDLETABLE hHandleTable, void *pvObj, void *pvCtx, void *pvUser);
144static DECLCALLBACK(void) supdrvSessionObjHandleDelete(RTHANDLETABLE hHandleTable, uint32_t h, void *pvObj, void *pvCtx, void *pvUser);
145static int supdrvMemAdd(PSUPDRVMEMREF pMem, PSUPDRVSESSION pSession);
146static int supdrvMemRelease(PSUPDRVSESSION pSession, RTHCUINTPTR uPtr, SUPDRVMEMREFTYPE eType);
147static int supdrvIOCtl_LdrOpen(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPLDROPEN pReq);
148static int supdrvIOCtl_LdrLoad(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPLDRLOAD pReq);
149static int supdrvIOCtl_LdrFree(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPLDRFREE pReq);
150static int supdrvIOCtl_LdrGetSymbol(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPLDRGETSYMBOL pReq);
151static int supdrvIDC_LdrGetSymbol(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPDRVIDCREQGETSYM pReq);
152static int supdrvLdrSetVMMR0EPs(PSUPDRVDEVEXT pDevExt, void *pvVMMR0, void *pvVMMR0EntryInt,void *pvVMMR0EntryFast, void *pvVMMR0EntryEx);
153static void supdrvLdrUnsetVMMR0EPs(PSUPDRVDEVEXT pDevExt);
154static int supdrvLdrAddUsage(PSUPDRVSESSION pSession, PSUPDRVLDRIMAGE pImage);
155static void supdrvLdrFree(PSUPDRVDEVEXT pDevExt, PSUPDRVLDRIMAGE pImage);
156DECLINLINE(int) supdrvLdrLock(PSUPDRVDEVEXT pDevExt);
157DECLINLINE(int) supdrvLdrUnlock(PSUPDRVDEVEXT pDevExt);
158static int supdrvIOCtl_CallServiceModule(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPCALLSERVICE pReq);
159static int supdrvIOCtl_LoggerSettings(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPLOGGERSETTINGS pReq);
160static int supdrvIOCtl_MsrProber(PSUPDRVDEVEXT pDevExt, PSUPMSRPROBER pReq);
161static int supdrvIOCtl_TscDeltaMeasure(PSUPDRVDEVEXT pDevExt, PSUPTSCDELTAMEASURE pReq);
162static int supdrvIOCtl_TscRead(PSUPDRVDEVEXT pDevExt, PSUPTSCREAD pReq);
163static int supdrvGipCreate(PSUPDRVDEVEXT pDevExt);
164static void supdrvGipDestroy(PSUPDRVDEVEXT pDevExt);
165static DECLCALLBACK(void) supdrvGipSyncTimer(PRTTIMER pTimer, void *pvUser, uint64_t iTick);
166static DECLCALLBACK(void) supdrvGipAsyncTimer(PRTTIMER pTimer, void *pvUser, uint64_t iTick);
167static DECLCALLBACK(void) supdrvGipMpEvent(RTMPEVENT enmEvent, RTCPUID idCpu, void *pvUser);
168static void supdrvGipInit(PSUPDRVDEVEXT pDevExt, PSUPGLOBALINFOPAGE pGip, RTHCPHYS HCPhys, uint64_t u64NanoTS,
169 unsigned uUpdateHz, unsigned uUpdateIntervalNS, unsigned cCpus);
170static DECLCALLBACK(void) supdrvGipInitOnCpu(RTCPUID idCpu, void *pvUser1, void *pvUser2);
171static void supdrvGipTerm(PSUPGLOBALINFOPAGE pGip);
172static void supdrvGipUpdate(PSUPDRVDEVEXT pDevExt, uint64_t u64NanoTS, uint64_t u64TSC, RTCPUID idCpu, uint64_t iTick);
173static void supdrvGipUpdatePerCpu(PSUPDRVDEVEXT pDevExt, uint64_t u64NanoTS, uint64_t u64TSC,
174 RTCPUID idCpu, uint8_t idApic, uint64_t iTick);
175static void supdrvGipInitCpu(PSUPDRVDEVEXT pDevExt, PSUPGLOBALINFOPAGE pGip, PSUPGIPCPU pCpu, uint64_t u64NanoTS);
176static int supdrvMeasureTscDeltas(PSUPDRVDEVEXT pDevExt, uint32_t *pidxMaster);
177static int supdrvMeasureTscDeltaOne(PSUPDRVDEVEXT pDevExt, uint32_t idxWorker);
178static int supdrvIOCtl_ResumeSuspendedKbds(void);
179
180
181/*******************************************************************************
182* Global Variables *
183*******************************************************************************/
184DECLEXPORT(PSUPGLOBALINFOPAGE) g_pSUPGlobalInfoPage = NULL;
185
186
187/**
188 * Array of the R0 SUP API.
189 */
190static SUPFUNC g_aFunctions[] =
191{
192/* SED: START */
193 /* name function */
194 /* Entries with absolute addresses determined at runtime, fixup
195 code makes ugly ASSUMPTIONS about the order here: */
196 { "SUPR0AbsIs64bit", (void *)0 },
197 { "SUPR0Abs64bitKernelCS", (void *)0 },
198 { "SUPR0Abs64bitKernelSS", (void *)0 },
199 { "SUPR0Abs64bitKernelDS", (void *)0 },
200 { "SUPR0AbsKernelCS", (void *)0 },
201 { "SUPR0AbsKernelSS", (void *)0 },
202 { "SUPR0AbsKernelDS", (void *)0 },
203 { "SUPR0AbsKernelES", (void *)0 },
204 { "SUPR0AbsKernelFS", (void *)0 },
205 { "SUPR0AbsKernelGS", (void *)0 },
206 /* Normal function pointers: */
207 { "g_pSUPGlobalInfoPage", (void *)&g_pSUPGlobalInfoPage }, /* SED: DATA */
208 { "SUPGetGIP", (void *)SUPGetGIP },
209 { "SUPR0ComponentDeregisterFactory", (void *)SUPR0ComponentDeregisterFactory },
210 { "SUPR0ComponentQueryFactory", (void *)SUPR0ComponentQueryFactory },
211 { "SUPR0ComponentRegisterFactory", (void *)SUPR0ComponentRegisterFactory },
212 { "SUPR0ContAlloc", (void *)SUPR0ContAlloc },
213 { "SUPR0ContFree", (void *)SUPR0ContFree },
214 { "SUPR0EnableVTx", (void *)SUPR0EnableVTx },
215 { "SUPR0SuspendVTxOnCpu", (void *)SUPR0SuspendVTxOnCpu },
216 { "SUPR0ResumeVTxOnCpu", (void *)SUPR0ResumeVTxOnCpu },
217 { "SUPR0GetPagingMode", (void *)SUPR0GetPagingMode },
218 { "SUPR0GetKernelFeatures", (void *)SUPR0GetKernelFeatures },
219 { "SUPR0LockMem", (void *)SUPR0LockMem },
220 { "SUPR0LowAlloc", (void *)SUPR0LowAlloc },
221 { "SUPR0LowFree", (void *)SUPR0LowFree },
222 { "SUPR0MemAlloc", (void *)SUPR0MemAlloc },
223 { "SUPR0MemFree", (void *)SUPR0MemFree },
224 { "SUPR0MemGetPhys", (void *)SUPR0MemGetPhys },
225 { "SUPR0ObjAddRef", (void *)SUPR0ObjAddRef },
226 { "SUPR0ObjAddRefEx", (void *)SUPR0ObjAddRefEx },
227 { "SUPR0ObjRegister", (void *)SUPR0ObjRegister },
228 { "SUPR0ObjRelease", (void *)SUPR0ObjRelease },
229 { "SUPR0ObjVerifyAccess", (void *)SUPR0ObjVerifyAccess },
230 { "SUPR0PageAllocEx", (void *)SUPR0PageAllocEx },
231 { "SUPR0PageFree", (void *)SUPR0PageFree },
232 { "SUPR0Printf", (void *)SUPR0Printf },
233 { "SUPR0TracerDeregisterDrv", (void *)SUPR0TracerDeregisterDrv },
234 { "SUPR0TracerDeregisterImpl", (void *)SUPR0TracerDeregisterImpl },
235 { "SUPR0TracerFireProbe", (void *)SUPR0TracerFireProbe },
236 { "SUPR0TracerRegisterDrv", (void *)SUPR0TracerRegisterDrv },
237 { "SUPR0TracerRegisterImpl", (void *)SUPR0TracerRegisterImpl },
238 { "SUPR0TracerRegisterModule", (void *)SUPR0TracerRegisterModule },
239 { "SUPR0TracerUmodProbeFire", (void *)SUPR0TracerUmodProbeFire },
240 { "SUPR0UnlockMem", (void *)SUPR0UnlockMem },
241 { "SUPSemEventClose", (void *)SUPSemEventClose },
242 { "SUPSemEventCreate", (void *)SUPSemEventCreate },
243 { "SUPSemEventGetResolution", (void *)SUPSemEventGetResolution },
244 { "SUPSemEventMultiClose", (void *)SUPSemEventMultiClose },
245 { "SUPSemEventMultiCreate", (void *)SUPSemEventMultiCreate },
246 { "SUPSemEventMultiGetResolution", (void *)SUPSemEventMultiGetResolution },
247 { "SUPSemEventMultiReset", (void *)SUPSemEventMultiReset },
248 { "SUPSemEventMultiSignal", (void *)SUPSemEventMultiSignal },
249 { "SUPSemEventMultiWait", (void *)SUPSemEventMultiWait },
250 { "SUPSemEventMultiWaitNoResume", (void *)SUPSemEventMultiWaitNoResume },
251 { "SUPSemEventMultiWaitNsAbsIntr", (void *)SUPSemEventMultiWaitNsAbsIntr },
252 { "SUPSemEventMultiWaitNsRelIntr", (void *)SUPSemEventMultiWaitNsRelIntr },
253 { "SUPSemEventSignal", (void *)SUPSemEventSignal },
254 { "SUPSemEventWait", (void *)SUPSemEventWait },
255 { "SUPSemEventWaitNoResume", (void *)SUPSemEventWaitNoResume },
256 { "SUPSemEventWaitNsAbsIntr", (void *)SUPSemEventWaitNsAbsIntr },
257 { "SUPSemEventWaitNsRelIntr", (void *)SUPSemEventWaitNsRelIntr },
258
259 { "RTAssertAreQuiet", (void *)RTAssertAreQuiet },
260 { "RTAssertMayPanic", (void *)RTAssertMayPanic },
261 { "RTAssertMsg1", (void *)RTAssertMsg1 },
262 { "RTAssertMsg2AddV", (void *)RTAssertMsg2AddV },
263 { "RTAssertMsg2V", (void *)RTAssertMsg2V },
264 { "RTAssertSetMayPanic", (void *)RTAssertSetMayPanic },
265 { "RTAssertSetQuiet", (void *)RTAssertSetQuiet },
266 { "RTCrc32", (void *)RTCrc32 },
267 { "RTCrc32Finish", (void *)RTCrc32Finish },
268 { "RTCrc32Process", (void *)RTCrc32Process },
269 { "RTCrc32Start", (void *)RTCrc32Start },
270 { "RTErrConvertFromErrno", (void *)RTErrConvertFromErrno },
271 { "RTErrConvertToErrno", (void *)RTErrConvertToErrno },
272 { "RTHandleTableAllocWithCtx", (void *)RTHandleTableAllocWithCtx },
273 { "RTHandleTableCreate", (void *)RTHandleTableCreate },
274 { "RTHandleTableCreateEx", (void *)RTHandleTableCreateEx },
275 { "RTHandleTableDestroy", (void *)RTHandleTableDestroy },
276 { "RTHandleTableFreeWithCtx", (void *)RTHandleTableFreeWithCtx },
277 { "RTHandleTableLookupWithCtx", (void *)RTHandleTableLookupWithCtx },
278 { "RTLogDefaultInstance", (void *)RTLogDefaultInstance },
279 { "RTLogGetDefaultInstance", (void *)RTLogGetDefaultInstance },
280 { "RTLogLoggerExV", (void *)RTLogLoggerExV },
281 { "RTLogPrintfV", (void *)RTLogPrintfV },
282 { "RTLogRelDefaultInstance", (void *)RTLogRelDefaultInstance },
283 { "RTLogSetDefaultInstanceThread", (void *)RTLogSetDefaultInstanceThread },
284 { "RTMemAllocExTag", (void *)RTMemAllocExTag },
285 { "RTMemAllocTag", (void *)RTMemAllocTag },
286 { "RTMemAllocVarTag", (void *)RTMemAllocVarTag },
287 { "RTMemAllocZTag", (void *)RTMemAllocZTag },
288 { "RTMemAllocZVarTag", (void *)RTMemAllocZVarTag },
289 { "RTMemDupExTag", (void *)RTMemDupExTag },
290 { "RTMemDupTag", (void *)RTMemDupTag },
291 { "RTMemFree", (void *)RTMemFree },
292 { "RTMemFreeEx", (void *)RTMemFreeEx },
293 { "RTMemReallocTag", (void *)RTMemReallocTag },
294 { "RTMpCpuId", (void *)RTMpCpuId },
295 { "RTMpCpuIdFromSetIndex", (void *)RTMpCpuIdFromSetIndex },
296 { "RTMpCpuIdToSetIndex", (void *)RTMpCpuIdToSetIndex },
297 { "RTMpGetArraySize", (void *)RTMpGetArraySize },
298 { "RTMpGetCount", (void *)RTMpGetCount },
299 { "RTMpGetMaxCpuId", (void *)RTMpGetMaxCpuId },
300 { "RTMpGetOnlineCount", (void *)RTMpGetOnlineCount },
301 { "RTMpGetOnlineSet", (void *)RTMpGetOnlineSet },
302 { "RTMpGetSet", (void *)RTMpGetSet },
303 { "RTMpIsCpuOnline", (void *)RTMpIsCpuOnline },
304 { "RTMpIsCpuPossible", (void *)RTMpIsCpuPossible },
305 { "RTMpIsCpuWorkPending", (void *)RTMpIsCpuWorkPending },
306 { "RTMpNotificationDeregister", (void *)RTMpNotificationDeregister },
307 { "RTMpNotificationRegister", (void *)RTMpNotificationRegister },
308 { "RTMpOnAll", (void *)RTMpOnAll },
309 { "RTMpOnOthers", (void *)RTMpOnOthers },
310 { "RTMpOnSpecific", (void *)RTMpOnSpecific },
311 { "RTMpPokeCpu", (void *)RTMpPokeCpu },
312 { "RTNetIPv4AddDataChecksum", (void *)RTNetIPv4AddDataChecksum },
313 { "RTNetIPv4AddTCPChecksum", (void *)RTNetIPv4AddTCPChecksum },
314 { "RTNetIPv4AddUDPChecksum", (void *)RTNetIPv4AddUDPChecksum },
315 { "RTNetIPv4FinalizeChecksum", (void *)RTNetIPv4FinalizeChecksum },
316 { "RTNetIPv4HdrChecksum", (void *)RTNetIPv4HdrChecksum },
317 { "RTNetIPv4IsDHCPValid", (void *)RTNetIPv4IsDHCPValid },
318 { "RTNetIPv4IsHdrValid", (void *)RTNetIPv4IsHdrValid },
319 { "RTNetIPv4IsTCPSizeValid", (void *)RTNetIPv4IsTCPSizeValid },
320 { "RTNetIPv4IsTCPValid", (void *)RTNetIPv4IsTCPValid },
321 { "RTNetIPv4IsUDPSizeValid", (void *)RTNetIPv4IsUDPSizeValid },
322 { "RTNetIPv4IsUDPValid", (void *)RTNetIPv4IsUDPValid },
323 { "RTNetIPv4PseudoChecksum", (void *)RTNetIPv4PseudoChecksum },
324 { "RTNetIPv4PseudoChecksumBits", (void *)RTNetIPv4PseudoChecksumBits },
325 { "RTNetIPv4TCPChecksum", (void *)RTNetIPv4TCPChecksum },
326 { "RTNetIPv4UDPChecksum", (void *)RTNetIPv4UDPChecksum },
327 { "RTNetIPv6PseudoChecksum", (void *)RTNetIPv6PseudoChecksum },
328 { "RTNetIPv6PseudoChecksumBits", (void *)RTNetIPv6PseudoChecksumBits },
329 { "RTNetIPv6PseudoChecksumEx", (void *)RTNetIPv6PseudoChecksumEx },
330 { "RTNetTCPChecksum", (void *)RTNetTCPChecksum },
331 { "RTNetUDPChecksum", (void *)RTNetUDPChecksum },
332 { "RTPowerNotificationDeregister", (void *)RTPowerNotificationDeregister },
333 { "RTPowerNotificationRegister", (void *)RTPowerNotificationRegister },
334 { "RTProcSelf", (void *)RTProcSelf },
335 { "RTR0AssertPanicSystem", (void *)RTR0AssertPanicSystem },
336 { "RTR0MemAreKrnlAndUsrDifferent", (void *)RTR0MemAreKrnlAndUsrDifferent },
337 { "RTR0MemKernelIsValidAddr", (void *)RTR0MemKernelIsValidAddr },
338 { "RTR0MemKernelCopyFrom", (void *)RTR0MemKernelCopyFrom },
339 { "RTR0MemKernelCopyTo", (void *)RTR0MemKernelCopyTo },
340 { "RTR0MemObjAddress", (void *)RTR0MemObjAddress },
341 { "RTR0MemObjAddressR3", (void *)RTR0MemObjAddressR3 },
342 { "RTR0MemObjAllocContTag", (void *)RTR0MemObjAllocContTag },
343 { "RTR0MemObjAllocLowTag", (void *)RTR0MemObjAllocLowTag },
344 { "RTR0MemObjAllocPageTag", (void *)RTR0MemObjAllocPageTag },
345 { "RTR0MemObjAllocPhysExTag", (void *)RTR0MemObjAllocPhysExTag },
346 { "RTR0MemObjAllocPhysNCTag", (void *)RTR0MemObjAllocPhysNCTag },
347 { "RTR0MemObjAllocPhysTag", (void *)RTR0MemObjAllocPhysTag },
348 { "RTR0MemObjEnterPhysTag", (void *)RTR0MemObjEnterPhysTag },
349 { "RTR0MemObjFree", (void *)RTR0MemObjFree },
350 { "RTR0MemObjGetPagePhysAddr", (void *)RTR0MemObjGetPagePhysAddr },
351 { "RTR0MemObjIsMapping", (void *)RTR0MemObjIsMapping },
352 { "RTR0MemObjLockUserTag", (void *)RTR0MemObjLockUserTag },
353 { "RTR0MemObjMapKernelExTag", (void *)RTR0MemObjMapKernelExTag },
354 { "RTR0MemObjMapKernelTag", (void *)RTR0MemObjMapKernelTag },
355 { "RTR0MemObjMapUserTag", (void *)RTR0MemObjMapUserTag },
356 { "RTR0MemObjProtect", (void *)RTR0MemObjProtect },
357 { "RTR0MemObjSize", (void *)RTR0MemObjSize },
358 { "RTR0MemUserCopyFrom", (void *)RTR0MemUserCopyFrom },
359 { "RTR0MemUserCopyTo", (void *)RTR0MemUserCopyTo },
360 { "RTR0MemUserIsValidAddr", (void *)RTR0MemUserIsValidAddr },
361 { "RTR0ProcHandleSelf", (void *)RTR0ProcHandleSelf },
362 { "RTSemEventCreate", (void *)RTSemEventCreate },
363 { "RTSemEventDestroy", (void *)RTSemEventDestroy },
364 { "RTSemEventGetResolution", (void *)RTSemEventGetResolution },
365 { "RTSemEventMultiCreate", (void *)RTSemEventMultiCreate },
366 { "RTSemEventMultiDestroy", (void *)RTSemEventMultiDestroy },
367 { "RTSemEventMultiGetResolution", (void *)RTSemEventMultiGetResolution },
368 { "RTSemEventMultiReset", (void *)RTSemEventMultiReset },
369 { "RTSemEventMultiSignal", (void *)RTSemEventMultiSignal },
370 { "RTSemEventMultiWait", (void *)RTSemEventMultiWait },
371 { "RTSemEventMultiWaitEx", (void *)RTSemEventMultiWaitEx },
372 { "RTSemEventMultiWaitExDebug", (void *)RTSemEventMultiWaitExDebug },
373 { "RTSemEventMultiWaitNoResume", (void *)RTSemEventMultiWaitNoResume },
374 { "RTSemEventSignal", (void *)RTSemEventSignal },
375 { "RTSemEventWait", (void *)RTSemEventWait },
376 { "RTSemEventWaitEx", (void *)RTSemEventWaitEx },
377 { "RTSemEventWaitExDebug", (void *)RTSemEventWaitExDebug },
378 { "RTSemEventWaitNoResume", (void *)RTSemEventWaitNoResume },
379 { "RTSemFastMutexCreate", (void *)RTSemFastMutexCreate },
380 { "RTSemFastMutexDestroy", (void *)RTSemFastMutexDestroy },
381 { "RTSemFastMutexRelease", (void *)RTSemFastMutexRelease },
382 { "RTSemFastMutexRequest", (void *)RTSemFastMutexRequest },
383 { "RTSemMutexCreate", (void *)RTSemMutexCreate },
384 { "RTSemMutexDestroy", (void *)RTSemMutexDestroy },
385 { "RTSemMutexRelease", (void *)RTSemMutexRelease },
386 { "RTSemMutexRequest", (void *)RTSemMutexRequest },
387 { "RTSemMutexRequestDebug", (void *)RTSemMutexRequestDebug },
388 { "RTSemMutexRequestNoResume", (void *)RTSemMutexRequestNoResume },
389 { "RTSemMutexRequestNoResumeDebug", (void *)RTSemMutexRequestNoResumeDebug },
390 { "RTSpinlockAcquire", (void *)RTSpinlockAcquire },
391 { "RTSpinlockCreate", (void *)RTSpinlockCreate },
392 { "RTSpinlockDestroy", (void *)RTSpinlockDestroy },
393 { "RTSpinlockRelease", (void *)RTSpinlockRelease },
394 { "RTStrCopy", (void *)RTStrCopy },
395 { "RTStrDupTag", (void *)RTStrDupTag },
396 { "RTStrFormat", (void *)RTStrFormat },
397 { "RTStrFormatNumber", (void *)RTStrFormatNumber },
398 { "RTStrFormatTypeDeregister", (void *)RTStrFormatTypeDeregister },
399 { "RTStrFormatTypeRegister", (void *)RTStrFormatTypeRegister },
400 { "RTStrFormatTypeSetUser", (void *)RTStrFormatTypeSetUser },
401 { "RTStrFormatV", (void *)RTStrFormatV },
402 { "RTStrFree", (void *)RTStrFree },
403 { "RTStrNCmp", (void *)RTStrNCmp },
404 { "RTStrPrintf", (void *)RTStrPrintf },
405 { "RTStrPrintfEx", (void *)RTStrPrintfEx },
406 { "RTStrPrintfExV", (void *)RTStrPrintfExV },
407 { "RTStrPrintfV", (void *)RTStrPrintfV },
408 { "RTThreadCreate", (void *)RTThreadCreate },
409 { "RTThreadCtxHooksAreRegistered", (void *)RTThreadCtxHooksAreRegistered },
410 { "RTThreadCtxHooksCreate", (void *)RTThreadCtxHooksCreate },
411 { "RTThreadCtxHooksDeregister", (void *)RTThreadCtxHooksDeregister },
412 { "RTThreadCtxHooksRegister", (void *)RTThreadCtxHooksRegister },
413 { "RTThreadCtxHooksRelease", (void *)RTThreadCtxHooksRelease },
414 { "RTThreadCtxHooksRetain", (void *)RTThreadCtxHooksRetain },
415 { "RTThreadGetName", (void *)RTThreadGetName },
416 { "RTThreadGetNative", (void *)RTThreadGetNative },
417 { "RTThreadGetType", (void *)RTThreadGetType },
418 { "RTThreadIsInInterrupt", (void *)RTThreadIsInInterrupt },
419 { "RTThreadNativeSelf", (void *)RTThreadNativeSelf },
420 { "RTThreadPreemptDisable", (void *)RTThreadPreemptDisable },
421 { "RTThreadPreemptIsEnabled", (void *)RTThreadPreemptIsEnabled },
422 { "RTThreadPreemptIsPending", (void *)RTThreadPreemptIsPending },
423 { "RTThreadPreemptIsPendingTrusty", (void *)RTThreadPreemptIsPendingTrusty },
424 { "RTThreadPreemptIsPossible", (void *)RTThreadPreemptIsPossible },
425 { "RTThreadPreemptRestore", (void *)RTThreadPreemptRestore },
426 { "RTThreadSelf", (void *)RTThreadSelf },
427 { "RTThreadSelfName", (void *)RTThreadSelfName },
428 { "RTThreadSleep", (void *)RTThreadSleep },
429 { "RTThreadUserReset", (void *)RTThreadUserReset },
430 { "RTThreadUserSignal", (void *)RTThreadUserSignal },
431 { "RTThreadUserWait", (void *)RTThreadUserWait },
432 { "RTThreadUserWaitNoResume", (void *)RTThreadUserWaitNoResume },
433 { "RTThreadWait", (void *)RTThreadWait },
434 { "RTThreadWaitNoResume", (void *)RTThreadWaitNoResume },
435 { "RTThreadYield", (void *)RTThreadYield },
436 { "RTTimeMilliTS", (void *)RTTimeMilliTS },
437 { "RTTimeNanoTS", (void *)RTTimeNanoTS },
438 { "RTTimeNow", (void *)RTTimeNow },
439 { "RTTimerCanDoHighResolution", (void *)RTTimerCanDoHighResolution },
440 { "RTTimerChangeInterval", (void *)RTTimerChangeInterval },
441 { "RTTimerCreate", (void *)RTTimerCreate },
442 { "RTTimerCreateEx", (void *)RTTimerCreateEx },
443 { "RTTimerDestroy", (void *)RTTimerDestroy },
444 { "RTTimerGetSystemGranularity", (void *)RTTimerGetSystemGranularity },
445 { "RTTimerReleaseSystemGranularity", (void *)RTTimerReleaseSystemGranularity },
446 { "RTTimerRequestSystemGranularity", (void *)RTTimerRequestSystemGranularity },
447 { "RTTimerStart", (void *)RTTimerStart },
448 { "RTTimerStop", (void *)RTTimerStop },
449 { "RTTimeSystemMilliTS", (void *)RTTimeSystemMilliTS },
450 { "RTTimeSystemNanoTS", (void *)RTTimeSystemNanoTS },
451 { "RTUuidCompare", (void *)RTUuidCompare },
452 { "RTUuidCompareStr", (void *)RTUuidCompareStr },
453 { "RTUuidFromStr", (void *)RTUuidFromStr },
454/* SED: END */
455};
456
457#if defined(RT_OS_DARWIN) || defined(RT_OS_SOLARIS) || defined(RT_OS_FREEBSD)
458/**
459 * Drag in the rest of IRPT since we share it with the
460 * rest of the kernel modules on darwin.
461 */
462PFNRT g_apfnVBoxDrvIPRTDeps[] =
463{
464 /* VBoxNetAdp */
465 (PFNRT)RTRandBytes,
466 /* VBoxUSB */
467 (PFNRT)RTPathStripFilename,
468 NULL
469};
470#endif /* RT_OS_DARWIN || RT_OS_SOLARIS || RT_OS_SOLARIS */
471
472
473/**
474 * Initializes the device extentsion structure.
475 *
476 * @returns IPRT status code.
477 * @param pDevExt The device extension to initialize.
478 * @param cbSession The size of the session structure. The size of
479 * SUPDRVSESSION may be smaller when SUPDRV_AGNOSTIC is
480 * defined because we're skipping the OS specific members
481 * then.
482 */
483int VBOXCALL supdrvInitDevExt(PSUPDRVDEVEXT pDevExt, size_t cbSession)
484{
485 int rc;
486
487#ifdef SUPDRV_WITH_RELEASE_LOGGER
488 /*
489 * Create the release log.
490 */
491 static const char * const s_apszGroups[] = VBOX_LOGGROUP_NAMES;
492 PRTLOGGER pRelLogger;
493 rc = RTLogCreate(&pRelLogger, 0 /* fFlags */, "all",
494 "VBOX_RELEASE_LOG", RT_ELEMENTS(s_apszGroups), s_apszGroups, RTLOGDEST_STDOUT | RTLOGDEST_DEBUGGER, NULL);
495 if (RT_SUCCESS(rc))
496 RTLogRelSetDefaultInstance(pRelLogger);
497 /** @todo Add native hook for getting logger config parameters and setting
498 * them. On linux we should use the module parameter stuff... */
499#endif
500
501 /*
502 * Initialize it.
503 */
504 memset(pDevExt, 0, sizeof(*pDevExt)); /* Does not wipe OS specific tail section of the structure. */
505 pDevExt->Spinlock = NIL_RTSPINLOCK;
506 pDevExt->hGipSpinlock = NIL_RTSPINLOCK;
507 pDevExt->hSessionHashTabSpinlock = NIL_RTSPINLOCK;
508 pDevExt->idTscDeltaInitiator = NIL_RTCPUID;
509 rc = RTSpinlockCreate(&pDevExt->Spinlock, RTSPINLOCK_FLAGS_INTERRUPT_SAFE, "SUPDrvDevExt");
510 if (RT_SUCCESS(rc))
511 rc = RTSpinlockCreate(&pDevExt->hGipSpinlock, RTSPINLOCK_FLAGS_INTERRUPT_SAFE, "SUPDrvGip");
512 if (RT_SUCCESS(rc))
513 rc = RTSpinlockCreate(&pDevExt->hSessionHashTabSpinlock, RTSPINLOCK_FLAGS_INTERRUPT_SAFE, "SUPDrvSession");
514
515 if (RT_SUCCESS(rc))
516#ifdef SUPDRV_USE_MUTEX_FOR_LDR
517 rc = RTSemMutexCreate(&pDevExt->mtxLdr);
518#else
519 rc = RTSemFastMutexCreate(&pDevExt->mtxLdr);
520#endif
521 if (RT_SUCCESS(rc))
522 {
523 rc = RTSemFastMutexCreate(&pDevExt->mtxComponentFactory);
524 if (RT_SUCCESS(rc))
525 {
526#ifdef SUPDRV_USE_MUTEX_FOR_LDR
527 rc = RTSemMutexCreate(&pDevExt->mtxGip);
528#else
529 rc = RTSemFastMutexCreate(&pDevExt->mtxGip);
530#endif
531 if (RT_SUCCESS(rc))
532 {
533 rc = supdrvGipCreate(pDevExt);
534 if (RT_SUCCESS(rc))
535 {
536 rc = supdrvTracerInit(pDevExt);
537 if (RT_SUCCESS(rc))
538 {
539 pDevExt->pLdrInitImage = NULL;
540 pDevExt->hLdrInitThread = NIL_RTNATIVETHREAD;
541 pDevExt->u32Cookie = BIRD; /** @todo make this random? */
542 pDevExt->cbSession = (uint32_t)cbSession;
543
544 /*
545 * Fixup the absolute symbols.
546 *
547 * Because of the table indexing assumptions we'll have a little #ifdef orgy
548 * here rather than distributing this to OS specific files. At least for now.
549 */
550#ifdef RT_OS_DARWIN
551# if ARCH_BITS == 32
552 if (SUPR0GetPagingMode() >= SUPPAGINGMODE_AMD64)
553 {
554 g_aFunctions[0].pfn = (void *)1; /* SUPR0AbsIs64bit */
555 g_aFunctions[1].pfn = (void *)0x80; /* SUPR0Abs64bitKernelCS - KERNEL64_CS, seg.h */
556 g_aFunctions[2].pfn = (void *)0x88; /* SUPR0Abs64bitKernelSS - KERNEL64_SS, seg.h */
557 g_aFunctions[3].pfn = (void *)0x88; /* SUPR0Abs64bitKernelDS - KERNEL64_SS, seg.h */
558 }
559 else
560 g_aFunctions[0].pfn = g_aFunctions[1].pfn = g_aFunctions[2].pfn = g_aFunctions[4].pfn = (void *)0;
561 g_aFunctions[4].pfn = (void *)0x08; /* SUPR0AbsKernelCS - KERNEL_CS, seg.h */
562 g_aFunctions[5].pfn = (void *)0x10; /* SUPR0AbsKernelSS - KERNEL_DS, seg.h */
563 g_aFunctions[6].pfn = (void *)0x10; /* SUPR0AbsKernelDS - KERNEL_DS, seg.h */
564 g_aFunctions[7].pfn = (void *)0x10; /* SUPR0AbsKernelES - KERNEL_DS, seg.h */
565 g_aFunctions[8].pfn = (void *)0x10; /* SUPR0AbsKernelFS - KERNEL_DS, seg.h */
566 g_aFunctions[9].pfn = (void *)0x48; /* SUPR0AbsKernelGS - CPU_DATA_GS, seg.h */
567# else /* 64-bit darwin: */
568 g_aFunctions[0].pfn = (void *)1; /* SUPR0AbsIs64bit */
569 g_aFunctions[1].pfn = (void *)(uintptr_t)ASMGetCS(); /* SUPR0Abs64bitKernelCS */
570 g_aFunctions[2].pfn = (void *)(uintptr_t)ASMGetSS(); /* SUPR0Abs64bitKernelSS */
571 g_aFunctions[3].pfn = (void *)0; /* SUPR0Abs64bitKernelDS */
572 g_aFunctions[4].pfn = (void *)(uintptr_t)ASMGetCS(); /* SUPR0AbsKernelCS */
573 g_aFunctions[5].pfn = (void *)(uintptr_t)ASMGetSS(); /* SUPR0AbsKernelSS */
574 g_aFunctions[6].pfn = (void *)0; /* SUPR0AbsKernelDS */
575 g_aFunctions[7].pfn = (void *)0; /* SUPR0AbsKernelES */
576 g_aFunctions[8].pfn = (void *)0; /* SUPR0AbsKernelFS */
577 g_aFunctions[9].pfn = (void *)0; /* SUPR0AbsKernelGS */
578
579# endif
580#else /* !RT_OS_DARWIN */
581# if ARCH_BITS == 64
582 g_aFunctions[0].pfn = (void *)1; /* SUPR0AbsIs64bit */
583 g_aFunctions[1].pfn = (void *)(uintptr_t)ASMGetCS(); /* SUPR0Abs64bitKernelCS */
584 g_aFunctions[2].pfn = (void *)(uintptr_t)ASMGetSS(); /* SUPR0Abs64bitKernelSS */
585 g_aFunctions[3].pfn = (void *)(uintptr_t)ASMGetDS(); /* SUPR0Abs64bitKernelDS */
586# else
587 g_aFunctions[0].pfn = g_aFunctions[1].pfn = g_aFunctions[2].pfn = g_aFunctions[4].pfn = (void *)0;
588# endif
589 g_aFunctions[4].pfn = (void *)(uintptr_t)ASMGetCS(); /* SUPR0AbsKernelCS */
590 g_aFunctions[5].pfn = (void *)(uintptr_t)ASMGetSS(); /* SUPR0AbsKernelSS */
591 g_aFunctions[6].pfn = (void *)(uintptr_t)ASMGetDS(); /* SUPR0AbsKernelDS */
592 g_aFunctions[7].pfn = (void *)(uintptr_t)ASMGetES(); /* SUPR0AbsKernelES */
593 g_aFunctions[8].pfn = (void *)(uintptr_t)ASMGetFS(); /* SUPR0AbsKernelFS */
594 g_aFunctions[9].pfn = (void *)(uintptr_t)ASMGetGS(); /* SUPR0AbsKernelGS */
595#endif /* !RT_OS_DARWIN */
596 return VINF_SUCCESS;
597 }
598
599 supdrvGipDestroy(pDevExt);
600 }
601
602#ifdef SUPDRV_USE_MUTEX_FOR_GIP
603 RTSemMutexDestroy(pDevExt->mtxGip);
604 pDevExt->mtxGip = NIL_RTSEMMUTEX;
605#else
606 RTSemFastMutexDestroy(pDevExt->mtxGip);
607 pDevExt->mtxGip = NIL_RTSEMFASTMUTEX;
608#endif
609 }
610 RTSemFastMutexDestroy(pDevExt->mtxComponentFactory);
611 pDevExt->mtxComponentFactory = NIL_RTSEMFASTMUTEX;
612 }
613#ifdef SUPDRV_USE_MUTEX_FOR_LDR
614 RTSemMutexDestroy(pDevExt->mtxLdr);
615 pDevExt->mtxLdr = NIL_RTSEMMUTEX;
616#else
617 RTSemFastMutexDestroy(pDevExt->mtxLdr);
618 pDevExt->mtxLdr = NIL_RTSEMFASTMUTEX;
619#endif
620 }
621
622 RTSpinlockDestroy(pDevExt->Spinlock);
623 pDevExt->Spinlock = NIL_RTSPINLOCK;
624 RTSpinlockDestroy(pDevExt->hGipSpinlock);
625 pDevExt->hGipSpinlock = NIL_RTSPINLOCK;
626 RTSpinlockDestroy(pDevExt->hSessionHashTabSpinlock);
627 pDevExt->hSessionHashTabSpinlock = NIL_RTSPINLOCK;
628
629#ifdef SUPDRV_WITH_RELEASE_LOGGER
630 RTLogDestroy(RTLogRelSetDefaultInstance(NULL));
631 RTLogDestroy(RTLogSetDefaultInstance(NULL));
632#endif
633
634 return rc;
635}
636
637
638/**
639 * Delete the device extension (e.g. cleanup members).
640 *
641 * @param pDevExt The device extension to delete.
642 */
643void VBOXCALL supdrvDeleteDevExt(PSUPDRVDEVEXT pDevExt)
644{
645 PSUPDRVOBJ pObj;
646 PSUPDRVUSAGE pUsage;
647
648 /*
649 * Kill mutexes and spinlocks.
650 */
651#ifdef SUPDRV_USE_MUTEX_FOR_GIP
652 RTSemMutexDestroy(pDevExt->mtxGip);
653 pDevExt->mtxGip = NIL_RTSEMMUTEX;
654#else
655 RTSemFastMutexDestroy(pDevExt->mtxGip);
656 pDevExt->mtxGip = NIL_RTSEMFASTMUTEX;
657#endif
658#ifdef SUPDRV_USE_MUTEX_FOR_LDR
659 RTSemMutexDestroy(pDevExt->mtxLdr);
660 pDevExt->mtxLdr = NIL_RTSEMMUTEX;
661#else
662 RTSemFastMutexDestroy(pDevExt->mtxLdr);
663 pDevExt->mtxLdr = NIL_RTSEMFASTMUTEX;
664#endif
665 RTSpinlockDestroy(pDevExt->Spinlock);
666 pDevExt->Spinlock = NIL_RTSPINLOCK;
667 RTSemFastMutexDestroy(pDevExt->mtxComponentFactory);
668 pDevExt->mtxComponentFactory = NIL_RTSEMFASTMUTEX;
669 RTSpinlockDestroy(pDevExt->hSessionHashTabSpinlock);
670 pDevExt->hSessionHashTabSpinlock = NIL_RTSPINLOCK;
671
672 /*
673 * Free lists.
674 */
675 /* objects. */
676 pObj = pDevExt->pObjs;
677 Assert(!pObj); /* (can trigger on forced unloads) */
678 pDevExt->pObjs = NULL;
679 while (pObj)
680 {
681 void *pvFree = pObj;
682 pObj = pObj->pNext;
683 RTMemFree(pvFree);
684 }
685
686 /* usage records. */
687 pUsage = pDevExt->pUsageFree;
688 pDevExt->pUsageFree = NULL;
689 while (pUsage)
690 {
691 void *pvFree = pUsage;
692 pUsage = pUsage->pNext;
693 RTMemFree(pvFree);
694 }
695
696 /* kill the GIP. */
697 supdrvGipDestroy(pDevExt);
698 RTSpinlockDestroy(pDevExt->hGipSpinlock);
699 pDevExt->hGipSpinlock = NIL_RTSPINLOCK;
700
701 supdrvTracerTerm(pDevExt);
702
703#ifdef SUPDRV_WITH_RELEASE_LOGGER
704 /* destroy the loggers. */
705 RTLogDestroy(RTLogRelSetDefaultInstance(NULL));
706 RTLogDestroy(RTLogSetDefaultInstance(NULL));
707#endif
708}
709
710
711/**
712 * Create session.
713 *
714 * @returns IPRT status code.
715 * @param pDevExt Device extension.
716 * @param fUser Flag indicating whether this is a user or kernel
717 * session.
718 * @param fUnrestricted Unrestricted access (system) or restricted access
719 * (user)?
720 * @param ppSession Where to store the pointer to the session data.
721 */
722int VBOXCALL supdrvCreateSession(PSUPDRVDEVEXT pDevExt, bool fUser, bool fUnrestricted, PSUPDRVSESSION *ppSession)
723{
724 int rc;
725 PSUPDRVSESSION pSession;
726
727 if (!SUP_IS_DEVEXT_VALID(pDevExt))
728 return VERR_INVALID_PARAMETER;
729
730 /*
731 * Allocate memory for the session data.
732 */
733 pSession = *ppSession = (PSUPDRVSESSION)RTMemAllocZ(pDevExt->cbSession);
734 if (pSession)
735 {
736 /* Initialize session data. */
737 rc = RTSpinlockCreate(&pSession->Spinlock, RTSPINLOCK_FLAGS_INTERRUPT_UNSAFE, "SUPDrvSession");
738 if (!rc)
739 {
740 rc = RTHandleTableCreateEx(&pSession->hHandleTable,
741 RTHANDLETABLE_FLAGS_LOCKED_IRQ_SAFE | RTHANDLETABLE_FLAGS_CONTEXT,
742 1 /*uBase*/, 32768 /*cMax*/, supdrvSessionObjHandleRetain, pSession);
743 if (RT_SUCCESS(rc))
744 {
745 Assert(pSession->Spinlock != NIL_RTSPINLOCK);
746 pSession->pDevExt = pDevExt;
747 pSession->u32Cookie = BIRD_INV;
748 pSession->fUnrestricted = fUnrestricted;
749 /*pSession->fInHashTable = false; */
750 pSession->cRefs = 1;
751 /*pSession->pCommonNextHash = NULL;
752 pSession->ppOsSessionPtr = NULL; */
753 if (fUser)
754 {
755 pSession->Process = RTProcSelf();
756 pSession->R0Process = RTR0ProcHandleSelf();
757 }
758 else
759 {
760 pSession->Process = NIL_RTPROCESS;
761 pSession->R0Process = NIL_RTR0PROCESS;
762 }
763 /*pSession->pLdrUsage = NULL;
764 pSession->pVM = NULL;
765 pSession->pUsage = NULL;
766 pSession->pGip = NULL;
767 pSession->fGipReferenced = false;
768 pSession->Bundle.cUsed = 0; */
769 pSession->Uid = NIL_RTUID;
770 pSession->Gid = NIL_RTGID;
771 /*pSession->uTracerData = 0;*/
772 pSession->hTracerCaller = NIL_RTNATIVETHREAD;
773 RTListInit(&pSession->TpProviders);
774 /*pSession->cTpProviders = 0;*/
775 /*pSession->cTpProbesFiring = 0;*/
776 RTListInit(&pSession->TpUmods);
777 /*RT_ZERO(pSession->apTpLookupTable);*/
778
779 VBOXDRV_SESSION_CREATE(pSession, fUser);
780 LogFlow(("Created session %p initial cookie=%#x\n", pSession, pSession->u32Cookie));
781 return VINF_SUCCESS;
782 }
783
784 RTSpinlockDestroy(pSession->Spinlock);
785 }
786 RTMemFree(pSession);
787 *ppSession = NULL;
788 Log(("Failed to create spinlock, rc=%d!\n", rc));
789 }
790 else
791 rc = VERR_NO_MEMORY;
792
793 return rc;
794}
795
796
797/**
798 * Cleans up the session in the context of the process to which it belongs, the
799 * caller will free the session and the session spinlock.
800 *
801 * This should normally occur when the session is closed or as the process
802 * exits. Careful reference counting in the OS specfic code makes sure that
803 * there cannot be any races between process/handle cleanup callbacks and
804 * threads doing I/O control calls.
805 *
806 * @param pDevExt The device extension.
807 * @param pSession Session data.
808 */
809static void supdrvCleanupSession(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession)
810{
811 int rc;
812 PSUPDRVBUNDLE pBundle;
813 LogFlow(("supdrvCleanupSession: pSession=%p\n", pSession));
814
815 Assert(!pSession->fInHashTable);
816 Assert(!pSession->ppOsSessionPtr);
817 AssertReleaseMsg(pSession->R0Process == RTR0ProcHandleSelf() || pSession->R0Process == NIL_RTR0PROCESS,
818 ("R0Process=%p cur=%p; Process=%u curpid=%u\n", RTR0ProcHandleSelf(), RTProcSelf()));
819
820 /*
821 * Remove logger instances related to this session.
822 */
823 RTLogSetDefaultInstanceThread(NULL, (uintptr_t)pSession);
824
825 /*
826 * Destroy the handle table.
827 */
828 rc = RTHandleTableDestroy(pSession->hHandleTable, supdrvSessionObjHandleDelete, pSession);
829 AssertRC(rc);
830 pSession->hHandleTable = NIL_RTHANDLETABLE;
831
832 /*
833 * Release object references made in this session.
834 * In theory there should be noone racing us in this session.
835 */
836 Log2(("release objects - start\n"));
837 if (pSession->pUsage)
838 {
839 PSUPDRVUSAGE pUsage;
840 RTSpinlockAcquire(pDevExt->Spinlock);
841
842 while ((pUsage = pSession->pUsage) != NULL)
843 {
844 PSUPDRVOBJ pObj = pUsage->pObj;
845 pSession->pUsage = pUsage->pNext;
846
847 AssertMsg(pUsage->cUsage >= 1 && pObj->cUsage >= pUsage->cUsage, ("glob %d; sess %d\n", pObj->cUsage, pUsage->cUsage));
848 if (pUsage->cUsage < pObj->cUsage)
849 {
850 pObj->cUsage -= pUsage->cUsage;
851 RTSpinlockRelease(pDevExt->Spinlock);
852 }
853 else
854 {
855 /* Destroy the object and free the record. */
856 if (pDevExt->pObjs == pObj)
857 pDevExt->pObjs = pObj->pNext;
858 else
859 {
860 PSUPDRVOBJ pObjPrev;
861 for (pObjPrev = pDevExt->pObjs; pObjPrev; pObjPrev = pObjPrev->pNext)
862 if (pObjPrev->pNext == pObj)
863 {
864 pObjPrev->pNext = pObj->pNext;
865 break;
866 }
867 Assert(pObjPrev);
868 }
869 RTSpinlockRelease(pDevExt->Spinlock);
870
871 Log(("supdrvCleanupSession: destroying %p/%d (%p/%p) cpid=%RTproc pid=%RTproc dtor=%p\n",
872 pObj, pObj->enmType, pObj->pvUser1, pObj->pvUser2, pObj->CreatorProcess, RTProcSelf(), pObj->pfnDestructor));
873 if (pObj->pfnDestructor)
874 pObj->pfnDestructor(pObj, pObj->pvUser1, pObj->pvUser2);
875 RTMemFree(pObj);
876 }
877
878 /* free it and continue. */
879 RTMemFree(pUsage);
880
881 RTSpinlockAcquire(pDevExt->Spinlock);
882 }
883
884 RTSpinlockRelease(pDevExt->Spinlock);
885 AssertMsg(!pSession->pUsage, ("Some buster reregistered an object during desturction!\n"));
886 }
887 Log2(("release objects - done\n"));
888
889 /*
890 * Do tracer cleanups related to this session.
891 */
892 Log2(("release tracer stuff - start\n"));
893 supdrvTracerCleanupSession(pDevExt, pSession);
894 Log2(("release tracer stuff - end\n"));
895
896 /*
897 * Release memory allocated in the session.
898 *
899 * We do not serialize this as we assume that the application will
900 * not allocated memory while closing the file handle object.
901 */
902 Log2(("freeing memory:\n"));
903 pBundle = &pSession->Bundle;
904 while (pBundle)
905 {
906 PSUPDRVBUNDLE pToFree;
907 unsigned i;
908
909 /*
910 * Check and unlock all entries in the bundle.
911 */
912 for (i = 0; i < RT_ELEMENTS(pBundle->aMem); i++)
913 {
914 if (pBundle->aMem[i].MemObj != NIL_RTR0MEMOBJ)
915 {
916 Log2(("eType=%d pvR0=%p pvR3=%p cb=%ld\n", pBundle->aMem[i].eType, RTR0MemObjAddress(pBundle->aMem[i].MemObj),
917 (void *)RTR0MemObjAddressR3(pBundle->aMem[i].MapObjR3), (long)RTR0MemObjSize(pBundle->aMem[i].MemObj)));
918 if (pBundle->aMem[i].MapObjR3 != NIL_RTR0MEMOBJ)
919 {
920 rc = RTR0MemObjFree(pBundle->aMem[i].MapObjR3, false);
921 AssertRC(rc); /** @todo figure out how to handle this. */
922 pBundle->aMem[i].MapObjR3 = NIL_RTR0MEMOBJ;
923 }
924 rc = RTR0MemObjFree(pBundle->aMem[i].MemObj, true /* fFreeMappings */);
925 AssertRC(rc); /** @todo figure out how to handle this. */
926 pBundle->aMem[i].MemObj = NIL_RTR0MEMOBJ;
927 pBundle->aMem[i].eType = MEMREF_TYPE_UNUSED;
928 }
929 }
930
931 /*
932 * Advance and free previous bundle.
933 */
934 pToFree = pBundle;
935 pBundle = pBundle->pNext;
936
937 pToFree->pNext = NULL;
938 pToFree->cUsed = 0;
939 if (pToFree != &pSession->Bundle)
940 RTMemFree(pToFree);
941 }
942 Log2(("freeing memory - done\n"));
943
944 /*
945 * Deregister component factories.
946 */
947 RTSemFastMutexRequest(pDevExt->mtxComponentFactory);
948 Log2(("deregistering component factories:\n"));
949 if (pDevExt->pComponentFactoryHead)
950 {
951 PSUPDRVFACTORYREG pPrev = NULL;
952 PSUPDRVFACTORYREG pCur = pDevExt->pComponentFactoryHead;
953 while (pCur)
954 {
955 if (pCur->pSession == pSession)
956 {
957 /* unlink it */
958 PSUPDRVFACTORYREG pNext = pCur->pNext;
959 if (pPrev)
960 pPrev->pNext = pNext;
961 else
962 pDevExt->pComponentFactoryHead = pNext;
963
964 /* free it */
965 pCur->pNext = NULL;
966 pCur->pSession = NULL;
967 pCur->pFactory = NULL;
968 RTMemFree(pCur);
969
970 /* next */
971 pCur = pNext;
972 }
973 else
974 {
975 /* next */
976 pPrev = pCur;
977 pCur = pCur->pNext;
978 }
979 }
980 }
981 RTSemFastMutexRelease(pDevExt->mtxComponentFactory);
982 Log2(("deregistering component factories - done\n"));
983
984 /*
985 * Loaded images needs to be dereferenced and possibly freed up.
986 */
987 supdrvLdrLock(pDevExt);
988 Log2(("freeing images:\n"));
989 if (pSession->pLdrUsage)
990 {
991 PSUPDRVLDRUSAGE pUsage = pSession->pLdrUsage;
992 pSession->pLdrUsage = NULL;
993 while (pUsage)
994 {
995 void *pvFree = pUsage;
996 PSUPDRVLDRIMAGE pImage = pUsage->pImage;
997 if (pImage->cUsage > pUsage->cUsage)
998 pImage->cUsage -= pUsage->cUsage;
999 else
1000 supdrvLdrFree(pDevExt, pImage);
1001 pUsage->pImage = NULL;
1002 pUsage = pUsage->pNext;
1003 RTMemFree(pvFree);
1004 }
1005 }
1006 supdrvLdrUnlock(pDevExt);
1007 Log2(("freeing images - done\n"));
1008
1009 /*
1010 * Unmap the GIP.
1011 */
1012 Log2(("umapping GIP:\n"));
1013 if (pSession->GipMapObjR3 != NIL_RTR0MEMOBJ)
1014 {
1015 SUPR0GipUnmap(pSession);
1016 pSession->fGipReferenced = 0;
1017 }
1018 Log2(("umapping GIP - done\n"));
1019}
1020
1021
1022/**
1023 * Common code for freeing a session when the reference count reaches zero.
1024 *
1025 * @param pDevExt Device extension.
1026 * @param pSession Session data.
1027 * This data will be freed by this routine.
1028 */
1029static void supdrvDestroySession(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession)
1030{
1031 VBOXDRV_SESSION_CLOSE(pSession);
1032
1033 /*
1034 * Cleanup the session first.
1035 */
1036 supdrvCleanupSession(pDevExt, pSession);
1037 supdrvOSCleanupSession(pDevExt, pSession);
1038
1039 /*
1040 * Free the rest of the session stuff.
1041 */
1042 RTSpinlockDestroy(pSession->Spinlock);
1043 pSession->Spinlock = NIL_RTSPINLOCK;
1044 pSession->pDevExt = NULL;
1045 RTMemFree(pSession);
1046 LogFlow(("supdrvDestroySession: returns\n"));
1047}
1048
1049
1050/**
1051 * Inserts the session into the global hash table.
1052 *
1053 * @retval VINF_SUCCESS on success.
1054 * @retval VERR_WRONG_ORDER if the session was already inserted (asserted).
1055 * @retval VERR_INVALID_PARAMETER if the session handle is invalid or a ring-0
1056 * session (asserted).
1057 * @retval VERR_DUPLICATE if there is already a session for that pid.
1058 *
1059 * @param pDevExt The device extension.
1060 * @param pSession The session.
1061 * @param ppOsSessionPtr Pointer to the OS session pointer, if any is
1062 * available and used. This will set to point to the
1063 * session while under the protection of the session
1064 * hash table spinlock. It will also be kept in
1065 * PSUPDRVSESSION::ppOsSessionPtr for lookup and
1066 * cleanup use.
1067 * @param pvUser Argument for supdrvOSSessionHashTabInserted.
1068 */
1069int VBOXCALL supdrvSessionHashTabInsert(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPDRVSESSION *ppOsSessionPtr,
1070 void *pvUser)
1071{
1072 PSUPDRVSESSION pCur;
1073 unsigned iHash;
1074
1075 /*
1076 * Validate input.
1077 */
1078 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
1079 AssertReturn(pSession->R0Process != NIL_RTR0PROCESS, VERR_INVALID_PARAMETER);
1080
1081 /*
1082 * Calculate the hash table index and acquire the spinlock.
1083 */
1084 iHash = SUPDRV_SESSION_HASH(pSession->Process);
1085
1086 RTSpinlockAcquire(pDevExt->hSessionHashTabSpinlock);
1087
1088 /*
1089 * If there are a collisions, we need to carefully check if we got a
1090 * duplicate. There can only be one open session per process.
1091 */
1092 pCur = pDevExt->apSessionHashTab[iHash];
1093 if (pCur)
1094 {
1095 while (pCur && pCur->Process != pSession->Process)
1096 pCur = pCur->pCommonNextHash;
1097
1098 if (pCur)
1099 {
1100 RTSpinlockRelease(pDevExt->hSessionHashTabSpinlock);
1101 if (pCur == pSession)
1102 {
1103 Assert(pSession->fInHashTable);
1104 AssertFailed();
1105 return VERR_WRONG_ORDER;
1106 }
1107 Assert(!pSession->fInHashTable);
1108 if (pCur->R0Process == pSession->R0Process)
1109 return VERR_RESOURCE_IN_USE;
1110 return VERR_DUPLICATE;
1111 }
1112 }
1113 Assert(!pSession->fInHashTable);
1114 Assert(!pSession->ppOsSessionPtr);
1115
1116 /*
1117 * Insert it, doing a callout to the OS specific code in case it has
1118 * anything it wishes to do while we're holding the spinlock.
1119 */
1120 pSession->pCommonNextHash = pDevExt->apSessionHashTab[iHash];
1121 pDevExt->apSessionHashTab[iHash] = pSession;
1122 pSession->fInHashTable = true;
1123 ASMAtomicIncS32(&pDevExt->cSessions);
1124
1125 pSession->ppOsSessionPtr = ppOsSessionPtr;
1126 if (ppOsSessionPtr)
1127 ASMAtomicWritePtr(ppOsSessionPtr, pSession);
1128
1129 supdrvOSSessionHashTabInserted(pDevExt, pSession, pvUser);
1130
1131 /*
1132 * Retain a reference for the pointer in the session table.
1133 */
1134 ASMAtomicIncU32(&pSession->cRefs);
1135
1136 RTSpinlockRelease(pDevExt->hSessionHashTabSpinlock);
1137 return VINF_SUCCESS;
1138}
1139
1140
1141/**
1142 * Removes the session from the global hash table.
1143 *
1144 * @retval VINF_SUCCESS on success.
1145 * @retval VERR_NOT_FOUND if the session was already removed (asserted).
1146 * @retval VERR_INVALID_PARAMETER if the session handle is invalid or a ring-0
1147 * session (asserted).
1148 *
1149 * @param pDevExt The device extension.
1150 * @param pSession The session. The caller is expected to have a reference
1151 * to this so it won't croak on us when we release the hash
1152 * table reference.
1153 * @param pvUser OS specific context value for the
1154 * supdrvOSSessionHashTabInserted callback.
1155 */
1156int VBOXCALL supdrvSessionHashTabRemove(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, void *pvUser)
1157{
1158 PSUPDRVSESSION pCur;
1159 unsigned iHash;
1160 int32_t cRefs;
1161
1162 /*
1163 * Validate input.
1164 */
1165 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
1166 AssertReturn(pSession->R0Process != NIL_RTR0PROCESS, VERR_INVALID_PARAMETER);
1167
1168 /*
1169 * Calculate the hash table index and acquire the spinlock.
1170 */
1171 iHash = SUPDRV_SESSION_HASH(pSession->Process);
1172
1173 RTSpinlockAcquire(pDevExt->hSessionHashTabSpinlock);
1174
1175 /*
1176 * Unlink it.
1177 */
1178 pCur = pDevExt->apSessionHashTab[iHash];
1179 if (pCur == pSession)
1180 pDevExt->apSessionHashTab[iHash] = pSession->pCommonNextHash;
1181 else
1182 {
1183 PSUPDRVSESSION pPrev = pCur;
1184 while (pCur && pCur != pSession)
1185 {
1186 pPrev = pCur;
1187 pCur = pCur->pCommonNextHash;
1188 }
1189 if (pCur)
1190 pPrev->pCommonNextHash = pCur->pCommonNextHash;
1191 else
1192 {
1193 Assert(!pSession->fInHashTable);
1194 RTSpinlockRelease(pDevExt->hSessionHashTabSpinlock);
1195 return VERR_NOT_FOUND;
1196 }
1197 }
1198
1199 pSession->pCommonNextHash = NULL;
1200 pSession->fInHashTable = false;
1201
1202 ASMAtomicDecS32(&pDevExt->cSessions);
1203
1204 /*
1205 * Clear OS specific session pointer if available and do the OS callback.
1206 */
1207 if (pSession->ppOsSessionPtr)
1208 {
1209 ASMAtomicCmpXchgPtr(pSession->ppOsSessionPtr, NULL, pSession);
1210 pSession->ppOsSessionPtr = NULL;
1211 }
1212
1213 supdrvOSSessionHashTabRemoved(pDevExt, pSession, pvUser);
1214
1215 RTSpinlockRelease(pDevExt->hSessionHashTabSpinlock);
1216
1217 /*
1218 * Drop the reference the hash table had to the session. This shouldn't
1219 * be the last reference!
1220 */
1221 cRefs = ASMAtomicDecU32(&pSession->cRefs);
1222 Assert(cRefs > 0 && cRefs < _1M);
1223 if (cRefs == 0)
1224 supdrvDestroySession(pDevExt, pSession);
1225
1226 return VINF_SUCCESS;
1227}
1228
1229
1230/**
1231 * Looks up the session for the current process in the global hash table or in
1232 * OS specific pointer.
1233 *
1234 * @returns Pointer to the session with a reference that the caller must
1235 * release. If no valid session was found, NULL is returned.
1236 *
1237 * @param pDevExt The device extension.
1238 * @param Process The process ID.
1239 * @param R0Process The ring-0 process handle.
1240 * @param ppOsSessionPtr The OS session pointer if available. If not NULL,
1241 * this is used instead of the hash table. For
1242 * additional safety it must then be equal to the
1243 * SUPDRVSESSION::ppOsSessionPtr member.
1244 * This can be NULL even if the OS has a session
1245 * pointer.
1246 */
1247PSUPDRVSESSION VBOXCALL supdrvSessionHashTabLookup(PSUPDRVDEVEXT pDevExt, RTPROCESS Process, RTR0PROCESS R0Process,
1248 PSUPDRVSESSION *ppOsSessionPtr)
1249{
1250 PSUPDRVSESSION pCur;
1251 unsigned iHash;
1252
1253 /*
1254 * Validate input.
1255 */
1256 AssertReturn(R0Process != NIL_RTR0PROCESS, NULL);
1257
1258 /*
1259 * Calculate the hash table index and acquire the spinlock.
1260 */
1261 iHash = SUPDRV_SESSION_HASH(Process);
1262
1263 RTSpinlockAcquire(pDevExt->hSessionHashTabSpinlock);
1264
1265 /*
1266 * If an OS session pointer is provided, always use it.
1267 */
1268 if (ppOsSessionPtr)
1269 {
1270 pCur = *ppOsSessionPtr;
1271 if ( pCur
1272 && ( pCur->ppOsSessionPtr != ppOsSessionPtr
1273 || pCur->Process != Process
1274 || pCur->R0Process != R0Process) )
1275 pCur = NULL;
1276 }
1277 else
1278 {
1279 /*
1280 * Otherwise, do the hash table lookup.
1281 */
1282 pCur = pDevExt->apSessionHashTab[iHash];
1283 while ( pCur
1284 && ( pCur->Process != Process
1285 || pCur->R0Process != R0Process) )
1286 pCur = pCur->pCommonNextHash;
1287 }
1288
1289 /*
1290 * Retain the session.
1291 */
1292 if (pCur)
1293 {
1294 uint32_t cRefs = ASMAtomicIncU32(&pCur->cRefs);
1295 NOREF(cRefs);
1296 Assert(cRefs > 1 && cRefs < _1M);
1297 }
1298
1299 RTSpinlockRelease(pDevExt->hSessionHashTabSpinlock);
1300
1301 return pCur;
1302}
1303
1304
1305/**
1306 * Retain a session to make sure it doesn't go away while it is in use.
1307 *
1308 * @returns New reference count on success, UINT32_MAX on failure.
1309 * @param pSession Session data.
1310 */
1311uint32_t VBOXCALL supdrvSessionRetain(PSUPDRVSESSION pSession)
1312{
1313 uint32_t cRefs;
1314 AssertPtrReturn(pSession, UINT32_MAX);
1315 AssertReturn(SUP_IS_SESSION_VALID(pSession), UINT32_MAX);
1316
1317 cRefs = ASMAtomicIncU32(&pSession->cRefs);
1318 AssertMsg(cRefs > 1 && cRefs < _1M, ("%#x %p\n", cRefs, pSession));
1319 return cRefs;
1320}
1321
1322
1323/**
1324 * Releases a given session.
1325 *
1326 * @returns New reference count on success (0 if closed), UINT32_MAX on failure.
1327 * @param pSession Session data.
1328 */
1329uint32_t VBOXCALL supdrvSessionRelease(PSUPDRVSESSION pSession)
1330{
1331 uint32_t cRefs;
1332 AssertPtrReturn(pSession, UINT32_MAX);
1333 AssertReturn(SUP_IS_SESSION_VALID(pSession), UINT32_MAX);
1334
1335 cRefs = ASMAtomicDecU32(&pSession->cRefs);
1336 AssertMsg(cRefs < _1M, ("%#x %p\n", cRefs, pSession));
1337 if (cRefs == 0)
1338 supdrvDestroySession(pSession->pDevExt, pSession);
1339 return cRefs;
1340}
1341
1342
1343/**
1344 * RTHandleTableDestroy callback used by supdrvCleanupSession.
1345 *
1346 * @returns IPRT status code, see SUPR0ObjAddRef.
1347 * @param hHandleTable The handle table handle. Ignored.
1348 * @param pvObj The object pointer.
1349 * @param pvCtx Context, the handle type. Ignored.
1350 * @param pvUser Session pointer.
1351 */
1352static DECLCALLBACK(int) supdrvSessionObjHandleRetain(RTHANDLETABLE hHandleTable, void *pvObj, void *pvCtx, void *pvUser)
1353{
1354 NOREF(pvCtx);
1355 NOREF(hHandleTable);
1356 return SUPR0ObjAddRefEx(pvObj, (PSUPDRVSESSION)pvUser, true /*fNoBlocking*/);
1357}
1358
1359
1360/**
1361 * RTHandleTableDestroy callback used by supdrvCleanupSession.
1362 *
1363 * @param hHandleTable The handle table handle. Ignored.
1364 * @param h The handle value. Ignored.
1365 * @param pvObj The object pointer.
1366 * @param pvCtx Context, the handle type. Ignored.
1367 * @param pvUser Session pointer.
1368 */
1369static DECLCALLBACK(void) supdrvSessionObjHandleDelete(RTHANDLETABLE hHandleTable, uint32_t h, void *pvObj, void *pvCtx, void *pvUser)
1370{
1371 NOREF(pvCtx);
1372 NOREF(h);
1373 NOREF(hHandleTable);
1374 SUPR0ObjRelease(pvObj, (PSUPDRVSESSION)pvUser);
1375}
1376
1377
1378/**
1379 * Fast path I/O Control worker.
1380 *
1381 * @returns VBox status code that should be passed down to ring-3 unchanged.
1382 * @param uIOCtl Function number.
1383 * @param idCpu VMCPU id.
1384 * @param pDevExt Device extention.
1385 * @param pSession Session data.
1386 */
1387int VBOXCALL supdrvIOCtlFast(uintptr_t uIOCtl, VMCPUID idCpu, PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession)
1388{
1389 /*
1390 * We check the two prereqs after doing this only to allow the compiler to optimize things better.
1391 */
1392 if (RT_LIKELY( RT_VALID_PTR(pSession)
1393 && pSession->pVM
1394 && pDevExt->pfnVMMR0EntryFast))
1395 {
1396 switch (uIOCtl)
1397 {
1398 case SUP_IOCTL_FAST_DO_RAW_RUN:
1399 pDevExt->pfnVMMR0EntryFast(pSession->pVM, idCpu, SUP_VMMR0_DO_RAW_RUN);
1400 break;
1401 case SUP_IOCTL_FAST_DO_HM_RUN:
1402 pDevExt->pfnVMMR0EntryFast(pSession->pVM, idCpu, SUP_VMMR0_DO_HM_RUN);
1403 break;
1404 case SUP_IOCTL_FAST_DO_NOP:
1405 pDevExt->pfnVMMR0EntryFast(pSession->pVM, idCpu, SUP_VMMR0_DO_NOP);
1406 break;
1407 default:
1408 return VERR_INTERNAL_ERROR;
1409 }
1410 return VINF_SUCCESS;
1411 }
1412 return VERR_INTERNAL_ERROR;
1413}
1414
1415
1416/**
1417 * Helper for supdrvIOCtl. Check if pszStr contains any character of pszChars.
1418 * We would use strpbrk here if this function would be contained in the RedHat kABI white
1419 * list, see http://www.kerneldrivers.org/RHEL5.
1420 *
1421 * @returns 1 if pszStr does contain any character of pszChars, 0 otherwise.
1422 * @param pszStr String to check
1423 * @param pszChars Character set
1424 */
1425static int supdrvCheckInvalidChar(const char *pszStr, const char *pszChars)
1426{
1427 int chCur;
1428 while ((chCur = *pszStr++) != '\0')
1429 {
1430 int ch;
1431 const char *psz = pszChars;
1432 while ((ch = *psz++) != '\0')
1433 if (ch == chCur)
1434 return 1;
1435
1436 }
1437 return 0;
1438}
1439
1440
1441
1442/**
1443 * I/O Control inner worker (tracing reasons).
1444 *
1445 * @returns IPRT status code.
1446 * @retval VERR_INVALID_PARAMETER if the request is invalid.
1447 *
1448 * @param uIOCtl Function number.
1449 * @param pDevExt Device extention.
1450 * @param pSession Session data.
1451 * @param pReqHdr The request header.
1452 */
1453static int supdrvIOCtlInnerUnrestricted(uintptr_t uIOCtl, PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPREQHDR pReqHdr)
1454{
1455 /*
1456 * Validation macros
1457 */
1458#define REQ_CHECK_SIZES_EX(Name, cbInExpect, cbOutExpect) \
1459 do { \
1460 if (RT_UNLIKELY(pReqHdr->cbIn != (cbInExpect) || pReqHdr->cbOut != (cbOutExpect))) \
1461 { \
1462 OSDBGPRINT(( #Name ": Invalid input/output sizes. cbIn=%ld expected %ld. cbOut=%ld expected %ld.\n", \
1463 (long)pReqHdr->cbIn, (long)(cbInExpect), (long)pReqHdr->cbOut, (long)(cbOutExpect))); \
1464 return pReqHdr->rc = VERR_INVALID_PARAMETER; \
1465 } \
1466 } while (0)
1467
1468#define REQ_CHECK_SIZES(Name) REQ_CHECK_SIZES_EX(Name, Name ## _SIZE_IN, Name ## _SIZE_OUT)
1469
1470#define REQ_CHECK_SIZE_IN(Name, cbInExpect) \
1471 do { \
1472 if (RT_UNLIKELY(pReqHdr->cbIn != (cbInExpect))) \
1473 { \
1474 OSDBGPRINT(( #Name ": Invalid input/output sizes. cbIn=%ld expected %ld.\n", \
1475 (long)pReqHdr->cbIn, (long)(cbInExpect))); \
1476 return pReqHdr->rc = VERR_INVALID_PARAMETER; \
1477 } \
1478 } while (0)
1479
1480#define REQ_CHECK_SIZE_OUT(Name, cbOutExpect) \
1481 do { \
1482 if (RT_UNLIKELY(pReqHdr->cbOut != (cbOutExpect))) \
1483 { \
1484 OSDBGPRINT(( #Name ": Invalid input/output sizes. cbOut=%ld expected %ld.\n", \
1485 (long)pReqHdr->cbOut, (long)(cbOutExpect))); \
1486 return pReqHdr->rc = VERR_INVALID_PARAMETER; \
1487 } \
1488 } while (0)
1489
1490#define REQ_CHECK_EXPR(Name, expr) \
1491 do { \
1492 if (RT_UNLIKELY(!(expr))) \
1493 { \
1494 OSDBGPRINT(( #Name ": %s\n", #expr)); \
1495 return pReqHdr->rc = VERR_INVALID_PARAMETER; \
1496 } \
1497 } while (0)
1498
1499#define REQ_CHECK_EXPR_FMT(expr, fmt) \
1500 do { \
1501 if (RT_UNLIKELY(!(expr))) \
1502 { \
1503 OSDBGPRINT( fmt ); \
1504 return pReqHdr->rc = VERR_INVALID_PARAMETER; \
1505 } \
1506 } while (0)
1507
1508 /*
1509 * The switch.
1510 */
1511 switch (SUP_CTL_CODE_NO_SIZE(uIOCtl))
1512 {
1513 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_COOKIE):
1514 {
1515 PSUPCOOKIE pReq = (PSUPCOOKIE)pReqHdr;
1516 REQ_CHECK_SIZES(SUP_IOCTL_COOKIE);
1517 if (strncmp(pReq->u.In.szMagic, SUPCOOKIE_MAGIC, sizeof(pReq->u.In.szMagic)))
1518 {
1519 OSDBGPRINT(("SUP_IOCTL_COOKIE: invalid magic %.16s\n", pReq->u.In.szMagic));
1520 pReq->Hdr.rc = VERR_INVALID_MAGIC;
1521 return 0;
1522 }
1523
1524#if 0
1525 /*
1526 * Call out to the OS specific code and let it do permission checks on the
1527 * client process.
1528 */
1529 if (!supdrvOSValidateClientProcess(pDevExt, pSession))
1530 {
1531 pReq->u.Out.u32Cookie = 0xffffffff;
1532 pReq->u.Out.u32SessionCookie = 0xffffffff;
1533 pReq->u.Out.u32SessionVersion = 0xffffffff;
1534 pReq->u.Out.u32DriverVersion = SUPDRV_IOC_VERSION;
1535 pReq->u.Out.pSession = NULL;
1536 pReq->u.Out.cFunctions = 0;
1537 pReq->Hdr.rc = VERR_PERMISSION_DENIED;
1538 return 0;
1539 }
1540#endif
1541
1542 /*
1543 * Match the version.
1544 * The current logic is very simple, match the major interface version.
1545 */
1546 if ( pReq->u.In.u32MinVersion > SUPDRV_IOC_VERSION
1547 || (pReq->u.In.u32MinVersion & 0xffff0000) != (SUPDRV_IOC_VERSION & 0xffff0000))
1548 {
1549 OSDBGPRINT(("SUP_IOCTL_COOKIE: Version mismatch. Requested: %#x Min: %#x Current: %#x\n",
1550 pReq->u.In.u32ReqVersion, pReq->u.In.u32MinVersion, SUPDRV_IOC_VERSION));
1551 pReq->u.Out.u32Cookie = 0xffffffff;
1552 pReq->u.Out.u32SessionCookie = 0xffffffff;
1553 pReq->u.Out.u32SessionVersion = 0xffffffff;
1554 pReq->u.Out.u32DriverVersion = SUPDRV_IOC_VERSION;
1555 pReq->u.Out.pSession = NULL;
1556 pReq->u.Out.cFunctions = 0;
1557 pReq->Hdr.rc = VERR_VERSION_MISMATCH;
1558 return 0;
1559 }
1560
1561 /*
1562 * Fill in return data and be gone.
1563 * N.B. The first one to change SUPDRV_IOC_VERSION shall makes sure that
1564 * u32SessionVersion <= u32ReqVersion!
1565 */
1566 /** @todo Somehow validate the client and negotiate a secure cookie... */
1567 pReq->u.Out.u32Cookie = pDevExt->u32Cookie;
1568 pReq->u.Out.u32SessionCookie = pSession->u32Cookie;
1569 pReq->u.Out.u32SessionVersion = SUPDRV_IOC_VERSION;
1570 pReq->u.Out.u32DriverVersion = SUPDRV_IOC_VERSION;
1571 pReq->u.Out.pSession = pSession;
1572 pReq->u.Out.cFunctions = sizeof(g_aFunctions) / sizeof(g_aFunctions[0]);
1573 pReq->Hdr.rc = VINF_SUCCESS;
1574 return 0;
1575 }
1576
1577 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_QUERY_FUNCS(0)):
1578 {
1579 /* validate */
1580 PSUPQUERYFUNCS pReq = (PSUPQUERYFUNCS)pReqHdr;
1581 REQ_CHECK_SIZES_EX(SUP_IOCTL_QUERY_FUNCS, SUP_IOCTL_QUERY_FUNCS_SIZE_IN, SUP_IOCTL_QUERY_FUNCS_SIZE_OUT(RT_ELEMENTS(g_aFunctions)));
1582
1583 /* execute */
1584 pReq->u.Out.cFunctions = RT_ELEMENTS(g_aFunctions);
1585 memcpy(&pReq->u.Out.aFunctions[0], g_aFunctions, sizeof(g_aFunctions));
1586 pReq->Hdr.rc = VINF_SUCCESS;
1587 return 0;
1588 }
1589
1590 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_PAGE_LOCK):
1591 {
1592 /* validate */
1593 PSUPPAGELOCK pReq = (PSUPPAGELOCK)pReqHdr;
1594 REQ_CHECK_SIZE_IN(SUP_IOCTL_PAGE_LOCK, SUP_IOCTL_PAGE_LOCK_SIZE_IN);
1595 REQ_CHECK_SIZE_OUT(SUP_IOCTL_PAGE_LOCK, SUP_IOCTL_PAGE_LOCK_SIZE_OUT(pReq->u.In.cPages));
1596 REQ_CHECK_EXPR(SUP_IOCTL_PAGE_LOCK, pReq->u.In.cPages > 0);
1597 REQ_CHECK_EXPR(SUP_IOCTL_PAGE_LOCK, pReq->u.In.pvR3 >= PAGE_SIZE);
1598
1599 /* execute */
1600 pReq->Hdr.rc = SUPR0LockMem(pSession, pReq->u.In.pvR3, pReq->u.In.cPages, &pReq->u.Out.aPages[0]);
1601 if (RT_FAILURE(pReq->Hdr.rc))
1602 pReq->Hdr.cbOut = sizeof(pReq->Hdr);
1603 return 0;
1604 }
1605
1606 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_PAGE_UNLOCK):
1607 {
1608 /* validate */
1609 PSUPPAGEUNLOCK pReq = (PSUPPAGEUNLOCK)pReqHdr;
1610 REQ_CHECK_SIZES(SUP_IOCTL_PAGE_UNLOCK);
1611
1612 /* execute */
1613 pReq->Hdr.rc = SUPR0UnlockMem(pSession, pReq->u.In.pvR3);
1614 return 0;
1615 }
1616
1617 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_CONT_ALLOC):
1618 {
1619 /* validate */
1620 PSUPCONTALLOC pReq = (PSUPCONTALLOC)pReqHdr;
1621 REQ_CHECK_SIZES(SUP_IOCTL_CONT_ALLOC);
1622
1623 /* execute */
1624 pReq->Hdr.rc = SUPR0ContAlloc(pSession, pReq->u.In.cPages, &pReq->u.Out.pvR0, &pReq->u.Out.pvR3, &pReq->u.Out.HCPhys);
1625 if (RT_FAILURE(pReq->Hdr.rc))
1626 pReq->Hdr.cbOut = sizeof(pReq->Hdr);
1627 return 0;
1628 }
1629
1630 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_CONT_FREE):
1631 {
1632 /* validate */
1633 PSUPCONTFREE pReq = (PSUPCONTFREE)pReqHdr;
1634 REQ_CHECK_SIZES(SUP_IOCTL_CONT_FREE);
1635
1636 /* execute */
1637 pReq->Hdr.rc = SUPR0ContFree(pSession, (RTHCUINTPTR)pReq->u.In.pvR3);
1638 return 0;
1639 }
1640
1641 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_LDR_OPEN):
1642 {
1643 /* validate */
1644 PSUPLDROPEN pReq = (PSUPLDROPEN)pReqHdr;
1645 REQ_CHECK_SIZES(SUP_IOCTL_LDR_OPEN);
1646 REQ_CHECK_EXPR(SUP_IOCTL_LDR_OPEN, pReq->u.In.cbImageWithTabs > 0);
1647 REQ_CHECK_EXPR(SUP_IOCTL_LDR_OPEN, pReq->u.In.cbImageWithTabs < 16*_1M);
1648 REQ_CHECK_EXPR(SUP_IOCTL_LDR_OPEN, pReq->u.In.cbImageBits > 0);
1649 REQ_CHECK_EXPR(SUP_IOCTL_LDR_OPEN, pReq->u.In.cbImageBits > 0);
1650 REQ_CHECK_EXPR(SUP_IOCTL_LDR_OPEN, pReq->u.In.cbImageBits < pReq->u.In.cbImageWithTabs);
1651 REQ_CHECK_EXPR(SUP_IOCTL_LDR_OPEN, pReq->u.In.szName[0]);
1652 REQ_CHECK_EXPR(SUP_IOCTL_LDR_OPEN, RTStrEnd(pReq->u.In.szName, sizeof(pReq->u.In.szName)));
1653 REQ_CHECK_EXPR(SUP_IOCTL_LDR_OPEN, !supdrvCheckInvalidChar(pReq->u.In.szName, ";:()[]{}/\\|&*%#@!~`\"'"));
1654 REQ_CHECK_EXPR(SUP_IOCTL_LDR_OPEN, RTStrEnd(pReq->u.In.szFilename, sizeof(pReq->u.In.szFilename)));
1655
1656 /* execute */
1657 pReq->Hdr.rc = supdrvIOCtl_LdrOpen(pDevExt, pSession, pReq);
1658 return 0;
1659 }
1660
1661 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_LDR_LOAD):
1662 {
1663 /* validate */
1664 PSUPLDRLOAD pReq = (PSUPLDRLOAD)pReqHdr;
1665 REQ_CHECK_EXPR(Name, pReq->Hdr.cbIn >= sizeof(*pReq));
1666 REQ_CHECK_SIZES_EX(SUP_IOCTL_LDR_LOAD, SUP_IOCTL_LDR_LOAD_SIZE_IN(pReq->u.In.cbImageWithTabs), SUP_IOCTL_LDR_LOAD_SIZE_OUT);
1667 REQ_CHECK_EXPR(SUP_IOCTL_LDR_LOAD, pReq->u.In.cSymbols <= 16384);
1668 REQ_CHECK_EXPR_FMT( !pReq->u.In.cSymbols
1669 || ( pReq->u.In.offSymbols < pReq->u.In.cbImageWithTabs
1670 && pReq->u.In.offSymbols + pReq->u.In.cSymbols * sizeof(SUPLDRSYM) <= pReq->u.In.cbImageWithTabs),
1671 ("SUP_IOCTL_LDR_LOAD: offSymbols=%#lx cSymbols=%#lx cbImageWithTabs=%#lx\n", (long)pReq->u.In.offSymbols,
1672 (long)pReq->u.In.cSymbols, (long)pReq->u.In.cbImageWithTabs));
1673 REQ_CHECK_EXPR_FMT( !pReq->u.In.cbStrTab
1674 || ( pReq->u.In.offStrTab < pReq->u.In.cbImageWithTabs
1675 && pReq->u.In.offStrTab + pReq->u.In.cbStrTab <= pReq->u.In.cbImageWithTabs
1676 && pReq->u.In.cbStrTab <= pReq->u.In.cbImageWithTabs),
1677 ("SUP_IOCTL_LDR_LOAD: offStrTab=%#lx cbStrTab=%#lx cbImageWithTabs=%#lx\n", (long)pReq->u.In.offStrTab,
1678 (long)pReq->u.In.cbStrTab, (long)pReq->u.In.cbImageWithTabs));
1679
1680 if (pReq->u.In.cSymbols)
1681 {
1682 uint32_t i;
1683 PSUPLDRSYM paSyms = (PSUPLDRSYM)&pReq->u.In.abImage[pReq->u.In.offSymbols];
1684 for (i = 0; i < pReq->u.In.cSymbols; i++)
1685 {
1686 REQ_CHECK_EXPR_FMT(paSyms[i].offSymbol < pReq->u.In.cbImageWithTabs,
1687 ("SUP_IOCTL_LDR_LOAD: sym #%ld: symb off %#lx (max=%#lx)\n", (long)i, (long)paSyms[i].offSymbol, (long)pReq->u.In.cbImageWithTabs));
1688 REQ_CHECK_EXPR_FMT(paSyms[i].offName < pReq->u.In.cbStrTab,
1689 ("SUP_IOCTL_LDR_LOAD: sym #%ld: name off %#lx (max=%#lx)\n", (long)i, (long)paSyms[i].offName, (long)pReq->u.In.cbImageWithTabs));
1690 REQ_CHECK_EXPR_FMT(RTStrEnd((char const *)&pReq->u.In.abImage[pReq->u.In.offStrTab + paSyms[i].offName],
1691 pReq->u.In.cbStrTab - paSyms[i].offName),
1692 ("SUP_IOCTL_LDR_LOAD: sym #%ld: unterminated name! (%#lx / %#lx)\n", (long)i, (long)paSyms[i].offName, (long)pReq->u.In.cbImageWithTabs));
1693 }
1694 }
1695
1696 /* execute */
1697 pReq->Hdr.rc = supdrvIOCtl_LdrLoad(pDevExt, pSession, pReq);
1698 return 0;
1699 }
1700
1701 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_LDR_FREE):
1702 {
1703 /* validate */
1704 PSUPLDRFREE pReq = (PSUPLDRFREE)pReqHdr;
1705 REQ_CHECK_SIZES(SUP_IOCTL_LDR_FREE);
1706
1707 /* execute */
1708 pReq->Hdr.rc = supdrvIOCtl_LdrFree(pDevExt, pSession, pReq);
1709 return 0;
1710 }
1711
1712 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_LDR_GET_SYMBOL):
1713 {
1714 /* validate */
1715 PSUPLDRGETSYMBOL pReq = (PSUPLDRGETSYMBOL)pReqHdr;
1716 REQ_CHECK_SIZES(SUP_IOCTL_LDR_GET_SYMBOL);
1717 REQ_CHECK_EXPR(SUP_IOCTL_LDR_GET_SYMBOL, RTStrEnd(pReq->u.In.szSymbol, sizeof(pReq->u.In.szSymbol)));
1718
1719 /* execute */
1720 pReq->Hdr.rc = supdrvIOCtl_LdrGetSymbol(pDevExt, pSession, pReq);
1721 return 0;
1722 }
1723
1724 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_CALL_VMMR0(0)):
1725 {
1726 /* validate */
1727 PSUPCALLVMMR0 pReq = (PSUPCALLVMMR0)pReqHdr;
1728 Log4(("SUP_IOCTL_CALL_VMMR0: op=%u in=%u arg=%RX64 p/t=%RTproc/%RTthrd\n",
1729 pReq->u.In.uOperation, pReq->Hdr.cbIn, pReq->u.In.u64Arg, RTProcSelf(), RTThreadNativeSelf()));
1730
1731 if (pReq->Hdr.cbIn == SUP_IOCTL_CALL_VMMR0_SIZE(0))
1732 {
1733 REQ_CHECK_SIZES_EX(SUP_IOCTL_CALL_VMMR0, SUP_IOCTL_CALL_VMMR0_SIZE_IN(0), SUP_IOCTL_CALL_VMMR0_SIZE_OUT(0));
1734
1735 /* execute */
1736 if (RT_LIKELY(pDevExt->pfnVMMR0EntryEx))
1737 pReq->Hdr.rc = pDevExt->pfnVMMR0EntryEx(pReq->u.In.pVMR0, pReq->u.In.idCpu, pReq->u.In.uOperation, NULL, pReq->u.In.u64Arg, pSession);
1738 else
1739 pReq->Hdr.rc = VERR_WRONG_ORDER;
1740 }
1741 else
1742 {
1743 PSUPVMMR0REQHDR pVMMReq = (PSUPVMMR0REQHDR)&pReq->abReqPkt[0];
1744 REQ_CHECK_EXPR_FMT(pReq->Hdr.cbIn >= SUP_IOCTL_CALL_VMMR0_SIZE(sizeof(SUPVMMR0REQHDR)),
1745 ("SUP_IOCTL_CALL_VMMR0: cbIn=%#x < %#lx\n", pReq->Hdr.cbIn, SUP_IOCTL_CALL_VMMR0_SIZE(sizeof(SUPVMMR0REQHDR))));
1746 REQ_CHECK_EXPR(SUP_IOCTL_CALL_VMMR0, pVMMReq->u32Magic == SUPVMMR0REQHDR_MAGIC);
1747 REQ_CHECK_SIZES_EX(SUP_IOCTL_CALL_VMMR0, SUP_IOCTL_CALL_VMMR0_SIZE_IN(pVMMReq->cbReq), SUP_IOCTL_CALL_VMMR0_SIZE_OUT(pVMMReq->cbReq));
1748
1749 /* execute */
1750 if (RT_LIKELY(pDevExt->pfnVMMR0EntryEx))
1751 pReq->Hdr.rc = pDevExt->pfnVMMR0EntryEx(pReq->u.In.pVMR0, pReq->u.In.idCpu, pReq->u.In.uOperation, pVMMReq, pReq->u.In.u64Arg, pSession);
1752 else
1753 pReq->Hdr.rc = VERR_WRONG_ORDER;
1754 }
1755
1756 if ( RT_FAILURE(pReq->Hdr.rc)
1757 && pReq->Hdr.rc != VERR_INTERRUPTED
1758 && pReq->Hdr.rc != VERR_TIMEOUT)
1759 Log(("SUP_IOCTL_CALL_VMMR0: rc=%Rrc op=%u out=%u arg=%RX64 p/t=%RTproc/%RTthrd\n",
1760 pReq->Hdr.rc, pReq->u.In.uOperation, pReq->Hdr.cbOut, pReq->u.In.u64Arg, RTProcSelf(), RTThreadNativeSelf()));
1761 else
1762 Log4(("SUP_IOCTL_CALL_VMMR0: rc=%Rrc op=%u out=%u arg=%RX64 p/t=%RTproc/%RTthrd\n",
1763 pReq->Hdr.rc, pReq->u.In.uOperation, pReq->Hdr.cbOut, pReq->u.In.u64Arg, RTProcSelf(), RTThreadNativeSelf()));
1764 return 0;
1765 }
1766
1767 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_CALL_VMMR0_BIG):
1768 {
1769 /* validate */
1770 PSUPCALLVMMR0 pReq = (PSUPCALLVMMR0)pReqHdr;
1771 PSUPVMMR0REQHDR pVMMReq;
1772 Log4(("SUP_IOCTL_CALL_VMMR0_BIG: op=%u in=%u arg=%RX64 p/t=%RTproc/%RTthrd\n",
1773 pReq->u.In.uOperation, pReq->Hdr.cbIn, pReq->u.In.u64Arg, RTProcSelf(), RTThreadNativeSelf()));
1774
1775 pVMMReq = (PSUPVMMR0REQHDR)&pReq->abReqPkt[0];
1776 REQ_CHECK_EXPR_FMT(pReq->Hdr.cbIn >= SUP_IOCTL_CALL_VMMR0_BIG_SIZE(sizeof(SUPVMMR0REQHDR)),
1777 ("SUP_IOCTL_CALL_VMMR0_BIG: cbIn=%#x < %#lx\n", pReq->Hdr.cbIn, SUP_IOCTL_CALL_VMMR0_BIG_SIZE(sizeof(SUPVMMR0REQHDR))));
1778 REQ_CHECK_EXPR(SUP_IOCTL_CALL_VMMR0_BIG, pVMMReq->u32Magic == SUPVMMR0REQHDR_MAGIC);
1779 REQ_CHECK_SIZES_EX(SUP_IOCTL_CALL_VMMR0_BIG, SUP_IOCTL_CALL_VMMR0_BIG_SIZE_IN(pVMMReq->cbReq), SUP_IOCTL_CALL_VMMR0_BIG_SIZE_OUT(pVMMReq->cbReq));
1780
1781 /* execute */
1782 if (RT_LIKELY(pDevExt->pfnVMMR0EntryEx))
1783 pReq->Hdr.rc = pDevExt->pfnVMMR0EntryEx(pReq->u.In.pVMR0, pReq->u.In.idCpu, pReq->u.In.uOperation, pVMMReq, pReq->u.In.u64Arg, pSession);
1784 else
1785 pReq->Hdr.rc = VERR_WRONG_ORDER;
1786
1787 if ( RT_FAILURE(pReq->Hdr.rc)
1788 && pReq->Hdr.rc != VERR_INTERRUPTED
1789 && pReq->Hdr.rc != VERR_TIMEOUT)
1790 Log(("SUP_IOCTL_CALL_VMMR0_BIG: rc=%Rrc op=%u out=%u arg=%RX64 p/t=%RTproc/%RTthrd\n",
1791 pReq->Hdr.rc, pReq->u.In.uOperation, pReq->Hdr.cbOut, pReq->u.In.u64Arg, RTProcSelf(), RTThreadNativeSelf()));
1792 else
1793 Log4(("SUP_IOCTL_CALL_VMMR0_BIG: rc=%Rrc op=%u out=%u arg=%RX64 p/t=%RTproc/%RTthrd\n",
1794 pReq->Hdr.rc, pReq->u.In.uOperation, pReq->Hdr.cbOut, pReq->u.In.u64Arg, RTProcSelf(), RTThreadNativeSelf()));
1795 return 0;
1796 }
1797
1798 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_GET_PAGING_MODE):
1799 {
1800 /* validate */
1801 PSUPGETPAGINGMODE pReq = (PSUPGETPAGINGMODE)pReqHdr;
1802 REQ_CHECK_SIZES(SUP_IOCTL_GET_PAGING_MODE);
1803
1804 /* execute */
1805 pReq->Hdr.rc = VINF_SUCCESS;
1806 pReq->u.Out.enmMode = SUPR0GetPagingMode();
1807 return 0;
1808 }
1809
1810 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_LOW_ALLOC):
1811 {
1812 /* validate */
1813 PSUPLOWALLOC pReq = (PSUPLOWALLOC)pReqHdr;
1814 REQ_CHECK_EXPR(SUP_IOCTL_LOW_ALLOC, pReq->Hdr.cbIn <= SUP_IOCTL_LOW_ALLOC_SIZE_IN);
1815 REQ_CHECK_SIZES_EX(SUP_IOCTL_LOW_ALLOC, SUP_IOCTL_LOW_ALLOC_SIZE_IN, SUP_IOCTL_LOW_ALLOC_SIZE_OUT(pReq->u.In.cPages));
1816
1817 /* execute */
1818 pReq->Hdr.rc = SUPR0LowAlloc(pSession, pReq->u.In.cPages, &pReq->u.Out.pvR0, &pReq->u.Out.pvR3, &pReq->u.Out.aPages[0]);
1819 if (RT_FAILURE(pReq->Hdr.rc))
1820 pReq->Hdr.cbOut = sizeof(pReq->Hdr);
1821 return 0;
1822 }
1823
1824 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_LOW_FREE):
1825 {
1826 /* validate */
1827 PSUPLOWFREE pReq = (PSUPLOWFREE)pReqHdr;
1828 REQ_CHECK_SIZES(SUP_IOCTL_LOW_FREE);
1829
1830 /* execute */
1831 pReq->Hdr.rc = SUPR0LowFree(pSession, (RTHCUINTPTR)pReq->u.In.pvR3);
1832 return 0;
1833 }
1834
1835 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_GIP_MAP):
1836 {
1837 /* validate */
1838 PSUPGIPMAP pReq = (PSUPGIPMAP)pReqHdr;
1839 REQ_CHECK_SIZES(SUP_IOCTL_GIP_MAP);
1840
1841 /* execute */
1842 pReq->Hdr.rc = SUPR0GipMap(pSession, &pReq->u.Out.pGipR3, &pReq->u.Out.HCPhysGip);
1843 if (RT_SUCCESS(pReq->Hdr.rc))
1844 pReq->u.Out.pGipR0 = pDevExt->pGip;
1845 return 0;
1846 }
1847
1848 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_GIP_UNMAP):
1849 {
1850 /* validate */
1851 PSUPGIPUNMAP pReq = (PSUPGIPUNMAP)pReqHdr;
1852 REQ_CHECK_SIZES(SUP_IOCTL_GIP_UNMAP);
1853
1854 /* execute */
1855 pReq->Hdr.rc = SUPR0GipUnmap(pSession);
1856 return 0;
1857 }
1858
1859 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_SET_VM_FOR_FAST):
1860 {
1861 /* validate */
1862 PSUPSETVMFORFAST pReq = (PSUPSETVMFORFAST)pReqHdr;
1863 REQ_CHECK_SIZES(SUP_IOCTL_SET_VM_FOR_FAST);
1864 REQ_CHECK_EXPR_FMT( !pReq->u.In.pVMR0
1865 || ( VALID_PTR(pReq->u.In.pVMR0)
1866 && !((uintptr_t)pReq->u.In.pVMR0 & (PAGE_SIZE - 1))),
1867 ("SUP_IOCTL_SET_VM_FOR_FAST: pVMR0=%p!\n", pReq->u.In.pVMR0));
1868 /* execute */
1869 pSession->pVM = pReq->u.In.pVMR0;
1870 pReq->Hdr.rc = VINF_SUCCESS;
1871 return 0;
1872 }
1873
1874 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_PAGE_ALLOC_EX):
1875 {
1876 /* validate */
1877 PSUPPAGEALLOCEX pReq = (PSUPPAGEALLOCEX)pReqHdr;
1878 REQ_CHECK_EXPR(SUP_IOCTL_PAGE_ALLOC_EX, pReq->Hdr.cbIn <= SUP_IOCTL_PAGE_ALLOC_EX_SIZE_IN);
1879 REQ_CHECK_SIZES_EX(SUP_IOCTL_PAGE_ALLOC_EX, SUP_IOCTL_PAGE_ALLOC_EX_SIZE_IN, SUP_IOCTL_PAGE_ALLOC_EX_SIZE_OUT(pReq->u.In.cPages));
1880 REQ_CHECK_EXPR_FMT(pReq->u.In.fKernelMapping || pReq->u.In.fUserMapping,
1881 ("SUP_IOCTL_PAGE_ALLOC_EX: No mapping requested!\n"));
1882 REQ_CHECK_EXPR_FMT(pReq->u.In.fUserMapping,
1883 ("SUP_IOCTL_PAGE_ALLOC_EX: Must have user mapping!\n"));
1884 REQ_CHECK_EXPR_FMT(!pReq->u.In.fReserved0 && !pReq->u.In.fReserved1,
1885 ("SUP_IOCTL_PAGE_ALLOC_EX: fReserved0=%d fReserved1=%d\n", pReq->u.In.fReserved0, pReq->u.In.fReserved1));
1886
1887 /* execute */
1888 pReq->Hdr.rc = SUPR0PageAllocEx(pSession, pReq->u.In.cPages, 0 /* fFlags */,
1889 pReq->u.In.fUserMapping ? &pReq->u.Out.pvR3 : NULL,
1890 pReq->u.In.fKernelMapping ? &pReq->u.Out.pvR0 : NULL,
1891 &pReq->u.Out.aPages[0]);
1892 if (RT_FAILURE(pReq->Hdr.rc))
1893 pReq->Hdr.cbOut = sizeof(pReq->Hdr);
1894 return 0;
1895 }
1896
1897 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_PAGE_MAP_KERNEL):
1898 {
1899 /* validate */
1900 PSUPPAGEMAPKERNEL pReq = (PSUPPAGEMAPKERNEL)pReqHdr;
1901 REQ_CHECK_SIZES(SUP_IOCTL_PAGE_MAP_KERNEL);
1902 REQ_CHECK_EXPR_FMT(!pReq->u.In.fFlags, ("SUP_IOCTL_PAGE_MAP_KERNEL: fFlags=%#x! MBZ\n", pReq->u.In.fFlags));
1903 REQ_CHECK_EXPR_FMT(!(pReq->u.In.offSub & PAGE_OFFSET_MASK), ("SUP_IOCTL_PAGE_MAP_KERNEL: offSub=%#x\n", pReq->u.In.offSub));
1904 REQ_CHECK_EXPR_FMT(pReq->u.In.cbSub && !(pReq->u.In.cbSub & PAGE_OFFSET_MASK),
1905 ("SUP_IOCTL_PAGE_MAP_KERNEL: cbSub=%#x\n", pReq->u.In.cbSub));
1906
1907 /* execute */
1908 pReq->Hdr.rc = SUPR0PageMapKernel(pSession, pReq->u.In.pvR3, pReq->u.In.offSub, pReq->u.In.cbSub,
1909 pReq->u.In.fFlags, &pReq->u.Out.pvR0);
1910 if (RT_FAILURE(pReq->Hdr.rc))
1911 pReq->Hdr.cbOut = sizeof(pReq->Hdr);
1912 return 0;
1913 }
1914
1915 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_PAGE_PROTECT):
1916 {
1917 /* validate */
1918 PSUPPAGEPROTECT pReq = (PSUPPAGEPROTECT)pReqHdr;
1919 REQ_CHECK_SIZES(SUP_IOCTL_PAGE_PROTECT);
1920 REQ_CHECK_EXPR_FMT(!(pReq->u.In.fProt & ~(RTMEM_PROT_READ | RTMEM_PROT_WRITE | RTMEM_PROT_EXEC | RTMEM_PROT_NONE)),
1921 ("SUP_IOCTL_PAGE_PROTECT: fProt=%#x!\n", pReq->u.In.fProt));
1922 REQ_CHECK_EXPR_FMT(!(pReq->u.In.offSub & PAGE_OFFSET_MASK), ("SUP_IOCTL_PAGE_PROTECT: offSub=%#x\n", pReq->u.In.offSub));
1923 REQ_CHECK_EXPR_FMT(pReq->u.In.cbSub && !(pReq->u.In.cbSub & PAGE_OFFSET_MASK),
1924 ("SUP_IOCTL_PAGE_PROTECT: cbSub=%#x\n", pReq->u.In.cbSub));
1925
1926 /* execute */
1927 pReq->Hdr.rc = SUPR0PageProtect(pSession, pReq->u.In.pvR3, pReq->u.In.pvR0, pReq->u.In.offSub, pReq->u.In.cbSub, pReq->u.In.fProt);
1928 return 0;
1929 }
1930
1931 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_PAGE_FREE):
1932 {
1933 /* validate */
1934 PSUPPAGEFREE pReq = (PSUPPAGEFREE)pReqHdr;
1935 REQ_CHECK_SIZES(SUP_IOCTL_PAGE_FREE);
1936
1937 /* execute */
1938 pReq->Hdr.rc = SUPR0PageFree(pSession, pReq->u.In.pvR3);
1939 return 0;
1940 }
1941
1942 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_CALL_SERVICE(0)):
1943 {
1944 /* validate */
1945 PSUPCALLSERVICE pReq = (PSUPCALLSERVICE)pReqHdr;
1946 Log4(("SUP_IOCTL_CALL_SERVICE: op=%u in=%u arg=%RX64 p/t=%RTproc/%RTthrd\n",
1947 pReq->u.In.uOperation, pReq->Hdr.cbIn, pReq->u.In.u64Arg, RTProcSelf(), RTThreadNativeSelf()));
1948
1949 if (pReq->Hdr.cbIn == SUP_IOCTL_CALL_SERVICE_SIZE(0))
1950 REQ_CHECK_SIZES_EX(SUP_IOCTL_CALL_SERVICE, SUP_IOCTL_CALL_SERVICE_SIZE_IN(0), SUP_IOCTL_CALL_SERVICE_SIZE_OUT(0));
1951 else
1952 {
1953 PSUPR0SERVICEREQHDR pSrvReq = (PSUPR0SERVICEREQHDR)&pReq->abReqPkt[0];
1954 REQ_CHECK_EXPR_FMT(pReq->Hdr.cbIn >= SUP_IOCTL_CALL_SERVICE_SIZE(sizeof(SUPR0SERVICEREQHDR)),
1955 ("SUP_IOCTL_CALL_SERVICE: cbIn=%#x < %#lx\n", pReq->Hdr.cbIn, SUP_IOCTL_CALL_SERVICE_SIZE(sizeof(SUPR0SERVICEREQHDR))));
1956 REQ_CHECK_EXPR(SUP_IOCTL_CALL_SERVICE, pSrvReq->u32Magic == SUPR0SERVICEREQHDR_MAGIC);
1957 REQ_CHECK_SIZES_EX(SUP_IOCTL_CALL_SERVICE, SUP_IOCTL_CALL_SERVICE_SIZE_IN(pSrvReq->cbReq), SUP_IOCTL_CALL_SERVICE_SIZE_OUT(pSrvReq->cbReq));
1958 }
1959 REQ_CHECK_EXPR(SUP_IOCTL_CALL_SERVICE, RTStrEnd(pReq->u.In.szName, sizeof(pReq->u.In.szName)));
1960
1961 /* execute */
1962 pReq->Hdr.rc = supdrvIOCtl_CallServiceModule(pDevExt, pSession, pReq);
1963 return 0;
1964 }
1965
1966 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_LOGGER_SETTINGS(0)):
1967 {
1968 /* validate */
1969 PSUPLOGGERSETTINGS pReq = (PSUPLOGGERSETTINGS)pReqHdr;
1970 size_t cbStrTab;
1971 REQ_CHECK_SIZE_OUT(SUP_IOCTL_LOGGER_SETTINGS, SUP_IOCTL_LOGGER_SETTINGS_SIZE_OUT);
1972 REQ_CHECK_EXPR(SUP_IOCTL_LOGGER_SETTINGS, pReq->Hdr.cbIn >= SUP_IOCTL_LOGGER_SETTINGS_SIZE_IN(1));
1973 cbStrTab = pReq->Hdr.cbIn - SUP_IOCTL_LOGGER_SETTINGS_SIZE_IN(0);
1974 REQ_CHECK_EXPR(SUP_IOCTL_LOGGER_SETTINGS, pReq->u.In.offGroups < cbStrTab);
1975 REQ_CHECK_EXPR(SUP_IOCTL_LOGGER_SETTINGS, pReq->u.In.offFlags < cbStrTab);
1976 REQ_CHECK_EXPR(SUP_IOCTL_LOGGER_SETTINGS, pReq->u.In.offDestination < cbStrTab);
1977 REQ_CHECK_EXPR_FMT(pReq->u.In.szStrings[cbStrTab - 1] == '\0',
1978 ("SUP_IOCTL_LOGGER_SETTINGS: cbIn=%#x cbStrTab=%#zx LastChar=%d\n",
1979 pReq->Hdr.cbIn, cbStrTab, pReq->u.In.szStrings[cbStrTab - 1]));
1980 REQ_CHECK_EXPR(SUP_IOCTL_LOGGER_SETTINGS, pReq->u.In.fWhich <= SUPLOGGERSETTINGS_WHICH_RELEASE);
1981 REQ_CHECK_EXPR(SUP_IOCTL_LOGGER_SETTINGS, pReq->u.In.fWhat <= SUPLOGGERSETTINGS_WHAT_DESTROY);
1982
1983 /* execute */
1984 pReq->Hdr.rc = supdrvIOCtl_LoggerSettings(pDevExt, pSession, pReq);
1985 return 0;
1986 }
1987
1988 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_SEM_OP2):
1989 {
1990 /* validate */
1991 PSUPSEMOP2 pReq = (PSUPSEMOP2)pReqHdr;
1992 REQ_CHECK_SIZES_EX(SUP_IOCTL_SEM_OP2, SUP_IOCTL_SEM_OP2_SIZE_IN, SUP_IOCTL_SEM_OP2_SIZE_OUT);
1993 REQ_CHECK_EXPR(SUP_IOCTL_SEM_OP2, pReq->u.In.uReserved == 0);
1994
1995 /* execute */
1996 switch (pReq->u.In.uType)
1997 {
1998 case SUP_SEM_TYPE_EVENT:
1999 {
2000 SUPSEMEVENT hEvent = (SUPSEMEVENT)(uintptr_t)pReq->u.In.hSem;
2001 switch (pReq->u.In.uOp)
2002 {
2003 case SUPSEMOP2_WAIT_MS_REL:
2004 pReq->Hdr.rc = SUPSemEventWaitNoResume(pSession, hEvent, pReq->u.In.uArg.cRelMsTimeout);
2005 break;
2006 case SUPSEMOP2_WAIT_NS_ABS:
2007 pReq->Hdr.rc = SUPSemEventWaitNsAbsIntr(pSession, hEvent, pReq->u.In.uArg.uAbsNsTimeout);
2008 break;
2009 case SUPSEMOP2_WAIT_NS_REL:
2010 pReq->Hdr.rc = SUPSemEventWaitNsRelIntr(pSession, hEvent, pReq->u.In.uArg.cRelNsTimeout);
2011 break;
2012 case SUPSEMOP2_SIGNAL:
2013 pReq->Hdr.rc = SUPSemEventSignal(pSession, hEvent);
2014 break;
2015 case SUPSEMOP2_CLOSE:
2016 pReq->Hdr.rc = SUPSemEventClose(pSession, hEvent);
2017 break;
2018 case SUPSEMOP2_RESET:
2019 default:
2020 pReq->Hdr.rc = VERR_INVALID_FUNCTION;
2021 break;
2022 }
2023 break;
2024 }
2025
2026 case SUP_SEM_TYPE_EVENT_MULTI:
2027 {
2028 SUPSEMEVENTMULTI hEventMulti = (SUPSEMEVENTMULTI)(uintptr_t)pReq->u.In.hSem;
2029 switch (pReq->u.In.uOp)
2030 {
2031 case SUPSEMOP2_WAIT_MS_REL:
2032 pReq->Hdr.rc = SUPSemEventMultiWaitNoResume(pSession, hEventMulti, pReq->u.In.uArg.cRelMsTimeout);
2033 break;
2034 case SUPSEMOP2_WAIT_NS_ABS:
2035 pReq->Hdr.rc = SUPSemEventMultiWaitNsAbsIntr(pSession, hEventMulti, pReq->u.In.uArg.uAbsNsTimeout);
2036 break;
2037 case SUPSEMOP2_WAIT_NS_REL:
2038 pReq->Hdr.rc = SUPSemEventMultiWaitNsRelIntr(pSession, hEventMulti, pReq->u.In.uArg.cRelNsTimeout);
2039 break;
2040 case SUPSEMOP2_SIGNAL:
2041 pReq->Hdr.rc = SUPSemEventMultiSignal(pSession, hEventMulti);
2042 break;
2043 case SUPSEMOP2_CLOSE:
2044 pReq->Hdr.rc = SUPSemEventMultiClose(pSession, hEventMulti);
2045 break;
2046 case SUPSEMOP2_RESET:
2047 pReq->Hdr.rc = SUPSemEventMultiReset(pSession, hEventMulti);
2048 break;
2049 default:
2050 pReq->Hdr.rc = VERR_INVALID_FUNCTION;
2051 break;
2052 }
2053 break;
2054 }
2055
2056 default:
2057 pReq->Hdr.rc = VERR_INVALID_PARAMETER;
2058 break;
2059 }
2060 return 0;
2061 }
2062
2063 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_SEM_OP3):
2064 {
2065 /* validate */
2066 PSUPSEMOP3 pReq = (PSUPSEMOP3)pReqHdr;
2067 REQ_CHECK_SIZES_EX(SUP_IOCTL_SEM_OP3, SUP_IOCTL_SEM_OP3_SIZE_IN, SUP_IOCTL_SEM_OP3_SIZE_OUT);
2068 REQ_CHECK_EXPR(SUP_IOCTL_SEM_OP3, pReq->u.In.u32Reserved == 0 && pReq->u.In.u64Reserved == 0);
2069
2070 /* execute */
2071 switch (pReq->u.In.uType)
2072 {
2073 case SUP_SEM_TYPE_EVENT:
2074 {
2075 SUPSEMEVENT hEvent = (SUPSEMEVENT)(uintptr_t)pReq->u.In.hSem;
2076 switch (pReq->u.In.uOp)
2077 {
2078 case SUPSEMOP3_CREATE:
2079 REQ_CHECK_EXPR(SUP_IOCTL_SEM_OP3, hEvent == NIL_SUPSEMEVENT);
2080 pReq->Hdr.rc = SUPSemEventCreate(pSession, &hEvent);
2081 pReq->u.Out.hSem = (uint32_t)(uintptr_t)hEvent;
2082 break;
2083 case SUPSEMOP3_GET_RESOLUTION:
2084 REQ_CHECK_EXPR(SUP_IOCTL_SEM_OP3, hEvent == NIL_SUPSEMEVENT);
2085 pReq->Hdr.rc = VINF_SUCCESS;
2086 pReq->Hdr.cbOut = sizeof(*pReq);
2087 pReq->u.Out.cNsResolution = SUPSemEventGetResolution(pSession);
2088 break;
2089 default:
2090 pReq->Hdr.rc = VERR_INVALID_FUNCTION;
2091 break;
2092 }
2093 break;
2094 }
2095
2096 case SUP_SEM_TYPE_EVENT_MULTI:
2097 {
2098 SUPSEMEVENTMULTI hEventMulti = (SUPSEMEVENTMULTI)(uintptr_t)pReq->u.In.hSem;
2099 switch (pReq->u.In.uOp)
2100 {
2101 case SUPSEMOP3_CREATE:
2102 REQ_CHECK_EXPR(SUP_IOCTL_SEM_OP3, hEventMulti == NIL_SUPSEMEVENTMULTI);
2103 pReq->Hdr.rc = SUPSemEventMultiCreate(pSession, &hEventMulti);
2104 pReq->u.Out.hSem = (uint32_t)(uintptr_t)hEventMulti;
2105 break;
2106 case SUPSEMOP3_GET_RESOLUTION:
2107 REQ_CHECK_EXPR(SUP_IOCTL_SEM_OP3, hEventMulti == NIL_SUPSEMEVENTMULTI);
2108 pReq->Hdr.rc = VINF_SUCCESS;
2109 pReq->u.Out.cNsResolution = SUPSemEventMultiGetResolution(pSession);
2110 break;
2111 default:
2112 pReq->Hdr.rc = VERR_INVALID_FUNCTION;
2113 break;
2114 }
2115 break;
2116 }
2117
2118 default:
2119 pReq->Hdr.rc = VERR_INVALID_PARAMETER;
2120 break;
2121 }
2122 return 0;
2123 }
2124
2125 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_VT_CAPS):
2126 {
2127 /* validate */
2128 PSUPVTCAPS pReq = (PSUPVTCAPS)pReqHdr;
2129 REQ_CHECK_SIZES(SUP_IOCTL_VT_CAPS);
2130
2131 /* execute */
2132 pReq->Hdr.rc = SUPR0QueryVTCaps(pSession, &pReq->u.Out.Caps);
2133 if (RT_FAILURE(pReq->Hdr.rc))
2134 pReq->Hdr.cbOut = sizeof(pReq->Hdr);
2135 return 0;
2136 }
2137
2138 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_TRACER_OPEN):
2139 {
2140 /* validate */
2141 PSUPTRACEROPEN pReq = (PSUPTRACEROPEN)pReqHdr;
2142 REQ_CHECK_SIZES(SUP_IOCTL_TRACER_OPEN);
2143
2144 /* execute */
2145 pReq->Hdr.rc = supdrvIOCtl_TracerOpen(pDevExt, pSession, pReq->u.In.uCookie, pReq->u.In.uArg);
2146 return 0;
2147 }
2148
2149 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_TRACER_CLOSE):
2150 {
2151 /* validate */
2152 REQ_CHECK_SIZES(SUP_IOCTL_TRACER_CLOSE);
2153
2154 /* execute */
2155 pReqHdr->rc = supdrvIOCtl_TracerClose(pDevExt, pSession);
2156 return 0;
2157 }
2158
2159 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_TRACER_IOCTL):
2160 {
2161 /* validate */
2162 PSUPTRACERIOCTL pReq = (PSUPTRACERIOCTL)pReqHdr;
2163 REQ_CHECK_SIZES(SUP_IOCTL_TRACER_IOCTL);
2164
2165 /* execute */
2166 pReqHdr->rc = supdrvIOCtl_TracerIOCtl(pDevExt, pSession, pReq->u.In.uCmd, pReq->u.In.uArg, &pReq->u.Out.iRetVal);
2167 return 0;
2168 }
2169
2170 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_TRACER_UMOD_REG):
2171 {
2172 /* validate */
2173 PSUPTRACERUMODREG pReq = (PSUPTRACERUMODREG)pReqHdr;
2174 REQ_CHECK_SIZES(SUP_IOCTL_TRACER_UMOD_REG);
2175 if (!RTStrEnd(pReq->u.In.szName, sizeof(pReq->u.In.szName)))
2176 return VERR_INVALID_PARAMETER;
2177
2178 /* execute */
2179 pReqHdr->rc = supdrvIOCtl_TracerUmodRegister(pDevExt, pSession,
2180 pReq->u.In.R3PtrVtgHdr, pReq->u.In.uVtgHdrAddr,
2181 pReq->u.In.R3PtrStrTab, pReq->u.In.cbStrTab,
2182 pReq->u.In.szName, pReq->u.In.fFlags);
2183 return 0;
2184 }
2185
2186 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_TRACER_UMOD_DEREG):
2187 {
2188 /* validate */
2189 PSUPTRACERUMODDEREG pReq = (PSUPTRACERUMODDEREG)pReqHdr;
2190 REQ_CHECK_SIZES(SUP_IOCTL_TRACER_UMOD_DEREG);
2191
2192 /* execute */
2193 pReqHdr->rc = supdrvIOCtl_TracerUmodDeregister(pDevExt, pSession, pReq->u.In.pVtgHdr);
2194 return 0;
2195 }
2196
2197 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_TRACER_UMOD_FIRE_PROBE):
2198 {
2199 /* validate */
2200 PSUPTRACERUMODFIREPROBE pReq = (PSUPTRACERUMODFIREPROBE)pReqHdr;
2201 REQ_CHECK_SIZES(SUP_IOCTL_TRACER_UMOD_FIRE_PROBE);
2202
2203 supdrvIOCtl_TracerUmodProbeFire(pDevExt, pSession, &pReq->u.In);
2204 pReqHdr->rc = VINF_SUCCESS;
2205 return 0;
2206 }
2207
2208 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_MSR_PROBER):
2209 {
2210 /* validate */
2211 PSUPMSRPROBER pReq = (PSUPMSRPROBER)pReqHdr;
2212 REQ_CHECK_SIZES(SUP_IOCTL_MSR_PROBER);
2213 REQ_CHECK_EXPR(SUP_IOCTL_MSR_PROBER,
2214 pReq->u.In.enmOp > SUPMSRPROBEROP_INVALID && pReq->u.In.enmOp < SUPMSRPROBEROP_END);
2215
2216 pReqHdr->rc = supdrvIOCtl_MsrProber(pDevExt, pReq);
2217 return 0;
2218 }
2219
2220 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_RESUME_SUSPENDED_KBDS):
2221 {
2222 /* validate */
2223 REQ_CHECK_SIZES(SUP_IOCTL_RESUME_SUSPENDED_KBDS);
2224
2225 pReqHdr->rc = supdrvIOCtl_ResumeSuspendedKbds();
2226 return 0;
2227 }
2228
2229 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_TSC_DELTA_MEASURE):
2230 {
2231 /* validate */
2232 PSUPTSCDELTAMEASURE pReq = (PSUPTSCDELTAMEASURE)pReqHdr;
2233 REQ_CHECK_SIZES(SUP_IOCTL_TSC_DELTA_MEASURE);
2234
2235 pReqHdr->rc = supdrvIOCtl_TscDeltaMeasure(pDevExt, pReq);
2236 return 0;
2237 }
2238
2239 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_TSC_READ):
2240 {
2241 /* validate */
2242 PSUPTSCREAD pReq = (PSUPTSCREAD)pReqHdr;
2243 REQ_CHECK_SIZES(SUP_IOCTL_TSC_READ);
2244
2245 pReqHdr->rc = supdrvIOCtl_TscRead(pDevExt, pReq);
2246 return 0;
2247 }
2248
2249 default:
2250 Log(("Unknown IOCTL %#lx\n", (long)uIOCtl));
2251 break;
2252 }
2253 return VERR_GENERAL_FAILURE;
2254}
2255
2256
2257/**
2258 * I/O Control inner worker for the restricted operations.
2259 *
2260 * @returns IPRT status code.
2261 * @retval VERR_INVALID_PARAMETER if the request is invalid.
2262 *
2263 * @param uIOCtl Function number.
2264 * @param pDevExt Device extention.
2265 * @param pSession Session data.
2266 * @param pReqHdr The request header.
2267 */
2268static int supdrvIOCtlInnerRestricted(uintptr_t uIOCtl, PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPREQHDR pReqHdr)
2269{
2270 /*
2271 * The switch.
2272 */
2273 switch (SUP_CTL_CODE_NO_SIZE(uIOCtl))
2274 {
2275 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_COOKIE):
2276 {
2277 PSUPCOOKIE pReq = (PSUPCOOKIE)pReqHdr;
2278 REQ_CHECK_SIZES(SUP_IOCTL_COOKIE);
2279 if (strncmp(pReq->u.In.szMagic, SUPCOOKIE_MAGIC, sizeof(pReq->u.In.szMagic)))
2280 {
2281 OSDBGPRINT(("SUP_IOCTL_COOKIE: invalid magic %.16s\n", pReq->u.In.szMagic));
2282 pReq->Hdr.rc = VERR_INVALID_MAGIC;
2283 return 0;
2284 }
2285
2286 /*
2287 * Match the version.
2288 * The current logic is very simple, match the major interface version.
2289 */
2290 if ( pReq->u.In.u32MinVersion > SUPDRV_IOC_VERSION
2291 || (pReq->u.In.u32MinVersion & 0xffff0000) != (SUPDRV_IOC_VERSION & 0xffff0000))
2292 {
2293 OSDBGPRINT(("SUP_IOCTL_COOKIE: Version mismatch. Requested: %#x Min: %#x Current: %#x\n",
2294 pReq->u.In.u32ReqVersion, pReq->u.In.u32MinVersion, SUPDRV_IOC_VERSION));
2295 pReq->u.Out.u32Cookie = 0xffffffff;
2296 pReq->u.Out.u32SessionCookie = 0xffffffff;
2297 pReq->u.Out.u32SessionVersion = 0xffffffff;
2298 pReq->u.Out.u32DriverVersion = SUPDRV_IOC_VERSION;
2299 pReq->u.Out.pSession = NULL;
2300 pReq->u.Out.cFunctions = 0;
2301 pReq->Hdr.rc = VERR_VERSION_MISMATCH;
2302 return 0;
2303 }
2304
2305 /*
2306 * Fill in return data and be gone.
2307 * N.B. The first one to change SUPDRV_IOC_VERSION shall makes sure that
2308 * u32SessionVersion <= u32ReqVersion!
2309 */
2310 /** @todo Somehow validate the client and negotiate a secure cookie... */
2311 pReq->u.Out.u32Cookie = pDevExt->u32Cookie;
2312 pReq->u.Out.u32SessionCookie = pSession->u32Cookie;
2313 pReq->u.Out.u32SessionVersion = SUPDRV_IOC_VERSION;
2314 pReq->u.Out.u32DriverVersion = SUPDRV_IOC_VERSION;
2315 pReq->u.Out.pSession = pSession;
2316 pReq->u.Out.cFunctions = 0;
2317 pReq->Hdr.rc = VINF_SUCCESS;
2318 return 0;
2319 }
2320
2321 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_VT_CAPS):
2322 {
2323 /* validate */
2324 PSUPVTCAPS pReq = (PSUPVTCAPS)pReqHdr;
2325 REQ_CHECK_SIZES(SUP_IOCTL_VT_CAPS);
2326
2327 /* execute */
2328 pReq->Hdr.rc = SUPR0QueryVTCaps(pSession, &pReq->u.Out.Caps);
2329 if (RT_FAILURE(pReq->Hdr.rc))
2330 pReq->Hdr.cbOut = sizeof(pReq->Hdr);
2331 return 0;
2332 }
2333
2334 default:
2335 Log(("Unknown IOCTL %#lx\n", (long)uIOCtl));
2336 break;
2337 }
2338 return VERR_GENERAL_FAILURE;
2339}
2340
2341
2342/**
2343 * I/O Control worker.
2344 *
2345 * @returns IPRT status code.
2346 * @retval VERR_INVALID_PARAMETER if the request is invalid.
2347 *
2348 * @param uIOCtl Function number.
2349 * @param pDevExt Device extention.
2350 * @param pSession Session data.
2351 * @param pReqHdr The request header.
2352 */
2353int VBOXCALL supdrvIOCtl(uintptr_t uIOCtl, PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPREQHDR pReqHdr, size_t cbReq)
2354{
2355 int rc;
2356 VBOXDRV_IOCTL_ENTRY(pSession, uIOCtl, pReqHdr);
2357
2358 /*
2359 * Validate the request.
2360 */
2361 if (RT_UNLIKELY(cbReq < sizeof(*pReqHdr)))
2362 {
2363 OSDBGPRINT(("vboxdrv: Bad ioctl request size; cbReq=%#lx\n", (long)cbReq));
2364 VBOXDRV_IOCTL_RETURN(pSession, uIOCtl, pReqHdr, VERR_INVALID_PARAMETER, VINF_SUCCESS);
2365 return VERR_INVALID_PARAMETER;
2366 }
2367 if (RT_UNLIKELY( (pReqHdr->fFlags & SUPREQHDR_FLAGS_MAGIC_MASK) != SUPREQHDR_FLAGS_MAGIC
2368 || pReqHdr->cbIn < sizeof(*pReqHdr)
2369 || pReqHdr->cbIn > cbReq
2370 || pReqHdr->cbOut < sizeof(*pReqHdr)
2371 || pReqHdr->cbOut > cbReq))
2372 {
2373 OSDBGPRINT(("vboxdrv: Bad ioctl request header; cbIn=%#lx cbOut=%#lx fFlags=%#lx\n",
2374 (long)pReqHdr->cbIn, (long)pReqHdr->cbOut, (long)pReqHdr->fFlags));
2375 VBOXDRV_IOCTL_RETURN(pSession, uIOCtl, pReqHdr, VERR_INVALID_PARAMETER, VINF_SUCCESS);
2376 return VERR_INVALID_PARAMETER;
2377 }
2378 if (RT_UNLIKELY(!RT_VALID_PTR(pSession)))
2379 {
2380 OSDBGPRINT(("vboxdrv: Invalid pSession value %p (ioctl=%p)\n", pSession, (void *)uIOCtl));
2381 VBOXDRV_IOCTL_RETURN(pSession, uIOCtl, pReqHdr, VERR_INVALID_PARAMETER, VINF_SUCCESS);
2382 return VERR_INVALID_PARAMETER;
2383 }
2384 if (RT_UNLIKELY(uIOCtl == SUP_IOCTL_COOKIE))
2385 {
2386 if (pReqHdr->u32Cookie != SUPCOOKIE_INITIAL_COOKIE)
2387 {
2388 OSDBGPRINT(("SUP_IOCTL_COOKIE: bad cookie %#lx\n", (long)pReqHdr->u32Cookie));
2389 VBOXDRV_IOCTL_RETURN(pSession, uIOCtl, pReqHdr, VERR_INVALID_PARAMETER, VINF_SUCCESS);
2390 return VERR_INVALID_PARAMETER;
2391 }
2392 }
2393 else if (RT_UNLIKELY( pReqHdr->u32Cookie != pDevExt->u32Cookie
2394 || pReqHdr->u32SessionCookie != pSession->u32Cookie))
2395 {
2396 OSDBGPRINT(("vboxdrv: bad cookie %#lx / %#lx.\n", (long)pReqHdr->u32Cookie, (long)pReqHdr->u32SessionCookie));
2397 VBOXDRV_IOCTL_RETURN(pSession, uIOCtl, pReqHdr, VERR_INVALID_PARAMETER, VINF_SUCCESS);
2398 return VERR_INVALID_PARAMETER;
2399 }
2400
2401 /*
2402 * Hand it to an inner function to avoid lots of unnecessary return tracepoints.
2403 */
2404 if (pSession->fUnrestricted)
2405 rc = supdrvIOCtlInnerUnrestricted(uIOCtl, pDevExt, pSession, pReqHdr);
2406 else
2407 rc = supdrvIOCtlInnerRestricted(uIOCtl, pDevExt, pSession, pReqHdr);
2408
2409 VBOXDRV_IOCTL_RETURN(pSession, uIOCtl, pReqHdr, pReqHdr->rc, rc);
2410 return rc;
2411}
2412
2413
2414/**
2415 * Inter-Driver Communication (IDC) worker.
2416 *
2417 * @returns VBox status code.
2418 * @retval VINF_SUCCESS on success.
2419 * @retval VERR_INVALID_PARAMETER if the request is invalid.
2420 * @retval VERR_NOT_SUPPORTED if the request isn't supported.
2421 *
2422 * @param uReq The request (function) code.
2423 * @param pDevExt Device extention.
2424 * @param pSession Session data.
2425 * @param pReqHdr The request header.
2426 */
2427int VBOXCALL supdrvIDC(uintptr_t uReq, PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPDRVIDCREQHDR pReqHdr)
2428{
2429 /*
2430 * The OS specific code has already validated the pSession
2431 * pointer, and the request size being greater or equal to
2432 * size of the header.
2433 *
2434 * So, just check that pSession is a kernel context session.
2435 */
2436 if (RT_UNLIKELY( pSession
2437 && pSession->R0Process != NIL_RTR0PROCESS))
2438 return VERR_INVALID_PARAMETER;
2439
2440/*
2441 * Validation macro.
2442 */
2443#define REQ_CHECK_IDC_SIZE(Name, cbExpect) \
2444 do { \
2445 if (RT_UNLIKELY(pReqHdr->cb != (cbExpect))) \
2446 { \
2447 OSDBGPRINT(( #Name ": Invalid input/output sizes. cb=%ld expected %ld.\n", \
2448 (long)pReqHdr->cb, (long)(cbExpect))); \
2449 return pReqHdr->rc = VERR_INVALID_PARAMETER; \
2450 } \
2451 } while (0)
2452
2453 switch (uReq)
2454 {
2455 case SUPDRV_IDC_REQ_CONNECT:
2456 {
2457 PSUPDRVIDCREQCONNECT pReq = (PSUPDRVIDCREQCONNECT)pReqHdr;
2458 REQ_CHECK_IDC_SIZE(SUPDRV_IDC_REQ_CONNECT, sizeof(*pReq));
2459
2460 /*
2461 * Validate the cookie and other input.
2462 */
2463 if (pReq->Hdr.pSession != NULL)
2464 {
2465 OSDBGPRINT(("SUPDRV_IDC_REQ_CONNECT: Hdr.pSession=%p expected NULL!\n", pReq->Hdr.pSession));
2466 return pReqHdr->rc = VERR_INVALID_PARAMETER;
2467 }
2468 if (pReq->u.In.u32MagicCookie != SUPDRVIDCREQ_CONNECT_MAGIC_COOKIE)
2469 {
2470 OSDBGPRINT(("SUPDRV_IDC_REQ_CONNECT: u32MagicCookie=%#x expected %#x!\n",
2471 (unsigned)pReq->u.In.u32MagicCookie, (unsigned)SUPDRVIDCREQ_CONNECT_MAGIC_COOKIE));
2472 return pReqHdr->rc = VERR_INVALID_PARAMETER;
2473 }
2474 if ( pReq->u.In.uMinVersion > pReq->u.In.uReqVersion
2475 || (pReq->u.In.uMinVersion & UINT32_C(0xffff0000)) != (pReq->u.In.uReqVersion & UINT32_C(0xffff0000)))
2476 {
2477 OSDBGPRINT(("SUPDRV_IDC_REQ_CONNECT: uMinVersion=%#x uMaxVersion=%#x doesn't match!\n",
2478 pReq->u.In.uMinVersion, pReq->u.In.uReqVersion));
2479 return pReqHdr->rc = VERR_INVALID_PARAMETER;
2480 }
2481 if (pSession != NULL)
2482 {
2483 OSDBGPRINT(("SUPDRV_IDC_REQ_CONNECT: pSession=%p expected NULL!\n", pSession));
2484 return pReqHdr->rc = VERR_INVALID_PARAMETER;
2485 }
2486
2487 /*
2488 * Match the version.
2489 * The current logic is very simple, match the major interface version.
2490 */
2491 if ( pReq->u.In.uMinVersion > SUPDRV_IDC_VERSION
2492 || (pReq->u.In.uMinVersion & 0xffff0000) != (SUPDRV_IDC_VERSION & 0xffff0000))
2493 {
2494 OSDBGPRINT(("SUPDRV_IDC_REQ_CONNECT: Version mismatch. Requested: %#x Min: %#x Current: %#x\n",
2495 pReq->u.In.uReqVersion, pReq->u.In.uMinVersion, (unsigned)SUPDRV_IDC_VERSION));
2496 pReq->u.Out.pSession = NULL;
2497 pReq->u.Out.uSessionVersion = 0xffffffff;
2498 pReq->u.Out.uDriverVersion = SUPDRV_IDC_VERSION;
2499 pReq->u.Out.uDriverRevision = VBOX_SVN_REV;
2500 pReq->Hdr.rc = VERR_VERSION_MISMATCH;
2501 return VINF_SUCCESS;
2502 }
2503
2504 pReq->u.Out.pSession = NULL;
2505 pReq->u.Out.uSessionVersion = SUPDRV_IDC_VERSION;
2506 pReq->u.Out.uDriverVersion = SUPDRV_IDC_VERSION;
2507 pReq->u.Out.uDriverRevision = VBOX_SVN_REV;
2508
2509 pReq->Hdr.rc = supdrvCreateSession(pDevExt, false /* fUser */, true /*fUnrestricted*/, &pSession);
2510 if (RT_FAILURE(pReq->Hdr.rc))
2511 {
2512 OSDBGPRINT(("SUPDRV_IDC_REQ_CONNECT: failed to create session, rc=%d\n", pReq->Hdr.rc));
2513 return VINF_SUCCESS;
2514 }
2515
2516 pReq->u.Out.pSession = pSession;
2517 pReq->Hdr.pSession = pSession;
2518
2519 return VINF_SUCCESS;
2520 }
2521
2522 case SUPDRV_IDC_REQ_DISCONNECT:
2523 {
2524 REQ_CHECK_IDC_SIZE(SUPDRV_IDC_REQ_DISCONNECT, sizeof(*pReqHdr));
2525
2526 supdrvSessionRelease(pSession);
2527 return pReqHdr->rc = VINF_SUCCESS;
2528 }
2529
2530 case SUPDRV_IDC_REQ_GET_SYMBOL:
2531 {
2532 PSUPDRVIDCREQGETSYM pReq = (PSUPDRVIDCREQGETSYM)pReqHdr;
2533 REQ_CHECK_IDC_SIZE(SUPDRV_IDC_REQ_GET_SYMBOL, sizeof(*pReq));
2534
2535 pReq->Hdr.rc = supdrvIDC_LdrGetSymbol(pDevExt, pSession, pReq);
2536 return VINF_SUCCESS;
2537 }
2538
2539 case SUPDRV_IDC_REQ_COMPONENT_REGISTER_FACTORY:
2540 {
2541 PSUPDRVIDCREQCOMPREGFACTORY pReq = (PSUPDRVIDCREQCOMPREGFACTORY)pReqHdr;
2542 REQ_CHECK_IDC_SIZE(SUPDRV_IDC_REQ_COMPONENT_REGISTER_FACTORY, sizeof(*pReq));
2543
2544 pReq->Hdr.rc = SUPR0ComponentRegisterFactory(pSession, pReq->u.In.pFactory);
2545 return VINF_SUCCESS;
2546 }
2547
2548 case SUPDRV_IDC_REQ_COMPONENT_DEREGISTER_FACTORY:
2549 {
2550 PSUPDRVIDCREQCOMPDEREGFACTORY pReq = (PSUPDRVIDCREQCOMPDEREGFACTORY)pReqHdr;
2551 REQ_CHECK_IDC_SIZE(SUPDRV_IDC_REQ_COMPONENT_DEREGISTER_FACTORY, sizeof(*pReq));
2552
2553 pReq->Hdr.rc = SUPR0ComponentDeregisterFactory(pSession, pReq->u.In.pFactory);
2554 return VINF_SUCCESS;
2555 }
2556
2557 default:
2558 Log(("Unknown IDC %#lx\n", (long)uReq));
2559 break;
2560 }
2561
2562#undef REQ_CHECK_IDC_SIZE
2563 return VERR_NOT_SUPPORTED;
2564}
2565
2566
2567/**
2568 * Register a object for reference counting.
2569 * The object is registered with one reference in the specified session.
2570 *
2571 * @returns Unique identifier on success (pointer).
2572 * All future reference must use this identifier.
2573 * @returns NULL on failure.
2574 * @param pfnDestructor The destructore function which will be called when the reference count reaches 0.
2575 * @param pvUser1 The first user argument.
2576 * @param pvUser2 The second user argument.
2577 */
2578SUPR0DECL(void *) SUPR0ObjRegister(PSUPDRVSESSION pSession, SUPDRVOBJTYPE enmType, PFNSUPDRVDESTRUCTOR pfnDestructor, void *pvUser1, void *pvUser2)
2579{
2580 PSUPDRVDEVEXT pDevExt = pSession->pDevExt;
2581 PSUPDRVOBJ pObj;
2582 PSUPDRVUSAGE pUsage;
2583
2584 /*
2585 * Validate the input.
2586 */
2587 AssertReturn(SUP_IS_SESSION_VALID(pSession), NULL);
2588 AssertReturn(enmType > SUPDRVOBJTYPE_INVALID && enmType < SUPDRVOBJTYPE_END, NULL);
2589 AssertPtrReturn(pfnDestructor, NULL);
2590
2591 /*
2592 * Allocate and initialize the object.
2593 */
2594 pObj = (PSUPDRVOBJ)RTMemAlloc(sizeof(*pObj));
2595 if (!pObj)
2596 return NULL;
2597 pObj->u32Magic = SUPDRVOBJ_MAGIC;
2598 pObj->enmType = enmType;
2599 pObj->pNext = NULL;
2600 pObj->cUsage = 1;
2601 pObj->pfnDestructor = pfnDestructor;
2602 pObj->pvUser1 = pvUser1;
2603 pObj->pvUser2 = pvUser2;
2604 pObj->CreatorUid = pSession->Uid;
2605 pObj->CreatorGid = pSession->Gid;
2606 pObj->CreatorProcess= pSession->Process;
2607 supdrvOSObjInitCreator(pObj, pSession);
2608
2609 /*
2610 * Allocate the usage record.
2611 * (We keep freed usage records around to simplify SUPR0ObjAddRefEx().)
2612 */
2613 RTSpinlockAcquire(pDevExt->Spinlock);
2614
2615 pUsage = pDevExt->pUsageFree;
2616 if (pUsage)
2617 pDevExt->pUsageFree = pUsage->pNext;
2618 else
2619 {
2620 RTSpinlockRelease(pDevExt->Spinlock);
2621 pUsage = (PSUPDRVUSAGE)RTMemAlloc(sizeof(*pUsage));
2622 if (!pUsage)
2623 {
2624 RTMemFree(pObj);
2625 return NULL;
2626 }
2627 RTSpinlockAcquire(pDevExt->Spinlock);
2628 }
2629
2630 /*
2631 * Insert the object and create the session usage record.
2632 */
2633 /* The object. */
2634 pObj->pNext = pDevExt->pObjs;
2635 pDevExt->pObjs = pObj;
2636
2637 /* The session record. */
2638 pUsage->cUsage = 1;
2639 pUsage->pObj = pObj;
2640 pUsage->pNext = pSession->pUsage;
2641 /* Log2(("SUPR0ObjRegister: pUsage=%p:{.pObj=%p, .pNext=%p}\n", pUsage, pUsage->pObj, pUsage->pNext)); */
2642 pSession->pUsage = pUsage;
2643
2644 RTSpinlockRelease(pDevExt->Spinlock);
2645
2646 Log(("SUPR0ObjRegister: returns %p (pvUser1=%p, pvUser=%p)\n", pObj, pvUser1, pvUser2));
2647 return pObj;
2648}
2649
2650
2651/**
2652 * Increment the reference counter for the object associating the reference
2653 * with the specified session.
2654 *
2655 * @returns IPRT status code.
2656 * @param pvObj The identifier returned by SUPR0ObjRegister().
2657 * @param pSession The session which is referencing the object.
2658 *
2659 * @remarks The caller should not own any spinlocks and must carefully protect
2660 * itself against potential race with the destructor so freed memory
2661 * isn't accessed here.
2662 */
2663SUPR0DECL(int) SUPR0ObjAddRef(void *pvObj, PSUPDRVSESSION pSession)
2664{
2665 return SUPR0ObjAddRefEx(pvObj, pSession, false /* fNoBlocking */);
2666}
2667
2668
2669/**
2670 * Increment the reference counter for the object associating the reference
2671 * with the specified session.
2672 *
2673 * @returns IPRT status code.
2674 * @retval VERR_TRY_AGAIN if fNoBlocking was set and a new usage record
2675 * couldn't be allocated. (If you see this you're not doing the right
2676 * thing and it won't ever work reliably.)
2677 *
2678 * @param pvObj The identifier returned by SUPR0ObjRegister().
2679 * @param pSession The session which is referencing the object.
2680 * @param fNoBlocking Set if it's not OK to block. Never try to make the
2681 * first reference to an object in a session with this
2682 * argument set.
2683 *
2684 * @remarks The caller should not own any spinlocks and must carefully protect
2685 * itself against potential race with the destructor so freed memory
2686 * isn't accessed here.
2687 */
2688SUPR0DECL(int) SUPR0ObjAddRefEx(void *pvObj, PSUPDRVSESSION pSession, bool fNoBlocking)
2689{
2690 PSUPDRVDEVEXT pDevExt = pSession->pDevExt;
2691 PSUPDRVOBJ pObj = (PSUPDRVOBJ)pvObj;
2692 int rc = VINF_SUCCESS;
2693 PSUPDRVUSAGE pUsagePre;
2694 PSUPDRVUSAGE pUsage;
2695
2696 /*
2697 * Validate the input.
2698 * Be ready for the destruction race (someone might be stuck in the
2699 * destructor waiting a lock we own).
2700 */
2701 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
2702 AssertPtrReturn(pObj, VERR_INVALID_POINTER);
2703 AssertMsgReturn(pObj->u32Magic == SUPDRVOBJ_MAGIC || pObj->u32Magic == SUPDRVOBJ_MAGIC_DEAD,
2704 ("Invalid pvObj=%p magic=%#x (expected %#x or %#x)\n", pvObj, pObj->u32Magic, SUPDRVOBJ_MAGIC, SUPDRVOBJ_MAGIC_DEAD),
2705 VERR_INVALID_PARAMETER);
2706
2707 RTSpinlockAcquire(pDevExt->Spinlock);
2708
2709 if (RT_UNLIKELY(pObj->u32Magic != SUPDRVOBJ_MAGIC))
2710 {
2711 RTSpinlockRelease(pDevExt->Spinlock);
2712
2713 AssertMsgFailed(("pvObj=%p magic=%#x\n", pvObj, pObj->u32Magic));
2714 return VERR_WRONG_ORDER;
2715 }
2716
2717 /*
2718 * Preallocate the usage record if we can.
2719 */
2720 pUsagePre = pDevExt->pUsageFree;
2721 if (pUsagePre)
2722 pDevExt->pUsageFree = pUsagePre->pNext;
2723 else if (!fNoBlocking)
2724 {
2725 RTSpinlockRelease(pDevExt->Spinlock);
2726 pUsagePre = (PSUPDRVUSAGE)RTMemAlloc(sizeof(*pUsagePre));
2727 if (!pUsagePre)
2728 return VERR_NO_MEMORY;
2729
2730 RTSpinlockAcquire(pDevExt->Spinlock);
2731 if (RT_UNLIKELY(pObj->u32Magic != SUPDRVOBJ_MAGIC))
2732 {
2733 RTSpinlockRelease(pDevExt->Spinlock);
2734
2735 AssertMsgFailed(("pvObj=%p magic=%#x\n", pvObj, pObj->u32Magic));
2736 return VERR_WRONG_ORDER;
2737 }
2738 }
2739
2740 /*
2741 * Reference the object.
2742 */
2743 pObj->cUsage++;
2744
2745 /*
2746 * Look for the session record.
2747 */
2748 for (pUsage = pSession->pUsage; pUsage; pUsage = pUsage->pNext)
2749 {
2750 /*Log(("SUPR0AddRef: pUsage=%p:{.pObj=%p, .pNext=%p}\n", pUsage, pUsage->pObj, pUsage->pNext));*/
2751 if (pUsage->pObj == pObj)
2752 break;
2753 }
2754 if (pUsage)
2755 pUsage->cUsage++;
2756 else if (pUsagePre)
2757 {
2758 /* create a new session record. */
2759 pUsagePre->cUsage = 1;
2760 pUsagePre->pObj = pObj;
2761 pUsagePre->pNext = pSession->pUsage;
2762 pSession->pUsage = pUsagePre;
2763 /*Log(("SUPR0AddRef: pUsagePre=%p:{.pObj=%p, .pNext=%p}\n", pUsagePre, pUsagePre->pObj, pUsagePre->pNext));*/
2764
2765 pUsagePre = NULL;
2766 }
2767 else
2768 {
2769 pObj->cUsage--;
2770 rc = VERR_TRY_AGAIN;
2771 }
2772
2773 /*
2774 * Put any unused usage record into the free list..
2775 */
2776 if (pUsagePre)
2777 {
2778 pUsagePre->pNext = pDevExt->pUsageFree;
2779 pDevExt->pUsageFree = pUsagePre;
2780 }
2781
2782 RTSpinlockRelease(pDevExt->Spinlock);
2783
2784 return rc;
2785}
2786
2787
2788/**
2789 * Decrement / destroy a reference counter record for an object.
2790 *
2791 * The object is uniquely identified by pfnDestructor+pvUser1+pvUser2.
2792 *
2793 * @returns IPRT status code.
2794 * @retval VINF_SUCCESS if not destroyed.
2795 * @retval VINF_OBJECT_DESTROYED if it's destroyed by this release call.
2796 * @retval VERR_INVALID_PARAMETER if the object isn't valid. Will assert in
2797 * string builds.
2798 *
2799 * @param pvObj The identifier returned by SUPR0ObjRegister().
2800 * @param pSession The session which is referencing the object.
2801 */
2802SUPR0DECL(int) SUPR0ObjRelease(void *pvObj, PSUPDRVSESSION pSession)
2803{
2804 PSUPDRVDEVEXT pDevExt = pSession->pDevExt;
2805 PSUPDRVOBJ pObj = (PSUPDRVOBJ)pvObj;
2806 int rc = VERR_INVALID_PARAMETER;
2807 PSUPDRVUSAGE pUsage;
2808 PSUPDRVUSAGE pUsagePrev;
2809
2810 /*
2811 * Validate the input.
2812 */
2813 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
2814 AssertMsgReturn(VALID_PTR(pObj) && pObj->u32Magic == SUPDRVOBJ_MAGIC,
2815 ("Invalid pvObj=%p magic=%#x (exepcted %#x)\n", pvObj, pObj ? pObj->u32Magic : 0, SUPDRVOBJ_MAGIC),
2816 VERR_INVALID_PARAMETER);
2817
2818 /*
2819 * Acquire the spinlock and look for the usage record.
2820 */
2821 RTSpinlockAcquire(pDevExt->Spinlock);
2822
2823 for (pUsagePrev = NULL, pUsage = pSession->pUsage;
2824 pUsage;
2825 pUsagePrev = pUsage, pUsage = pUsage->pNext)
2826 {
2827 /*Log2(("SUPR0ObjRelease: pUsage=%p:{.pObj=%p, .pNext=%p}\n", pUsage, pUsage->pObj, pUsage->pNext));*/
2828 if (pUsage->pObj == pObj)
2829 {
2830 rc = VINF_SUCCESS;
2831 AssertMsg(pUsage->cUsage >= 1 && pObj->cUsage >= pUsage->cUsage, ("glob %d; sess %d\n", pObj->cUsage, pUsage->cUsage));
2832 if (pUsage->cUsage > 1)
2833 {
2834 pObj->cUsage--;
2835 pUsage->cUsage--;
2836 }
2837 else
2838 {
2839 /*
2840 * Free the session record.
2841 */
2842 if (pUsagePrev)
2843 pUsagePrev->pNext = pUsage->pNext;
2844 else
2845 pSession->pUsage = pUsage->pNext;
2846 pUsage->pNext = pDevExt->pUsageFree;
2847 pDevExt->pUsageFree = pUsage;
2848
2849 /* What about the object? */
2850 if (pObj->cUsage > 1)
2851 pObj->cUsage--;
2852 else
2853 {
2854 /*
2855 * Object is to be destroyed, unlink it.
2856 */
2857 pObj->u32Magic = SUPDRVOBJ_MAGIC_DEAD;
2858 rc = VINF_OBJECT_DESTROYED;
2859 if (pDevExt->pObjs == pObj)
2860 pDevExt->pObjs = pObj->pNext;
2861 else
2862 {
2863 PSUPDRVOBJ pObjPrev;
2864 for (pObjPrev = pDevExt->pObjs; pObjPrev; pObjPrev = pObjPrev->pNext)
2865 if (pObjPrev->pNext == pObj)
2866 {
2867 pObjPrev->pNext = pObj->pNext;
2868 break;
2869 }
2870 Assert(pObjPrev);
2871 }
2872 }
2873 }
2874 break;
2875 }
2876 }
2877
2878 RTSpinlockRelease(pDevExt->Spinlock);
2879
2880 /*
2881 * Call the destructor and free the object if required.
2882 */
2883 if (rc == VINF_OBJECT_DESTROYED)
2884 {
2885 Log(("SUPR0ObjRelease: destroying %p/%d (%p/%p) cpid=%RTproc pid=%RTproc dtor=%p\n",
2886 pObj, pObj->enmType, pObj->pvUser1, pObj->pvUser2, pObj->CreatorProcess, RTProcSelf(), pObj->pfnDestructor));
2887 if (pObj->pfnDestructor)
2888 pObj->pfnDestructor(pObj, pObj->pvUser1, pObj->pvUser2);
2889 RTMemFree(pObj);
2890 }
2891
2892 AssertMsg(pUsage, ("pvObj=%p\n", pvObj));
2893 return rc;
2894}
2895
2896
2897/**
2898 * Verifies that the current process can access the specified object.
2899 *
2900 * @returns The following IPRT status code:
2901 * @retval VINF_SUCCESS if access was granted.
2902 * @retval VERR_PERMISSION_DENIED if denied access.
2903 * @retval VERR_INVALID_PARAMETER if invalid parameter.
2904 *
2905 * @param pvObj The identifier returned by SUPR0ObjRegister().
2906 * @param pSession The session which wishes to access the object.
2907 * @param pszObjName Object string name. This is optional and depends on the object type.
2908 *
2909 * @remark The caller is responsible for making sure the object isn't removed while
2910 * we're inside this function. If uncertain about this, just call AddRef before calling us.
2911 */
2912SUPR0DECL(int) SUPR0ObjVerifyAccess(void *pvObj, PSUPDRVSESSION pSession, const char *pszObjName)
2913{
2914 PSUPDRVOBJ pObj = (PSUPDRVOBJ)pvObj;
2915 int rc;
2916
2917 /*
2918 * Validate the input.
2919 */
2920 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
2921 AssertMsgReturn(VALID_PTR(pObj) && pObj->u32Magic == SUPDRVOBJ_MAGIC,
2922 ("Invalid pvObj=%p magic=%#x (exepcted %#x)\n", pvObj, pObj ? pObj->u32Magic : 0, SUPDRVOBJ_MAGIC),
2923 VERR_INVALID_PARAMETER);
2924
2925 /*
2926 * Check access. (returns true if a decision has been made.)
2927 */
2928 rc = VERR_INTERNAL_ERROR;
2929 if (supdrvOSObjCanAccess(pObj, pSession, pszObjName, &rc))
2930 return rc;
2931
2932 /*
2933 * Default policy is to allow the user to access his own
2934 * stuff but nothing else.
2935 */
2936 if (pObj->CreatorUid == pSession->Uid)
2937 return VINF_SUCCESS;
2938 return VERR_PERMISSION_DENIED;
2939}
2940
2941
2942/**
2943 * Lock pages.
2944 *
2945 * @returns IPRT status code.
2946 * @param pSession Session to which the locked memory should be associated.
2947 * @param pvR3 Start of the memory range to lock.
2948 * This must be page aligned.
2949 * @param cPages Number of pages to lock.
2950 * @param paPages Where to put the physical addresses of locked memory.
2951 */
2952SUPR0DECL(int) SUPR0LockMem(PSUPDRVSESSION pSession, RTR3PTR pvR3, uint32_t cPages, PRTHCPHYS paPages)
2953{
2954 int rc;
2955 SUPDRVMEMREF Mem = { NIL_RTR0MEMOBJ, NIL_RTR0MEMOBJ, MEMREF_TYPE_UNUSED };
2956 const size_t cb = (size_t)cPages << PAGE_SHIFT;
2957 LogFlow(("SUPR0LockMem: pSession=%p pvR3=%p cPages=%d paPages=%p\n", pSession, (void *)pvR3, cPages, paPages));
2958
2959 /*
2960 * Verify input.
2961 */
2962 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
2963 AssertPtrReturn(paPages, VERR_INVALID_PARAMETER);
2964 if ( RT_ALIGN_R3PT(pvR3, PAGE_SIZE, RTR3PTR) != pvR3
2965 || !pvR3)
2966 {
2967 Log(("pvR3 (%p) must be page aligned and not NULL!\n", (void *)pvR3));
2968 return VERR_INVALID_PARAMETER;
2969 }
2970
2971 /*
2972 * Let IPRT do the job.
2973 */
2974 Mem.eType = MEMREF_TYPE_LOCKED;
2975 rc = RTR0MemObjLockUser(&Mem.MemObj, pvR3, cb, RTMEM_PROT_READ | RTMEM_PROT_WRITE, RTR0ProcHandleSelf());
2976 if (RT_SUCCESS(rc))
2977 {
2978 uint32_t iPage = cPages;
2979 AssertMsg(RTR0MemObjAddressR3(Mem.MemObj) == pvR3, ("%p == %p\n", RTR0MemObjAddressR3(Mem.MemObj), pvR3));
2980 AssertMsg(RTR0MemObjSize(Mem.MemObj) == cb, ("%x == %x\n", RTR0MemObjSize(Mem.MemObj), cb));
2981
2982 while (iPage-- > 0)
2983 {
2984 paPages[iPage] = RTR0MemObjGetPagePhysAddr(Mem.MemObj, iPage);
2985 if (RT_UNLIKELY(paPages[iPage] == NIL_RTCCPHYS))
2986 {
2987 AssertMsgFailed(("iPage=%d\n", iPage));
2988 rc = VERR_INTERNAL_ERROR;
2989 break;
2990 }
2991 }
2992 if (RT_SUCCESS(rc))
2993 rc = supdrvMemAdd(&Mem, pSession);
2994 if (RT_FAILURE(rc))
2995 {
2996 int rc2 = RTR0MemObjFree(Mem.MemObj, false);
2997 AssertRC(rc2);
2998 }
2999 }
3000
3001 return rc;
3002}
3003
3004
3005/**
3006 * Unlocks the memory pointed to by pv.
3007 *
3008 * @returns IPRT status code.
3009 * @param pSession Session to which the memory was locked.
3010 * @param pvR3 Memory to unlock.
3011 */
3012SUPR0DECL(int) SUPR0UnlockMem(PSUPDRVSESSION pSession, RTR3PTR pvR3)
3013{
3014 LogFlow(("SUPR0UnlockMem: pSession=%p pvR3=%p\n", pSession, (void *)pvR3));
3015 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
3016 return supdrvMemRelease(pSession, (RTHCUINTPTR)pvR3, MEMREF_TYPE_LOCKED);
3017}
3018
3019
3020/**
3021 * Allocates a chunk of page aligned memory with contiguous and fixed physical
3022 * backing.
3023 *
3024 * @returns IPRT status code.
3025 * @param pSession Session data.
3026 * @param cPages Number of pages to allocate.
3027 * @param ppvR0 Where to put the address of Ring-0 mapping the allocated memory.
3028 * @param ppvR3 Where to put the address of Ring-3 mapping the allocated memory.
3029 * @param pHCPhys Where to put the physical address of allocated memory.
3030 */
3031SUPR0DECL(int) SUPR0ContAlloc(PSUPDRVSESSION pSession, uint32_t cPages, PRTR0PTR ppvR0, PRTR3PTR ppvR3, PRTHCPHYS pHCPhys)
3032{
3033 int rc;
3034 SUPDRVMEMREF Mem = { NIL_RTR0MEMOBJ, NIL_RTR0MEMOBJ, MEMREF_TYPE_UNUSED };
3035 LogFlow(("SUPR0ContAlloc: pSession=%p cPages=%d ppvR0=%p ppvR3=%p pHCPhys=%p\n", pSession, cPages, ppvR0, ppvR3, pHCPhys));
3036
3037 /*
3038 * Validate input.
3039 */
3040 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
3041 if (!ppvR3 || !ppvR0 || !pHCPhys)
3042 {
3043 Log(("Null pointer. All of these should be set: pSession=%p ppvR0=%p ppvR3=%p pHCPhys=%p\n",
3044 pSession, ppvR0, ppvR3, pHCPhys));
3045 return VERR_INVALID_PARAMETER;
3046
3047 }
3048 if (cPages < 1 || cPages >= 256)
3049 {
3050 Log(("Illegal request cPages=%d, must be greater than 0 and smaller than 256.\n", cPages));
3051 return VERR_PAGE_COUNT_OUT_OF_RANGE;
3052 }
3053
3054 /*
3055 * Let IPRT do the job.
3056 */
3057 rc = RTR0MemObjAllocCont(&Mem.MemObj, cPages << PAGE_SHIFT, true /* executable R0 mapping */);
3058 if (RT_SUCCESS(rc))
3059 {
3060 int rc2;
3061 rc = RTR0MemObjMapUser(&Mem.MapObjR3, Mem.MemObj, (RTR3PTR)-1, 0,
3062 RTMEM_PROT_EXEC | RTMEM_PROT_WRITE | RTMEM_PROT_READ, RTR0ProcHandleSelf());
3063 if (RT_SUCCESS(rc))
3064 {
3065 Mem.eType = MEMREF_TYPE_CONT;
3066 rc = supdrvMemAdd(&Mem, pSession);
3067 if (!rc)
3068 {
3069 *ppvR0 = RTR0MemObjAddress(Mem.MemObj);
3070 *ppvR3 = RTR0MemObjAddressR3(Mem.MapObjR3);
3071 *pHCPhys = RTR0MemObjGetPagePhysAddr(Mem.MemObj, 0);
3072 return 0;
3073 }
3074
3075 rc2 = RTR0MemObjFree(Mem.MapObjR3, false);
3076 AssertRC(rc2);
3077 }
3078 rc2 = RTR0MemObjFree(Mem.MemObj, false);
3079 AssertRC(rc2);
3080 }
3081
3082 return rc;
3083}
3084
3085
3086/**
3087 * Frees memory allocated using SUPR0ContAlloc().
3088 *
3089 * @returns IPRT status code.
3090 * @param pSession The session to which the memory was allocated.
3091 * @param uPtr Pointer to the memory (ring-3 or ring-0).
3092 */
3093SUPR0DECL(int) SUPR0ContFree(PSUPDRVSESSION pSession, RTHCUINTPTR uPtr)
3094{
3095 LogFlow(("SUPR0ContFree: pSession=%p uPtr=%p\n", pSession, (void *)uPtr));
3096 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
3097 return supdrvMemRelease(pSession, uPtr, MEMREF_TYPE_CONT);
3098}
3099
3100
3101/**
3102 * Allocates a chunk of page aligned memory with fixed physical backing below 4GB.
3103 *
3104 * The memory isn't zeroed.
3105 *
3106 * @returns IPRT status code.
3107 * @param pSession Session data.
3108 * @param cPages Number of pages to allocate.
3109 * @param ppvR0 Where to put the address of Ring-0 mapping of the allocated memory.
3110 * @param ppvR3 Where to put the address of Ring-3 mapping of the allocated memory.
3111 * @param paPages Where to put the physical addresses of allocated memory.
3112 */
3113SUPR0DECL(int) SUPR0LowAlloc(PSUPDRVSESSION pSession, uint32_t cPages, PRTR0PTR ppvR0, PRTR3PTR ppvR3, PRTHCPHYS paPages)
3114{
3115 unsigned iPage;
3116 int rc;
3117 SUPDRVMEMREF Mem = { NIL_RTR0MEMOBJ, NIL_RTR0MEMOBJ, MEMREF_TYPE_UNUSED };
3118 LogFlow(("SUPR0LowAlloc: pSession=%p cPages=%d ppvR3=%p ppvR0=%p paPages=%p\n", pSession, cPages, ppvR3, ppvR0, paPages));
3119
3120 /*
3121 * Validate input.
3122 */
3123 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
3124 if (!ppvR3 || !ppvR0 || !paPages)
3125 {
3126 Log(("Null pointer. All of these should be set: pSession=%p ppvR3=%p ppvR0=%p paPages=%p\n",
3127 pSession, ppvR3, ppvR0, paPages));
3128 return VERR_INVALID_PARAMETER;
3129
3130 }
3131 if (cPages < 1 || cPages >= 256)
3132 {
3133 Log(("Illegal request cPages=%d, must be greater than 0 and smaller than 256.\n", cPages));
3134 return VERR_PAGE_COUNT_OUT_OF_RANGE;
3135 }
3136
3137 /*
3138 * Let IPRT do the work.
3139 */
3140 rc = RTR0MemObjAllocLow(&Mem.MemObj, cPages << PAGE_SHIFT, true /* executable ring-0 mapping */);
3141 if (RT_SUCCESS(rc))
3142 {
3143 int rc2;
3144 rc = RTR0MemObjMapUser(&Mem.MapObjR3, Mem.MemObj, (RTR3PTR)-1, 0,
3145 RTMEM_PROT_EXEC | RTMEM_PROT_WRITE | RTMEM_PROT_READ, RTR0ProcHandleSelf());
3146 if (RT_SUCCESS(rc))
3147 {
3148 Mem.eType = MEMREF_TYPE_LOW;
3149 rc = supdrvMemAdd(&Mem, pSession);
3150 if (!rc)
3151 {
3152 for (iPage = 0; iPage < cPages; iPage++)
3153 {
3154 paPages[iPage] = RTR0MemObjGetPagePhysAddr(Mem.MemObj, iPage);
3155 AssertMsg(!(paPages[iPage] & (PAGE_SIZE - 1)), ("iPage=%d Phys=%RHp\n", paPages[iPage]));
3156 }
3157 *ppvR0 = RTR0MemObjAddress(Mem.MemObj);
3158 *ppvR3 = RTR0MemObjAddressR3(Mem.MapObjR3);
3159 return 0;
3160 }
3161
3162 rc2 = RTR0MemObjFree(Mem.MapObjR3, false);
3163 AssertRC(rc2);
3164 }
3165
3166 rc2 = RTR0MemObjFree(Mem.MemObj, false);
3167 AssertRC(rc2);
3168 }
3169
3170 return rc;
3171}
3172
3173
3174/**
3175 * Frees memory allocated using SUPR0LowAlloc().
3176 *
3177 * @returns IPRT status code.
3178 * @param pSession The session to which the memory was allocated.
3179 * @param uPtr Pointer to the memory (ring-3 or ring-0).
3180 */
3181SUPR0DECL(int) SUPR0LowFree(PSUPDRVSESSION pSession, RTHCUINTPTR uPtr)
3182{
3183 LogFlow(("SUPR0LowFree: pSession=%p uPtr=%p\n", pSession, (void *)uPtr));
3184 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
3185 return supdrvMemRelease(pSession, uPtr, MEMREF_TYPE_LOW);
3186}
3187
3188
3189
3190/**
3191 * Allocates a chunk of memory with both R0 and R3 mappings.
3192 * The memory is fixed and it's possible to query the physical addresses using SUPR0MemGetPhys().
3193 *
3194 * @returns IPRT status code.
3195 * @param pSession The session to associated the allocation with.
3196 * @param cb Number of bytes to allocate.
3197 * @param ppvR0 Where to store the address of the Ring-0 mapping.
3198 * @param ppvR3 Where to store the address of the Ring-3 mapping.
3199 */
3200SUPR0DECL(int) SUPR0MemAlloc(PSUPDRVSESSION pSession, uint32_t cb, PRTR0PTR ppvR0, PRTR3PTR ppvR3)
3201{
3202 int rc;
3203 SUPDRVMEMREF Mem = { NIL_RTR0MEMOBJ, NIL_RTR0MEMOBJ, MEMREF_TYPE_UNUSED };
3204 LogFlow(("SUPR0MemAlloc: pSession=%p cb=%d ppvR0=%p ppvR3=%p\n", pSession, cb, ppvR0, ppvR3));
3205
3206 /*
3207 * Validate input.
3208 */
3209 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
3210 AssertPtrReturn(ppvR0, VERR_INVALID_POINTER);
3211 AssertPtrReturn(ppvR3, VERR_INVALID_POINTER);
3212 if (cb < 1 || cb >= _4M)
3213 {
3214 Log(("Illegal request cb=%u; must be greater than 0 and smaller than 4MB.\n", cb));
3215 return VERR_INVALID_PARAMETER;
3216 }
3217
3218 /*
3219 * Let IPRT do the work.
3220 */
3221 rc = RTR0MemObjAllocPage(&Mem.MemObj, cb, true /* executable ring-0 mapping */);
3222 if (RT_SUCCESS(rc))
3223 {
3224 int rc2;
3225 rc = RTR0MemObjMapUser(&Mem.MapObjR3, Mem.MemObj, (RTR3PTR)-1, 0,
3226 RTMEM_PROT_EXEC | RTMEM_PROT_WRITE | RTMEM_PROT_READ, RTR0ProcHandleSelf());
3227 if (RT_SUCCESS(rc))
3228 {
3229 Mem.eType = MEMREF_TYPE_MEM;
3230 rc = supdrvMemAdd(&Mem, pSession);
3231 if (!rc)
3232 {
3233 *ppvR0 = RTR0MemObjAddress(Mem.MemObj);
3234 *ppvR3 = RTR0MemObjAddressR3(Mem.MapObjR3);
3235 return VINF_SUCCESS;
3236 }
3237
3238 rc2 = RTR0MemObjFree(Mem.MapObjR3, false);
3239 AssertRC(rc2);
3240 }
3241
3242 rc2 = RTR0MemObjFree(Mem.MemObj, false);
3243 AssertRC(rc2);
3244 }
3245
3246 return rc;
3247}
3248
3249
3250/**
3251 * Get the physical addresses of memory allocated using SUPR0MemAlloc().
3252 *
3253 * @returns IPRT status code.
3254 * @param pSession The session to which the memory was allocated.
3255 * @param uPtr The Ring-0 or Ring-3 address returned by SUPR0MemAlloc().
3256 * @param paPages Where to store the physical addresses.
3257 */
3258SUPR0DECL(int) SUPR0MemGetPhys(PSUPDRVSESSION pSession, RTHCUINTPTR uPtr, PSUPPAGE paPages) /** @todo switch this bugger to RTHCPHYS */
3259{
3260 PSUPDRVBUNDLE pBundle;
3261 LogFlow(("SUPR0MemGetPhys: pSession=%p uPtr=%p paPages=%p\n", pSession, (void *)uPtr, paPages));
3262
3263 /*
3264 * Validate input.
3265 */
3266 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
3267 AssertPtrReturn(paPages, VERR_INVALID_POINTER);
3268 AssertReturn(uPtr, VERR_INVALID_PARAMETER);
3269
3270 /*
3271 * Search for the address.
3272 */
3273 RTSpinlockAcquire(pSession->Spinlock);
3274 for (pBundle = &pSession->Bundle; pBundle; pBundle = pBundle->pNext)
3275 {
3276 if (pBundle->cUsed > 0)
3277 {
3278 unsigned i;
3279 for (i = 0; i < RT_ELEMENTS(pBundle->aMem); i++)
3280 {
3281 if ( pBundle->aMem[i].eType == MEMREF_TYPE_MEM
3282 && pBundle->aMem[i].MemObj != NIL_RTR0MEMOBJ
3283 && ( (RTHCUINTPTR)RTR0MemObjAddress(pBundle->aMem[i].MemObj) == uPtr
3284 || ( pBundle->aMem[i].MapObjR3 != NIL_RTR0MEMOBJ
3285 && RTR0MemObjAddressR3(pBundle->aMem[i].MapObjR3) == uPtr)
3286 )
3287 )
3288 {
3289 const size_t cPages = RTR0MemObjSize(pBundle->aMem[i].MemObj) >> PAGE_SHIFT;
3290 size_t iPage;
3291 for (iPage = 0; iPage < cPages; iPage++)
3292 {
3293 paPages[iPage].Phys = RTR0MemObjGetPagePhysAddr(pBundle->aMem[i].MemObj, iPage);
3294 paPages[iPage].uReserved = 0;
3295 }
3296 RTSpinlockRelease(pSession->Spinlock);
3297 return VINF_SUCCESS;
3298 }
3299 }
3300 }
3301 }
3302 RTSpinlockRelease(pSession->Spinlock);
3303 Log(("Failed to find %p!!!\n", (void *)uPtr));
3304 return VERR_INVALID_PARAMETER;
3305}
3306
3307
3308/**
3309 * Free memory allocated by SUPR0MemAlloc().
3310 *
3311 * @returns IPRT status code.
3312 * @param pSession The session owning the allocation.
3313 * @param uPtr The Ring-0 or Ring-3 address returned by SUPR0MemAlloc().
3314 */
3315SUPR0DECL(int) SUPR0MemFree(PSUPDRVSESSION pSession, RTHCUINTPTR uPtr)
3316{
3317 LogFlow(("SUPR0MemFree: pSession=%p uPtr=%p\n", pSession, (void *)uPtr));
3318 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
3319 return supdrvMemRelease(pSession, uPtr, MEMREF_TYPE_MEM);
3320}
3321
3322
3323/**
3324 * Allocates a chunk of memory with a kernel or/and a user mode mapping.
3325 *
3326 * The memory is fixed and it's possible to query the physical addresses using
3327 * SUPR0MemGetPhys().
3328 *
3329 * @returns IPRT status code.
3330 * @param pSession The session to associated the allocation with.
3331 * @param cPages The number of pages to allocate.
3332 * @param fFlags Flags, reserved for the future. Must be zero.
3333 * @param ppvR3 Where to store the address of the Ring-3 mapping.
3334 * NULL if no ring-3 mapping.
3335 * @param ppvR3 Where to store the address of the Ring-0 mapping.
3336 * NULL if no ring-0 mapping.
3337 * @param paPages Where to store the addresses of the pages. Optional.
3338 */
3339SUPR0DECL(int) SUPR0PageAllocEx(PSUPDRVSESSION pSession, uint32_t cPages, uint32_t fFlags, PRTR3PTR ppvR3, PRTR0PTR ppvR0, PRTHCPHYS paPages)
3340{
3341 int rc;
3342 SUPDRVMEMREF Mem = { NIL_RTR0MEMOBJ, NIL_RTR0MEMOBJ, MEMREF_TYPE_UNUSED };
3343 LogFlow(("SUPR0PageAlloc: pSession=%p cb=%d ppvR3=%p\n", pSession, cPages, ppvR3));
3344
3345 /*
3346 * Validate input. The allowed allocation size must be at least equal to the maximum guest VRAM size.
3347 */
3348 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
3349 AssertPtrNullReturn(ppvR3, VERR_INVALID_POINTER);
3350 AssertPtrNullReturn(ppvR0, VERR_INVALID_POINTER);
3351 AssertReturn(ppvR3 || ppvR0, VERR_INVALID_PARAMETER);
3352 AssertReturn(!fFlags, VERR_INVALID_PARAMETER);
3353 if (cPages < 1 || cPages > VBOX_MAX_ALLOC_PAGE_COUNT)
3354 {
3355 Log(("SUPR0PageAlloc: Illegal request cb=%u; must be greater than 0 and smaller than %uMB (VBOX_MAX_ALLOC_PAGE_COUNT pages).\n", cPages, VBOX_MAX_ALLOC_PAGE_COUNT * (_1M / _4K)));
3356 return VERR_PAGE_COUNT_OUT_OF_RANGE;
3357 }
3358
3359 /*
3360 * Let IPRT do the work.
3361 */
3362 if (ppvR0)
3363 rc = RTR0MemObjAllocPage(&Mem.MemObj, (size_t)cPages * PAGE_SIZE, true /* fExecutable */);
3364 else
3365 rc = RTR0MemObjAllocPhysNC(&Mem.MemObj, (size_t)cPages * PAGE_SIZE, NIL_RTHCPHYS);
3366 if (RT_SUCCESS(rc))
3367 {
3368 int rc2;
3369 if (ppvR3)
3370 rc = RTR0MemObjMapUser(&Mem.MapObjR3, Mem.MemObj, (RTR3PTR)-1, 0,
3371 RTMEM_PROT_EXEC | RTMEM_PROT_WRITE | RTMEM_PROT_READ, RTR0ProcHandleSelf());
3372 else
3373 Mem.MapObjR3 = NIL_RTR0MEMOBJ;
3374 if (RT_SUCCESS(rc))
3375 {
3376 Mem.eType = MEMREF_TYPE_PAGE;
3377 rc = supdrvMemAdd(&Mem, pSession);
3378 if (!rc)
3379 {
3380 if (ppvR3)
3381 *ppvR3 = RTR0MemObjAddressR3(Mem.MapObjR3);
3382 if (ppvR0)
3383 *ppvR0 = RTR0MemObjAddress(Mem.MemObj);
3384 if (paPages)
3385 {
3386 uint32_t iPage = cPages;
3387 while (iPage-- > 0)
3388 {
3389 paPages[iPage] = RTR0MemObjGetPagePhysAddr(Mem.MapObjR3, iPage);
3390 Assert(paPages[iPage] != NIL_RTHCPHYS);
3391 }
3392 }
3393 return VINF_SUCCESS;
3394 }
3395
3396 rc2 = RTR0MemObjFree(Mem.MapObjR3, false);
3397 AssertRC(rc2);
3398 }
3399
3400 rc2 = RTR0MemObjFree(Mem.MemObj, false);
3401 AssertRC(rc2);
3402 }
3403 return rc;
3404}
3405
3406
3407/**
3408 * Maps a chunk of memory previously allocated by SUPR0PageAllocEx into kernel
3409 * space.
3410 *
3411 * @returns IPRT status code.
3412 * @param pSession The session to associated the allocation with.
3413 * @param pvR3 The ring-3 address returned by SUPR0PageAllocEx.
3414 * @param offSub Where to start mapping. Must be page aligned.
3415 * @param cbSub How much to map. Must be page aligned.
3416 * @param fFlags Flags, MBZ.
3417 * @param ppvR0 Where to return the address of the ring-0 mapping on
3418 * success.
3419 */
3420SUPR0DECL(int) SUPR0PageMapKernel(PSUPDRVSESSION pSession, RTR3PTR pvR3, uint32_t offSub, uint32_t cbSub,
3421 uint32_t fFlags, PRTR0PTR ppvR0)
3422{
3423 int rc;
3424 PSUPDRVBUNDLE pBundle;
3425 RTR0MEMOBJ hMemObj = NIL_RTR0MEMOBJ;
3426 LogFlow(("SUPR0PageMapKernel: pSession=%p pvR3=%p offSub=%#x cbSub=%#x\n", pSession, pvR3, offSub, cbSub));
3427
3428 /*
3429 * Validate input. The allowed allocation size must be at least equal to the maximum guest VRAM size.
3430 */
3431 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
3432 AssertPtrNullReturn(ppvR0, VERR_INVALID_POINTER);
3433 AssertReturn(!fFlags, VERR_INVALID_PARAMETER);
3434 AssertReturn(!(offSub & PAGE_OFFSET_MASK), VERR_INVALID_PARAMETER);
3435 AssertReturn(!(cbSub & PAGE_OFFSET_MASK), VERR_INVALID_PARAMETER);
3436 AssertReturn(cbSub, VERR_INVALID_PARAMETER);
3437
3438 /*
3439 * Find the memory object.
3440 */
3441 RTSpinlockAcquire(pSession->Spinlock);
3442 for (pBundle = &pSession->Bundle; pBundle; pBundle = pBundle->pNext)
3443 {
3444 if (pBundle->cUsed > 0)
3445 {
3446 unsigned i;
3447 for (i = 0; i < RT_ELEMENTS(pBundle->aMem); i++)
3448 {
3449 if ( ( pBundle->aMem[i].eType == MEMREF_TYPE_PAGE
3450 && pBundle->aMem[i].MemObj != NIL_RTR0MEMOBJ
3451 && pBundle->aMem[i].MapObjR3 != NIL_RTR0MEMOBJ
3452 && RTR0MemObjAddressR3(pBundle->aMem[i].MapObjR3) == pvR3)
3453 || ( pBundle->aMem[i].eType == MEMREF_TYPE_LOCKED
3454 && pBundle->aMem[i].MemObj != NIL_RTR0MEMOBJ
3455 && pBundle->aMem[i].MapObjR3 == NIL_RTR0MEMOBJ
3456 && RTR0MemObjAddressR3(pBundle->aMem[i].MemObj) == pvR3))
3457 {
3458 hMemObj = pBundle->aMem[i].MemObj;
3459 break;
3460 }
3461 }
3462 }
3463 }
3464 RTSpinlockRelease(pSession->Spinlock);
3465
3466 rc = VERR_INVALID_PARAMETER;
3467 if (hMemObj != NIL_RTR0MEMOBJ)
3468 {
3469 /*
3470 * Do some further input validations before calling IPRT.
3471 * (Cleanup is done indirectly by telling RTR0MemObjFree to include mappings.)
3472 */
3473 size_t cbMemObj = RTR0MemObjSize(hMemObj);
3474 if ( offSub < cbMemObj
3475 && cbSub <= cbMemObj
3476 && offSub + cbSub <= cbMemObj)
3477 {
3478 RTR0MEMOBJ hMapObj;
3479 rc = RTR0MemObjMapKernelEx(&hMapObj, hMemObj, (void *)-1, 0,
3480 RTMEM_PROT_READ | RTMEM_PROT_WRITE, offSub, cbSub);
3481 if (RT_SUCCESS(rc))
3482 *ppvR0 = RTR0MemObjAddress(hMapObj);
3483 }
3484 else
3485 SUPR0Printf("SUPR0PageMapKernel: cbMemObj=%#x offSub=%#x cbSub=%#x\n", cbMemObj, offSub, cbSub);
3486
3487 }
3488 return rc;
3489}
3490
3491
3492/**
3493 * Changes the page level protection of one or more pages previously allocated
3494 * by SUPR0PageAllocEx.
3495 *
3496 * @returns IPRT status code.
3497 * @param pSession The session to associated the allocation with.
3498 * @param pvR3 The ring-3 address returned by SUPR0PageAllocEx.
3499 * NIL_RTR3PTR if the ring-3 mapping should be unaffected.
3500 * @param pvR0 The ring-0 address returned by SUPR0PageAllocEx.
3501 * NIL_RTR0PTR if the ring-0 mapping should be unaffected.
3502 * @param offSub Where to start changing. Must be page aligned.
3503 * @param cbSub How much to change. Must be page aligned.
3504 * @param fProt The new page level protection, see RTMEM_PROT_*.
3505 */
3506SUPR0DECL(int) SUPR0PageProtect(PSUPDRVSESSION pSession, RTR3PTR pvR3, RTR0PTR pvR0, uint32_t offSub, uint32_t cbSub, uint32_t fProt)
3507{
3508 int rc;
3509 PSUPDRVBUNDLE pBundle;
3510 RTR0MEMOBJ hMemObjR0 = NIL_RTR0MEMOBJ;
3511 RTR0MEMOBJ hMemObjR3 = NIL_RTR0MEMOBJ;
3512 LogFlow(("SUPR0PageProtect: pSession=%p pvR3=%p pvR0=%p offSub=%#x cbSub=%#x fProt-%#x\n", pSession, pvR3, pvR0, offSub, cbSub, fProt));
3513
3514 /*
3515 * Validate input. The allowed allocation size must be at least equal to the maximum guest VRAM size.
3516 */
3517 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
3518 AssertReturn(!(fProt & ~(RTMEM_PROT_READ | RTMEM_PROT_WRITE | RTMEM_PROT_EXEC | RTMEM_PROT_NONE)), VERR_INVALID_PARAMETER);
3519 AssertReturn(!(offSub & PAGE_OFFSET_MASK), VERR_INVALID_PARAMETER);
3520 AssertReturn(!(cbSub & PAGE_OFFSET_MASK), VERR_INVALID_PARAMETER);
3521 AssertReturn(cbSub, VERR_INVALID_PARAMETER);
3522
3523 /*
3524 * Find the memory object.
3525 */
3526 RTSpinlockAcquire(pSession->Spinlock);
3527 for (pBundle = &pSession->Bundle; pBundle; pBundle = pBundle->pNext)
3528 {
3529 if (pBundle->cUsed > 0)
3530 {
3531 unsigned i;
3532 for (i = 0; i < RT_ELEMENTS(pBundle->aMem); i++)
3533 {
3534 if ( pBundle->aMem[i].eType == MEMREF_TYPE_PAGE
3535 && pBundle->aMem[i].MemObj != NIL_RTR0MEMOBJ
3536 && ( pBundle->aMem[i].MapObjR3 != NIL_RTR0MEMOBJ
3537 || pvR3 == NIL_RTR3PTR)
3538 && ( pvR0 == NIL_RTR0PTR
3539 || RTR0MemObjAddress(pBundle->aMem[i].MemObj) == pvR0)
3540 && ( pvR3 == NIL_RTR3PTR
3541 || RTR0MemObjAddressR3(pBundle->aMem[i].MapObjR3) == pvR3))
3542 {
3543 if (pvR0 != NIL_RTR0PTR)
3544 hMemObjR0 = pBundle->aMem[i].MemObj;
3545 if (pvR3 != NIL_RTR3PTR)
3546 hMemObjR3 = pBundle->aMem[i].MapObjR3;
3547 break;
3548 }
3549 }
3550 }
3551 }
3552 RTSpinlockRelease(pSession->Spinlock);
3553
3554 rc = VERR_INVALID_PARAMETER;
3555 if ( hMemObjR0 != NIL_RTR0MEMOBJ
3556 || hMemObjR3 != NIL_RTR0MEMOBJ)
3557 {
3558 /*
3559 * Do some further input validations before calling IPRT.
3560 */
3561 size_t cbMemObj = hMemObjR0 != NIL_RTR0PTR ? RTR0MemObjSize(hMemObjR0) : RTR0MemObjSize(hMemObjR3);
3562 if ( offSub < cbMemObj
3563 && cbSub <= cbMemObj
3564 && offSub + cbSub <= cbMemObj)
3565 {
3566 rc = VINF_SUCCESS;
3567 if (hMemObjR3 != NIL_RTR0PTR)
3568 rc = RTR0MemObjProtect(hMemObjR3, offSub, cbSub, fProt);
3569 if (hMemObjR0 != NIL_RTR0PTR && RT_SUCCESS(rc))
3570 rc = RTR0MemObjProtect(hMemObjR0, offSub, cbSub, fProt);
3571 }
3572 else
3573 SUPR0Printf("SUPR0PageMapKernel: cbMemObj=%#x offSub=%#x cbSub=%#x\n", cbMemObj, offSub, cbSub);
3574
3575 }
3576 return rc;
3577
3578}
3579
3580
3581/**
3582 * Free memory allocated by SUPR0PageAlloc() and SUPR0PageAllocEx().
3583 *
3584 * @returns IPRT status code.
3585 * @param pSession The session owning the allocation.
3586 * @param pvR3 The Ring-3 address returned by SUPR0PageAlloc() or
3587 * SUPR0PageAllocEx().
3588 */
3589SUPR0DECL(int) SUPR0PageFree(PSUPDRVSESSION pSession, RTR3PTR pvR3)
3590{
3591 LogFlow(("SUPR0PageFree: pSession=%p pvR3=%p\n", pSession, (void *)pvR3));
3592 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
3593 return supdrvMemRelease(pSession, (RTHCUINTPTR)pvR3, MEMREF_TYPE_PAGE);
3594}
3595
3596
3597/**
3598 * Gets the paging mode of the current CPU.
3599 *
3600 * @returns Paging mode, SUPPAGEINGMODE_INVALID on error.
3601 */
3602SUPR0DECL(SUPPAGINGMODE) SUPR0GetPagingMode(void)
3603{
3604 SUPPAGINGMODE enmMode;
3605
3606 RTR0UINTREG cr0 = ASMGetCR0();
3607 if ((cr0 & (X86_CR0_PG | X86_CR0_PE)) != (X86_CR0_PG | X86_CR0_PE))
3608 enmMode = SUPPAGINGMODE_INVALID;
3609 else
3610 {
3611 RTR0UINTREG cr4 = ASMGetCR4();
3612 uint32_t fNXEPlusLMA = 0;
3613 if (cr4 & X86_CR4_PAE)
3614 {
3615 uint32_t fExtFeatures = ASMCpuId_EDX(0x80000001);
3616 if (fExtFeatures & (X86_CPUID_EXT_FEATURE_EDX_NX | X86_CPUID_EXT_FEATURE_EDX_LONG_MODE))
3617 {
3618 uint64_t efer = ASMRdMsr(MSR_K6_EFER);
3619 if ((fExtFeatures & X86_CPUID_EXT_FEATURE_EDX_NX) && (efer & MSR_K6_EFER_NXE))
3620 fNXEPlusLMA |= RT_BIT(0);
3621 if ((fExtFeatures & X86_CPUID_EXT_FEATURE_EDX_LONG_MODE) && (efer & MSR_K6_EFER_LMA))
3622 fNXEPlusLMA |= RT_BIT(1);
3623 }
3624 }
3625
3626 switch ((cr4 & (X86_CR4_PAE | X86_CR4_PGE)) | fNXEPlusLMA)
3627 {
3628 case 0:
3629 enmMode = SUPPAGINGMODE_32_BIT;
3630 break;
3631
3632 case X86_CR4_PGE:
3633 enmMode = SUPPAGINGMODE_32_BIT_GLOBAL;
3634 break;
3635
3636 case X86_CR4_PAE:
3637 enmMode = SUPPAGINGMODE_PAE;
3638 break;
3639
3640 case X86_CR4_PAE | RT_BIT(0):
3641 enmMode = SUPPAGINGMODE_PAE_NX;
3642 break;
3643
3644 case X86_CR4_PAE | X86_CR4_PGE:
3645 enmMode = SUPPAGINGMODE_PAE_GLOBAL;
3646 break;
3647
3648 case X86_CR4_PAE | X86_CR4_PGE | RT_BIT(0):
3649 enmMode = SUPPAGINGMODE_PAE_GLOBAL;
3650 break;
3651
3652 case RT_BIT(1) | X86_CR4_PAE:
3653 enmMode = SUPPAGINGMODE_AMD64;
3654 break;
3655
3656 case RT_BIT(1) | X86_CR4_PAE | RT_BIT(0):
3657 enmMode = SUPPAGINGMODE_AMD64_NX;
3658 break;
3659
3660 case RT_BIT(1) | X86_CR4_PAE | X86_CR4_PGE:
3661 enmMode = SUPPAGINGMODE_AMD64_GLOBAL;
3662 break;
3663
3664 case RT_BIT(1) | X86_CR4_PAE | X86_CR4_PGE | RT_BIT(0):
3665 enmMode = SUPPAGINGMODE_AMD64_GLOBAL_NX;
3666 break;
3667
3668 default:
3669 AssertMsgFailed(("Cannot happen! cr4=%#x fNXEPlusLMA=%d\n", cr4, fNXEPlusLMA));
3670 enmMode = SUPPAGINGMODE_INVALID;
3671 break;
3672 }
3673 }
3674 return enmMode;
3675}
3676
3677
3678/**
3679 * Enables or disabled hardware virtualization extensions using native OS APIs.
3680 *
3681 * @returns VBox status code.
3682 * @retval VINF_SUCCESS on success.
3683 * @retval VERR_NOT_SUPPORTED if not supported by the native OS.
3684 *
3685 * @param fEnable Whether to enable or disable.
3686 */
3687SUPR0DECL(int) SUPR0EnableVTx(bool fEnable)
3688{
3689#ifdef RT_OS_DARWIN
3690 return supdrvOSEnableVTx(fEnable);
3691#else
3692 return VERR_NOT_SUPPORTED;
3693#endif
3694}
3695
3696
3697/**
3698 * Suspends hardware virtualization extensions using the native OS API.
3699 *
3700 * This is called prior to entering raw-mode context.
3701 *
3702 * @returns @c true if suspended, @c false if not.
3703 */
3704SUPR0DECL(bool) SUPR0SuspendVTxOnCpu(void)
3705{
3706#ifdef RT_OS_DARWIN
3707 return supdrvOSSuspendVTxOnCpu();
3708#else
3709 return false;
3710#endif
3711}
3712
3713
3714/**
3715 * Resumes hardware virtualization extensions using the native OS API.
3716 *
3717 * This is called after to entering raw-mode context.
3718 *
3719 * @param fSuspended The return value of SUPR0SuspendVTxOnCpu.
3720 */
3721SUPR0DECL(void) SUPR0ResumeVTxOnCpu(bool fSuspended)
3722{
3723#ifdef RT_OS_DARWIN
3724 supdrvOSResumeVTxOnCpu(fSuspended);
3725#else
3726 Assert(!fSuspended);
3727#endif
3728}
3729
3730
3731/**
3732 * Queries the AMD-V and VT-x capabilities of the calling CPU.
3733 *
3734 * @returns VBox status code.
3735 * @retval VERR_VMX_NO_VMX
3736 * @retval VERR_VMX_MSR_ALL_VMXON_DISABLED
3737 * @retval VERR_VMX_MSR_VMXON_DISABLED
3738 * @retval VERR_VMX_MSR_LOCKING_FAILED
3739 * @retval VERR_SVM_NO_SVM
3740 * @retval VERR_SVM_DISABLED
3741 * @retval VERR_UNSUPPORTED_CPU if not identifiable as an AMD, Intel or VIA
3742 * (centaur) CPU.
3743 *
3744 * @param pSession The session handle.
3745 * @param pfCaps Where to store the capabilities.
3746 */
3747SUPR0DECL(int) SUPR0QueryVTCaps(PSUPDRVSESSION pSession, uint32_t *pfCaps)
3748{
3749 int rc = VERR_UNSUPPORTED_CPU;
3750 bool fIsSmxModeAmbiguous = false;
3751 RTTHREADPREEMPTSTATE PreemptState = RTTHREADPREEMPTSTATE_INITIALIZER;
3752
3753 /*
3754 * Input validation.
3755 */
3756 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
3757 AssertPtrReturn(pfCaps, VERR_INVALID_POINTER);
3758
3759 *pfCaps = 0;
3760 /* We may modify MSRs and re-read them, disable preemption so we make sure we don't migrate CPUs. */
3761 RTThreadPreemptDisable(&PreemptState);
3762 if (ASMHasCpuId())
3763 {
3764 uint32_t fFeaturesECX, fFeaturesEDX, uDummy;
3765 uint32_t uMaxId, uVendorEBX, uVendorECX, uVendorEDX;
3766
3767 ASMCpuId(0, &uMaxId, &uVendorEBX, &uVendorECX, &uVendorEDX);
3768 ASMCpuId(1, &uDummy, &uDummy, &fFeaturesECX, &fFeaturesEDX);
3769
3770 if ( ASMIsValidStdRange(uMaxId)
3771 && ( ASMIsIntelCpuEx( uVendorEBX, uVendorECX, uVendorEDX)
3772 || ASMIsViaCentaurCpuEx(uVendorEBX, uVendorECX, uVendorEDX) )
3773 )
3774 {
3775 if ( (fFeaturesECX & X86_CPUID_FEATURE_ECX_VMX)
3776 && (fFeaturesEDX & X86_CPUID_FEATURE_EDX_MSR)
3777 && (fFeaturesEDX & X86_CPUID_FEATURE_EDX_FXSR)
3778 )
3779 {
3780 /** @todo Unify code with hmR0InitIntelCpu(). */
3781 uint64_t u64FeatMsr = ASMRdMsr(MSR_IA32_FEATURE_CONTROL);
3782 bool const fMaybeSmxMode = RT_BOOL(ASMGetCR4() & X86_CR4_SMXE);
3783 bool fMsrLocked = RT_BOOL(u64FeatMsr & MSR_IA32_FEATURE_CONTROL_LOCK);
3784 bool fSmxVmxAllowed = RT_BOOL(u64FeatMsr & MSR_IA32_FEATURE_CONTROL_SMX_VMXON);
3785 bool fVmxAllowed = RT_BOOL(u64FeatMsr & MSR_IA32_FEATURE_CONTROL_VMXON);
3786
3787 /* Check if the LOCK bit is set but excludes the required VMXON bit. */
3788 if (fMsrLocked)
3789 {
3790 if (fVmxAllowed && fSmxVmxAllowed)
3791 rc = VINF_SUCCESS;
3792 else if (!fVmxAllowed && !fSmxVmxAllowed)
3793 rc = VERR_VMX_MSR_ALL_VMXON_DISABLED;
3794 else if (!fMaybeSmxMode)
3795 {
3796 if (fVmxAllowed)
3797 rc = VINF_SUCCESS;
3798 else
3799 rc = VERR_VMX_MSR_VMXON_DISABLED;
3800 }
3801 else
3802 {
3803 /*
3804 * CR4.SMXE is set but this doesn't mean the CPU is necessarily in SMX mode. We shall assume
3805 * that it is -not- and that it is a stupid BIOS/OS setting CR4.SMXE for no good reason.
3806 * See @bugref{6873}.
3807 */
3808 Assert(fMaybeSmxMode == true);
3809 fIsSmxModeAmbiguous = true;
3810 rc = VINF_SUCCESS;
3811 }
3812 }
3813 else
3814 {
3815 /*
3816 * MSR is not yet locked; we can change it ourselves here.
3817 * Once the lock bit is set, this MSR can no longer be modified.
3818 *
3819 * Set both the VMXON and SMX_VMXON bits as we can't determine SMX mode
3820 * accurately. See @bugref{6873}.
3821 */
3822 u64FeatMsr |= MSR_IA32_FEATURE_CONTROL_LOCK
3823 | MSR_IA32_FEATURE_CONTROL_SMX_VMXON
3824 | MSR_IA32_FEATURE_CONTROL_VMXON;
3825 ASMWrMsr(MSR_IA32_FEATURE_CONTROL, u64FeatMsr);
3826
3827 /* Verify. */
3828 u64FeatMsr = ASMRdMsr(MSR_IA32_FEATURE_CONTROL);
3829 fMsrLocked = RT_BOOL(u64FeatMsr & MSR_IA32_FEATURE_CONTROL_LOCK);
3830 fSmxVmxAllowed = fMsrLocked && RT_BOOL(u64FeatMsr & MSR_IA32_FEATURE_CONTROL_SMX_VMXON);
3831 fVmxAllowed = fMsrLocked && RT_BOOL(u64FeatMsr & MSR_IA32_FEATURE_CONTROL_VMXON);
3832 if (fSmxVmxAllowed && fVmxAllowed)
3833 rc = VINF_SUCCESS;
3834 else
3835 rc = VERR_VMX_MSR_LOCKING_FAILED;
3836 }
3837
3838 if (rc == VINF_SUCCESS)
3839 {
3840 VMXCAPABILITY vtCaps;
3841
3842 *pfCaps |= SUPVTCAPS_VT_X;
3843
3844 vtCaps.u = ASMRdMsr(MSR_IA32_VMX_PROCBASED_CTLS);
3845 if (vtCaps.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC_USE_SECONDARY_EXEC_CTRL)
3846 {
3847 vtCaps.u = ASMRdMsr(MSR_IA32_VMX_PROCBASED_CTLS2);
3848 if (vtCaps.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC2_EPT)
3849 *pfCaps |= SUPVTCAPS_NESTED_PAGING;
3850 }
3851 }
3852 }
3853 else
3854 rc = VERR_VMX_NO_VMX;
3855 }
3856 else if ( ASMIsAmdCpuEx(uVendorEBX, uVendorECX, uVendorEDX)
3857 && ASMIsValidStdRange(uMaxId))
3858 {
3859 uint32_t fExtFeaturesEcx, uExtMaxId;
3860 ASMCpuId(0x80000000, &uExtMaxId, &uDummy, &uDummy, &uDummy);
3861 ASMCpuId(0x80000001, &uDummy, &uDummy, &fExtFeaturesEcx, &uDummy);
3862 if ( ASMIsValidExtRange(uExtMaxId)
3863 && uExtMaxId >= 0x8000000a
3864 && (fExtFeaturesEcx & X86_CPUID_AMD_FEATURE_ECX_SVM)
3865 && (fFeaturesEDX & X86_CPUID_FEATURE_EDX_MSR)
3866 && (fFeaturesEDX & X86_CPUID_FEATURE_EDX_FXSR)
3867 )
3868 {
3869 /* Check if SVM is disabled */
3870 uint64_t u64FeatMsr = ASMRdMsr(MSR_K8_VM_CR);
3871 if (!(u64FeatMsr & MSR_K8_VM_CR_SVM_DISABLE))
3872 {
3873 uint32_t fSvmFeatures;
3874 *pfCaps |= SUPVTCAPS_AMD_V;
3875
3876 /* Query AMD-V features. */
3877 ASMCpuId(0x8000000a, &uDummy, &uDummy, &uDummy, &fSvmFeatures);
3878 if (fSvmFeatures & AMD_CPUID_SVM_FEATURE_EDX_NESTED_PAGING)
3879 *pfCaps |= SUPVTCAPS_NESTED_PAGING;
3880
3881 rc = VINF_SUCCESS;
3882 }
3883 else
3884 rc = VERR_SVM_DISABLED;
3885 }
3886 else
3887 rc = VERR_SVM_NO_SVM;
3888 }
3889 }
3890
3891 RTThreadPreemptRestore(&PreemptState);
3892 if (fIsSmxModeAmbiguous)
3893 SUPR0Printf(("WARNING! CR4 hints SMX mode but your CPU is too secretive. Proceeding anyway... We wish you good luck!\n"));
3894 return rc;
3895}
3896
3897
3898/**
3899 * (Re-)initializes the per-cpu structure prior to starting or resuming the GIP
3900 * updating.
3901 *
3902 * @param pGip Pointer to the GIP.
3903 * @param pGipCpu The per CPU structure for this CPU.
3904 * @param u64NanoTS The current time.
3905 */
3906static void supdrvGipReInitCpu(PSUPGLOBALINFOPAGE pGip, PSUPGIPCPU pGipCpu, uint64_t u64NanoTS)
3907{
3908 pGipCpu->u64TSC = SUPReadTsc() - pGipCpu->u32UpdateIntervalTSC;
3909 pGipCpu->u64NanoTS = u64NanoTS;
3910}
3911
3912
3913/**
3914 * Set the current TSC and NanoTS value for the CPU.
3915 *
3916 * @param idCpu The CPU ID. Unused - we have to use the APIC ID.
3917 * @param pvUser1 Pointer to the ring-0 GIP mapping.
3918 * @param pvUser2 Pointer to the variable holding the current time.
3919 */
3920static DECLCALLBACK(void) supdrvGipReInitCpuCallback(RTCPUID idCpu, void *pvUser1, void *pvUser2)
3921{
3922 PSUPGLOBALINFOPAGE pGip = (PSUPGLOBALINFOPAGE)pvUser1;
3923 unsigned iCpu = pGip->aiCpuFromApicId[ASMGetApicId()];
3924
3925 if (RT_LIKELY(iCpu < pGip->cCpus && pGip->aCPUs[iCpu].idCpu == idCpu))
3926 supdrvGipReInitCpu(pGip, &pGip->aCPUs[iCpu], *(uint64_t *)pvUser2);
3927
3928 NOREF(pvUser2);
3929 NOREF(idCpu);
3930}
3931
3932
3933/**
3934 * Increase the timer freqency on hosts where this is possible (NT).
3935 *
3936 * The idea is that more interrupts is better for us... Also, it's better than
3937 * we increase the timer frequence, because we might end up getting inaccurate
3938 * callbacks if someone else does it.
3939 *
3940 * @param pDevExt Sets u32SystemTimerGranularityGrant if increased.
3941 */
3942static void supdrvGipRequestHigherTimerFrequencyFromSystem(PSUPDRVDEVEXT pDevExt)
3943{
3944 if (pDevExt->u32SystemTimerGranularityGrant == 0)
3945 {
3946 uint32_t u32SystemResolution;
3947 if ( RT_SUCCESS_NP(RTTimerRequestSystemGranularity( 976563 /* 1024 HZ */, &u32SystemResolution))
3948 || RT_SUCCESS_NP(RTTimerRequestSystemGranularity( 1000000 /* 1000 HZ */, &u32SystemResolution))
3949 || RT_SUCCESS_NP(RTTimerRequestSystemGranularity( 1953125 /* 512 HZ */, &u32SystemResolution))
3950 || RT_SUCCESS_NP(RTTimerRequestSystemGranularity( 2000000 /* 500 HZ */, &u32SystemResolution))
3951 )
3952 {
3953 Assert(RTTimerGetSystemGranularity() <= u32SystemResolution);
3954 pDevExt->u32SystemTimerGranularityGrant = u32SystemResolution;
3955 }
3956 }
3957}
3958
3959
3960/**
3961 * Undoes supdrvGipRequestHigherTimerFrequencyFromSystem.
3962 *
3963 * @param pDevExt Clears u32SystemTimerGranularityGrant.
3964 */
3965static void supdrvGipReleaseHigherTimerFrequencyFromSystem(PSUPDRVDEVEXT pDevExt)
3966{
3967 if (pDevExt->u32SystemTimerGranularityGrant)
3968 {
3969 int rc2 = RTTimerReleaseSystemGranularity(pDevExt->u32SystemTimerGranularityGrant);
3970 AssertRC(rc2);
3971 pDevExt->u32SystemTimerGranularityGrant = 0;
3972 }
3973}
3974
3975
3976/**
3977 * Maps the GIP into userspace and/or get the physical address of the GIP.
3978 *
3979 * @returns IPRT status code.
3980 * @param pSession Session to which the GIP mapping should belong.
3981 * @param ppGipR3 Where to store the address of the ring-3 mapping. (optional)
3982 * @param pHCPhysGip Where to store the physical address. (optional)
3983 *
3984 * @remark There is no reference counting on the mapping, so one call to this function
3985 * count globally as one reference. One call to SUPR0GipUnmap() is will unmap GIP
3986 * and remove the session as a GIP user.
3987 */
3988SUPR0DECL(int) SUPR0GipMap(PSUPDRVSESSION pSession, PRTR3PTR ppGipR3, PRTHCPHYS pHCPhysGip)
3989{
3990 int rc;
3991 PSUPDRVDEVEXT pDevExt = pSession->pDevExt;
3992 RTR3PTR pGipR3 = NIL_RTR3PTR;
3993 RTHCPHYS HCPhys = NIL_RTHCPHYS;
3994 LogFlow(("SUPR0GipMap: pSession=%p ppGipR3=%p pHCPhysGip=%p\n", pSession, ppGipR3, pHCPhysGip));
3995
3996 /*
3997 * Validate
3998 */
3999 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
4000 AssertPtrNullReturn(ppGipR3, VERR_INVALID_POINTER);
4001 AssertPtrNullReturn(pHCPhysGip, VERR_INVALID_POINTER);
4002
4003#ifdef SUPDRV_USE_MUTEX_FOR_GIP
4004 RTSemMutexRequest(pDevExt->mtxGip, RT_INDEFINITE_WAIT);
4005#else
4006 RTSemFastMutexRequest(pDevExt->mtxGip);
4007#endif
4008 if (pDevExt->pGip)
4009 {
4010 /*
4011 * Map it?
4012 */
4013 rc = VINF_SUCCESS;
4014 if (ppGipR3)
4015 {
4016 if (pSession->GipMapObjR3 == NIL_RTR0MEMOBJ)
4017 rc = RTR0MemObjMapUser(&pSession->GipMapObjR3, pDevExt->GipMemObj, (RTR3PTR)-1, 0,
4018 RTMEM_PROT_READ, RTR0ProcHandleSelf());
4019 if (RT_SUCCESS(rc))
4020 pGipR3 = RTR0MemObjAddressR3(pSession->GipMapObjR3);
4021 }
4022
4023 /*
4024 * Get physical address.
4025 */
4026 if (pHCPhysGip && RT_SUCCESS(rc))
4027 HCPhys = pDevExt->HCPhysGip;
4028
4029 /*
4030 * Reference globally.
4031 */
4032 if (!pSession->fGipReferenced && RT_SUCCESS(rc))
4033 {
4034 pSession->fGipReferenced = 1;
4035 pDevExt->cGipUsers++;
4036 if (pDevExt->cGipUsers == 1)
4037 {
4038 PSUPGLOBALINFOPAGE pGipR0 = pDevExt->pGip;
4039 uint64_t u64NanoTS;
4040
4041 LogFlow(("SUPR0GipMap: Resumes GIP updating\n"));
4042
4043 supdrvGipRequestHigherTimerFrequencyFromSystem(pDevExt);
4044
4045 if (pGipR0->aCPUs[0].u32TransactionId != 2 /* not the first time */)
4046 {
4047 unsigned i;
4048 for (i = 0; i < pGipR0->cCpus; i++)
4049 ASMAtomicUoWriteU32(&pGipR0->aCPUs[i].u32TransactionId,
4050 (pGipR0->aCPUs[i].u32TransactionId + GIP_UPDATEHZ_RECALC_FREQ * 2)
4051 & ~(GIP_UPDATEHZ_RECALC_FREQ * 2 - 1));
4052 ASMAtomicWriteU64(&pGipR0->u64NanoTSLastUpdateHz, 0);
4053 }
4054
4055 u64NanoTS = RTTimeSystemNanoTS() - pGipR0->u32UpdateIntervalNS;
4056 if ( pGipR0->u32Mode == SUPGIPMODE_INVARIANT_TSC
4057 || pGipR0->u32Mode == SUPGIPMODE_SYNC_TSC
4058 || RTMpGetOnlineCount() == 1)
4059 supdrvGipReInitCpu(pGipR0, &pGipR0->aCPUs[0], u64NanoTS);
4060 else
4061 RTMpOnAll(supdrvGipReInitCpuCallback, pGipR0, &u64NanoTS);
4062
4063#ifndef DO_NOT_START_GIP
4064 rc = RTTimerStart(pDevExt->pGipTimer, 0 /* fire ASAP */); AssertRC(rc);
4065#endif
4066 rc = VINF_SUCCESS;
4067 }
4068 }
4069 }
4070 else
4071 {
4072 rc = VERR_GENERAL_FAILURE;
4073 Log(("SUPR0GipMap: GIP is not available!\n"));
4074 }
4075#ifdef SUPDRV_USE_MUTEX_FOR_GIP
4076 RTSemMutexRelease(pDevExt->mtxGip);
4077#else
4078 RTSemFastMutexRelease(pDevExt->mtxGip);
4079#endif
4080
4081 /*
4082 * Write returns.
4083 */
4084 if (pHCPhysGip)
4085 *pHCPhysGip = HCPhys;
4086 if (ppGipR3)
4087 *ppGipR3 = pGipR3;
4088
4089#ifdef DEBUG_DARWIN_GIP
4090 OSDBGPRINT(("SUPR0GipMap: returns %d *pHCPhysGip=%lx pGipR3=%p\n", rc, (unsigned long)HCPhys, (void *)pGipR3));
4091#else
4092 LogFlow(( "SUPR0GipMap: returns %d *pHCPhysGip=%lx pGipR3=%p\n", rc, (unsigned long)HCPhys, (void *)pGipR3));
4093#endif
4094 return rc;
4095}
4096
4097
4098/**
4099 * Unmaps any user mapping of the GIP and terminates all GIP access
4100 * from this session.
4101 *
4102 * @returns IPRT status code.
4103 * @param pSession Session to which the GIP mapping should belong.
4104 */
4105SUPR0DECL(int) SUPR0GipUnmap(PSUPDRVSESSION pSession)
4106{
4107 int rc = VINF_SUCCESS;
4108 PSUPDRVDEVEXT pDevExt = pSession->pDevExt;
4109#ifdef DEBUG_DARWIN_GIP
4110 OSDBGPRINT(("SUPR0GipUnmap: pSession=%p pGip=%p GipMapObjR3=%p\n",
4111 pSession,
4112 pSession->GipMapObjR3 != NIL_RTR0MEMOBJ ? RTR0MemObjAddress(pSession->GipMapObjR3) : NULL,
4113 pSession->GipMapObjR3));
4114#else
4115 LogFlow(("SUPR0GipUnmap: pSession=%p\n", pSession));
4116#endif
4117 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
4118
4119#ifdef SUPDRV_USE_MUTEX_FOR_GIP
4120 RTSemMutexRequest(pDevExt->mtxGip, RT_INDEFINITE_WAIT);
4121#else
4122 RTSemFastMutexRequest(pDevExt->mtxGip);
4123#endif
4124
4125 /*
4126 * Unmap anything?
4127 */
4128 if (pSession->GipMapObjR3 != NIL_RTR0MEMOBJ)
4129 {
4130 rc = RTR0MemObjFree(pSession->GipMapObjR3, false);
4131 AssertRC(rc);
4132 if (RT_SUCCESS(rc))
4133 pSession->GipMapObjR3 = NIL_RTR0MEMOBJ;
4134 }
4135
4136 /*
4137 * Dereference global GIP.
4138 */
4139 if (pSession->fGipReferenced && !rc)
4140 {
4141 pSession->fGipReferenced = 0;
4142 if ( pDevExt->cGipUsers > 0
4143 && !--pDevExt->cGipUsers)
4144 {
4145 LogFlow(("SUPR0GipUnmap: Suspends GIP updating\n"));
4146#ifndef DO_NOT_START_GIP
4147 rc = RTTimerStop(pDevExt->pGipTimer); AssertRC(rc); rc = VINF_SUCCESS;
4148#endif
4149 supdrvGipReleaseHigherTimerFrequencyFromSystem(pDevExt);
4150 }
4151 }
4152
4153#ifdef SUPDRV_USE_MUTEX_FOR_GIP
4154 RTSemMutexRelease(pDevExt->mtxGip);
4155#else
4156 RTSemFastMutexRelease(pDevExt->mtxGip);
4157#endif
4158
4159 return rc;
4160}
4161
4162
4163/**
4164 * Gets the GIP pointer.
4165 *
4166 * @returns Pointer to the GIP or NULL.
4167 */
4168SUPDECL(PSUPGLOBALINFOPAGE) SUPGetGIP(void)
4169{
4170 return g_pSUPGlobalInfoPage;
4171}
4172
4173
4174/**
4175 * Register a component factory with the support driver.
4176 *
4177 * This is currently restricted to kernel sessions only.
4178 *
4179 * @returns VBox status code.
4180 * @retval VINF_SUCCESS on success.
4181 * @retval VERR_NO_MEMORY if we're out of memory.
4182 * @retval VERR_ALREADY_EXISTS if the factory has already been registered.
4183 * @retval VERR_ACCESS_DENIED if it isn't a kernel session.
4184 * @retval VERR_INVALID_PARAMETER on invalid parameter.
4185 * @retval VERR_INVALID_POINTER on invalid pointer parameter.
4186 *
4187 * @param pSession The SUPDRV session (must be a ring-0 session).
4188 * @param pFactory Pointer to the component factory registration structure.
4189 *
4190 * @remarks This interface is also available via SUPR0IdcComponentRegisterFactory.
4191 */
4192SUPR0DECL(int) SUPR0ComponentRegisterFactory(PSUPDRVSESSION pSession, PCSUPDRVFACTORY pFactory)
4193{
4194 PSUPDRVFACTORYREG pNewReg;
4195 const char *psz;
4196 int rc;
4197
4198 /*
4199 * Validate parameters.
4200 */
4201 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
4202 AssertReturn(pSession->R0Process == NIL_RTR0PROCESS, VERR_ACCESS_DENIED);
4203 AssertPtrReturn(pFactory, VERR_INVALID_POINTER);
4204 AssertPtrReturn(pFactory->pfnQueryFactoryInterface, VERR_INVALID_POINTER);
4205 psz = RTStrEnd(pFactory->szName, sizeof(pFactory->szName));
4206 AssertReturn(psz, VERR_INVALID_PARAMETER);
4207
4208 /*
4209 * Allocate and initialize a new registration structure.
4210 */
4211 pNewReg = (PSUPDRVFACTORYREG)RTMemAlloc(sizeof(SUPDRVFACTORYREG));
4212 if (pNewReg)
4213 {
4214 pNewReg->pNext = NULL;
4215 pNewReg->pFactory = pFactory;
4216 pNewReg->pSession = pSession;
4217 pNewReg->cchName = psz - &pFactory->szName[0];
4218
4219 /*
4220 * Add it to the tail of the list after checking for prior registration.
4221 */
4222 rc = RTSemFastMutexRequest(pSession->pDevExt->mtxComponentFactory);
4223 if (RT_SUCCESS(rc))
4224 {
4225 PSUPDRVFACTORYREG pPrev = NULL;
4226 PSUPDRVFACTORYREG pCur = pSession->pDevExt->pComponentFactoryHead;
4227 while (pCur && pCur->pFactory != pFactory)
4228 {
4229 pPrev = pCur;
4230 pCur = pCur->pNext;
4231 }
4232 if (!pCur)
4233 {
4234 if (pPrev)
4235 pPrev->pNext = pNewReg;
4236 else
4237 pSession->pDevExt->pComponentFactoryHead = pNewReg;
4238 rc = VINF_SUCCESS;
4239 }
4240 else
4241 rc = VERR_ALREADY_EXISTS;
4242
4243 RTSemFastMutexRelease(pSession->pDevExt->mtxComponentFactory);
4244 }
4245
4246 if (RT_FAILURE(rc))
4247 RTMemFree(pNewReg);
4248 }
4249 else
4250 rc = VERR_NO_MEMORY;
4251 return rc;
4252}
4253
4254
4255/**
4256 * Deregister a component factory.
4257 *
4258 * @returns VBox status code.
4259 * @retval VINF_SUCCESS on success.
4260 * @retval VERR_NOT_FOUND if the factory wasn't registered.
4261 * @retval VERR_ACCESS_DENIED if it isn't a kernel session.
4262 * @retval VERR_INVALID_PARAMETER on invalid parameter.
4263 * @retval VERR_INVALID_POINTER on invalid pointer parameter.
4264 *
4265 * @param pSession The SUPDRV session (must be a ring-0 session).
4266 * @param pFactory Pointer to the component factory registration structure
4267 * previously passed SUPR0ComponentRegisterFactory().
4268 *
4269 * @remarks This interface is also available via SUPR0IdcComponentDeregisterFactory.
4270 */
4271SUPR0DECL(int) SUPR0ComponentDeregisterFactory(PSUPDRVSESSION pSession, PCSUPDRVFACTORY pFactory)
4272{
4273 int rc;
4274
4275 /*
4276 * Validate parameters.
4277 */
4278 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
4279 AssertReturn(pSession->R0Process == NIL_RTR0PROCESS, VERR_ACCESS_DENIED);
4280 AssertPtrReturn(pFactory, VERR_INVALID_POINTER);
4281
4282 /*
4283 * Take the lock and look for the registration record.
4284 */
4285 rc = RTSemFastMutexRequest(pSession->pDevExt->mtxComponentFactory);
4286 if (RT_SUCCESS(rc))
4287 {
4288 PSUPDRVFACTORYREG pPrev = NULL;
4289 PSUPDRVFACTORYREG pCur = pSession->pDevExt->pComponentFactoryHead;
4290 while (pCur && pCur->pFactory != pFactory)
4291 {
4292 pPrev = pCur;
4293 pCur = pCur->pNext;
4294 }
4295 if (pCur)
4296 {
4297 if (!pPrev)
4298 pSession->pDevExt->pComponentFactoryHead = pCur->pNext;
4299 else
4300 pPrev->pNext = pCur->pNext;
4301
4302 pCur->pNext = NULL;
4303 pCur->pFactory = NULL;
4304 pCur->pSession = NULL;
4305 rc = VINF_SUCCESS;
4306 }
4307 else
4308 rc = VERR_NOT_FOUND;
4309
4310 RTSemFastMutexRelease(pSession->pDevExt->mtxComponentFactory);
4311
4312 RTMemFree(pCur);
4313 }
4314 return rc;
4315}
4316
4317
4318/**
4319 * Queries a component factory.
4320 *
4321 * @returns VBox status code.
4322 * @retval VERR_INVALID_PARAMETER on invalid parameter.
4323 * @retval VERR_INVALID_POINTER on invalid pointer parameter.
4324 * @retval VERR_SUPDRV_COMPONENT_NOT_FOUND if the component factory wasn't found.
4325 * @retval VERR_SUPDRV_INTERFACE_NOT_SUPPORTED if the interface wasn't supported.
4326 *
4327 * @param pSession The SUPDRV session.
4328 * @param pszName The name of the component factory.
4329 * @param pszInterfaceUuid The UUID of the factory interface (stringified).
4330 * @param ppvFactoryIf Where to store the factory interface.
4331 */
4332SUPR0DECL(int) SUPR0ComponentQueryFactory(PSUPDRVSESSION pSession, const char *pszName, const char *pszInterfaceUuid, void **ppvFactoryIf)
4333{
4334 const char *pszEnd;
4335 size_t cchName;
4336 int rc;
4337
4338 /*
4339 * Validate parameters.
4340 */
4341 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
4342
4343 AssertPtrReturn(pszName, VERR_INVALID_POINTER);
4344 pszEnd = RTStrEnd(pszName, RT_SIZEOFMEMB(SUPDRVFACTORY, szName));
4345 AssertReturn(pszEnd, VERR_INVALID_PARAMETER);
4346 cchName = pszEnd - pszName;
4347
4348 AssertPtrReturn(pszInterfaceUuid, VERR_INVALID_POINTER);
4349 pszEnd = RTStrEnd(pszInterfaceUuid, RTUUID_STR_LENGTH);
4350 AssertReturn(pszEnd, VERR_INVALID_PARAMETER);
4351
4352 AssertPtrReturn(ppvFactoryIf, VERR_INVALID_POINTER);
4353 *ppvFactoryIf = NULL;
4354
4355 /*
4356 * Take the lock and try all factories by this name.
4357 */
4358 rc = RTSemFastMutexRequest(pSession->pDevExt->mtxComponentFactory);
4359 if (RT_SUCCESS(rc))
4360 {
4361 PSUPDRVFACTORYREG pCur = pSession->pDevExt->pComponentFactoryHead;
4362 rc = VERR_SUPDRV_COMPONENT_NOT_FOUND;
4363 while (pCur)
4364 {
4365 if ( pCur->cchName == cchName
4366 && !memcmp(pCur->pFactory->szName, pszName, cchName))
4367 {
4368 void *pvFactory = pCur->pFactory->pfnQueryFactoryInterface(pCur->pFactory, pSession, pszInterfaceUuid);
4369 if (pvFactory)
4370 {
4371 *ppvFactoryIf = pvFactory;
4372 rc = VINF_SUCCESS;
4373 break;
4374 }
4375 rc = VERR_SUPDRV_INTERFACE_NOT_SUPPORTED;
4376 }
4377
4378 /* next */
4379 pCur = pCur->pNext;
4380 }
4381
4382 RTSemFastMutexRelease(pSession->pDevExt->mtxComponentFactory);
4383 }
4384 return rc;
4385}
4386
4387
4388/**
4389 * Adds a memory object to the session.
4390 *
4391 * @returns IPRT status code.
4392 * @param pMem Memory tracking structure containing the
4393 * information to track.
4394 * @param pSession The session.
4395 */
4396static int supdrvMemAdd(PSUPDRVMEMREF pMem, PSUPDRVSESSION pSession)
4397{
4398 PSUPDRVBUNDLE pBundle;
4399
4400 /*
4401 * Find free entry and record the allocation.
4402 */
4403 RTSpinlockAcquire(pSession->Spinlock);
4404 for (pBundle = &pSession->Bundle; pBundle; pBundle = pBundle->pNext)
4405 {
4406 if (pBundle->cUsed < RT_ELEMENTS(pBundle->aMem))
4407 {
4408 unsigned i;
4409 for (i = 0; i < RT_ELEMENTS(pBundle->aMem); i++)
4410 {
4411 if (pBundle->aMem[i].MemObj == NIL_RTR0MEMOBJ)
4412 {
4413 pBundle->cUsed++;
4414 pBundle->aMem[i] = *pMem;
4415 RTSpinlockRelease(pSession->Spinlock);
4416 return VINF_SUCCESS;
4417 }
4418 }
4419 AssertFailed(); /* !!this can't be happening!!! */
4420 }
4421 }
4422 RTSpinlockRelease(pSession->Spinlock);
4423
4424 /*
4425 * Need to allocate a new bundle.
4426 * Insert into the last entry in the bundle.
4427 */
4428 pBundle = (PSUPDRVBUNDLE)RTMemAllocZ(sizeof(*pBundle));
4429 if (!pBundle)
4430 return VERR_NO_MEMORY;
4431
4432 /* take last entry. */
4433 pBundle->cUsed++;
4434 pBundle->aMem[RT_ELEMENTS(pBundle->aMem) - 1] = *pMem;
4435
4436 /* insert into list. */
4437 RTSpinlockAcquire(pSession->Spinlock);
4438 pBundle->pNext = pSession->Bundle.pNext;
4439 pSession->Bundle.pNext = pBundle;
4440 RTSpinlockRelease(pSession->Spinlock);
4441
4442 return VINF_SUCCESS;
4443}
4444
4445
4446/**
4447 * Releases a memory object referenced by pointer and type.
4448 *
4449 * @returns IPRT status code.
4450 * @param pSession Session data.
4451 * @param uPtr Pointer to memory. This is matched against both the R0 and R3 addresses.
4452 * @param eType Memory type.
4453 */
4454static int supdrvMemRelease(PSUPDRVSESSION pSession, RTHCUINTPTR uPtr, SUPDRVMEMREFTYPE eType)
4455{
4456 PSUPDRVBUNDLE pBundle;
4457
4458 /*
4459 * Validate input.
4460 */
4461 if (!uPtr)
4462 {
4463 Log(("Illegal address %p\n", (void *)uPtr));
4464 return VERR_INVALID_PARAMETER;
4465 }
4466
4467 /*
4468 * Search for the address.
4469 */
4470 RTSpinlockAcquire(pSession->Spinlock);
4471 for (pBundle = &pSession->Bundle; pBundle; pBundle = pBundle->pNext)
4472 {
4473 if (pBundle->cUsed > 0)
4474 {
4475 unsigned i;
4476 for (i = 0; i < RT_ELEMENTS(pBundle->aMem); i++)
4477 {
4478 if ( pBundle->aMem[i].eType == eType
4479 && pBundle->aMem[i].MemObj != NIL_RTR0MEMOBJ
4480 && ( (RTHCUINTPTR)RTR0MemObjAddress(pBundle->aMem[i].MemObj) == uPtr
4481 || ( pBundle->aMem[i].MapObjR3 != NIL_RTR0MEMOBJ
4482 && RTR0MemObjAddressR3(pBundle->aMem[i].MapObjR3) == uPtr))
4483 )
4484 {
4485 /* Make a copy of it and release it outside the spinlock. */
4486 SUPDRVMEMREF Mem = pBundle->aMem[i];
4487 pBundle->aMem[i].eType = MEMREF_TYPE_UNUSED;
4488 pBundle->aMem[i].MemObj = NIL_RTR0MEMOBJ;
4489 pBundle->aMem[i].MapObjR3 = NIL_RTR0MEMOBJ;
4490 RTSpinlockRelease(pSession->Spinlock);
4491
4492 if (Mem.MapObjR3 != NIL_RTR0MEMOBJ)
4493 {
4494 int rc = RTR0MemObjFree(Mem.MapObjR3, false);
4495 AssertRC(rc); /** @todo figure out how to handle this. */
4496 }
4497 if (Mem.MemObj != NIL_RTR0MEMOBJ)
4498 {
4499 int rc = RTR0MemObjFree(Mem.MemObj, true /* fFreeMappings */);
4500 AssertRC(rc); /** @todo figure out how to handle this. */
4501 }
4502 return VINF_SUCCESS;
4503 }
4504 }
4505 }
4506 }
4507 RTSpinlockRelease(pSession->Spinlock);
4508 Log(("Failed to find %p!!! (eType=%d)\n", (void *)uPtr, eType));
4509 return VERR_INVALID_PARAMETER;
4510}
4511
4512
4513/**
4514 * Opens an image. If it's the first time it's opened the call must upload
4515 * the bits using the supdrvIOCtl_LdrLoad() / SUPDRV_IOCTL_LDR_LOAD function.
4516 *
4517 * This is the 1st step of the loading.
4518 *
4519 * @returns IPRT status code.
4520 * @param pDevExt Device globals.
4521 * @param pSession Session data.
4522 * @param pReq The open request.
4523 */
4524static int supdrvIOCtl_LdrOpen(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPLDROPEN pReq)
4525{
4526 int rc;
4527 PSUPDRVLDRIMAGE pImage;
4528 void *pv;
4529 size_t cchName = strlen(pReq->u.In.szName); /* (caller checked < 32). */
4530 LogFlow(("supdrvIOCtl_LdrOpen: szName=%s cbImageWithTabs=%d\n", pReq->u.In.szName, pReq->u.In.cbImageWithTabs));
4531
4532 /*
4533 * Check if we got an instance of the image already.
4534 */
4535 supdrvLdrLock(pDevExt);
4536 for (pImage = pDevExt->pLdrImages; pImage; pImage = pImage->pNext)
4537 {
4538 if ( pImage->szName[cchName] == '\0'
4539 && !memcmp(pImage->szName, pReq->u.In.szName, cchName))
4540 {
4541 if (RT_LIKELY(pImage->cUsage < UINT32_MAX / 2U))
4542 {
4543 /** @todo check cbImageBits and cbImageWithTabs here, if they differs that indicates that the images are different. */
4544 pImage->cUsage++;
4545 pReq->u.Out.pvImageBase = pImage->pvImage;
4546 pReq->u.Out.fNeedsLoading = pImage->uState == SUP_IOCTL_LDR_OPEN;
4547 pReq->u.Out.fNativeLoader = pImage->fNative;
4548 supdrvLdrAddUsage(pSession, pImage);
4549 supdrvLdrUnlock(pDevExt);
4550 return VINF_SUCCESS;
4551 }
4552 supdrvLdrUnlock(pDevExt);
4553 Log(("supdrvIOCtl_LdrOpen: To many existing references to '%s'!\n", pReq->u.In.szName));
4554 return VERR_INTERNAL_ERROR_3; /** @todo add VERR_TOO_MANY_REFERENCES */
4555 }
4556 }
4557 /* (not found - add it!) */
4558
4559 /*
4560 * Allocate memory.
4561 */
4562 Assert(cchName < sizeof(pImage->szName));
4563 pv = RTMemAlloc(sizeof(SUPDRVLDRIMAGE));
4564 if (!pv)
4565 {
4566 supdrvLdrUnlock(pDevExt);
4567 Log(("supdrvIOCtl_LdrOpen: RTMemAlloc() failed\n"));
4568 return /*VERR_NO_MEMORY*/ VERR_INTERNAL_ERROR_2;
4569 }
4570
4571 /*
4572 * Setup and link in the LDR stuff.
4573 */
4574 pImage = (PSUPDRVLDRIMAGE)pv;
4575 pImage->pvImage = NULL;
4576 pImage->pvImageAlloc = NULL;
4577 pImage->cbImageWithTabs = pReq->u.In.cbImageWithTabs;
4578 pImage->cbImageBits = pReq->u.In.cbImageBits;
4579 pImage->cSymbols = 0;
4580 pImage->paSymbols = NULL;
4581 pImage->pachStrTab = NULL;
4582 pImage->cbStrTab = 0;
4583 pImage->pfnModuleInit = NULL;
4584 pImage->pfnModuleTerm = NULL;
4585 pImage->pfnServiceReqHandler = NULL;
4586 pImage->uState = SUP_IOCTL_LDR_OPEN;
4587 pImage->cUsage = 1;
4588 pImage->pDevExt = pDevExt;
4589 memcpy(pImage->szName, pReq->u.In.szName, cchName + 1);
4590
4591 /*
4592 * Try load it using the native loader, if that isn't supported, fall back
4593 * on the older method.
4594 */
4595 pImage->fNative = true;
4596 rc = supdrvOSLdrOpen(pDevExt, pImage, pReq->u.In.szFilename);
4597 if (rc == VERR_NOT_SUPPORTED)
4598 {
4599 pImage->pvImageAlloc = RTMemExecAlloc(pImage->cbImageBits + 31);
4600 pImage->pvImage = RT_ALIGN_P(pImage->pvImageAlloc, 32);
4601 pImage->fNative = false;
4602 rc = pImage->pvImageAlloc ? VINF_SUCCESS : VERR_NO_EXEC_MEMORY;
4603 }
4604 if (RT_FAILURE(rc))
4605 {
4606 supdrvLdrUnlock(pDevExt);
4607 RTMemFree(pImage);
4608 Log(("supdrvIOCtl_LdrOpen(%s): failed - %Rrc\n", pReq->u.In.szName, rc));
4609 return rc;
4610 }
4611 Assert(VALID_PTR(pImage->pvImage) || RT_FAILURE(rc));
4612
4613 /*
4614 * Link it.
4615 */
4616 pImage->pNext = pDevExt->pLdrImages;
4617 pDevExt->pLdrImages = pImage;
4618
4619 supdrvLdrAddUsage(pSession, pImage);
4620
4621 pReq->u.Out.pvImageBase = pImage->pvImage;
4622 pReq->u.Out.fNeedsLoading = true;
4623 pReq->u.Out.fNativeLoader = pImage->fNative;
4624 supdrvOSLdrNotifyOpened(pDevExt, pImage);
4625
4626 supdrvLdrUnlock(pDevExt);
4627 return VINF_SUCCESS;
4628}
4629
4630
4631/**
4632 * Worker that validates a pointer to an image entrypoint.
4633 *
4634 * @returns IPRT status code.
4635 * @param pDevExt The device globals.
4636 * @param pImage The loader image.
4637 * @param pv The pointer into the image.
4638 * @param fMayBeNull Whether it may be NULL.
4639 * @param pszWhat What is this entrypoint? (for logging)
4640 * @param pbImageBits The image bits prepared by ring-3.
4641 *
4642 * @remarks Will leave the lock on failure.
4643 */
4644static int supdrvLdrValidatePointer(PSUPDRVDEVEXT pDevExt, PSUPDRVLDRIMAGE pImage, void *pv,
4645 bool fMayBeNull, const uint8_t *pbImageBits, const char *pszWhat)
4646{
4647 if (!fMayBeNull || pv)
4648 {
4649 if ((uintptr_t)pv - (uintptr_t)pImage->pvImage >= pImage->cbImageBits)
4650 {
4651 supdrvLdrUnlock(pDevExt);
4652 Log(("Out of range (%p LB %#x): %s=%p\n", pImage->pvImage, pImage->cbImageBits, pszWhat, pv));
4653 return VERR_INVALID_PARAMETER;
4654 }
4655
4656 if (pImage->fNative)
4657 {
4658 int rc = supdrvOSLdrValidatePointer(pDevExt, pImage, pv, pbImageBits);
4659 if (RT_FAILURE(rc))
4660 {
4661 supdrvLdrUnlock(pDevExt);
4662 Log(("Bad entry point address: %s=%p (rc=%Rrc)\n", pszWhat, pv, rc));
4663 return rc;
4664 }
4665 }
4666 }
4667 return VINF_SUCCESS;
4668}
4669
4670
4671/**
4672 * Loads the image bits.
4673 *
4674 * This is the 2nd step of the loading.
4675 *
4676 * @returns IPRT status code.
4677 * @param pDevExt Device globals.
4678 * @param pSession Session data.
4679 * @param pReq The request.
4680 */
4681static int supdrvIOCtl_LdrLoad(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPLDRLOAD pReq)
4682{
4683 PSUPDRVLDRUSAGE pUsage;
4684 PSUPDRVLDRIMAGE pImage;
4685 int rc;
4686 LogFlow(("supdrvIOCtl_LdrLoad: pvImageBase=%p cbImageWithBits=%d\n", pReq->u.In.pvImageBase, pReq->u.In.cbImageWithTabs));
4687
4688 /*
4689 * Find the ldr image.
4690 */
4691 supdrvLdrLock(pDevExt);
4692 pUsage = pSession->pLdrUsage;
4693 while (pUsage && pUsage->pImage->pvImage != pReq->u.In.pvImageBase)
4694 pUsage = pUsage->pNext;
4695 if (!pUsage)
4696 {
4697 supdrvLdrUnlock(pDevExt);
4698 Log(("SUP_IOCTL_LDR_LOAD: couldn't find image!\n"));
4699 return VERR_INVALID_HANDLE;
4700 }
4701 pImage = pUsage->pImage;
4702
4703 /*
4704 * Validate input.
4705 */
4706 if ( pImage->cbImageWithTabs != pReq->u.In.cbImageWithTabs
4707 || pImage->cbImageBits != pReq->u.In.cbImageBits)
4708 {
4709 supdrvLdrUnlock(pDevExt);
4710 Log(("SUP_IOCTL_LDR_LOAD: image size mismatch!! %d(prep) != %d(load) or %d != %d\n",
4711 pImage->cbImageWithTabs, pReq->u.In.cbImageWithTabs, pImage->cbImageBits, pReq->u.In.cbImageBits));
4712 return VERR_INVALID_HANDLE;
4713 }
4714
4715 if (pImage->uState != SUP_IOCTL_LDR_OPEN)
4716 {
4717 unsigned uState = pImage->uState;
4718 supdrvLdrUnlock(pDevExt);
4719 if (uState != SUP_IOCTL_LDR_LOAD)
4720 AssertMsgFailed(("SUP_IOCTL_LDR_LOAD: invalid image state %d (%#x)!\n", uState, uState));
4721 return VERR_ALREADY_LOADED;
4722 }
4723
4724 switch (pReq->u.In.eEPType)
4725 {
4726 case SUPLDRLOADEP_NOTHING:
4727 break;
4728
4729 case SUPLDRLOADEP_VMMR0:
4730 rc = supdrvLdrValidatePointer( pDevExt, pImage, pReq->u.In.EP.VMMR0.pvVMMR0, false, pReq->u.In.abImage, "pvVMMR0");
4731 if (RT_SUCCESS(rc))
4732 rc = supdrvLdrValidatePointer(pDevExt, pImage, pReq->u.In.EP.VMMR0.pvVMMR0EntryInt, false, pReq->u.In.abImage, "pvVMMR0EntryInt");
4733 if (RT_SUCCESS(rc))
4734 rc = supdrvLdrValidatePointer(pDevExt, pImage, pReq->u.In.EP.VMMR0.pvVMMR0EntryFast, false, pReq->u.In.abImage, "pvVMMR0EntryFast");
4735 if (RT_SUCCESS(rc))
4736 rc = supdrvLdrValidatePointer(pDevExt, pImage, pReq->u.In.EP.VMMR0.pvVMMR0EntryEx, false, pReq->u.In.abImage, "pvVMMR0EntryEx");
4737 if (RT_FAILURE(rc))
4738 return rc;
4739 break;
4740
4741 case SUPLDRLOADEP_SERVICE:
4742 rc = supdrvLdrValidatePointer(pDevExt, pImage, pReq->u.In.EP.Service.pfnServiceReq, false, pReq->u.In.abImage, "pfnServiceReq");
4743 if (RT_FAILURE(rc))
4744 return rc;
4745 if ( pReq->u.In.EP.Service.apvReserved[0] != NIL_RTR0PTR
4746 || pReq->u.In.EP.Service.apvReserved[1] != NIL_RTR0PTR
4747 || pReq->u.In.EP.Service.apvReserved[2] != NIL_RTR0PTR)
4748 {
4749 supdrvLdrUnlock(pDevExt);
4750 Log(("Out of range (%p LB %#x): apvReserved={%p,%p,%p} MBZ!\n",
4751 pImage->pvImage, pReq->u.In.cbImageWithTabs,
4752 pReq->u.In.EP.Service.apvReserved[0],
4753 pReq->u.In.EP.Service.apvReserved[1],
4754 pReq->u.In.EP.Service.apvReserved[2]));
4755 return VERR_INVALID_PARAMETER;
4756 }
4757 break;
4758
4759 default:
4760 supdrvLdrUnlock(pDevExt);
4761 Log(("Invalid eEPType=%d\n", pReq->u.In.eEPType));
4762 return VERR_INVALID_PARAMETER;
4763 }
4764
4765 rc = supdrvLdrValidatePointer(pDevExt, pImage, pReq->u.In.pfnModuleInit, true, pReq->u.In.abImage, "pfnModuleInit");
4766 if (RT_FAILURE(rc))
4767 return rc;
4768 rc = supdrvLdrValidatePointer(pDevExt, pImage, pReq->u.In.pfnModuleTerm, true, pReq->u.In.abImage, "pfnModuleTerm");
4769 if (RT_FAILURE(rc))
4770 return rc;
4771
4772 /*
4773 * Allocate and copy the tables.
4774 * (No need to do try/except as this is a buffered request.)
4775 */
4776 pImage->cbStrTab = pReq->u.In.cbStrTab;
4777 if (pImage->cbStrTab)
4778 {
4779 pImage->pachStrTab = (char *)RTMemAlloc(pImage->cbStrTab);
4780 if (pImage->pachStrTab)
4781 memcpy(pImage->pachStrTab, &pReq->u.In.abImage[pReq->u.In.offStrTab], pImage->cbStrTab);
4782 else
4783 rc = /*VERR_NO_MEMORY*/ VERR_INTERNAL_ERROR_3;
4784 }
4785
4786 pImage->cSymbols = pReq->u.In.cSymbols;
4787 if (RT_SUCCESS(rc) && pImage->cSymbols)
4788 {
4789 size_t cbSymbols = pImage->cSymbols * sizeof(SUPLDRSYM);
4790 pImage->paSymbols = (PSUPLDRSYM)RTMemAlloc(cbSymbols);
4791 if (pImage->paSymbols)
4792 memcpy(pImage->paSymbols, &pReq->u.In.abImage[pReq->u.In.offSymbols], cbSymbols);
4793 else
4794 rc = /*VERR_NO_MEMORY*/ VERR_INTERNAL_ERROR_4;
4795 }
4796
4797 /*
4798 * Copy the bits / complete native loading.
4799 */
4800 if (RT_SUCCESS(rc))
4801 {
4802 pImage->uState = SUP_IOCTL_LDR_LOAD;
4803 pImage->pfnModuleInit = pReq->u.In.pfnModuleInit;
4804 pImage->pfnModuleTerm = pReq->u.In.pfnModuleTerm;
4805
4806 if (pImage->fNative)
4807 rc = supdrvOSLdrLoad(pDevExt, pImage, pReq->u.In.abImage, pReq);
4808 else
4809 {
4810 memcpy(pImage->pvImage, &pReq->u.In.abImage[0], pImage->cbImageBits);
4811 Log(("vboxdrv: Loaded '%s' at %p\n", pImage->szName, pImage->pvImage));
4812 }
4813 }
4814
4815 /*
4816 * Update any entry points.
4817 */
4818 if (RT_SUCCESS(rc))
4819 {
4820 switch (pReq->u.In.eEPType)
4821 {
4822 default:
4823 case SUPLDRLOADEP_NOTHING:
4824 rc = VINF_SUCCESS;
4825 break;
4826 case SUPLDRLOADEP_VMMR0:
4827 rc = supdrvLdrSetVMMR0EPs(pDevExt, pReq->u.In.EP.VMMR0.pvVMMR0, pReq->u.In.EP.VMMR0.pvVMMR0EntryInt,
4828 pReq->u.In.EP.VMMR0.pvVMMR0EntryFast, pReq->u.In.EP.VMMR0.pvVMMR0EntryEx);
4829 break;
4830 case SUPLDRLOADEP_SERVICE:
4831 pImage->pfnServiceReqHandler = pReq->u.In.EP.Service.pfnServiceReq;
4832 rc = VINF_SUCCESS;
4833 break;
4834 }
4835 }
4836
4837 /*
4838 * On success call the module initialization.
4839 */
4840 LogFlow(("supdrvIOCtl_LdrLoad: pfnModuleInit=%p\n", pImage->pfnModuleInit));
4841 if (RT_SUCCESS(rc) && pImage->pfnModuleInit)
4842 {
4843 Log(("supdrvIOCtl_LdrLoad: calling pfnModuleInit=%p\n", pImage->pfnModuleInit));
4844 pDevExt->pLdrInitImage = pImage;
4845 pDevExt->hLdrInitThread = RTThreadNativeSelf();
4846 rc = pImage->pfnModuleInit(pImage);
4847 pDevExt->pLdrInitImage = NULL;
4848 pDevExt->hLdrInitThread = NIL_RTNATIVETHREAD;
4849 if (RT_FAILURE(rc) && pDevExt->pvVMMR0 == pImage->pvImage)
4850 supdrvLdrUnsetVMMR0EPs(pDevExt);
4851 }
4852 SUPR0Printf("vboxdrv: %p %s\n", pImage->pvImage, pImage->szName);
4853
4854 if (RT_FAILURE(rc))
4855 {
4856 /* Inform the tracing component in case ModuleInit registered TPs. */
4857 supdrvTracerModuleUnloading(pDevExt, pImage);
4858
4859 pImage->uState = SUP_IOCTL_LDR_OPEN;
4860 pImage->pfnModuleInit = NULL;
4861 pImage->pfnModuleTerm = NULL;
4862 pImage->pfnServiceReqHandler= NULL;
4863 pImage->cbStrTab = 0;
4864 RTMemFree(pImage->pachStrTab);
4865 pImage->pachStrTab = NULL;
4866 RTMemFree(pImage->paSymbols);
4867 pImage->paSymbols = NULL;
4868 pImage->cSymbols = 0;
4869 }
4870
4871 supdrvLdrUnlock(pDevExt);
4872 return rc;
4873}
4874
4875
4876/**
4877 * Frees a previously loaded (prep'ed) image.
4878 *
4879 * @returns IPRT status code.
4880 * @param pDevExt Device globals.
4881 * @param pSession Session data.
4882 * @param pReq The request.
4883 */
4884static int supdrvIOCtl_LdrFree(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPLDRFREE pReq)
4885{
4886 int rc;
4887 PSUPDRVLDRUSAGE pUsagePrev;
4888 PSUPDRVLDRUSAGE pUsage;
4889 PSUPDRVLDRIMAGE pImage;
4890 LogFlow(("supdrvIOCtl_LdrFree: pvImageBase=%p\n", pReq->u.In.pvImageBase));
4891
4892 /*
4893 * Find the ldr image.
4894 */
4895 supdrvLdrLock(pDevExt);
4896 pUsagePrev = NULL;
4897 pUsage = pSession->pLdrUsage;
4898 while (pUsage && pUsage->pImage->pvImage != pReq->u.In.pvImageBase)
4899 {
4900 pUsagePrev = pUsage;
4901 pUsage = pUsage->pNext;
4902 }
4903 if (!pUsage)
4904 {
4905 supdrvLdrUnlock(pDevExt);
4906 Log(("SUP_IOCTL_LDR_FREE: couldn't find image!\n"));
4907 return VERR_INVALID_HANDLE;
4908 }
4909
4910 /*
4911 * Check if we can remove anything.
4912 */
4913 rc = VINF_SUCCESS;
4914 pImage = pUsage->pImage;
4915 if (pImage->cUsage <= 1 || pUsage->cUsage <= 1)
4916 {
4917 /*
4918 * Check if there are any objects with destructors in the image, if
4919 * so leave it for the session cleanup routine so we get a chance to
4920 * clean things up in the right order and not leave them all dangling.
4921 */
4922 RTSpinlockAcquire(pDevExt->Spinlock);
4923 if (pImage->cUsage <= 1)
4924 {
4925 PSUPDRVOBJ pObj;
4926 for (pObj = pDevExt->pObjs; pObj; pObj = pObj->pNext)
4927 if (RT_UNLIKELY((uintptr_t)pObj->pfnDestructor - (uintptr_t)pImage->pvImage < pImage->cbImageBits))
4928 {
4929 rc = VERR_DANGLING_OBJECTS;
4930 break;
4931 }
4932 }
4933 else
4934 {
4935 PSUPDRVUSAGE pGenUsage;
4936 for (pGenUsage = pSession->pUsage; pGenUsage; pGenUsage = pGenUsage->pNext)
4937 if (RT_UNLIKELY((uintptr_t)pGenUsage->pObj->pfnDestructor - (uintptr_t)pImage->pvImage < pImage->cbImageBits))
4938 {
4939 rc = VERR_DANGLING_OBJECTS;
4940 break;
4941 }
4942 }
4943 RTSpinlockRelease(pDevExt->Spinlock);
4944 if (rc == VINF_SUCCESS)
4945 {
4946 /* unlink it */
4947 if (pUsagePrev)
4948 pUsagePrev->pNext = pUsage->pNext;
4949 else
4950 pSession->pLdrUsage = pUsage->pNext;
4951
4952 /* free it */
4953 pUsage->pImage = NULL;
4954 pUsage->pNext = NULL;
4955 RTMemFree(pUsage);
4956
4957 /*
4958 * Dereference the image.
4959 */
4960 if (pImage->cUsage <= 1)
4961 supdrvLdrFree(pDevExt, pImage);
4962 else
4963 pImage->cUsage--;
4964 }
4965 else
4966 {
4967 Log(("supdrvIOCtl_LdrFree: Dangling objects in %p/%s!\n", pImage->pvImage, pImage->szName));
4968 rc = VINF_SUCCESS; /** @todo BRANCH-2.1: remove this after branching. */
4969 }
4970 }
4971 else
4972 {
4973 /*
4974 * Dereference both image and usage.
4975 */
4976 pImage->cUsage--;
4977 pUsage->cUsage--;
4978 }
4979
4980 supdrvLdrUnlock(pDevExt);
4981 return rc;
4982}
4983
4984
4985/**
4986 * Gets the address of a symbol in an open image.
4987 *
4988 * @returns IPRT status code.
4989 * @param pDevExt Device globals.
4990 * @param pSession Session data.
4991 * @param pReq The request buffer.
4992 */
4993static int supdrvIOCtl_LdrGetSymbol(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPLDRGETSYMBOL pReq)
4994{
4995 PSUPDRVLDRIMAGE pImage;
4996 PSUPDRVLDRUSAGE pUsage;
4997 uint32_t i;
4998 PSUPLDRSYM paSyms;
4999 const char *pchStrings;
5000 const size_t cbSymbol = strlen(pReq->u.In.szSymbol) + 1;
5001 void *pvSymbol = NULL;
5002 int rc = VERR_GENERAL_FAILURE;
5003 Log3(("supdrvIOCtl_LdrGetSymbol: pvImageBase=%p szSymbol=\"%s\"\n", pReq->u.In.pvImageBase, pReq->u.In.szSymbol));
5004
5005 /*
5006 * Find the ldr image.
5007 */
5008 supdrvLdrLock(pDevExt);
5009 pUsage = pSession->pLdrUsage;
5010 while (pUsage && pUsage->pImage->pvImage != pReq->u.In.pvImageBase)
5011 pUsage = pUsage->pNext;
5012 if (!pUsage)
5013 {
5014 supdrvLdrUnlock(pDevExt);
5015 Log(("SUP_IOCTL_LDR_GET_SYMBOL: couldn't find image!\n"));
5016 return VERR_INVALID_HANDLE;
5017 }
5018 pImage = pUsage->pImage;
5019 if (pImage->uState != SUP_IOCTL_LDR_LOAD)
5020 {
5021 unsigned uState = pImage->uState;
5022 supdrvLdrUnlock(pDevExt);
5023 Log(("SUP_IOCTL_LDR_GET_SYMBOL: invalid image state %d (%#x)!\n", uState, uState)); NOREF(uState);
5024 return VERR_ALREADY_LOADED;
5025 }
5026
5027 /*
5028 * Search the symbol strings.
5029 *
5030 * Note! The int32_t is for native loading on solaris where the data
5031 * and text segments are in very different places.
5032 */
5033 pchStrings = pImage->pachStrTab;
5034 paSyms = pImage->paSymbols;
5035 for (i = 0; i < pImage->cSymbols; i++)
5036 {
5037 if ( paSyms[i].offName + cbSymbol <= pImage->cbStrTab
5038 && !memcmp(pchStrings + paSyms[i].offName, pReq->u.In.szSymbol, cbSymbol))
5039 {
5040 pvSymbol = (uint8_t *)pImage->pvImage + (int32_t)paSyms[i].offSymbol;
5041 rc = VINF_SUCCESS;
5042 break;
5043 }
5044 }
5045 supdrvLdrUnlock(pDevExt);
5046 pReq->u.Out.pvSymbol = pvSymbol;
5047 return rc;
5048}
5049
5050
5051/**
5052 * Gets the address of a symbol in an open image or the support driver.
5053 *
5054 * @returns VINF_SUCCESS on success.
5055 * @returns
5056 * @param pDevExt Device globals.
5057 * @param pSession Session data.
5058 * @param pReq The request buffer.
5059 */
5060static int supdrvIDC_LdrGetSymbol(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPDRVIDCREQGETSYM pReq)
5061{
5062 int rc = VINF_SUCCESS;
5063 const char *pszSymbol = pReq->u.In.pszSymbol;
5064 const char *pszModule = pReq->u.In.pszModule;
5065 size_t cbSymbol;
5066 char const *pszEnd;
5067 uint32_t i;
5068
5069 /*
5070 * Input validation.
5071 */
5072 AssertPtrReturn(pszSymbol, VERR_INVALID_POINTER);
5073 pszEnd = RTStrEnd(pszSymbol, 512);
5074 AssertReturn(pszEnd, VERR_INVALID_PARAMETER);
5075 cbSymbol = pszEnd - pszSymbol + 1;
5076
5077 if (pszModule)
5078 {
5079 AssertPtrReturn(pszModule, VERR_INVALID_POINTER);
5080 pszEnd = RTStrEnd(pszModule, 64);
5081 AssertReturn(pszEnd, VERR_INVALID_PARAMETER);
5082 }
5083 Log3(("supdrvIDC_LdrGetSymbol: pszModule=%p:{%s} pszSymbol=%p:{%s}\n", pszModule, pszModule, pszSymbol, pszSymbol));
5084
5085
5086 if ( !pszModule
5087 || !strcmp(pszModule, "SupDrv"))
5088 {
5089 /*
5090 * Search the support driver export table.
5091 */
5092 for (i = 0; i < RT_ELEMENTS(g_aFunctions); i++)
5093 if (!strcmp(g_aFunctions[i].szName, pszSymbol))
5094 {
5095 pReq->u.Out.pfnSymbol = g_aFunctions[i].pfn;
5096 break;
5097 }
5098 }
5099 else
5100 {
5101 /*
5102 * Find the loader image.
5103 */
5104 PSUPDRVLDRIMAGE pImage;
5105
5106 supdrvLdrLock(pDevExt);
5107
5108 for (pImage = pDevExt->pLdrImages; pImage; pImage = pImage->pNext)
5109 if (!strcmp(pImage->szName, pszModule))
5110 break;
5111 if (pImage && pImage->uState == SUP_IOCTL_LDR_LOAD)
5112 {
5113 /*
5114 * Search the symbol strings.
5115 */
5116 const char *pchStrings = pImage->pachStrTab;
5117 PCSUPLDRSYM paSyms = pImage->paSymbols;
5118 for (i = 0; i < pImage->cSymbols; i++)
5119 {
5120 if ( paSyms[i].offName + cbSymbol <= pImage->cbStrTab
5121 && !memcmp(pchStrings + paSyms[i].offName, pszSymbol, cbSymbol))
5122 {
5123 /*
5124 * Found it! Calc the symbol address and add a reference to the module.
5125 */
5126 pReq->u.Out.pfnSymbol = (PFNRT)((uint8_t *)pImage->pvImage + (int32_t)paSyms[i].offSymbol);
5127 rc = supdrvLdrAddUsage(pSession, pImage);
5128 break;
5129 }
5130 }
5131 }
5132 else
5133 rc = pImage ? VERR_WRONG_ORDER : VERR_MODULE_NOT_FOUND;
5134
5135 supdrvLdrUnlock(pDevExt);
5136 }
5137 return rc;
5138}
5139
5140
5141/**
5142 * Updates the VMMR0 entry point pointers.
5143 *
5144 * @returns IPRT status code.
5145 * @param pDevExt Device globals.
5146 * @param pSession Session data.
5147 * @param pVMMR0 VMMR0 image handle.
5148 * @param pvVMMR0EntryInt VMMR0EntryInt address.
5149 * @param pvVMMR0EntryFast VMMR0EntryFast address.
5150 * @param pvVMMR0EntryEx VMMR0EntryEx address.
5151 * @remark Caller must own the loader mutex.
5152 */
5153static int supdrvLdrSetVMMR0EPs(PSUPDRVDEVEXT pDevExt, void *pvVMMR0, void *pvVMMR0EntryInt, void *pvVMMR0EntryFast, void *pvVMMR0EntryEx)
5154{
5155 int rc = VINF_SUCCESS;
5156 LogFlow(("supdrvLdrSetR0EP pvVMMR0=%p pvVMMR0EntryInt=%p\n", pvVMMR0, pvVMMR0EntryInt));
5157
5158
5159 /*
5160 * Check if not yet set.
5161 */
5162 if (!pDevExt->pvVMMR0)
5163 {
5164 pDevExt->pvVMMR0 = pvVMMR0;
5165 pDevExt->pfnVMMR0EntryInt = pvVMMR0EntryInt;
5166 pDevExt->pfnVMMR0EntryFast = pvVMMR0EntryFast;
5167 pDevExt->pfnVMMR0EntryEx = pvVMMR0EntryEx;
5168 }
5169 else
5170 {
5171 /*
5172 * Return failure or success depending on whether the values match or not.
5173 */
5174 if ( pDevExt->pvVMMR0 != pvVMMR0
5175 || (void *)pDevExt->pfnVMMR0EntryInt != pvVMMR0EntryInt
5176 || (void *)pDevExt->pfnVMMR0EntryFast != pvVMMR0EntryFast
5177 || (void *)pDevExt->pfnVMMR0EntryEx != pvVMMR0EntryEx)
5178 {
5179 AssertMsgFailed(("SUP_IOCTL_LDR_SETR0EP: Already set pointing to a different module!\n"));
5180 rc = VERR_INVALID_PARAMETER;
5181 }
5182 }
5183 return rc;
5184}
5185
5186
5187/**
5188 * Unsets the VMMR0 entry point installed by supdrvLdrSetR0EP.
5189 *
5190 * @param pDevExt Device globals.
5191 */
5192static void supdrvLdrUnsetVMMR0EPs(PSUPDRVDEVEXT pDevExt)
5193{
5194 pDevExt->pvVMMR0 = NULL;
5195 pDevExt->pfnVMMR0EntryInt = NULL;
5196 pDevExt->pfnVMMR0EntryFast = NULL;
5197 pDevExt->pfnVMMR0EntryEx = NULL;
5198}
5199
5200
5201/**
5202 * Adds a usage reference in the specified session of an image.
5203 *
5204 * Called while owning the loader semaphore.
5205 *
5206 * @returns VINF_SUCCESS on success and VERR_NO_MEMORY on failure.
5207 * @param pSession Session in question.
5208 * @param pImage Image which the session is using.
5209 */
5210static int supdrvLdrAddUsage(PSUPDRVSESSION pSession, PSUPDRVLDRIMAGE pImage)
5211{
5212 PSUPDRVLDRUSAGE pUsage;
5213 LogFlow(("supdrvLdrAddUsage: pImage=%p\n", pImage));
5214
5215 /*
5216 * Referenced it already?
5217 */
5218 pUsage = pSession->pLdrUsage;
5219 while (pUsage)
5220 {
5221 if (pUsage->pImage == pImage)
5222 {
5223 pUsage->cUsage++;
5224 return VINF_SUCCESS;
5225 }
5226 pUsage = pUsage->pNext;
5227 }
5228
5229 /*
5230 * Allocate new usage record.
5231 */
5232 pUsage = (PSUPDRVLDRUSAGE)RTMemAlloc(sizeof(*pUsage));
5233 AssertReturn(pUsage, /*VERR_NO_MEMORY*/ VERR_INTERNAL_ERROR_5);
5234 pUsage->cUsage = 1;
5235 pUsage->pImage = pImage;
5236 pUsage->pNext = pSession->pLdrUsage;
5237 pSession->pLdrUsage = pUsage;
5238 return VINF_SUCCESS;
5239}
5240
5241
5242/**
5243 * Frees a load image.
5244 *
5245 * @param pDevExt Pointer to device extension.
5246 * @param pImage Pointer to the image we're gonna free.
5247 * This image must exit!
5248 * @remark The caller MUST own SUPDRVDEVEXT::mtxLdr!
5249 */
5250static void supdrvLdrFree(PSUPDRVDEVEXT pDevExt, PSUPDRVLDRIMAGE pImage)
5251{
5252 PSUPDRVLDRIMAGE pImagePrev;
5253 LogFlow(("supdrvLdrFree: pImage=%p\n", pImage));
5254
5255 /* find it - arg. should've used doubly linked list. */
5256 Assert(pDevExt->pLdrImages);
5257 pImagePrev = NULL;
5258 if (pDevExt->pLdrImages != pImage)
5259 {
5260 pImagePrev = pDevExt->pLdrImages;
5261 while (pImagePrev->pNext != pImage)
5262 pImagePrev = pImagePrev->pNext;
5263 Assert(pImagePrev->pNext == pImage);
5264 }
5265
5266 /* unlink */
5267 if (pImagePrev)
5268 pImagePrev->pNext = pImage->pNext;
5269 else
5270 pDevExt->pLdrImages = pImage->pNext;
5271
5272 /* check if this is VMMR0.r0 unset its entry point pointers. */
5273 if (pDevExt->pvVMMR0 == pImage->pvImage)
5274 supdrvLdrUnsetVMMR0EPs(pDevExt);
5275
5276 /* check for objects with destructors in this image. (Shouldn't happen.) */
5277 if (pDevExt->pObjs)
5278 {
5279 unsigned cObjs = 0;
5280 PSUPDRVOBJ pObj;
5281 RTSpinlockAcquire(pDevExt->Spinlock);
5282 for (pObj = pDevExt->pObjs; pObj; pObj = pObj->pNext)
5283 if (RT_UNLIKELY((uintptr_t)pObj->pfnDestructor - (uintptr_t)pImage->pvImage < pImage->cbImageBits))
5284 {
5285 pObj->pfnDestructor = NULL;
5286 cObjs++;
5287 }
5288 RTSpinlockRelease(pDevExt->Spinlock);
5289 if (cObjs)
5290 OSDBGPRINT(("supdrvLdrFree: Image '%s' has %d dangling objects!\n", pImage->szName, cObjs));
5291 }
5292
5293 /* call termination function if fully loaded. */
5294 if ( pImage->pfnModuleTerm
5295 && pImage->uState == SUP_IOCTL_LDR_LOAD)
5296 {
5297 LogFlow(("supdrvIOCtl_LdrLoad: calling pfnModuleTerm=%p\n", pImage->pfnModuleTerm));
5298 pImage->pfnModuleTerm(pImage);
5299 }
5300
5301 /* Inform the tracing component. */
5302 supdrvTracerModuleUnloading(pDevExt, pImage);
5303
5304 /* do native unload if appropriate. */
5305 if (pImage->fNative)
5306 supdrvOSLdrUnload(pDevExt, pImage);
5307
5308 /* free the image */
5309 pImage->cUsage = 0;
5310 pImage->pDevExt = NULL;
5311 pImage->pNext = NULL;
5312 pImage->uState = SUP_IOCTL_LDR_FREE;
5313 RTMemExecFree(pImage->pvImageAlloc, pImage->cbImageBits + 31);
5314 pImage->pvImageAlloc = NULL;
5315 RTMemFree(pImage->pachStrTab);
5316 pImage->pachStrTab = NULL;
5317 RTMemFree(pImage->paSymbols);
5318 pImage->paSymbols = NULL;
5319 RTMemFree(pImage);
5320}
5321
5322
5323/**
5324 * Acquires the loader lock.
5325 *
5326 * @returns IPRT status code.
5327 * @param pDevExt The device extension.
5328 */
5329DECLINLINE(int) supdrvLdrLock(PSUPDRVDEVEXT pDevExt)
5330{
5331#ifdef SUPDRV_USE_MUTEX_FOR_LDR
5332 int rc = RTSemMutexRequest(pDevExt->mtxLdr, RT_INDEFINITE_WAIT);
5333#else
5334 int rc = RTSemFastMutexRequest(pDevExt->mtxLdr);
5335#endif
5336 AssertRC(rc);
5337 return rc;
5338}
5339
5340
5341/**
5342 * Releases the loader lock.
5343 *
5344 * @returns IPRT status code.
5345 * @param pDevExt The device extension.
5346 */
5347DECLINLINE(int) supdrvLdrUnlock(PSUPDRVDEVEXT pDevExt)
5348{
5349#ifdef SUPDRV_USE_MUTEX_FOR_LDR
5350 return RTSemMutexRelease(pDevExt->mtxLdr);
5351#else
5352 return RTSemFastMutexRelease(pDevExt->mtxLdr);
5353#endif
5354}
5355
5356
5357/**
5358 * Implements the service call request.
5359 *
5360 * @returns VBox status code.
5361 * @param pDevExt The device extension.
5362 * @param pSession The calling session.
5363 * @param pReq The request packet, valid.
5364 */
5365static int supdrvIOCtl_CallServiceModule(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPCALLSERVICE pReq)
5366{
5367#if !defined(RT_OS_WINDOWS) || defined(RT_ARCH_AMD64) || defined(DEBUG)
5368 int rc;
5369
5370 /*
5371 * Find the module first in the module referenced by the calling session.
5372 */
5373 rc = supdrvLdrLock(pDevExt);
5374 if (RT_SUCCESS(rc))
5375 {
5376 PFNSUPR0SERVICEREQHANDLER pfnServiceReqHandler = NULL;
5377 PSUPDRVLDRUSAGE pUsage;
5378
5379 for (pUsage = pSession->pLdrUsage; pUsage; pUsage = pUsage->pNext)
5380 if ( pUsage->pImage->pfnServiceReqHandler
5381 && !strcmp(pUsage->pImage->szName, pReq->u.In.szName))
5382 {
5383 pfnServiceReqHandler = pUsage->pImage->pfnServiceReqHandler;
5384 break;
5385 }
5386 supdrvLdrUnlock(pDevExt);
5387
5388 if (pfnServiceReqHandler)
5389 {
5390 /*
5391 * Call it.
5392 */
5393 if (pReq->Hdr.cbIn == SUP_IOCTL_CALL_SERVICE_SIZE(0))
5394 rc = pfnServiceReqHandler(pSession, pReq->u.In.uOperation, pReq->u.In.u64Arg, NULL);
5395 else
5396 rc = pfnServiceReqHandler(pSession, pReq->u.In.uOperation, pReq->u.In.u64Arg, (PSUPR0SERVICEREQHDR)&pReq->abReqPkt[0]);
5397 }
5398 else
5399 rc = VERR_SUPDRV_SERVICE_NOT_FOUND;
5400 }
5401
5402 /* log it */
5403 if ( RT_FAILURE(rc)
5404 && rc != VERR_INTERRUPTED
5405 && rc != VERR_TIMEOUT)
5406 Log(("SUP_IOCTL_CALL_SERVICE: rc=%Rrc op=%u out=%u arg=%RX64 p/t=%RTproc/%RTthrd\n",
5407 rc, pReq->u.In.uOperation, pReq->Hdr.cbOut, pReq->u.In.u64Arg, RTProcSelf(), RTThreadNativeSelf()));
5408 else
5409 Log4(("SUP_IOCTL_CALL_SERVICE: rc=%Rrc op=%u out=%u arg=%RX64 p/t=%RTproc/%RTthrd\n",
5410 rc, pReq->u.In.uOperation, pReq->Hdr.cbOut, pReq->u.In.u64Arg, RTProcSelf(), RTThreadNativeSelf()));
5411 return rc;
5412#else /* RT_OS_WINDOWS && !RT_ARCH_AMD64 && !DEBUG */
5413 return VERR_NOT_IMPLEMENTED;
5414#endif /* RT_OS_WINDOWS && !RT_ARCH_AMD64 && !DEBUG */
5415}
5416
5417
5418/**
5419 * Implements the logger settings request.
5420 *
5421 * @returns VBox status code.
5422 * @param pDevExt The device extension.
5423 * @param pSession The caller's session.
5424 * @param pReq The request.
5425 */
5426static int supdrvIOCtl_LoggerSettings(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPLOGGERSETTINGS pReq)
5427{
5428 const char *pszGroup = &pReq->u.In.szStrings[pReq->u.In.offGroups];
5429 const char *pszFlags = &pReq->u.In.szStrings[pReq->u.In.offFlags];
5430 const char *pszDest = &pReq->u.In.szStrings[pReq->u.In.offDestination];
5431 PRTLOGGER pLogger = NULL;
5432 int rc;
5433
5434 /*
5435 * Some further validation.
5436 */
5437 switch (pReq->u.In.fWhat)
5438 {
5439 case SUPLOGGERSETTINGS_WHAT_SETTINGS:
5440 case SUPLOGGERSETTINGS_WHAT_CREATE:
5441 break;
5442
5443 case SUPLOGGERSETTINGS_WHAT_DESTROY:
5444 if (*pszGroup || *pszFlags || *pszDest)
5445 return VERR_INVALID_PARAMETER;
5446 if (pReq->u.In.fWhich == SUPLOGGERSETTINGS_WHICH_RELEASE)
5447 return VERR_ACCESS_DENIED;
5448 break;
5449
5450 default:
5451 return VERR_INTERNAL_ERROR;
5452 }
5453
5454 /*
5455 * Get the logger.
5456 */
5457 switch (pReq->u.In.fWhich)
5458 {
5459 case SUPLOGGERSETTINGS_WHICH_DEBUG:
5460 pLogger = RTLogGetDefaultInstance();
5461 break;
5462
5463 case SUPLOGGERSETTINGS_WHICH_RELEASE:
5464 pLogger = RTLogRelDefaultInstance();
5465 break;
5466
5467 default:
5468 return VERR_INTERNAL_ERROR;
5469 }
5470
5471 /*
5472 * Do the job.
5473 */
5474 switch (pReq->u.In.fWhat)
5475 {
5476 case SUPLOGGERSETTINGS_WHAT_SETTINGS:
5477 if (pLogger)
5478 {
5479 rc = RTLogFlags(pLogger, pszFlags);
5480 if (RT_SUCCESS(rc))
5481 rc = RTLogGroupSettings(pLogger, pszGroup);
5482 NOREF(pszDest);
5483 }
5484 else
5485 rc = VERR_NOT_FOUND;
5486 break;
5487
5488 case SUPLOGGERSETTINGS_WHAT_CREATE:
5489 {
5490 if (pLogger)
5491 rc = VERR_ALREADY_EXISTS;
5492 else
5493 {
5494 static const char * const s_apszGroups[] = VBOX_LOGGROUP_NAMES;
5495
5496 rc = RTLogCreate(&pLogger,
5497 0 /* fFlags */,
5498 pszGroup,
5499 pReq->u.In.fWhich == SUPLOGGERSETTINGS_WHICH_DEBUG
5500 ? "VBOX_LOG"
5501 : "VBOX_RELEASE_LOG",
5502 RT_ELEMENTS(s_apszGroups),
5503 s_apszGroups,
5504 RTLOGDEST_STDOUT | RTLOGDEST_DEBUGGER,
5505 NULL);
5506 if (RT_SUCCESS(rc))
5507 {
5508 rc = RTLogFlags(pLogger, pszFlags);
5509 NOREF(pszDest);
5510 if (RT_SUCCESS(rc))
5511 {
5512 switch (pReq->u.In.fWhich)
5513 {
5514 case SUPLOGGERSETTINGS_WHICH_DEBUG:
5515 pLogger = RTLogSetDefaultInstance(pLogger);
5516 break;
5517 case SUPLOGGERSETTINGS_WHICH_RELEASE:
5518 pLogger = RTLogRelSetDefaultInstance(pLogger);
5519 break;
5520 }
5521 }
5522 RTLogDestroy(pLogger);
5523 }
5524 }
5525 break;
5526 }
5527
5528 case SUPLOGGERSETTINGS_WHAT_DESTROY:
5529 switch (pReq->u.In.fWhich)
5530 {
5531 case SUPLOGGERSETTINGS_WHICH_DEBUG:
5532 pLogger = RTLogSetDefaultInstance(NULL);
5533 break;
5534 case SUPLOGGERSETTINGS_WHICH_RELEASE:
5535 pLogger = RTLogRelSetDefaultInstance(NULL);
5536 break;
5537 }
5538 rc = RTLogDestroy(pLogger);
5539 break;
5540
5541 default:
5542 {
5543 rc = VERR_INTERNAL_ERROR;
5544 break;
5545 }
5546 }
5547
5548 return rc;
5549}
5550
5551
5552/**
5553 * Implements the MSR prober operations.
5554 *
5555 * @returns VBox status code.
5556 * @param pDevExt The device extension.
5557 * @param pReq The request.
5558 */
5559static int supdrvIOCtl_MsrProber(PSUPDRVDEVEXT pDevExt, PSUPMSRPROBER pReq)
5560{
5561#ifdef SUPDRV_WITH_MSR_PROBER
5562 RTCPUID const idCpu = pReq->u.In.idCpu == UINT32_MAX ? NIL_RTCPUID : pReq->u.In.idCpu;
5563 int rc;
5564
5565 switch (pReq->u.In.enmOp)
5566 {
5567 case SUPMSRPROBEROP_READ:
5568 {
5569 uint64_t uValue;
5570 rc = supdrvOSMsrProberRead(pReq->u.In.uMsr, idCpu, &uValue);
5571 if (RT_SUCCESS(rc))
5572 {
5573 pReq->u.Out.uResults.Read.uValue = uValue;
5574 pReq->u.Out.uResults.Read.fGp = false;
5575 }
5576 else if (rc == VERR_ACCESS_DENIED)
5577 {
5578 pReq->u.Out.uResults.Read.uValue = 0;
5579 pReq->u.Out.uResults.Read.fGp = true;
5580 rc = VINF_SUCCESS;
5581 }
5582 break;
5583 }
5584
5585 case SUPMSRPROBEROP_WRITE:
5586 rc = supdrvOSMsrProberWrite(pReq->u.In.uMsr, idCpu, pReq->u.In.uArgs.Write.uToWrite);
5587 if (RT_SUCCESS(rc))
5588 pReq->u.Out.uResults.Write.fGp = false;
5589 else if (rc == VERR_ACCESS_DENIED)
5590 {
5591 pReq->u.Out.uResults.Write.fGp = true;
5592 rc = VINF_SUCCESS;
5593 }
5594 break;
5595
5596 case SUPMSRPROBEROP_MODIFY:
5597 case SUPMSRPROBEROP_MODIFY_FASTER:
5598 rc = supdrvOSMsrProberModify(idCpu, pReq);
5599 break;
5600
5601 default:
5602 return VERR_INVALID_FUNCTION;
5603 }
5604 return rc;
5605#else
5606 return VERR_NOT_IMPLEMENTED;
5607#endif
5608}
5609
5610
5611/**
5612 * Returns whether the host CPU sports an invariant TSC or not.
5613 *
5614 * @returns true if invariant TSC is supported, false otherwise.
5615 */
5616static bool supdrvIsInvariantTsc(void)
5617{
5618 static bool s_fQueried = false;
5619 static bool s_fIsInvariantTsc = false;
5620 if (!s_fQueried)
5621 {
5622 if (ASMHasCpuId())
5623 {
5624 uint32_t uEax, uEbx, uEcx, uEdx;
5625 ASMCpuId(0x80000000, &uEax, &uEbx, &uEcx, &uEdx);
5626 if (uEax >= 0x80000007)
5627 {
5628 ASMCpuId(0x80000007, &uEax, &uEbx, &uEcx, &uEdx);
5629 if (uEdx & X86_CPUID_AMD_ADVPOWER_EDX_TSCINVAR)
5630 s_fIsInvariantTsc = true;
5631 }
5632 }
5633 s_fQueried = true;
5634 }
5635
5636 return s_fIsInvariantTsc;
5637}
5638
5639
5640#ifdef SUPDRV_USE_TSC_DELTA_THREAD
5641/**
5642 * Switches the TSC-delta measurement thread into the butchered state.
5643 *
5644 * @returns VBox status code.
5645 * @param pDevExt Pointer to the device instance data.
5646 * @param fSpinlockHeld Whether the TSC-delta spinlock is held or not.
5647 * @param pszFailed An error message to log.
5648 * @param rcFailed The error code to exit the thread with.
5649 */
5650static int supdrvTscDeltaThreadButchered(PSUPDRVDEVEXT pDevExt, bool fSpinlockHeld, const char *pszFailed, int rcFailed)
5651{
5652 if (!fSpinlockHeld)
5653 RTSpinlockAcquire(pDevExt->hTscDeltaSpinlock);
5654
5655 pDevExt->enmTscDeltaState = kSupDrvTscDeltaState_Butchered;
5656 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
5657 OSDBGPRINT(("supdrvTscDeltaThreadButchered: %s. rc=%Rrc\n", rcFailed));
5658 return rcFailed;
5659}
5660
5661
5662/**
5663 * The TSC-delta measurement thread.
5664 *
5665 * @returns VBox status code.
5666 * @param hThread The thread handle.
5667 * @param pvUser Opaque pointer to the device instance data.
5668 */
5669static DECLCALLBACK(int) supdrvTscDeltaThread(RTTHREAD hThread, void *pvUser)
5670{
5671 PSUPDRVDEVEXT pDevExt = (PSUPDRVDEVEXT)pvUser;
5672 static uint32_t cTimesMeasured = 0;
5673 uint32_t cConsecutiveTimeouts = 0;
5674 int rc = VERR_INTERNAL_ERROR_2;
5675 for (;;)
5676 {
5677 /*
5678 * Switch on the current state.
5679 */
5680 SUPDRVTSCDELTASTATE enmState;
5681 RTSpinlockAcquire(pDevExt->hTscDeltaSpinlock);
5682 enmState = pDevExt->enmTscDeltaState;
5683 switch (enmState)
5684 {
5685 case kSupDrvTscDeltaState_Creating:
5686 {
5687 pDevExt->enmTscDeltaState = kSupDrvTscDeltaState_Listening;
5688 rc = RTSemEventSignal(pDevExt->hTscDeltaEvent);
5689 if (RT_FAILURE(rc))
5690 return supdrvTscDeltaThreadButchered(pDevExt, true /* fSpinlockHeld */, "RTSemEventSignal", rc);
5691 /* fall thru */
5692 }
5693
5694 case kSupDrvTscDeltaState_Listening:
5695 {
5696 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
5697
5698 /* Simple adaptive timeout. */
5699 if (cConsecutiveTimeouts++ == 10)
5700 {
5701 if (pDevExt->cMsTscDeltaTimeout == 1) /* 10 ms */
5702 pDevExt->cMsTscDeltaTimeout = 10;
5703 else if (pDevExt->cMsTscDeltaTimeout == 10) /* +100 ms */
5704 pDevExt->cMsTscDeltaTimeout = 100;
5705 else if (pDevExt->cMsTscDeltaTimeout == 100) /* +1000 ms */
5706 pDevExt->cMsTscDeltaTimeout = 500;
5707 cConsecutiveTimeouts = 0;
5708 }
5709 rc = RTThreadUserWait(pDevExt->hTscDeltaThread, pDevExt->cMsTscDeltaTimeout);
5710 if ( RT_FAILURE(rc)
5711 && rc != VERR_TIMEOUT)
5712 return supdrvTscDeltaThreadButchered(pDevExt, false /* fSpinlockHeld */, "RTThreadUserWait", rc);
5713 break;
5714 }
5715
5716 case kSupDrvTscDeltaState_WaitAndMeasure:
5717 {
5718 pDevExt->enmTscDeltaState = kSupDrvTscDeltaState_Measuring;
5719 rc = RTSemEventSignal(pDevExt->hTscDeltaEvent);
5720 if (RT_FAILURE(rc))
5721 return supdrvTscDeltaThreadButchered(pDevExt, true /* fSpinlockHeld */, "RTSemEventSignal", rc);
5722 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
5723 pDevExt->cMsTscDeltaTimeout = 1;
5724 RTThreadSleep(10);
5725 /* fall thru */
5726 }
5727
5728 case kSupDrvTscDeltaState_Measuring:
5729 {
5730 cConsecutiveTimeouts = 0;
5731 if (!cTimesMeasured++)
5732 {
5733 rc = supdrvMeasureTscDeltas(pDevExt, NULL /* pidxMaster */);
5734 RTCpuSetCopy(&pDevExt->TscDeltaObtainedCpuSet, &pDevExt->pGip->OnlineCpuSet);
5735 }
5736 else
5737 {
5738 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
5739 unsigned iCpu;
5740
5741 if (cTimesMeasured == UINT32_MAX)
5742 cTimesMeasured = 1;
5743
5744 /* Measure TSC-deltas only for the CPUs that are in the set. */
5745 rc = VINF_SUCCESS;
5746 for (iCpu = 0; iCpu < pGip->cCpus; iCpu++)
5747 {
5748 PSUPGIPCPU pGipCpuWorker = &pGip->aCPUs[iCpu];
5749 if ( pGipCpuWorker->i64TSCDelta == INT64_MAX
5750 && RTCpuSetIsMember(&pDevExt->TscDeltaCpuSet, pGipCpuWorker->idCpu))
5751 {
5752 rc |= supdrvMeasureTscDeltaOne(pDevExt, iCpu);
5753 RTCpuSetDel(&pDevExt->TscDeltaCpuSet, pGipCpuWorker->idCpu);
5754 if (pGipCpuWorker->i64TSCDelta != INT64_MAX)
5755 RTCpuSetAdd(&pDevExt->TscDeltaObtainedCpuSet, pGipCpuWorker->idCpu);
5756 }
5757 }
5758 }
5759 RTSpinlockAcquire(pDevExt->hTscDeltaSpinlock);
5760 if (pDevExt->enmTscDeltaState == kSupDrvTscDeltaState_Measuring)
5761 pDevExt->enmTscDeltaState = kSupDrvTscDeltaState_Listening;
5762 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
5763 pDevExt->rcTscDelta = rc;
5764 break;
5765 }
5766
5767 case kSupDrvTscDeltaState_Terminating:
5768 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
5769 return VINF_SUCCESS;
5770
5771 case kSupDrvTscDeltaState_Butchered:
5772 default:
5773 return supdrvTscDeltaThreadButchered(pDevExt, true /* fSpinlockHeld */, "Invalid state", VERR_INVALID_STATE);
5774 }
5775 }
5776
5777 return rc;
5778}
5779
5780
5781/**
5782 * Waits for the TSC-delta measurement thread to respond to a state change.
5783 *
5784 * @returns VINF_SUCCESS on success, VERR_TIMEOUT if it doesn't respond in time,
5785 * other error code on internal error.
5786 *
5787 * @param pThis Pointer to the grant service instance data.
5788 * @param enmCurState The current state.
5789 * @param enmNewState The new state we're waiting for it to enter.
5790 */
5791static int supdrvTscDeltaThreadWait(PSUPDRVDEVEXT pDevExt, SUPDRVTSCDELTASTATE enmCurState, SUPDRVTSCDELTASTATE enmNewState)
5792{
5793 /*
5794 * Wait a short while for the expected state transition.
5795 */
5796 int rc;
5797 RTSemEventWait(pDevExt->hTscDeltaEvent, RT_MS_1SEC);
5798 RTSpinlockAcquire(pDevExt->hTscDeltaSpinlock);
5799 if (pDevExt->enmTscDeltaState == enmNewState)
5800 {
5801 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
5802 rc = VINF_SUCCESS;
5803 }
5804 else if (pDevExt->enmTscDeltaState == enmCurState)
5805 {
5806 /*
5807 * Wait longer if the state has not yet transitioned to the one we want.
5808 */
5809 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
5810 rc = RTSemEventWait(pDevExt->hTscDeltaEvent, 50 * RT_MS_1SEC);
5811 if ( RT_SUCCESS(rc)
5812 || rc == VERR_TIMEOUT)
5813 {
5814 /*
5815 * Check the state whether we've succeeded.
5816 */
5817 SUPDRVTSCDELTASTATE enmState;
5818 RTSpinlockAcquire(pDevExt->hTscDeltaSpinlock);
5819 enmState = pDevExt->enmTscDeltaState;
5820 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
5821 if (enmState == enmNewState)
5822 rc = VINF_SUCCESS;
5823 else if (enmState == enmCurState)
5824 {
5825 rc = VERR_TIMEOUT;
5826 OSDBGPRINT(("supdrvTscDeltaThreadWait: timed out state transition. enmState=%d enmNewState=%d\n", enmState,
5827 enmNewState));
5828 }
5829 else
5830 {
5831 rc = VERR_INTERNAL_ERROR;
5832 OSDBGPRINT(("supdrvTscDeltaThreadWait: invalid state transition from %d to %d, expected %d\n", enmCurState,
5833 enmState, enmNewState));
5834 }
5835 }
5836 else
5837 OSDBGPRINT(("supdrvTscDeltaThreadWait: RTSemEventWait failed. rc=%Rrc\n", rc));
5838 }
5839 else
5840 {
5841 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
5842 OSDBGPRINT(("supdrvTscDeltaThreadWait: invalid state transition from %d to %d\n", enmCurState, enmNewState));
5843 rc = VERR_INTERNAL_ERROR;
5844 }
5845
5846 return rc;
5847}
5848
5849
5850/**
5851 * Terminates the TSC-delta measurement thread.
5852 *
5853 * @param pDevExt Pointer to the device instance data.
5854 */
5855static void supdrvTscDeltaThreadTerminate(PSUPDRVDEVEXT pDevExt)
5856{
5857 int rc;
5858 RTSpinlockAcquire(pDevExt->hTscDeltaSpinlock);
5859 pDevExt->enmTscDeltaState = kSupDrvTscDeltaState_Terminating;
5860 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
5861 RTThreadUserSignal(pDevExt->hTscDeltaThread);
5862 rc = RTThreadWait(pDevExt->hTscDeltaThread, 50 * RT_MS_1SEC, NULL /* prc */);
5863 if (RT_FAILURE(rc))
5864 {
5865 /* Signal a few more times before giving up. */
5866 int cTriesLeft = 5;
5867 while (--cTriesLeft > 0)
5868 {
5869 RTThreadUserSignal(pDevExt->hTscDeltaThread);
5870 rc = RTThreadWait(pDevExt->hTscDeltaThread, 2 * RT_MS_1SEC, NULL /* prc */);
5871 if (rc != VERR_TIMEOUT)
5872 break;
5873 }
5874 }
5875}
5876
5877
5878/**
5879 * Initializes and spawns the TSC-delta measurement thread.
5880 *
5881 * A thread is required for servicing re-measurement requests from events like
5882 * CPUs coming online, suspend/resume etc. as it cannot be done synchronously
5883 * under all contexts on all OSs.
5884 *
5885 * @returns VBox status code.
5886 * @param pDevExt Pointer to the device instance data.
5887 *
5888 * @remarks Must only be called -after- initializing GIP and setting up MP
5889 * notifications!
5890 */
5891static int supdrvTscDeltaThreadInit(PSUPDRVDEVEXT pDevExt)
5892{
5893 Assert(GIP_ARE_TSC_DELTAS_APPLICABLE(pDevExt->pGip));
5894
5895 int rc = RTSpinlockCreate(&pDevExt->hTscDeltaSpinlock, RTSPINLOCK_FLAGS_INTERRUPT_UNSAFE, "VBoxTscSpnLck");
5896 if (RT_SUCCESS(rc))
5897 {
5898 rc = RTSemEventCreate(&pDevExt->hTscDeltaEvent);
5899 if (RT_SUCCESS(rc))
5900 {
5901 pDevExt->enmTscDeltaState = kSupDrvTscDeltaState_Creating;
5902 pDevExt->cMsTscDeltaTimeout = 1;
5903 RTCpuSetEmpty(&pDevExt->TscDeltaCpuSet);
5904 RTCpuSetEmpty(&pDevExt->TscDeltaObtainedCpuSet);
5905 rc = RTThreadCreate(&pDevExt->hTscDeltaThread, supdrvTscDeltaThread, pDevExt, 0 /* cbStack */,
5906 RTTHREADTYPE_DEFAULT, RTTHREADFLAGS_WAITABLE, "VBoxTscThread");
5907 if (RT_SUCCESS(rc))
5908 {
5909 rc = supdrvTscDeltaThreadWait(pDevExt, kSupDrvTscDeltaState_Creating, kSupDrvTscDeltaState_Listening);
5910 if (RT_SUCCESS(rc))
5911 {
5912 pDevExt->rcTscDelta = VERR_NOT_AVAILABLE;
5913 return rc;
5914 }
5915
5916 OSDBGPRINT(("supdrvTscDeltaInit: supdrvTscDeltaThreadWait failed. rc=%Rrc\n", rc));
5917 supdrvTscDeltaThreadTerminate(pDevExt);
5918 }
5919 else
5920 OSDBGPRINT(("supdrvTscDeltaInit: RTThreadCreate failed. rc=%Rrc\n", rc));
5921 RTSemEventDestroy(pDevExt->hTscDeltaEvent);
5922 pDevExt->hTscDeltaEvent = NIL_RTSEMEVENT;
5923 }
5924 else
5925 OSDBGPRINT(("supdrvTscDeltaInit: RTSemEventCreate failed. rc=%Rrc\n", rc));
5926 RTSpinlockDestroy(pDevExt->hTscDeltaSpinlock);
5927 pDevExt->hTscDeltaSpinlock = NIL_RTSPINLOCK;
5928 }
5929 else
5930 OSDBGPRINT(("supdrvTscDeltaInit: RTSpinlockCreate failed. rc=%Rrc\n", rc));
5931
5932 return rc;
5933}
5934
5935
5936/**
5937 * Terminates the TSC-delta measurement thread and cleanup.
5938 *
5939 * @param pDevExt Pointer to the device instance data.
5940 */
5941static void supdrvTscDeltaTerm(PSUPDRVDEVEXT pDevExt)
5942{
5943 if ( pDevExt->hTscDeltaSpinlock != NIL_RTSPINLOCK
5944 && pDevExt->hTscDeltaEvent != NIL_RTSEMEVENT)
5945 {
5946 supdrvTscDeltaThreadTerminate(pDevExt);
5947 }
5948
5949 if (pDevExt->hTscDeltaSpinlock != NIL_RTSPINLOCK)
5950 {
5951 RTSpinlockDestroy(pDevExt->hTscDeltaSpinlock);
5952 pDevExt->hTscDeltaSpinlock = NIL_RTSPINLOCK;
5953 }
5954
5955 if (pDevExt->hTscDeltaEvent != NIL_RTSEMEVENT)
5956 {
5957 RTSemEventDestroy(pDevExt->hTscDeltaEvent);
5958 pDevExt->hTscDeltaEvent = NIL_RTSEMEVENT;
5959 }
5960
5961 pDevExt->rcTscDelta = VERR_NOT_AVAILABLE;
5962}
5963
5964
5965/**
5966 * Waits for TSC-delta measurements to be completed for all online CPUs.
5967 *
5968 * @returns VBox status code.
5969 * @param pDevExt Pointer to the device instance data.
5970 */
5971static int supdrvTscDeltaThreadWaitForOnlineCpus(PSUPDRVDEVEXT pDevExt)
5972{
5973 int cTriesLeft = 5;
5974 int cMsTotalWait;
5975 int cMsWaited = 0;
5976 int cMsWaitGranularity = 1;
5977
5978 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
5979 AssertReturn(pGip, VERR_INVALID_POINTER);
5980
5981 cMsTotalWait = RT_MIN(pGip->cPresentCpus + 2, 150);
5982 while (cTriesLeft-- > 0)
5983 {
5984 if (RTCpuSetIsEqual(&pDevExt->TscDeltaObtainedCpuSet, &pGip->OnlineCpuSet))
5985 return VINF_SUCCESS;
5986 RTThreadSleep(cMsWaitGranularity);
5987 cMsWaited += cMsWaitGranularity;
5988 if (cMsWaited >= cMsTotalWait)
5989 break;
5990 }
5991
5992 return VERR_TIMEOUT;
5993}
5994#endif /* SUPDRV_USE_TSC_DELTA_THREAD */
5995
5996
5997/**
5998 * Measures the TSC frequency of the system.
5999 *
6000 * Uses a busy-wait method for the async. case as it is intended to help push
6001 * the CPU frequency up, while for the invariant cases using a sleeping method.
6002 *
6003 * The TSC frequency can vary on systems which are not reported as invariant.
6004 * On such systems the object of this function is to find out what the nominal,
6005 * maximum TSC frequency under 'normal' CPU operation.
6006 *
6007 * @returns VBox status code.
6008 * @param pDevExt Pointer to the device instance.
6009 *
6010 * @remarks Must be called only -after- measuring the TSC deltas.
6011 */
6012static int supdrvGipMeasureTscFreq(PSUPDRVDEVEXT pDevExt)
6013{
6014 int cTriesLeft = 4;
6015 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
6016
6017 /* Assert order. */
6018 AssertReturn(pGip, VERR_INVALID_PARAMETER);
6019 AssertReturn(pGip->u32Magic == SUPGLOBALINFOPAGE_MAGIC, VERR_WRONG_ORDER);
6020
6021 while (cTriesLeft-- > 0)
6022 {
6023 RTCCUINTREG uFlags;
6024 uint64_t u64NanoTsBefore;
6025 uint64_t u64NanoTsAfter;
6026 uint64_t u64TscBefore;
6027 uint64_t u64TscAfter;
6028 uint8_t idApicBefore;
6029 uint8_t idApicAfter;
6030
6031 /*
6032 * Synchronize with the host OS clock tick before reading the TSC.
6033 * Especially important on older Windows version where the granularity is terrible.
6034 */
6035 u64NanoTsBefore = RTTimeSystemNanoTS();
6036 while (RTTimeSystemNanoTS() == u64NanoTsBefore)
6037 ASMNopPause();
6038
6039 uFlags = ASMIntDisableFlags();
6040 idApicBefore = ASMGetApicId();
6041 u64TscBefore = ASMReadTSC();
6042 u64NanoTsBefore = RTTimeSystemNanoTS();
6043 ASMSetFlags(uFlags);
6044
6045 if (pGip->u32Mode == SUPGIPMODE_INVARIANT_TSC)
6046 {
6047 /*
6048 * Sleep-wait since the TSC frequency is constant, it eases host load.
6049 * Shorter interval produces more variance in the frequency (esp. Windows).
6050 */
6051 RTThreadSleep(200);
6052 u64NanoTsAfter = RTTimeSystemNanoTS();
6053 while (RTTimeSystemNanoTS() == u64NanoTsAfter)
6054 ASMNopPause();
6055 u64NanoTsAfter = RTTimeSystemNanoTS();
6056 }
6057 else
6058 {
6059 /* Busy-wait keeping the frequency up and measure. */
6060 for (;;)
6061 {
6062 u64NanoTsAfter = RTTimeSystemNanoTS();
6063 if (u64NanoTsAfter < RT_NS_100MS + u64NanoTsBefore)
6064 ASMNopPause();
6065 else
6066 break;
6067 }
6068 }
6069
6070 uFlags = ASMIntDisableFlags();
6071 idApicAfter = ASMGetApicId();
6072 u64TscAfter = ASMReadTSC();
6073 ASMSetFlags(uFlags);
6074
6075 if (GIP_ARE_TSC_DELTAS_APPLICABLE(pGip))
6076 {
6077 int rc;
6078 bool fAppliedBefore;
6079 bool fAppliedAfter;
6080 rc = SUPTscDeltaApply(pGip, &u64TscBefore, idApicBefore, &fAppliedBefore); AssertRCReturn(rc, rc);
6081 rc = SUPTscDeltaApply(pGip, &u64TscAfter, idApicAfter, &fAppliedAfter); AssertRCReturn(rc, rc);
6082
6083 if ( !fAppliedBefore
6084 || !fAppliedAfter)
6085 {
6086#ifdef SUPDRV_USE_TSC_DELTA_THREAD
6087 /*
6088 * The TSC-delta measurements are kicked-off asynchronously as each host CPU is initialized.
6089 * Therefore, if we failed to have a delta for the CPU(s) we were scheduled on (idApicBefore
6090 * and idApicAfter) then wait until we have TSC-delta measurements for all online CPUs and
6091 * proceed. This should be triggered just once if we're rather unlucky.
6092 */
6093 rc = supdrvTscDeltaThreadWaitForOnlineCpus(pDevExt);
6094 if (rc == VERR_TIMEOUT)
6095 {
6096 SUPR0Printf("vboxdrv: supdrvGipMeasureTscFreq: timedout waiting for TSC-delta measurements.\n");
6097 return VERR_SUPDRV_TSC_FREQ_MEASUREMENT_FAILED;
6098 }
6099#else
6100 SUPR0Printf("vboxdrv: supdrvGipMeasureTscFreq: idApicBefore=%u idApicAfter=%u cTriesLeft=%u\n",
6101 idApicBefore, idApicAfter, cTriesLeft);
6102#endif
6103 continue;
6104 }
6105 }
6106
6107 /*
6108 * Update GIP.
6109 */
6110 pGip->u64CpuHz = ((u64TscAfter - u64TscBefore) * RT_NS_1SEC_64) / (u64NanoTsAfter - u64NanoTsBefore);
6111 if (pGip->u32Mode != SUPGIPMODE_ASYNC_TSC)
6112 pGip->aCPUs[0].u64CpuHz = pGip->u64CpuHz;
6113 return VINF_SUCCESS;
6114 }
6115
6116 return VERR_SUPDRV_TSC_FREQ_MEASUREMENT_FAILED;
6117}
6118
6119
6120/**
6121 * Timer callback function for TSC frequency refinement in invariant GIP mode.
6122 *
6123 * @param pTimer The timer.
6124 * @param pvUser Opaque pointer to the device instance data.
6125 * @param iTick The timer tick.
6126 */
6127static DECLCALLBACK(void) supdrvRefineTscTimer(PRTTIMER pTimer, void *pvUser, uint64_t iTick)
6128{
6129 PSUPDRVDEVEXT pDevExt = (PSUPDRVDEVEXT)pvUser;
6130 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
6131 bool fDeltaApplied = false;
6132 uint8_t idApic;
6133 uint64_t u64DeltaNanoTS;
6134 uint64_t u64DeltaTsc;
6135 uint64_t u64NanoTS;
6136 uint64_t u64Tsc;
6137 RTCCUINTREG uFlags;
6138
6139 /* Paranoia. */
6140 Assert(pGip);
6141 Assert(pGip->u32Mode == SUPGIPMODE_INVARIANT_TSC);
6142
6143#if !defined(RT_OS_OS2) /* PORTME: Disable if timers are called from clock interrupt handler or with interrupts disabled. */
6144 u64NanoTS = RTTimeSystemNanoTS();
6145 while (RTTimeSystemNanoTS() == u64NanoTS)
6146 ASMNopPause();
6147#endif
6148 uFlags = ASMIntDisableFlags();
6149 idApic = ASMGetApicId();
6150 u64Tsc = ASMReadTSC();
6151 u64NanoTS = RTTimeSystemNanoTS();
6152 ASMSetFlags(uFlags);
6153 if (GIP_ARE_TSC_DELTAS_APPLICABLE(pGip))
6154 SUPTscDeltaApply(pGip, &u64Tsc, idApic, &fDeltaApplied);
6155 u64DeltaNanoTS = u64NanoTS - pDevExt->u64NanoTSAnchor;
6156 u64DeltaTsc = u64Tsc - pDevExt->u64TscAnchor;
6157
6158 if ( pGip->u32Mode == SUPGIPMODE_INVARIANT_TSC
6159 && !fDeltaApplied)
6160 {
6161 SUPR0Printf("vboxdrv: failed to refine TSC frequency as TSC-deltas unavailable after %d seconds!\n",
6162 GIP_TSC_REFINE_INTERVAL);
6163 return;
6164 }
6165
6166 /* Calculate the TSC frequency. */
6167 if ( u64DeltaTsc < UINT64_MAX / RT_NS_1SEC
6168 && u64DeltaNanoTS < UINT32_MAX)
6169 pGip->u64CpuHz = ASMMultU64ByU32DivByU32(u64DeltaTsc, RT_NS_1SEC, (uint32_t)u64DeltaNanoTS);
6170 else
6171 {
6172#if 1
6173 RTUINT128U CpuHz, Tmp, Divisor;
6174 CpuHz.s.Lo = CpuHz.s.Hi = 0;
6175 RTUInt128MulU64ByU64(&Tmp, u64DeltaTsc, RT_NS_1SEC_64);
6176 RTUInt128Div(&CpuHz, &Tmp, RTUInt128AssignU64(&Divisor, u64DeltaNanoTS));
6177 pGip->u64CpuHz = CpuHz.s.Lo;
6178#else
6179 /** @todo remove later */
6180 /* Try not to lose precision, the larger the interval the more likely we overflow. */
6181 if ( u64DeltaTsc < UINT64_MAX / RT_NS_100MS
6182 && u64DeltaNanoTS / 10 < UINT32_MAX)
6183 pGip->u64CpuHz = ASMMultU64ByU32DivByU32(u64DeltaTsc, RT_NS_100MS, (uint32_t)(u64DeltaNanoTS / 10));
6184 else if ( u64DeltaTsc < UINT64_MAX / RT_NS_10MS
6185 && u64DeltaNanoTS / 100 < UINT32_MAX)
6186 pGip->u64CpuHz = ASMMultU64ByU32DivByU32(u64DeltaTsc, RT_NS_10MS, (uint32_t)(u64DeltaNanoTS / 100));
6187 else if ( u64DeltaTsc < UINT64_MAX / RT_NS_1MS
6188 && u64DeltaNanoTS / 1000 < UINT32_MAX)
6189 pGip->u64CpuHz = ASMMultU64ByU32DivByU32(u64DeltaTsc, RT_NS_1MS, (uint32_t)(u64DeltaNanoTS / 1000));
6190 else /* Screw it. */
6191 pGip->u64CpuHz = u64DeltaTsc / (u64DeltaNanoTS / RT_NS_1SEC_64);
6192#endif
6193 }
6194
6195 /* Update rest of GIP. */
6196 Assert(pGip->u32Mode != SUPGIPMODE_ASYNC_TSC); /* See SUPGetCpuHzFromGIP().*/
6197 pGip->aCPUs[0].u64CpuHz = pGip->u64CpuHz;
6198}
6199
6200
6201/**
6202 * Starts the TSC-frequency refinement phase asynchronously.
6203 *
6204 * @param pDevExt Pointer to the device instance data.
6205 */
6206static void supdrvRefineTscFreq(PSUPDRVDEVEXT pDevExt)
6207{
6208 uint64_t u64NanoTS;
6209 RTCCUINTREG uFlags;
6210 uint8_t idApic;
6211 int rc;
6212 bool fDeltaApplied = false;
6213 PSUPGLOBALINFOPAGE pGip;
6214
6215 /* Validate. */
6216 Assert(pDevExt);
6217 Assert(pDevExt->pGip);
6218
6219 pGip = pDevExt->pGip;
6220 u64NanoTS = RTTimeSystemNanoTS();
6221 while (RTTimeSystemNanoTS() == u64NanoTS)
6222 ASMNopPause();
6223 uFlags = ASMIntDisableFlags();
6224 idApic = ASMGetApicId();
6225 pDevExt->u64TscAnchor = ASMReadTSC();
6226 pDevExt->u64NanoTSAnchor = RTTimeSystemNanoTS();
6227 ASMSetFlags(uFlags);
6228 if (GIP_ARE_TSC_DELTAS_APPLICABLE(pGip))
6229 SUPTscDeltaApply(pGip, &pDevExt->u64TscAnchor, idApic, &fDeltaApplied);
6230
6231#ifdef SUPDRV_USE_TSC_DELTA_THREAD
6232 if ( pGip->u32Mode == SUPGIPMODE_INVARIANT_TSC
6233 && !fDeltaApplied)
6234 {
6235 rc = supdrvTscDeltaThreadWaitForOnlineCpus(pDevExt);
6236 if (rc == VERR_TIMEOUT)
6237 {
6238 SUPR0Printf("vboxdrv: Skipping refinement of TSC frequency as TSC-delta measurement timed out!\n");
6239 return;
6240 }
6241 }
6242#endif
6243
6244 rc = RTTimerCreateEx(&pDevExt->pTscRefineTimer, 0 /* one-shot */, RTTIMER_FLAGS_CPU_ANY, supdrvRefineTscTimer, pDevExt);
6245 if (RT_SUCCESS(rc))
6246 {
6247 /*
6248 * Refine the TSC frequency measurement over a long interval. Ideally, we want to keep the
6249 * interval as small as possible while gaining the most consistent and accurate frequency
6250 * (compared to what the host OS might have measured).
6251 *
6252 * In theory, we gain more accuracy with longer intervals, but we want VMs to startup with the
6253 * same TSC frequency whenever possible so we need to keep the interval short.
6254 */
6255 rc = RTTimerStart(pDevExt->pTscRefineTimer, GIP_TSC_REFINE_INTERVAL * RT_NS_1SEC_64);
6256 AssertRC(rc);
6257 }
6258 else
6259 OSDBGPRINT(("RTTimerCreateEx failed to create one-shot timer. rc=%Rrc\n", rc));
6260}
6261
6262
6263/**
6264 * Creates the GIP.
6265 *
6266 * @returns VBox status code.
6267 * @param pDevExt Instance data. GIP stuff may be updated.
6268 */
6269static int supdrvGipCreate(PSUPDRVDEVEXT pDevExt)
6270{
6271 PSUPGLOBALINFOPAGE pGip;
6272 RTHCPHYS HCPhysGip;
6273 uint32_t u32SystemResolution;
6274 uint32_t u32Interval;
6275 uint32_t u32MinInterval;
6276 uint32_t uMod;
6277 unsigned cCpus;
6278 int rc;
6279
6280 LogFlow(("supdrvGipCreate:\n"));
6281
6282 /* Assert order. */
6283 Assert(pDevExt->u32SystemTimerGranularityGrant == 0);
6284 Assert(pDevExt->GipMemObj == NIL_RTR0MEMOBJ);
6285 Assert(!pDevExt->pGipTimer);
6286
6287 /*
6288 * Check the CPU count.
6289 */
6290 cCpus = RTMpGetArraySize();
6291 if ( cCpus > RTCPUSET_MAX_CPUS
6292 || cCpus > 256 /* ApicId is used for the mappings */)
6293 {
6294 SUPR0Printf("VBoxDrv: Too many CPUs (%u) for the GIP (max %u)\n", cCpus, RT_MIN(RTCPUSET_MAX_CPUS, 256));
6295 return VERR_TOO_MANY_CPUS;
6296 }
6297
6298 /*
6299 * Allocate a contiguous set of pages with a default kernel mapping.
6300 */
6301 rc = RTR0MemObjAllocCont(&pDevExt->GipMemObj, RT_UOFFSETOF(SUPGLOBALINFOPAGE, aCPUs[cCpus]), false /*fExecutable*/);
6302 if (RT_FAILURE(rc))
6303 {
6304 OSDBGPRINT(("supdrvGipCreate: failed to allocate the GIP page. rc=%d\n", rc));
6305 return rc;
6306 }
6307 pGip = (PSUPGLOBALINFOPAGE)RTR0MemObjAddress(pDevExt->GipMemObj); AssertPtr(pGip);
6308 HCPhysGip = RTR0MemObjGetPagePhysAddr(pDevExt->GipMemObj, 0); Assert(HCPhysGip != NIL_RTHCPHYS);
6309
6310 /*
6311 * Allocate the TSC-delta sync struct on a separate cache line.
6312 */
6313 pDevExt->pvTscDeltaSync = RTMemAllocZ(sizeof(SUPTSCDELTASYNC) + 63);
6314 pDevExt->pTscDeltaSync = RT_ALIGN_PT(pDevExt->pvTscDeltaSync, 64, PSUPTSCDELTASYNC);
6315 Assert(RT_ALIGN_PT(pDevExt->pTscDeltaSync, 64, PSUPTSCDELTASYNC) == pDevExt->pTscDeltaSync);
6316
6317 /*
6318 * Find a reasonable update interval and initialize the structure.
6319 */
6320 supdrvGipRequestHigherTimerFrequencyFromSystem(pDevExt);
6321 /** @todo figure out why using a 100Ms interval upsets timekeeping in VMs.
6322 * See @bugref{6710}. */
6323 u32MinInterval = RT_NS_10MS;
6324 u32SystemResolution = RTTimerGetSystemGranularity();
6325 u32Interval = u32MinInterval;
6326 uMod = u32MinInterval % u32SystemResolution;
6327 if (uMod)
6328 u32Interval += u32SystemResolution - uMod;
6329
6330 supdrvGipInit(pDevExt, pGip, HCPhysGip, RTTimeSystemNanoTS(), RT_NS_1SEC / u32Interval /*=Hz*/, u32Interval, cCpus);
6331
6332 if (RT_UNLIKELY( pGip->fOsTscDeltasInSync
6333 && pGip->u32Mode == SUPGIPMODE_ASYNC_TSC
6334 && !supdrvOSGetForcedAsyncTscMode(pDevExt)))
6335 {
6336 /* Basically, invariant Windows boxes, should never be detected as async (i.e. TSC-deltas should be 0). */
6337 OSDBGPRINT(("supdrvGipCreate: The TSC-deltas should be normalized by the host OS, but verifying shows it's not!\n"));
6338 return VERR_INTERNAL_ERROR_2;
6339 }
6340
6341#ifdef SUPDRV_USE_TSC_DELTA_THREAD
6342 if (GIP_ARE_TSC_DELTAS_APPLICABLE(pGip))
6343 {
6344 /* Initialize TSC-delta measurement thread before executing any Mp event callbacks. */
6345 rc = supdrvTscDeltaThreadInit(pDevExt);
6346 }
6347#endif
6348 if (RT_SUCCESS(rc))
6349 {
6350 rc = RTMpNotificationRegister(supdrvGipMpEvent, pDevExt);
6351 if (RT_SUCCESS(rc))
6352 {
6353 rc = RTMpOnAll(supdrvGipInitOnCpu, pDevExt, pGip);
6354 if (RT_SUCCESS(rc))
6355 {
6356 uint16_t iCpu;
6357#ifndef SUPDRV_USE_TSC_DELTA_THREAD
6358 if (GIP_ARE_TSC_DELTAS_APPLICABLE(pGip))
6359 {
6360 /*
6361 * Measure the TSC deltas now that we have MP notifications.
6362 */
6363 int cTries = 5;
6364 do
6365 {
6366 rc = supdrvMeasureTscDeltas(pDevExt, NULL /* pidxMaster */);
6367 if (rc != VERR_TRY_AGAIN)
6368 break;
6369 } while (--cTries > 0);
6370 for (iCpu = 0; iCpu < pGip->cCpus; iCpu++)
6371 Log(("supdrvTscDeltaInit: cpu[%u] delta %lld\n", iCpu, pGip->aCPUs[iCpu].i64TSCDelta));
6372 }
6373 else
6374 {
6375 for (iCpu = 0; iCpu < pGip->cCpus; iCpu++)
6376 AssertMsg(!pGip->aCPUs[iCpu].i64TSCDelta, ("iCpu=%u %lld mode=%d\n", iCpu, pGip->aCPUs[iCpu].i64TSCDelta, pGip->u32Mode));
6377 }
6378#endif
6379 if (RT_SUCCESS(rc))
6380 {
6381 rc = supdrvGipMeasureTscFreq(pDevExt);
6382 if (RT_SUCCESS(rc))
6383 {
6384 /*
6385 * Create the timer.
6386 * If CPU_ALL isn't supported we'll have to fall back to synchronous mode.
6387 */
6388 if (pGip->u32Mode == SUPGIPMODE_ASYNC_TSC)
6389 {
6390 rc = RTTimerCreateEx(&pDevExt->pGipTimer, u32Interval, RTTIMER_FLAGS_CPU_ALL, supdrvGipAsyncTimer,
6391 pDevExt);
6392 if (rc == VERR_NOT_SUPPORTED)
6393 {
6394 OSDBGPRINT(("supdrvGipCreate: omni timer not supported, falling back to synchronous mode\n"));
6395 pGip->u32Mode = SUPGIPMODE_SYNC_TSC;
6396 }
6397 }
6398 if (pGip->u32Mode != SUPGIPMODE_ASYNC_TSC)
6399 rc = RTTimerCreateEx(&pDevExt->pGipTimer, u32Interval, 0 /* fFlags */, supdrvGipSyncTimer, pDevExt);
6400 if (RT_SUCCESS(rc))
6401 {
6402 /*
6403 * We're good.
6404 */
6405 Log(("supdrvGipCreate: %u ns interval.\n", u32Interval));
6406 supdrvGipReleaseHigherTimerFrequencyFromSystem(pDevExt);
6407
6408 g_pSUPGlobalInfoPage = pGip;
6409 if (pGip->u32Mode == SUPGIPMODE_INVARIANT_TSC)
6410 supdrvRefineTscFreq(pDevExt);
6411 return VINF_SUCCESS;
6412 }
6413
6414 OSDBGPRINT(("supdrvGipCreate: failed create GIP timer at %u ns interval. rc=%Rrc\n", u32Interval, rc));
6415 Assert(!pDevExt->pGipTimer);
6416 }
6417 else
6418 OSDBGPRINT(("supdrvGipCreate: supdrvGipMeasureTscFreq failed. rc=%Rrc\n", rc));
6419 }
6420 else
6421 OSDBGPRINT(("supdrvGipCreate: supdrvMeasureTscDeltas failed. rc=%Rrc\n", rc));
6422 }
6423 else
6424 OSDBGPRINT(("supdrvGipCreate: RTMpOnAll failed. rc=%Rrc\n", rc));
6425 }
6426 else
6427 OSDBGPRINT(("supdrvGipCreate: failed to register MP event notfication. rc=%Rrc\n", rc));
6428 }
6429 else
6430 OSDBGPRINT(("supdrvGipCreate: supdrvTscDeltaInit failed. rc=%Rrc\n", rc));
6431
6432 supdrvGipDestroy(pDevExt); /* Releases timer frequency increase too. */
6433 return rc;
6434}
6435
6436
6437/**
6438 * Terminates the GIP.
6439 *
6440 * @param pDevExt Instance data. GIP stuff may be updated.
6441 */
6442static void supdrvGipDestroy(PSUPDRVDEVEXT pDevExt)
6443{
6444 int rc;
6445#ifdef DEBUG_DARWIN_GIP
6446 OSDBGPRINT(("supdrvGipDestroy: pDevExt=%p pGip=%p pGipTimer=%p GipMemObj=%p\n", pDevExt,
6447 pDevExt->GipMemObj != NIL_RTR0MEMOBJ ? RTR0MemObjAddress(pDevExt->GipMemObj) : NULL,
6448 pDevExt->pGipTimer, pDevExt->GipMemObj));
6449#endif
6450
6451 /*
6452 * Stop receiving MP notifications before tearing anything else down.
6453 */
6454 RTMpNotificationDeregister(supdrvGipMpEvent, pDevExt);
6455
6456#ifdef SUPDRV_USE_TSC_DELTA_THREAD
6457 /*
6458 * Terminate the TSC-delta measurement thread and resources.
6459 */
6460 supdrvTscDeltaTerm(pDevExt);
6461#endif
6462
6463 /*
6464 * Destroy the TSC-refinement one-shot timer.
6465 */
6466 if (pDevExt->pTscRefineTimer)
6467 {
6468 RTTimerDestroy(pDevExt->pTscRefineTimer);
6469 pDevExt->pTscRefineTimer = NULL;
6470 }
6471
6472 if (pDevExt->pvTscDeltaSync)
6473 {
6474 RTMemFree(pDevExt->pvTscDeltaSync);
6475 pDevExt->pTscDeltaSync = NULL;
6476 pDevExt->pvTscDeltaSync = NULL;
6477 }
6478
6479 /*
6480 * Invalid the GIP data.
6481 */
6482 if (pDevExt->pGip)
6483 {
6484 supdrvGipTerm(pDevExt->pGip);
6485 pDevExt->pGip = NULL;
6486 }
6487 g_pSUPGlobalInfoPage = NULL;
6488
6489 /*
6490 * Destroy the timer and free the GIP memory object.
6491 */
6492 if (pDevExt->pGipTimer)
6493 {
6494 rc = RTTimerDestroy(pDevExt->pGipTimer); AssertRC(rc);
6495 pDevExt->pGipTimer = NULL;
6496 }
6497
6498 if (pDevExt->GipMemObj != NIL_RTR0MEMOBJ)
6499 {
6500 rc = RTR0MemObjFree(pDevExt->GipMemObj, true /* free mappings */); AssertRC(rc);
6501 pDevExt->GipMemObj = NIL_RTR0MEMOBJ;
6502 }
6503
6504 /*
6505 * Finally, make sure we've release the system timer resolution request
6506 * if one actually succeeded and is still pending.
6507 */
6508 supdrvGipReleaseHigherTimerFrequencyFromSystem(pDevExt);
6509}
6510
6511
6512/**
6513 * Timer callback function sync GIP mode.
6514 * @param pTimer The timer.
6515 * @param pvUser Opaque pointer to the device extension.
6516 * @param iTick The timer tick.
6517 */
6518static DECLCALLBACK(void) supdrvGipSyncTimer(PRTTIMER pTimer, void *pvUser, uint64_t iTick)
6519{
6520 RTCCUINTREG uFlags;
6521 uint64_t u64TSC;
6522 uint64_t u64NanoTS;
6523 PSUPDRVDEVEXT pDevExt = (PSUPDRVDEVEXT)pvUser;
6524 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
6525
6526 uFlags = ASMIntDisableFlags(); /* No interruptions please (real problem on S10). */
6527 u64TSC = ASMReadTSC();
6528 u64NanoTS = RTTimeSystemNanoTS();
6529
6530 if (GIP_ARE_TSC_DELTAS_APPLICABLE(pGip))
6531 {
6532 /*
6533 * The calculations in supdrvGipUpdate() is very timing sensitive and doesn't handle
6534 * missed timer ticks. So for now it is better to use a delta of 0 and have the TSC rate
6535 * affected a bit until we get proper TSC deltas than implementing options like
6536 * rescheduling the tick to be delivered on the right CPU or missing the tick entirely.
6537 *
6538 * The likely hood of this happening is really low. On Windows, Linux timers
6539 * fire on the CPU they were registered/started on. Darwin, Solaris need verification.
6540 */
6541 Assert(!ASMIntAreEnabled());
6542 SUPTscDeltaApply(pGip, &u64TSC, ASMGetApicId(), NULL /* pfDeltaApplied */);
6543 }
6544
6545 supdrvGipUpdate(pDevExt, u64NanoTS, u64TSC, NIL_RTCPUID, iTick);
6546
6547 ASMSetFlags(uFlags);
6548}
6549
6550
6551/**
6552 * Timer callback function for async GIP mode.
6553 * @param pTimer The timer.
6554 * @param pvUser Opaque pointer to the device extension.
6555 * @param iTick The timer tick.
6556 */
6557static DECLCALLBACK(void) supdrvGipAsyncTimer(PRTTIMER pTimer, void *pvUser, uint64_t iTick)
6558{
6559 RTCCUINTREG fOldFlags = ASMIntDisableFlags(); /* No interruptions please (real problem on S10). */
6560 PSUPDRVDEVEXT pDevExt = (PSUPDRVDEVEXT)pvUser;
6561 RTCPUID idCpu = RTMpCpuId();
6562 uint64_t u64TSC = ASMReadTSC();
6563 uint64_t NanoTS = RTTimeSystemNanoTS();
6564
6565 /** @todo reset the transaction number and whatnot when iTick == 1. */
6566 if (pDevExt->idGipMaster == idCpu)
6567 supdrvGipUpdate(pDevExt, NanoTS, u64TSC, idCpu, iTick);
6568 else
6569 supdrvGipUpdatePerCpu(pDevExt, NanoTS, u64TSC, idCpu, ASMGetApicId(), iTick);
6570
6571 ASMSetFlags(fOldFlags);
6572}
6573
6574
6575/**
6576 * Finds our (@a idCpu) entry, or allocates a new one if not found.
6577 *
6578 * @returns Index of the CPU in the cache set.
6579 * @param pGip The GIP.
6580 * @param idCpu The CPU ID.
6581 */
6582static uint32_t supdrvGipCpuIndexFromCpuId(PSUPGLOBALINFOPAGE pGip, RTCPUID idCpu)
6583{
6584 uint32_t i, cTries;
6585
6586 /*
6587 * ASSUMES that CPU IDs are constant.
6588 */
6589 for (i = 0; i < pGip->cCpus; i++)
6590 if (pGip->aCPUs[i].idCpu == idCpu)
6591 return i;
6592
6593 cTries = 0;
6594 do
6595 {
6596 for (i = 0; i < pGip->cCpus; i++)
6597 {
6598 bool fRc;
6599 ASMAtomicCmpXchgSize(&pGip->aCPUs[i].idCpu, idCpu, NIL_RTCPUID, fRc);
6600 if (fRc)
6601 return i;
6602 }
6603 } while (cTries++ < 32);
6604 AssertReleaseFailed();
6605 return i - 1;
6606}
6607
6608
6609/**
6610 * The calling CPU should be accounted as online, update GIP accordingly.
6611 *
6612 * This is used by supdrvGipMpEvent as well as the supdrvGipCreate.
6613 *
6614 * @param pDevExt The device extension.
6615 * @param idCpu The CPU ID.
6616 */
6617static void supdrvGipMpEventOnline(PSUPDRVDEVEXT pDevExt, RTCPUID idCpu)
6618{
6619 int iCpuSet = 0;
6620 uint16_t idApic = UINT16_MAX;
6621 uint32_t i = 0;
6622 uint64_t u64NanoTS = 0;
6623 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
6624
6625 AssertPtrReturnVoid(pGip);
6626 AssertRelease(idCpu == RTMpCpuId());
6627 Assert(pGip->cPossibleCpus == RTMpGetCount());
6628
6629 /*
6630 * Do this behind a spinlock with interrupts disabled as this can fire
6631 * on all CPUs simultaneously, see @bugref{6110}.
6632 */
6633 RTSpinlockAcquire(pDevExt->hGipSpinlock);
6634
6635 /*
6636 * Update the globals.
6637 */
6638 ASMAtomicWriteU16(&pGip->cPresentCpus, RTMpGetPresentCount());
6639 ASMAtomicWriteU16(&pGip->cOnlineCpus, RTMpGetOnlineCount());
6640 iCpuSet = RTMpCpuIdToSetIndex(idCpu);
6641 if (iCpuSet >= 0)
6642 {
6643 Assert(RTCpuSetIsMemberByIndex(&pGip->PossibleCpuSet, iCpuSet));
6644 RTCpuSetAddByIndex(&pGip->OnlineCpuSet, iCpuSet);
6645 RTCpuSetAddByIndex(&pGip->PresentCpuSet, iCpuSet);
6646 }
6647
6648 /*
6649 * Update the entry.
6650 */
6651 u64NanoTS = RTTimeSystemNanoTS() - pGip->u32UpdateIntervalNS;
6652 i = supdrvGipCpuIndexFromCpuId(pGip, idCpu);
6653 supdrvGipInitCpu(pDevExt, pGip, &pGip->aCPUs[i], u64NanoTS);
6654 idApic = ASMGetApicId();
6655 ASMAtomicWriteU16(&pGip->aCPUs[i].idApic, idApic);
6656 ASMAtomicWriteS16(&pGip->aCPUs[i].iCpuSet, (int16_t)iCpuSet);
6657 ASMAtomicWriteSize(&pGip->aCPUs[i].idCpu, idCpu);
6658
6659 /*
6660 * Update the APIC ID and CPU set index mappings.
6661 */
6662 ASMAtomicWriteU16(&pGip->aiCpuFromApicId[idApic], i);
6663 ASMAtomicWriteU16(&pGip->aiCpuFromCpuSetIdx[iCpuSet], i);
6664
6665 /* Update the Mp online/offline counter. */
6666 ASMAtomicIncU32(&pDevExt->cMpOnOffEvents);
6667
6668#ifdef SUPDRV_USE_TSC_DELTA_THREAD
6669 /*
6670 * Add this CPU to the set of CPUs that require their TSC delta to be measured.
6671 *
6672 * We cannot poke the TSC-delta measurement thread from this context (on all OSs), so we only
6673 * update the state and it'll get serviced when the thread's listening interval times out.
6674 */
6675 if (GIP_ARE_TSC_DELTAS_APPLICABLE(pGip))
6676 {
6677 RTSpinlockAcquire(pDevExt->hTscDeltaSpinlock);
6678 if ( pDevExt->enmTscDeltaState == kSupDrvTscDeltaState_Listening
6679 || pDevExt->enmTscDeltaState == kSupDrvTscDeltaState_Measuring)
6680 {
6681 RTCpuSetAdd(&pDevExt->TscDeltaCpuSet, idCpu);
6682 pDevExt->enmTscDeltaState = kSupDrvTscDeltaState_WaitAndMeasure;
6683 }
6684 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
6685 }
6686#endif
6687
6688 /* commit it */
6689 ASMAtomicWriteSize(&pGip->aCPUs[i].enmState, SUPGIPCPUSTATE_ONLINE);
6690
6691 RTSpinlockRelease(pDevExt->hGipSpinlock);
6692}
6693
6694
6695/**
6696 * The CPU should be accounted as offline, update the GIP accordingly.
6697 *
6698 * This is used by supdrvGipMpEvent.
6699 *
6700 * @param pDevExt The device extension.
6701 * @param idCpu The CPU ID.
6702 */
6703static void supdrvGipMpEventOffline(PSUPDRVDEVEXT pDevExt, RTCPUID idCpu)
6704{
6705 int iCpuSet;
6706 unsigned i;
6707
6708 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
6709
6710 AssertPtrReturnVoid(pGip);
6711 RTSpinlockAcquire(pDevExt->hGipSpinlock);
6712
6713 iCpuSet = RTMpCpuIdToSetIndex(idCpu);
6714 AssertReturnVoid(iCpuSet >= 0);
6715
6716 i = pGip->aiCpuFromCpuSetIdx[iCpuSet];
6717 AssertReturnVoid(i < pGip->cCpus);
6718 AssertReturnVoid(pGip->aCPUs[i].idCpu == idCpu);
6719
6720 Assert(RTCpuSetIsMemberByIndex(&pGip->PossibleCpuSet, iCpuSet));
6721 RTCpuSetDelByIndex(&pGip->OnlineCpuSet, iCpuSet);
6722
6723 /* Update the Mp online/offline counter. */
6724 ASMAtomicIncU32(&pDevExt->cMpOnOffEvents);
6725
6726 /* If we are the initiator going offline while measuring the TSC delta, unspin other waiting CPUs! */
6727 if (ASMAtomicReadU32(&pDevExt->idTscDeltaInitiator) == idCpu)
6728 {
6729 ASMAtomicWriteU32(&pDevExt->pTscDeltaSync->u, GIP_TSC_DELTA_SYNC_START);
6730 ASMAtomicWriteU64(&pGip->aCPUs[i].u64TSCSample, ~GIP_TSC_DELTA_RSVD);
6731 }
6732
6733 /* Reset the TSC delta, we will recalculate it lazily. */
6734 if (GIP_ARE_TSC_DELTAS_APPLICABLE(pGip))
6735 ASMAtomicWriteS64(&pGip->aCPUs[i].i64TSCDelta, INT64_MAX);
6736
6737#ifdef SUPDRV_USE_TSC_DELTA_THREAD
6738 /* Remove this CPU from the set of CPUs that we have obtained the TSC deltas. */
6739 if (supdrvIsInvariantTsc())
6740 RTCpuSetDel(&pDevExt->TscDeltaObtainedCpuSet, idCpu);
6741#endif
6742
6743 /* commit it */
6744 ASMAtomicWriteSize(&pGip->aCPUs[i].enmState, SUPGIPCPUSTATE_OFFLINE);
6745
6746 RTSpinlockRelease(pDevExt->hGipSpinlock);
6747}
6748
6749
6750/**
6751 * Multiprocessor event notification callback.
6752 *
6753 * This is used to make sure that the GIP master gets passed on to
6754 * another CPU. It also updates the associated CPU data.
6755 *
6756 * @param enmEvent The event.
6757 * @param idCpu The cpu it applies to.
6758 * @param pvUser Pointer to the device extension.
6759 *
6760 * @remarks This function -must- fire on the newly online'd CPU for the
6761 * RTMPEVENT_ONLINE case and can fire on any CPU for the
6762 * RTMPEVENT_OFFLINE case.
6763 */
6764static DECLCALLBACK(void) supdrvGipMpEvent(RTMPEVENT enmEvent, RTCPUID idCpu, void *pvUser)
6765{
6766 PSUPDRVDEVEXT pDevExt = (PSUPDRVDEVEXT)pvUser;
6767 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
6768
6769 AssertRelease(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
6770
6771 /*
6772 * Update the GIP CPU data.
6773 */
6774 if (pGip)
6775 {
6776 switch (enmEvent)
6777 {
6778 case RTMPEVENT_ONLINE:
6779 AssertRelease(idCpu == RTMpCpuId());
6780 supdrvGipMpEventOnline(pDevExt, idCpu);
6781 break;
6782 case RTMPEVENT_OFFLINE:
6783 supdrvGipMpEventOffline(pDevExt, idCpu);
6784 break;
6785 }
6786 }
6787
6788 /*
6789 * Make sure there is a master GIP.
6790 */
6791 if (enmEvent == RTMPEVENT_OFFLINE)
6792 {
6793 RTCPUID idGipMaster = ASMAtomicReadU32(&pDevExt->idGipMaster);
6794 if (idGipMaster == idCpu)
6795 {
6796 /*
6797 * Find a new GIP master.
6798 */
6799 bool fIgnored;
6800 unsigned i;
6801 int64_t iTSCDelta;
6802 uint32_t idxNewGipMaster;
6803 RTCPUID idNewGipMaster = NIL_RTCPUID;
6804 RTCPUSET OnlineCpus;
6805 RTMpGetOnlineSet(&OnlineCpus);
6806
6807 for (i = 0; i < RTCPUSET_MAX_CPUS; i++)
6808 {
6809 RTCPUID idCurCpu = RTMpCpuIdFromSetIndex(i);
6810 if ( RTCpuSetIsMember(&OnlineCpus, idCurCpu)
6811 && idCurCpu != idGipMaster)
6812 {
6813 idNewGipMaster = idCurCpu;
6814 break;
6815 }
6816 }
6817
6818 Log(("supdrvGipMpEvent: Gip master %#lx -> %#lx\n", (long)idGipMaster, (long)idNewGipMaster));
6819 ASMAtomicCmpXchgSize(&pDevExt->idGipMaster, idNewGipMaster, idGipMaster, fIgnored);
6820 NOREF(fIgnored);
6821
6822 /*
6823 * Adjust all the TSC deltas against the new GIP master.
6824 */
6825 if (pGip)
6826 {
6827 idxNewGipMaster = supdrvGipCpuIndexFromCpuId(pGip, idNewGipMaster);
6828 iTSCDelta = pGip->aCPUs[idxNewGipMaster].i64TSCDelta;
6829 Assert(iTSCDelta != INT64_MAX);
6830 for (i = 0; i < pGip->cCpus; i++)
6831 {
6832 PSUPGIPCPU pGipCpu = &pGip->aCPUs[i];
6833 int64_t iWorkerDelta = pGipCpu->i64TSCDelta;
6834 if (iWorkerDelta != INT64_MAX)
6835 iWorkerDelta -= iTSCDelta;
6836 ASMAtomicWriteS64(&pGipCpu->i64TSCDelta, iWorkerDelta);
6837 }
6838 Assert(pGip->aCPUs[idxNewGipMaster].i64TSCDelta == 0);
6839 }
6840 }
6841 }
6842}
6843
6844
6845/**
6846 * Callback used by supdrvMeasureTscDeltas() to read the TSC on two CPUs and
6847 * compute the delta between them.
6848 *
6849 * @param idCpu The CPU we are current scheduled on.
6850 * @param pvUser1 Opaque pointer to the device instance data.
6851 * @param pvUser2 Opaque pointer to the worker Cpu Id.
6852 *
6853 * @remarks Measuring TSC deltas between the CPUs is tricky because we need to
6854 * read the TSC at exactly the same time on both the master and the worker
6855 * CPUs. Due to DMA, bus arbitration, cache locality, contention, SMI,
6856 * pipelining etc. there is no guaranteed way of doing this on x86 CPUs. We
6857 * try to minimize the measurement error by computing the minimum read time
6858 * of the compare statement in the worker by taking TSC measurements across
6859 * it.
6860 *
6861 * We ignore the first few runs of the loop in order to prime the cache.
6862 * Also, be careful about using 'pause' instruction in critical busy-wait
6863 * loops in this code - it can cause undesired behaviour with
6864 * hyperthreading.
6865 *
6866 * It must be noted that the computed minimum read time is mostly to
6867 * eliminate huge deltas when the worker is too early and doesn't by itself
6868 * help produce more accurate deltas. We allow two times the computed
6869 * minimum as an arbibtrary acceptable threshold. Therefore, it is still
6870 * possible to get negative deltas where there are none when the worker is
6871 * earlier. As long as these occasional negative deltas are lower than the
6872 * time it takes to exit guest-context and the OS to reschedule EMT on a
6873 * different CPU we won't expose a TSC that jumped backwards. It is because
6874 * of the existence of the negative deltas we don't recompute the delta with
6875 * the master and worker interchanged to eliminate the remaining measurement
6876 * error.
6877 */
6878static DECLCALLBACK(void) supdrvMeasureTscDeltaCallback(RTCPUID idCpu, void *pvUser1, void *pvUser2)
6879{
6880 PSUPDRVDEVEXT pDevExt = (PSUPDRVDEVEXT)pvUser1;
6881 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
6882 uint32_t *pidWorker = (uint32_t *)pvUser2;
6883 RTCPUID idMaster = ASMAtomicUoReadU32(&pDevExt->idTscDeltaInitiator);
6884 unsigned idxMaster = supdrvGipCpuIndexFromCpuId(pGip, idMaster);
6885 unsigned idxWorker = supdrvGipCpuIndexFromCpuId(pGip, *pidWorker);
6886 PSUPGIPCPU pGipCpuMaster = &pGip->aCPUs[idxMaster];
6887 PSUPGIPCPU pGipCpuWorker = &pGip->aCPUs[idxWorker];
6888 int cTriesLeft = 12;
6889
6890 if ( idCpu != idMaster
6891 && idCpu != *pidWorker)
6892 return;
6893
6894 /* If the IPRT API isn't concurrent safe, the master and worker wait for each other
6895 with a timeout to avoid deadlocking the entire system. */
6896 if (!RTMpOnAllIsConcurrentSafe())
6897 {
6898 /** @todo This was introduced for Windows, but since Windows doesn't use this
6899 * code path any longer (as DPC timeouts BSOD regardless of interrupts,
6900 * see @bugref{6710} comment 81), eventually phase it out. */
6901 uint64_t uTscNow;
6902 uint64_t uTscStart;
6903 uint64_t const cWaitTicks = 130000; /* Arbitrary value, can be tweaked later. */
6904
6905 ASMSerializeInstruction();
6906 uTscStart = ASMReadTSC();
6907 if (idCpu == idMaster)
6908 {
6909 ASMAtomicWriteU32(&pDevExt->pTscDeltaSync->u, GIP_TSC_DELTA_SYNC_PRESTART_MASTER);
6910 while (ASMAtomicReadU32(&pDevExt->pTscDeltaSync->u) != GIP_TSC_DELTA_SYNC_PRESTART_WORKER)
6911 {
6912 ASMSerializeInstruction();
6913 uTscNow = ASMReadTSC();
6914 if (uTscNow - uTscStart > cWaitTicks)
6915 {
6916 /* Set the worker delta to indicate failure, not the master. */
6917 ASMAtomicWriteS64(&pGipCpuWorker->i64TSCDelta, INT64_MAX);
6918 return;
6919 }
6920
6921 ASMNopPause();
6922 }
6923 }
6924 else
6925 {
6926 while (ASMAtomicReadU32(&pDevExt->pTscDeltaSync->u) != GIP_TSC_DELTA_SYNC_PRESTART_MASTER)
6927 {
6928 ASMSerializeInstruction();
6929 uTscNow = ASMReadTSC();
6930 if (uTscNow - uTscStart > cWaitTicks)
6931 {
6932 ASMAtomicWriteS64(&pGipCpuWorker->i64TSCDelta, INT64_MAX);
6933 return;
6934 }
6935
6936 ASMNopPause();
6937 }
6938 ASMAtomicWriteU32(&pDevExt->pTscDeltaSync->u, GIP_TSC_DELTA_SYNC_PRESTART_WORKER);
6939 }
6940 }
6941
6942 Assert(pGipCpuWorker->i64TSCDelta == INT64_MAX);
6943 while (cTriesLeft-- > 0)
6944 {
6945 unsigned i;
6946 uint64_t uMinCmpReadTime = UINT64_MAX;
6947 for (i = 0; i < GIP_TSC_DELTA_LOOPS; i++)
6948 {
6949 if (idCpu == idMaster)
6950 {
6951 /*
6952 * The master.
6953 */
6954 RTCCUINTREG uFlags;
6955 Assert(pGipCpuMaster->u64TSCSample == GIP_TSC_DELTA_RSVD);
6956 ASMAtomicWriteU32(&pDevExt->pTscDeltaSync->u, GIP_TSC_DELTA_SYNC_START);
6957
6958 /* Disable interrupts only in the master for as short a period
6959 as possible, thanks again to Windows. See @bugref{6710} comment #73. */
6960 uFlags = ASMIntDisableFlags();
6961
6962 while (ASMAtomicReadU32(&pDevExt->pTscDeltaSync->u) == GIP_TSC_DELTA_SYNC_START)
6963 ;
6964
6965 do
6966 {
6967 ASMSerializeInstruction();
6968 ASMAtomicWriteU64(&pGipCpuMaster->u64TSCSample, ASMReadTSC());
6969 } while (pGipCpuMaster->u64TSCSample == GIP_TSC_DELTA_RSVD);
6970
6971 ASMSetFlags(uFlags);
6972
6973 while (ASMAtomicReadU32(&pDevExt->pTscDeltaSync->u) != GIP_TSC_DELTA_SYNC_WORKER_DONE)
6974 ;
6975
6976 if (i > GIP_TSC_DELTA_PRIMER_LOOPS + GIP_TSC_DELTA_READ_TIME_LOOPS)
6977 {
6978 if (pGipCpuWorker->u64TSCSample != GIP_TSC_DELTA_RSVD)
6979 {
6980 int64_t iDelta = pGipCpuWorker->u64TSCSample - pGipCpuMaster->u64TSCSample;
6981 if (iDelta < pGipCpuWorker->i64TSCDelta)
6982 pGipCpuWorker->i64TSCDelta = iDelta;
6983 }
6984 }
6985
6986 ASMAtomicWriteU64(&pGipCpuMaster->u64TSCSample, GIP_TSC_DELTA_RSVD);
6987 ASMAtomicWriteU32(&pDevExt->pTscDeltaSync->u, GIP_TSC_DELTA_SYNC_STOP);
6988 }
6989 else
6990 {
6991 /*
6992 * The worker.
6993 */
6994 uint64_t uTscWorker;
6995 uint64_t uTscWorkerFlushed;
6996 uint64_t uCmpReadTime;
6997
6998 ASMAtomicReadU64(&pGipCpuMaster->u64TSCSample); /* Warm the cache line. */
6999 while (ASMAtomicReadU32(&pDevExt->pTscDeltaSync->u) != GIP_TSC_DELTA_SYNC_START)
7000 ;
7001 Assert(pGipCpuMaster->u64TSCSample == GIP_TSC_DELTA_RSVD);
7002 ASMAtomicWriteU32(&pDevExt->pTscDeltaSync->u, GIP_TSC_DELTA_SYNC_WORKER_READY);
7003
7004 /*
7005 * Keep reading the TSC until we notice that the master has read his. Reading
7006 * the TSC -after- the master has updated the memory is way too late. We thus
7007 * compensate by trying to measure how long it took for the worker to notice
7008 * the memory flushed from the master.
7009 */
7010 do
7011 {
7012 ASMSerializeInstruction();
7013 uTscWorker = ASMReadTSC();
7014 } while (pGipCpuMaster->u64TSCSample == GIP_TSC_DELTA_RSVD);
7015 ASMSerializeInstruction();
7016 uTscWorkerFlushed = ASMReadTSC();
7017
7018 uCmpReadTime = uTscWorkerFlushed - uTscWorker;
7019 if (i > GIP_TSC_DELTA_PRIMER_LOOPS + GIP_TSC_DELTA_READ_TIME_LOOPS)
7020 {
7021 /* This is totally arbitrary a.k.a I don't like it but I have no better ideas for now. */
7022 if (uCmpReadTime < (uMinCmpReadTime << 1))
7023 {
7024 ASMAtomicWriteU64(&pGipCpuWorker->u64TSCSample, uTscWorker);
7025 if (uCmpReadTime < uMinCmpReadTime)
7026 uMinCmpReadTime = uCmpReadTime;
7027 }
7028 else
7029 ASMAtomicWriteU64(&pGipCpuWorker->u64TSCSample, GIP_TSC_DELTA_RSVD);
7030 }
7031 else if (i > GIP_TSC_DELTA_PRIMER_LOOPS)
7032 {
7033 if (uCmpReadTime < uMinCmpReadTime)
7034 uMinCmpReadTime = uCmpReadTime;
7035 }
7036
7037 ASMAtomicWriteU32(&pDevExt->pTscDeltaSync->u, GIP_TSC_DELTA_SYNC_WORKER_DONE);
7038 while (ASMAtomicReadU32(&pDevExt->pTscDeltaSync->u) == GIP_TSC_DELTA_SYNC_WORKER_DONE)
7039 ASMNopPause();
7040 }
7041 }
7042
7043 if (pGipCpuWorker->i64TSCDelta != INT64_MAX)
7044 break;
7045 }
7046}
7047
7048
7049/**
7050 * Clears TSC delta related variables.
7051 *
7052 * Clears all TSC samples as well as the delta synchronization variable on the
7053 * all the per-CPU structs. Optionally also clears the per-cpu deltas too.
7054 *
7055 * @param pDevExt Pointer to the device instance data.
7056 * @param fClearDeltas Whether the deltas are also to be cleared.
7057 */
7058DECLINLINE(void) supdrvClearTscSamples(PSUPDRVDEVEXT pDevExt, bool fClearDeltas)
7059{
7060 unsigned iCpu;
7061 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
7062 for (iCpu = 0; iCpu < pGip->cCpus; iCpu++)
7063 {
7064 PSUPGIPCPU pGipCpu = &pGip->aCPUs[iCpu];
7065 ASMAtomicWriteU64(&pGipCpu->u64TSCSample, GIP_TSC_DELTA_RSVD);
7066 if (fClearDeltas)
7067 ASMAtomicWriteS64(&pGipCpu->i64TSCDelta, INT64_MAX);
7068 }
7069 ASMAtomicWriteU32(&pDevExt->pTscDeltaSync->u, GIP_TSC_DELTA_SYNC_STOP);
7070}
7071
7072
7073/**
7074 * Measures the TSC delta between the master GIP CPU and one specified worker
7075 * CPU.
7076 *
7077 * @returns VBox status code.
7078 * @param pDevExt Pointer to the device instance data.
7079 * @param idxWorker The index of the worker CPU from the GIP's array of
7080 * CPUs.
7081 *
7082 * @remarks This can be called with preemption disabled!
7083 */
7084static int supdrvMeasureTscDeltaOne(PSUPDRVDEVEXT pDevExt, uint32_t idxWorker)
7085{
7086 int rc;
7087 PSUPGLOBALINFOPAGE pGip;
7088 PSUPGIPCPU pGipCpuWorker;
7089 RTCPUID idMaster;
7090
7091 AssertReturn(pDevExt, VERR_INVALID_PARAMETER);
7092 AssertReturn(pDevExt->pGip, VERR_INVALID_PARAMETER);
7093
7094 pGip = pDevExt->pGip;
7095 idMaster = pDevExt->idGipMaster;
7096 pGipCpuWorker = &pGip->aCPUs[idxWorker];
7097
7098 Assert(GIP_ARE_TSC_DELTAS_APPLICABLE(pGip));
7099
7100 if (pGipCpuWorker->idCpu == idMaster)
7101 {
7102 ASMAtomicWriteS64(&pGipCpuWorker->i64TSCDelta, 0);
7103 return VINF_SUCCESS;
7104 }
7105
7106 /* Set the master TSC as the initiator. */
7107 while (ASMAtomicCmpXchgU32(&pDevExt->idTscDeltaInitiator, idMaster, NIL_RTCPUID) == false)
7108 {
7109 /*
7110 * Sleep here rather than spin as there is a parallel measurement
7111 * being executed and that can take a good while to be done.
7112 */
7113 RTThreadSleep(1);
7114 }
7115
7116 if (RTCpuSetIsMember(&pGip->OnlineCpuSet, pGipCpuWorker->idCpu))
7117 {
7118 /* Fire TSC-read workers on all CPUs but only synchronize between master and one worker to ease memory contention. */
7119 ASMAtomicWriteS64(&pGipCpuWorker->i64TSCDelta, INT64_MAX);
7120 ASMAtomicWriteU32(&pDevExt->pTscDeltaSync->u, GIP_TSC_DELTA_SYNC_STOP);
7121 rc = RTMpOnAll(supdrvMeasureTscDeltaCallback, pDevExt, &pGipCpuWorker->idCpu);
7122 if (RT_SUCCESS(rc))
7123 {
7124 if (RT_UNLIKELY(pGipCpuWorker->i64TSCDelta == INT64_MAX))
7125 rc = VERR_SUPDRV_TSC_DELTA_MEASUREMENT_FAILED;
7126 }
7127 }
7128 else
7129 rc = VERR_CPU_OFFLINE;
7130
7131 ASMAtomicWriteU32(&pDevExt->idTscDeltaInitiator, NIL_RTCPUID);
7132 return rc;
7133}
7134
7135
7136/**
7137 * Measures the TSC deltas between CPUs.
7138 *
7139 * @param pDevExt Pointer to the device instance data.
7140 * @param pidxMaster Where to store the index of the chosen master TSC if we
7141 * managed to determine the TSC deltas successfully.
7142 * Optional, can be NULL.
7143 *
7144 * @returns VBox status code.
7145 * @remarks Must be called only after supdrvGipInitOnCpu() as this function uses
7146 * idCpu, GIP's online CPU set which are populated in
7147 * supdrvGipInitOnCpu().
7148 */
7149static int supdrvMeasureTscDeltas(PSUPDRVDEVEXT pDevExt, uint32_t *pidxMaster)
7150{
7151 PSUPGIPCPU pGipCpuMaster;
7152 unsigned iCpu;
7153 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
7154 uint32_t idxMaster = UINT32_MAX;
7155 int rc = VINF_SUCCESS;
7156 uint32_t cMpOnOffEvents = ASMAtomicReadU32(&pDevExt->cMpOnOffEvents);
7157 uint32_t cOnlineCpus = pGip->cOnlineCpus;
7158
7159 Assert(GIP_ARE_TSC_DELTAS_APPLICABLE(pGip));
7160
7161 /*
7162 * Pick the first CPU online as the master TSC and make it the new GIP master based
7163 * on the APIC ID.
7164 *
7165 * Technically we can simply use "idGipMaster" but doing this gives us master as CPU 0
7166 * in most cases making it nicer/easier for comparisons. It is safe to update the GIP
7167 * master as this point since the sync/async timer isn't created yet.
7168 */
7169 supdrvClearTscSamples(pDevExt, true /* fClearDeltas */);
7170 for (iCpu = 0; iCpu < RT_ELEMENTS(pGip->aiCpuFromApicId); iCpu++)
7171 {
7172 uint16_t idxCpu = pGip->aiCpuFromApicId[iCpu];
7173 if (idxCpu != UINT16_MAX)
7174 {
7175 PSUPGIPCPU pGipCpu = &pGip->aCPUs[idxCpu];
7176 if (RTCpuSetIsMember(&pGip->OnlineCpuSet, pGipCpu->idCpu))
7177 {
7178 idxMaster = idxCpu;
7179 pGipCpu->i64TSCDelta = 0;
7180 break;
7181 }
7182 }
7183 }
7184 AssertReturn(idxMaster != UINT32_MAX, VERR_CPU_NOT_FOUND);
7185 pGipCpuMaster = &pGip->aCPUs[idxMaster];
7186 ASMAtomicWriteSize(&pDevExt->idGipMaster, pGipCpuMaster->idCpu);
7187
7188 AssertReturn(cOnlineCpus > 0, VERR_INTERNAL_ERROR_5);
7189 if (pGip->cOnlineCpus <= 1)
7190 {
7191 if (pidxMaster)
7192 *pidxMaster = idxMaster;
7193 return VINF_SUCCESS;
7194 }
7195
7196 for (iCpu = 0; iCpu < pGip->cCpus; iCpu++)
7197 {
7198 PSUPGIPCPU pGipCpuWorker = &pGip->aCPUs[iCpu];
7199 if ( iCpu != idxMaster
7200 && RTCpuSetIsMember(&pGip->OnlineCpuSet, pGipCpuWorker->idCpu))
7201 {
7202 rc = supdrvMeasureTscDeltaOne(pDevExt, iCpu);
7203 if (RT_FAILURE(rc))
7204 {
7205 SUPR0Printf("supdrvMeasureTscDeltaOne failed. rc=%d CPU[%u].idCpu=%u Master[%u].idCpu=%u\n", rc, iCpu,
7206 pGipCpuWorker->idCpu, idxMaster, pDevExt->idGipMaster, pGipCpuMaster->idCpu);
7207 break;
7208 }
7209
7210 if (ASMAtomicReadU32(&pDevExt->cMpOnOffEvents) != cMpOnOffEvents)
7211 {
7212 SUPR0Printf("One or more CPUs transitioned between online & offline states. I'm confused, retrying...\n");
7213 rc = VERR_TRY_AGAIN;
7214 break;
7215 }
7216 }
7217 }
7218
7219 if ( RT_SUCCESS(rc)
7220 && !pGipCpuMaster->i64TSCDelta
7221 && pidxMaster)
7222 {
7223 *pidxMaster = idxMaster;
7224 }
7225 return rc;
7226}
7227
7228
7229/**
7230 * Callback used by supdrvDetermineAsyncTSC to read the TSC on a CPU.
7231 *
7232 * @param idCpu Ignored.
7233 * @param pvUser1 Where to put the TSC.
7234 * @param pvUser2 Ignored.
7235 */
7236static DECLCALLBACK(void) supdrvDetermineAsyncTscWorker(RTCPUID idCpu, void *pvUser1, void *pvUser2)
7237{
7238 ASMAtomicWriteU64((uint64_t volatile *)pvUser1, ASMReadTSC());
7239}
7240
7241
7242/**
7243 * Determine if Async GIP mode is required because of TSC drift.
7244 *
7245 * When using the default/normal timer code it is essential that the time stamp counter
7246 * (TSC) runs never backwards, that is, a read operation to the counter should return
7247 * a bigger value than any previous read operation. This is guaranteed by the latest
7248 * AMD CPUs and by newer Intel CPUs which never enter the C2 state (P4). In any other
7249 * case we have to choose the asynchronous timer mode.
7250 *
7251 * @param poffMin Pointer to the determined difference between different
7252 * cores (optional, can be NULL).
7253 * @return false if the time stamp counters appear to be synchronized, true otherwise.
7254 */
7255static bool supdrvDetermineAsyncTsc(uint64_t *poffMin)
7256{
7257 /*
7258 * Just iterate all the cpus 8 times and make sure that the TSC is
7259 * ever increasing. We don't bother taking TSC rollover into account.
7260 */
7261 int iEndCpu = RTMpGetArraySize();
7262 int iCpu;
7263 int cLoops = 8;
7264 bool fAsync = false;
7265 int rc = VINF_SUCCESS;
7266 uint64_t offMax = 0;
7267 uint64_t offMin = ~(uint64_t)0;
7268 uint64_t PrevTsc = ASMReadTSC();
7269
7270 while (cLoops-- > 0)
7271 {
7272 for (iCpu = 0; iCpu < iEndCpu; iCpu++)
7273 {
7274 uint64_t CurTsc;
7275 rc = RTMpOnSpecific(RTMpCpuIdFromSetIndex(iCpu), supdrvDetermineAsyncTscWorker, &CurTsc, NULL);
7276 if (RT_SUCCESS(rc))
7277 {
7278 if (CurTsc <= PrevTsc)
7279 {
7280 fAsync = true;
7281 offMin = offMax = PrevTsc - CurTsc;
7282 Log(("supdrvDetermineAsyncTsc: iCpu=%d cLoops=%d CurTsc=%llx PrevTsc=%llx\n",
7283 iCpu, cLoops, CurTsc, PrevTsc));
7284 break;
7285 }
7286
7287 /* Gather statistics (except the first time). */
7288 if (iCpu != 0 || cLoops != 7)
7289 {
7290 uint64_t off = CurTsc - PrevTsc;
7291 if (off < offMin)
7292 offMin = off;
7293 if (off > offMax)
7294 offMax = off;
7295 Log2(("%d/%d: off=%llx\n", cLoops, iCpu, off));
7296 }
7297
7298 /* Next */
7299 PrevTsc = CurTsc;
7300 }
7301 else if (rc == VERR_NOT_SUPPORTED)
7302 break;
7303 else
7304 AssertMsg(rc == VERR_CPU_NOT_FOUND || rc == VERR_CPU_OFFLINE, ("%d\n", rc));
7305 }
7306
7307 /* broke out of the loop. */
7308 if (iCpu < iEndCpu)
7309 break;
7310 }
7311
7312 if (poffMin)
7313 *poffMin = offMin; /* Almost RTMpOnSpecific profiling. */
7314 Log(("supdrvDetermineAsyncTsc: returns %d; iEndCpu=%d rc=%d offMin=%llx offMax=%llx\n",
7315 fAsync, iEndCpu, rc, offMin, offMax));
7316#if !defined(RT_OS_SOLARIS) && !defined(RT_OS_OS2) && !defined(RT_OS_WINDOWS)
7317 OSDBGPRINT(("vboxdrv: fAsync=%d offMin=%#lx offMax=%#lx\n", fAsync, (long)offMin, (long)offMax));
7318#endif
7319 return fAsync;
7320}
7321
7322
7323/**
7324 * Determine the GIP TSC mode.
7325 *
7326 * @returns The most suitable TSC mode.
7327 * @param pDevExt Pointer to the device instance data.
7328 */
7329static SUPGIPMODE supdrvGipDetermineTscMode(PSUPDRVDEVEXT pDevExt)
7330{
7331 /* Trust CPUs that declare their TSC to be invariant. */
7332#if 0 /** @todo this cannot be enabled until Michal's AMD laptop with insane deltas are working. */
7333 if (supdrvIsInvariantTsc())
7334 return SUPGIPMODE_INVARIANT_TSC;
7335#endif
7336
7337 /*
7338 * Without invariant CPU ID bit - On SMP we're faced with two problems:
7339 * (1) There might be a skew between the CPU, so that cpu0
7340 * returns a TSC that is slightly different from cpu1.
7341 * (2) Power management (and other things) may cause the TSC
7342 * to run at a non-constant speed, and cause the speed
7343 * to be different on the cpus. This will result in (1).
7344 *
7345 * So, on SMP systems we'll have to select the ASYNC update method
7346 * if there are symptoms of these problems.
7347 */
7348 if (RTMpGetCount() > 1)
7349 {
7350 uint32_t uEAX, uEBX, uECX, uEDX;
7351 uint64_t u64DiffCoresIgnored;
7352
7353 /* Permit the user and/or the OS specific bits to force async mode. */
7354 if (supdrvOSGetForcedAsyncTscMode(pDevExt))
7355 return SUPGIPMODE_ASYNC_TSC;
7356
7357 /* Try check for current differences between the cpus. */
7358 if (supdrvDetermineAsyncTsc(&u64DiffCoresIgnored))
7359 return SUPGIPMODE_ASYNC_TSC;
7360
7361 /*
7362 * If the CPU supports power management and is an AMD one we
7363 * won't trust it unless it has the TscInvariant bit is set.
7364 */
7365 /** @todo this is now redundant. remove later. */
7366 /* Check for "AuthenticAMD" */
7367 ASMCpuId(0, &uEAX, &uEBX, &uECX, &uEDX);
7368 if ( uEAX >= 1
7369 && ASMIsAmdCpuEx(uEBX, uECX, uEDX))
7370 {
7371 /* Check for APM support and that TscInvariant is cleared. */
7372 ASMCpuId(0x80000000, &uEAX, &uEBX, &uECX, &uEDX);
7373 if (uEAX >= 0x80000007)
7374 {
7375 ASMCpuId(0x80000007, &uEAX, &uEBX, &uECX, &uEDX);
7376 if ( !(uEDX & X86_CPUID_AMD_ADVPOWER_EDX_TSCINVAR) /* TscInvariant */
7377 && (uEDX & 0x3e)) /* STC|TM|THERMTRIP|VID|FID. Ignore TS. */
7378 return SUPGIPMODE_ASYNC_TSC;
7379 }
7380 }
7381 }
7382
7383 /** @todo later remove this when the above todo with AMD laptop is done (i.e.
7384 * TSC deltas handled everywhere). */
7385 if (supdrvIsInvariantTsc())
7386 return SUPGIPMODE_INVARIANT_TSC;
7387 return SUPGIPMODE_SYNC_TSC;
7388}
7389
7390
7391/**
7392 * Initializes per-CPU GIP information.
7393 *
7394 * @param pDevExt Pointer to the device instance data.
7395 * @param pGip Pointer to the GIP.
7396 * @param pCpu Pointer to which GIP CPU to initalize.
7397 * @param u64NanoTS The current nanosecond timestamp.
7398 */
7399static void supdrvGipInitCpu(PSUPDRVDEVEXT pDevExt, PSUPGLOBALINFOPAGE pGip, PSUPGIPCPU pCpu, uint64_t u64NanoTS)
7400{
7401 /* !!! Warning !!! The GIP may not be linked to the device instance data at this point!
7402 which is why we have 2 separate parameters. Don't dereference pDevExt->pGip here. */
7403 pCpu->u32TransactionId = 2;
7404 pCpu->u64NanoTS = u64NanoTS;
7405 pCpu->u64TSC = ASMReadTSC();
7406 pCpu->u64TSCSample = GIP_TSC_DELTA_RSVD;
7407 pCpu->i64TSCDelta = GIP_ARE_TSC_DELTAS_APPLICABLE(pGip) ? INT64_MAX : 0;
7408
7409 ASMAtomicWriteSize(&pCpu->enmState, SUPGIPCPUSTATE_INVALID);
7410 ASMAtomicWriteSize(&pCpu->idCpu, NIL_RTCPUID);
7411 ASMAtomicWriteS16(&pCpu->iCpuSet, -1);
7412 ASMAtomicWriteU16(&pCpu->idApic, UINT16_MAX);
7413
7414 /*
7415 * We don't know the following values until we've executed updates.
7416 * So, we'll just pretend it's a 4 GHz CPU and adjust the history it on
7417 * the 2nd timer callout.
7418 */
7419 pCpu->u64CpuHz = _4G + 1; /* tstGIP-2 depends on this. */
7420 pCpu->u32UpdateIntervalTSC
7421 = pCpu->au32TSCHistory[0]
7422 = pCpu->au32TSCHistory[1]
7423 = pCpu->au32TSCHistory[2]
7424 = pCpu->au32TSCHistory[3]
7425 = pCpu->au32TSCHistory[4]
7426 = pCpu->au32TSCHistory[5]
7427 = pCpu->au32TSCHistory[6]
7428 = pCpu->au32TSCHistory[7]
7429 = (uint32_t)(_4G / pGip->u32UpdateHz);
7430}
7431
7432
7433/**
7434 * Initializes the GIP data.
7435 *
7436 * @param pDevExt Pointer to the device instance data.
7437 * @param pGip Pointer to the read-write kernel mapping of the GIP.
7438 * @param HCPhys The physical address of the GIP.
7439 * @param u64NanoTS The current nanosecond timestamp.
7440 * @param uUpdateHz The update frequency.
7441 * @param uUpdateIntervalNS The update interval in nanoseconds.
7442 * @param cCpus The CPU count.
7443 */
7444static void supdrvGipInit(PSUPDRVDEVEXT pDevExt, PSUPGLOBALINFOPAGE pGip, RTHCPHYS HCPhys,
7445 uint64_t u64NanoTS, unsigned uUpdateHz, unsigned uUpdateIntervalNS, unsigned cCpus)
7446{
7447 size_t const cbGip = RT_ALIGN_Z(RT_OFFSETOF(SUPGLOBALINFOPAGE, aCPUs[cCpus]), PAGE_SIZE);
7448 unsigned i;
7449#ifdef DEBUG_DARWIN_GIP
7450 OSDBGPRINT(("supdrvGipInit: pGip=%p HCPhys=%lx u64NanoTS=%llu uUpdateHz=%d cCpus=%u\n", pGip, (long)HCPhys, u64NanoTS, uUpdateHz, cCpus));
7451#else
7452 LogFlow(("supdrvGipInit: pGip=%p HCPhys=%lx u64NanoTS=%llu uUpdateHz=%d cCpus=%u\n", pGip, (long)HCPhys, u64NanoTS, uUpdateHz, cCpus));
7453#endif
7454
7455 /*
7456 * Record whether the host OS has already normalized inter-CPU deltas for the hardware TSC.
7457 * We only bother with TSC-deltas on invariant CPUs for now.
7458 */
7459 pGip->fOsTscDeltasInSync = supdrvIsInvariantTsc() && supdrvOSAreTscDeltasInSync();
7460
7461 /*
7462 * Initialize the structure.
7463 */
7464 memset(pGip, 0, cbGip);
7465 pGip->u32Magic = SUPGLOBALINFOPAGE_MAGIC;
7466 pGip->u32Version = SUPGLOBALINFOPAGE_VERSION;
7467 pGip->u32Mode = supdrvGipDetermineTscMode(pDevExt);
7468 pGip->cCpus = (uint16_t)cCpus;
7469 pGip->cPages = (uint16_t)(cbGip / PAGE_SIZE);
7470 pGip->u32UpdateHz = uUpdateHz;
7471 pGip->u32UpdateIntervalNS = uUpdateIntervalNS;
7472 RTCpuSetEmpty(&pGip->OnlineCpuSet);
7473 RTCpuSetEmpty(&pGip->PresentCpuSet);
7474 RTMpGetSet(&pGip->PossibleCpuSet);
7475 pGip->cOnlineCpus = RTMpGetOnlineCount();
7476 pGip->cPresentCpus = RTMpGetPresentCount();
7477 pGip->cPossibleCpus = RTMpGetCount();
7478 pGip->idCpuMax = RTMpGetMaxCpuId();
7479 for (i = 0; i < RT_ELEMENTS(pGip->aiCpuFromApicId); i++)
7480 pGip->aiCpuFromApicId[i] = UINT16_MAX;
7481 for (i = 0; i < RT_ELEMENTS(pGip->aiCpuFromCpuSetIdx); i++)
7482 pGip->aiCpuFromCpuSetIdx[i] = UINT16_MAX;
7483
7484 for (i = 0; i < cCpus; i++)
7485 supdrvGipInitCpu(pDevExt, pGip, &pGip->aCPUs[i], u64NanoTS);
7486
7487 /*
7488 * Link it to the device extension.
7489 */
7490 pDevExt->pGip = pGip;
7491 pDevExt->HCPhysGip = HCPhys;
7492 pDevExt->cGipUsers = 0;
7493}
7494
7495
7496/**
7497 * On CPU initialization callback for RTMpOnAll.
7498 *
7499 * @param idCpu The CPU ID.
7500 * @param pvUser1 The device extension.
7501 * @param pvUser2 The GIP.
7502 */
7503static DECLCALLBACK(void) supdrvGipInitOnCpu(RTCPUID idCpu, void *pvUser1, void *pvUser2)
7504{
7505 /* This is good enough, even though it will update some of the globals a
7506 bit to much. */
7507 supdrvGipMpEventOnline((PSUPDRVDEVEXT)pvUser1, idCpu);
7508}
7509
7510
7511/**
7512 * Invalidates the GIP data upon termination.
7513 *
7514 * @param pGip Pointer to the read-write kernel mapping of the GIP.
7515 */
7516static void supdrvGipTerm(PSUPGLOBALINFOPAGE pGip)
7517{
7518 unsigned i;
7519 pGip->u32Magic = 0;
7520 for (i = 0; i < pGip->cCpus; i++)
7521 {
7522 pGip->aCPUs[i].u64NanoTS = 0;
7523 pGip->aCPUs[i].u64TSC = 0;
7524 pGip->aCPUs[i].iTSCHistoryHead = 0;
7525 pGip->aCPUs[i].u64TSCSample = 0;
7526 pGip->aCPUs[i].i64TSCDelta = INT64_MAX;
7527 }
7528}
7529
7530
7531/**
7532 * Worker routine for supdrvGipUpdate and supdrvGipUpdatePerCpu that
7533 * updates all the per cpu data except the transaction id.
7534 *
7535 * @param pDevExt The device extension.
7536 * @param pGipCpu Pointer to the per cpu data.
7537 * @param u64NanoTS The current time stamp.
7538 * @param u64TSC The current TSC.
7539 * @param iTick The current timer tick.
7540 *
7541 * @remarks Can be called with interrupts disabled!
7542 */
7543static void supdrvGipDoUpdateCpu(PSUPDRVDEVEXT pDevExt, PSUPGIPCPU pGipCpu, uint64_t u64NanoTS, uint64_t u64TSC, uint64_t iTick)
7544{
7545 uint64_t u64TSCDelta;
7546 uint32_t u32UpdateIntervalTSC;
7547 uint32_t u32UpdateIntervalTSCSlack;
7548 unsigned iTSCHistoryHead;
7549 uint64_t u64CpuHz;
7550 uint32_t u32TransactionId;
7551
7552 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
7553 AssertPtrReturnVoid(pGip);
7554
7555 /* Delta between this and the previous update. */
7556 ASMAtomicUoWriteU32(&pGipCpu->u32PrevUpdateIntervalNS, (uint32_t)(u64NanoTS - pGipCpu->u64NanoTS));
7557
7558 /*
7559 * Update the NanoTS.
7560 */
7561 ASMAtomicWriteU64(&pGipCpu->u64NanoTS, u64NanoTS);
7562
7563 /*
7564 * Calc TSC delta.
7565 */
7566 u64TSCDelta = u64TSC - pGipCpu->u64TSC;
7567 ASMAtomicWriteU64(&pGipCpu->u64TSC, u64TSC);
7568
7569 /* We don't need to keep realculating the frequency when it's invariant. */
7570 if (pGip->u32Mode == SUPGIPMODE_INVARIANT_TSC)
7571 return;
7572
7573 if (u64TSCDelta >> 32)
7574 {
7575 u64TSCDelta = pGipCpu->u32UpdateIntervalTSC;
7576 pGipCpu->cErrors++;
7577 }
7578
7579 /*
7580 * On the 2nd and 3rd callout, reset the history with the current TSC
7581 * interval since the values entered by supdrvGipInit are totally off.
7582 * The interval on the 1st callout completely unreliable, the 2nd is a bit
7583 * better, while the 3rd should be most reliable.
7584 */
7585 u32TransactionId = pGipCpu->u32TransactionId;
7586 if (RT_UNLIKELY( ( u32TransactionId == 5
7587 || u32TransactionId == 7)
7588 && ( iTick == 2
7589 || iTick == 3) ))
7590 {
7591 unsigned i;
7592 for (i = 0; i < RT_ELEMENTS(pGipCpu->au32TSCHistory); i++)
7593 ASMAtomicUoWriteU32(&pGipCpu->au32TSCHistory[i], (uint32_t)u64TSCDelta);
7594 }
7595
7596 /*
7597 * Validate the NanoTS deltas between timer fires with an arbitrary threshold of 0.5%.
7598 * Wait until we have at least one full history since the above history reset. The
7599 * assumption is that the majority of the previous history values will be tolerable.
7600 * See @bugref{6710} comment #67.
7601 */
7602 if ( u32TransactionId > 23 /* 7 + (8 * 2) */
7603 && pGip->u32Mode != SUPGIPMODE_ASYNC_TSC)
7604 {
7605 uint32_t uNanoTsThreshold = pGip->u32UpdateIntervalNS / 200;
7606 if ( pGipCpu->u32PrevUpdateIntervalNS > pGip->u32UpdateIntervalNS + uNanoTsThreshold
7607 || pGipCpu->u32PrevUpdateIntervalNS < pGip->u32UpdateIntervalNS - uNanoTsThreshold)
7608 {
7609 uint32_t u32;
7610 u32 = pGipCpu->au32TSCHistory[0];
7611 u32 += pGipCpu->au32TSCHistory[1];
7612 u32 += pGipCpu->au32TSCHistory[2];
7613 u32 += pGipCpu->au32TSCHistory[3];
7614 u32 >>= 2;
7615 u64TSCDelta = pGipCpu->au32TSCHistory[4];
7616 u64TSCDelta += pGipCpu->au32TSCHistory[5];
7617 u64TSCDelta += pGipCpu->au32TSCHistory[6];
7618 u64TSCDelta += pGipCpu->au32TSCHistory[7];
7619 u64TSCDelta >>= 2;
7620 u64TSCDelta += u32;
7621 u64TSCDelta >>= 1;
7622 }
7623 }
7624
7625
7626 /*
7627 * TSC History.
7628 */
7629 Assert(RT_ELEMENTS(pGipCpu->au32TSCHistory) == 8);
7630 iTSCHistoryHead = (pGipCpu->iTSCHistoryHead + 1) & 7;
7631 ASMAtomicWriteU32(&pGipCpu->iTSCHistoryHead, iTSCHistoryHead);
7632 ASMAtomicWriteU32(&pGipCpu->au32TSCHistory[iTSCHistoryHead], (uint32_t)u64TSCDelta);
7633
7634 /*
7635 * UpdateIntervalTSC = average of last 8,2,1 intervals depending on update HZ.
7636 *
7637 * On Windows, we have an occasional (but recurring) sour value that messed up
7638 * the history but taking only 1 interval reduces the precision overall.
7639 * However, this problem existed before the invariant mode was introduced.
7640 */
7641 if ( pGip->u32Mode == SUPGIPMODE_INVARIANT_TSC
7642 || pGip->u32UpdateHz >= 1000)
7643 {
7644 uint32_t u32;
7645 u32 = pGipCpu->au32TSCHistory[0];
7646 u32 += pGipCpu->au32TSCHistory[1];
7647 u32 += pGipCpu->au32TSCHistory[2];
7648 u32 += pGipCpu->au32TSCHistory[3];
7649 u32 >>= 2;
7650 u32UpdateIntervalTSC = pGipCpu->au32TSCHistory[4];
7651 u32UpdateIntervalTSC += pGipCpu->au32TSCHistory[5];
7652 u32UpdateIntervalTSC += pGipCpu->au32TSCHistory[6];
7653 u32UpdateIntervalTSC += pGipCpu->au32TSCHistory[7];
7654 u32UpdateIntervalTSC >>= 2;
7655 u32UpdateIntervalTSC += u32;
7656 u32UpdateIntervalTSC >>= 1;
7657
7658 /* Value chosen for a 2GHz Athlon64 running linux 2.6.10/11. */
7659 u32UpdateIntervalTSCSlack = u32UpdateIntervalTSC >> 14;
7660 }
7661 else if (pGip->u32UpdateHz >= 90)
7662 {
7663 u32UpdateIntervalTSC = (uint32_t)u64TSCDelta;
7664 u32UpdateIntervalTSC += pGipCpu->au32TSCHistory[(iTSCHistoryHead - 1) & 7];
7665 u32UpdateIntervalTSC >>= 1;
7666
7667 /* value chosen on a 2GHz thinkpad running windows */
7668 u32UpdateIntervalTSCSlack = u32UpdateIntervalTSC >> 7;
7669 }
7670 else
7671 {
7672 u32UpdateIntervalTSC = (uint32_t)u64TSCDelta;
7673
7674 /* This value hasn't be checked yet.. waiting for OS/2 and 33Hz timers.. :-) */
7675 u32UpdateIntervalTSCSlack = u32UpdateIntervalTSC >> 6;
7676 }
7677 ASMAtomicWriteU32(&pGipCpu->u32UpdateIntervalTSC, u32UpdateIntervalTSC + u32UpdateIntervalTSCSlack);
7678
7679 /*
7680 * CpuHz.
7681 */
7682 u64CpuHz = ASMMult2xU32RetU64(u32UpdateIntervalTSC, RT_NS_1SEC);
7683 u64CpuHz /= pGip->u32UpdateIntervalNS;
7684 ASMAtomicWriteU64(&pGipCpu->u64CpuHz, u64CpuHz);
7685}
7686
7687
7688/**
7689 * Updates the GIP.
7690 *
7691 * @param pDevExt The device extension.
7692 * @param u64NanoTS The current nanosecond timesamp.
7693 * @param u64TSC The current TSC timesamp.
7694 * @param idCpu The CPU ID.
7695 * @param iTick The current timer tick.
7696 *
7697 * @remarks Can be called with interrupts disabled!
7698 */
7699static void supdrvGipUpdate(PSUPDRVDEVEXT pDevExt, uint64_t u64NanoTS, uint64_t u64TSC, RTCPUID idCpu, uint64_t iTick)
7700{
7701 /*
7702 * Determine the relevant CPU data.
7703 */
7704 PSUPGIPCPU pGipCpu;
7705 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
7706 AssertPtrReturnVoid(pGip);
7707
7708 if (pGip->u32Mode != SUPGIPMODE_ASYNC_TSC)
7709 pGipCpu = &pGip->aCPUs[0];
7710 else
7711 {
7712 unsigned iCpu = pGip->aiCpuFromApicId[ASMGetApicId()];
7713 if (RT_UNLIKELY(iCpu >= pGip->cCpus))
7714 return;
7715 pGipCpu = &pGip->aCPUs[iCpu];
7716 if (RT_UNLIKELY(pGipCpu->idCpu != idCpu))
7717 return;
7718 }
7719
7720 /*
7721 * Start update transaction.
7722 */
7723 if (!(ASMAtomicIncU32(&pGipCpu->u32TransactionId) & 1))
7724 {
7725 /* this can happen on win32 if we're taking to long and there are more CPUs around. shouldn't happen though. */
7726 AssertMsgFailed(("Invalid transaction id, %#x, not odd!\n", pGipCpu->u32TransactionId));
7727 ASMAtomicIncU32(&pGipCpu->u32TransactionId);
7728 pGipCpu->cErrors++;
7729 return;
7730 }
7731
7732 /*
7733 * Recalc the update frequency every 0x800th time.
7734 */
7735 if ( pGip->u32Mode != SUPGIPMODE_INVARIANT_TSC /* cuz we're not recalculating the frequency on invariants hosts. */
7736 && !(pGipCpu->u32TransactionId & (GIP_UPDATEHZ_RECALC_FREQ * 2 - 2)))
7737 {
7738 if (pGip->u64NanoTSLastUpdateHz)
7739 {
7740#ifdef RT_ARCH_AMD64 /** @todo fix 64-bit div here to work on x86 linux. */
7741 uint64_t u64Delta = u64NanoTS - pGip->u64NanoTSLastUpdateHz;
7742 uint32_t u32UpdateHz = (uint32_t)((RT_NS_1SEC_64 * GIP_UPDATEHZ_RECALC_FREQ) / u64Delta);
7743 if (u32UpdateHz <= 2000 && u32UpdateHz >= 30)
7744 {
7745 /** @todo r=ramshankar: Changing u32UpdateHz might screw up TSC frequency
7746 * calculation on non-invariant hosts if it changes the history decision
7747 * taken in supdrvGipDoUpdateCpu(). */
7748 uint64_t u64Interval = u64Delta / GIP_UPDATEHZ_RECALC_FREQ;
7749 ASMAtomicWriteU32(&pGip->u32UpdateHz, u32UpdateHz);
7750 ASMAtomicWriteU32(&pGip->u32UpdateIntervalNS, (uint32_t)u64Interval);
7751 }
7752#endif
7753 }
7754 ASMAtomicWriteU64(&pGip->u64NanoTSLastUpdateHz, u64NanoTS | 1);
7755 }
7756
7757 /*
7758 * Update the data.
7759 */
7760 supdrvGipDoUpdateCpu(pDevExt, pGipCpu, u64NanoTS, u64TSC, iTick);
7761
7762 /*
7763 * Complete transaction.
7764 */
7765 ASMAtomicIncU32(&pGipCpu->u32TransactionId);
7766}
7767
7768
7769/**
7770 * Updates the per cpu GIP data for the calling cpu.
7771 *
7772 * @param pDevExt The device extension.
7773 * @param u64NanoTS The current nanosecond timesamp.
7774 * @param u64TSC The current TSC timesamp.
7775 * @param idCpu The CPU ID.
7776 * @param idApic The APIC id for the CPU index.
7777 * @param iTick The current timer tick.
7778 *
7779 * @remarks Can be called with interrupts disabled!
7780 */
7781static void supdrvGipUpdatePerCpu(PSUPDRVDEVEXT pDevExt, uint64_t u64NanoTS, uint64_t u64TSC,
7782 RTCPUID idCpu, uint8_t idApic, uint64_t iTick)
7783{
7784 uint32_t iCpu;
7785 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
7786
7787 /*
7788 * Avoid a potential race when a CPU online notification doesn't fire on
7789 * the onlined CPU but the tick creeps in before the event notification is
7790 * run.
7791 */
7792 if (RT_UNLIKELY(iTick == 1))
7793 {
7794 iCpu = supdrvGipCpuIndexFromCpuId(pGip, idCpu);
7795 if (pGip->aCPUs[iCpu].enmState == SUPGIPCPUSTATE_OFFLINE)
7796 supdrvGipMpEventOnline(pDevExt, idCpu);
7797 }
7798
7799 iCpu = pGip->aiCpuFromApicId[idApic];
7800 if (RT_LIKELY(iCpu < pGip->cCpus))
7801 {
7802 PSUPGIPCPU pGipCpu = &pGip->aCPUs[iCpu];
7803 if (pGipCpu->idCpu == idCpu)
7804 {
7805 /*
7806 * Start update transaction.
7807 */
7808 if (!(ASMAtomicIncU32(&pGipCpu->u32TransactionId) & 1))
7809 {
7810 AssertMsgFailed(("Invalid transaction id, %#x, not odd!\n", pGipCpu->u32TransactionId));
7811 ASMAtomicIncU32(&pGipCpu->u32TransactionId);
7812 pGipCpu->cErrors++;
7813 return;
7814 }
7815
7816 /*
7817 * Update the data.
7818 */
7819 supdrvGipDoUpdateCpu(pDevExt, pGipCpu, u64NanoTS, u64TSC, iTick);
7820
7821 /*
7822 * Complete transaction.
7823 */
7824 ASMAtomicIncU32(&pGipCpu->u32TransactionId);
7825 }
7826 }
7827}
7828
7829
7830/**
7831 * Resume built-in keyboard on MacBook Air and Pro hosts.
7832 * If there is no built-in keyboard device, return success anyway.
7833 *
7834 * @returns 0 on Mac OS X platform, VERR_NOT_IMPLEMENTED on the other ones.
7835 */
7836static int supdrvIOCtl_ResumeSuspendedKbds(void)
7837{
7838#if defined(RT_OS_DARWIN)
7839 return supdrvDarwinResumeSuspendedKbds();
7840#else
7841 return VERR_NOT_IMPLEMENTED;
7842#endif
7843}
7844
7845
7846/**
7847 * Service a TSC-delta measurement request.
7848 *
7849 * @returns VBox status code.
7850 * @param pDevExt Pointer to the device instance data.
7851 * @param pReq Pointer to the TSC-delta measurement request.
7852 */
7853static int supdrvIOCtl_TscDeltaMeasure(PSUPDRVDEVEXT pDevExt, PSUPTSCDELTAMEASURE pReq)
7854{
7855 PSUPGLOBALINFOPAGE pGip;
7856 RTCPUID idCpuWorker;
7857 int rc = VERR_CPU_NOT_FOUND;
7858 int16_t cTries;
7859 RTMSINTERVAL cMsWaitRetry;
7860 uint16_t iCpu;
7861
7862 /*
7863 * Validate.
7864 */
7865 AssertReturn(pDevExt, VERR_INVALID_PARAMETER);
7866 AssertReturn(pReq, VERR_INVALID_PARAMETER);
7867 AssertReturn(pDevExt->pGip, VERR_INVALID_PARAMETER);
7868 idCpuWorker = pReq->u.In.idCpu;
7869 if (idCpuWorker == NIL_RTCPUID)
7870 return VERR_INVALID_CPU_ID;
7871
7872 cTries = RT_MAX(pReq->u.In.cRetries + 1, 10);
7873 cMsWaitRetry = RT_MAX(pReq->u.In.cMsWaitRetry, 5);
7874 pGip = pDevExt->pGip;
7875
7876 if (!GIP_ARE_TSC_DELTAS_APPLICABLE(pGip))
7877 return VINF_SUCCESS;
7878
7879 for (iCpu = 0; iCpu < pGip->cCpus; iCpu++)
7880 {
7881 PSUPGIPCPU pGipCpuWorker = &pGip->aCPUs[iCpu];
7882 if (pGipCpuWorker->idCpu == idCpuWorker)
7883 {
7884 if ( pGipCpuWorker->i64TSCDelta != INT64_MAX
7885 && !pReq->u.In.fForce)
7886 return VINF_SUCCESS;
7887
7888#ifdef SUPDRV_USE_TSC_DELTA_THREAD
7889 if (pReq->u.In.fAsync)
7890 {
7891 /** @todo Async. doesn't implement options like retries, waiting. We'll need
7892 * to pass those options to the thread somehow and implement it in the
7893 * thread. Check if anyone uses/needs fAsync before implementing this. */
7894 RTCpuSetAdd(&pDevExt->TscDeltaCpuSet, pGipCpuWorker->idCpu);
7895 RTSpinlockAcquire(pDevExt->hTscDeltaSpinlock);
7896 if ( pDevExt->enmTscDeltaState == kSupDrvTscDeltaState_Listening
7897 || pDevExt->enmTscDeltaState == kSupDrvTscDeltaState_Measuring)
7898 {
7899 pDevExt->enmTscDeltaState = kSupDrvTscDeltaState_WaitAndMeasure;
7900 }
7901 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
7902 RTThreadUserSignal(pDevExt->hTscDeltaThread);
7903 return VINF_SUCCESS;
7904 }
7905#endif
7906
7907 while (cTries-- > 0)
7908 {
7909 rc = supdrvMeasureTscDeltaOne(pDevExt, iCpu);
7910 if (RT_SUCCESS(rc))
7911 {
7912 Assert(pGipCpuWorker->i64TSCDelta != INT64_MAX);
7913 break;
7914 }
7915
7916 if (cMsWaitRetry)
7917 RTThreadSleep(cMsWaitRetry);
7918 }
7919
7920 break;
7921 }
7922 }
7923 return rc;
7924}
7925
7926
7927/**
7928 * Reads the TSC and TSC-delta atomically, applies the TSC delta.
7929 *
7930 * @returns VBox status code.
7931 * @param pDevExt Pointer to the device instance data.
7932 * @param pReq Pointer to the TSC-read request.
7933 */
7934static int supdrvIOCtl_TscRead(PSUPDRVDEVEXT pDevExt, PSUPTSCREAD pReq)
7935{
7936 uint64_t uTsc;
7937 uint16_t idApic;
7938 int16_t cTries;
7939 PSUPGLOBALINFOPAGE pGip;
7940 int rc;
7941
7942 /*
7943 * Validate.
7944 */
7945 AssertReturn(pDevExt, VERR_INVALID_PARAMETER);
7946 AssertReturn(pReq, VERR_INVALID_PARAMETER);
7947 AssertReturn(pDevExt->pGip, VERR_INVALID_PARAMETER);
7948
7949 pGip = pDevExt->pGip;
7950 Assert(GIP_ARE_TSC_DELTAS_APPLICABLE(pGip));
7951
7952 cTries = 4;
7953 while (cTries-- > 0)
7954 {
7955 int rc2;
7956 uint16_t iCpu;
7957
7958 rc = SUPGetTsc(&uTsc, &idApic);
7959 if (RT_SUCCESS(rc))
7960 {
7961 pReq->u.Out.u64AdjustedTsc = uTsc;
7962 pReq->u.Out.idApic = idApic;
7963 return VINF_SUCCESS;
7964 }
7965
7966 /* If we failed to have a TSC-delta, measurement the TSC-delta and retry. */
7967 AssertMsgReturn(idApic < RT_ELEMENTS(pGip->aiCpuFromApicId),
7968 ("idApic=%u ArraySize=%u\n", idApic, RT_ELEMENTS(pGip->aiCpuFromApicId)), VERR_INVALID_CPU_INDEX);
7969 iCpu = pGip->aiCpuFromApicId[idApic];
7970 AssertMsgReturn(iCpu < pGip->cCpus, ("iCpu=%u cCpus=%u\n", iCpu, pGip->cCpus), VERR_INVALID_CPU_INDEX);
7971
7972 rc2 = supdrvMeasureTscDeltaOne(pDevExt, iCpu);
7973 if (RT_SUCCESS(rc2))
7974 AssertReturn(pGip->aCPUs[iCpu].i64TSCDelta != INT64_MAX, VERR_INTERNAL_ERROR_2);
7975 }
7976
7977 return rc;
7978}
7979
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette