VirtualBox

source: vbox/trunk/src/VBox/HostDrivers/Support/SUPDrvGip.cpp@ 107044

Last change on this file since 107044 was 106840, checked in by vboxsync, 3 weeks ago

SUP: Added SUPGIPGETCPU_TPIDRRO_EL0 for win.arm64, makes driver work on arm. jiraref:VBP-1253

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 188.4 KB
Line 
1/* $Id: SUPDrvGip.cpp 106840 2024-11-05 21:33:53Z vboxsync $ */
2/** @file
3 * VBoxDrv - The VirtualBox Support Driver - Common code for GIP.
4 */
5
6/*
7 * Copyright (C) 2006-2024 Oracle and/or its affiliates.
8 *
9 * This file is part of VirtualBox base platform packages, as
10 * available from https://www.virtualbox.org.
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation, in version 3 of the
15 * License.
16 *
17 * This program is distributed in the hope that it will be useful, but
18 * WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * General Public License for more details.
21 *
22 * You should have received a copy of the GNU General Public License
23 * along with this program; if not, see <https://www.gnu.org/licenses>.
24 *
25 * The contents of this file may alternatively be used under the terms
26 * of the Common Development and Distribution License Version 1.0
27 * (CDDL), a copy of it is provided in the "COPYING.CDDL" file included
28 * in the VirtualBox distribution, in which case the provisions of the
29 * CDDL are applicable instead of those of the GPL.
30 *
31 * You may elect to license modified versions of this file under the
32 * terms and conditions of either the GPL or the CDDL or both.
33 *
34 * SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0
35 */
36
37
38/*********************************************************************************************************************************
39* Header Files *
40*********************************************************************************************************************************/
41#define LOG_GROUP LOG_GROUP_SUP_DRV
42#define SUPDRV_AGNOSTIC
43#include "SUPDrvInternal.h"
44#ifndef PAGE_SHIFT
45# include <iprt/param.h>
46#endif
47#include <iprt/asm.h>
48#if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
49# include <iprt/asm-amd64-x86.h>
50#elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
51# include <iprt/asm-arm.h>
52#else
53# error "Port me!"
54#endif
55#include <iprt/asm-math.h>
56#include <iprt/cpuset.h>
57#include <iprt/handletable.h>
58#include <iprt/mem.h>
59#include <iprt/mp.h>
60#include <iprt/power.h>
61#include <iprt/process.h>
62#include <iprt/semaphore.h>
63#include <iprt/spinlock.h>
64#include <iprt/thread.h>
65#include <iprt/uuid.h>
66#include <iprt/net.h>
67#include <iprt/crc.h>
68#include <iprt/string.h>
69#include <iprt/timer.h>
70#if defined(RT_OS_DARWIN) || defined(RT_OS_SOLARIS) || defined(RT_OS_FREEBSD)
71# include <iprt/rand.h>
72# include <iprt/path.h>
73#endif
74#include <iprt/uint128.h>
75#if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
76# include <iprt/x86.h>
77#elif defined(RT_ARCH_ARM64)
78# include <iprt/armv8.h>
79#endif
80
81#include <VBox/param.h>
82#include <VBox/log.h>
83#include <VBox/err.h>
84
85#if defined(RT_OS_SOLARIS) || defined(RT_OS_DARWIN)
86# include "dtrace/SUPDrv.h"
87#else
88/* ... */
89#endif
90
91
92/*********************************************************************************************************************************
93* Defined Constants And Macros *
94*********************************************************************************************************************************/
95/** The frequency by which we recalculate the u32UpdateHz and
96 * u32UpdateIntervalNS GIP members. The value must be a power of 2.
97 *
98 * Warning: Bumping this too high might overflow u32UpdateIntervalNS.
99 */
100#define GIP_UPDATEHZ_RECALC_FREQ 0x800
101
102/** A reserved TSC value used for synchronization as well as measurement of
103 * TSC deltas. */
104#define GIP_TSC_DELTA_RSVD UINT64_MAX
105/** The number of TSC delta measurement loops in total (includes primer and
106 * read-time loops). */
107#define GIP_TSC_DELTA_LOOPS 96
108/** The number of cache primer loops. */
109#define GIP_TSC_DELTA_PRIMER_LOOPS 4
110/** The number of loops until we keep computing the minumum read time. */
111#define GIP_TSC_DELTA_READ_TIME_LOOPS 24
112
113/** The TSC frequency refinement period in seconds.
114 * The timer fires after 200ms, then every second, this value just says when
115 * to stop it after that. */
116#define GIP_TSC_REFINE_PERIOD_IN_SECS 12
117/** The TSC-delta threshold for the SUPGIPUSETSCDELTA_PRACTICALLY_ZERO rating */
118#define GIP_TSC_DELTA_THRESHOLD_PRACTICALLY_ZERO 32
119/** The TSC-delta threshold for the SUPGIPUSETSCDELTA_ROUGHLY_ZERO rating */
120#define GIP_TSC_DELTA_THRESHOLD_ROUGHLY_ZERO 448
121/** The TSC delta value for the initial GIP master - 0 in regular builds.
122 * To test the delta code this can be set to a non-zero value. */
123#if 0
124# define GIP_TSC_DELTA_INITIAL_MASTER_VALUE INT64_C(170139095182512) /* 0x00009abd9854acb0 */
125#else
126# define GIP_TSC_DELTA_INITIAL_MASTER_VALUE INT64_C(0)
127#endif
128
129AssertCompile(GIP_TSC_DELTA_PRIMER_LOOPS < GIP_TSC_DELTA_READ_TIME_LOOPS);
130AssertCompile(GIP_TSC_DELTA_PRIMER_LOOPS + GIP_TSC_DELTA_READ_TIME_LOOPS < GIP_TSC_DELTA_LOOPS);
131
132/** @def VBOX_SVN_REV
133 * The makefile should define this if it can. */
134#ifndef VBOX_SVN_REV
135# define VBOX_SVN_REV 0
136#endif
137
138#if 0 /* Don't start the GIP timers. Useful when debugging the IPRT timer code. */
139# define DO_NOT_START_GIP
140#endif
141
142
143/*********************************************************************************************************************************
144* Internal Functions *
145*********************************************************************************************************************************/
146static DECLCALLBACK(void) supdrvGipSyncAndInvariantTimer(PRTTIMER pTimer, void *pvUser, uint64_t iTick);
147static DECLCALLBACK(void) supdrvGipAsyncTimer(PRTTIMER pTimer, void *pvUser, uint64_t iTick);
148static int supdrvGipSetFlags(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, uint32_t fOrMask, uint32_t fAndMask);
149static void supdrvGipInitCpu(PSUPGLOBALINFOPAGE pGip, PSUPGIPCPU pCpu, uint64_t u64NanoTS, uint64_t uCpuHz);
150static void supdrvTscResetSamples(PSUPDRVDEVEXT pDevExt, bool fClearDeltas);
151#ifdef SUPDRV_USE_TSC_DELTA_THREAD
152static int supdrvTscDeltaThreadInit(PSUPDRVDEVEXT pDevExt);
153static void supdrvTscDeltaTerm(PSUPDRVDEVEXT pDevExt);
154static void supdrvTscDeltaThreadStartMeasurement(PSUPDRVDEVEXT pDevExt, bool fForceAll);
155#else
156static int supdrvTscMeasureInitialDeltas(PSUPDRVDEVEXT pDevExt);
157static int supdrvTscMeasureDeltaOne(PSUPDRVDEVEXT pDevExt, uint32_t idxWorker);
158#endif
159
160
161/*********************************************************************************************************************************
162* Global Variables *
163*********************************************************************************************************************************/
164DECLEXPORT(PSUPGLOBALINFOPAGE) g_pSUPGlobalInfoPage = NULL;
165SUPR0_EXPORT_SYMBOL(g_pSUPGlobalInfoPage);
166
167
168
169/*
170 *
171 * Misc Common GIP Code
172 * Misc Common GIP Code
173 * Misc Common GIP Code
174 *
175 *
176 */
177
178
179/**
180 * Finds the GIP CPU index corresponding to @a idCpu.
181 *
182 * @returns GIP CPU array index, UINT32_MAX if not found.
183 * @param pGip The GIP.
184 * @param idCpu The CPU ID.
185 */
186static uint32_t supdrvGipFindCpuIndexForCpuId(PSUPGLOBALINFOPAGE pGip, RTCPUID idCpu)
187{
188 uint32_t i;
189 for (i = 0; i < pGip->cCpus; i++)
190 if (pGip->aCPUs[i].idCpu == idCpu)
191 return i;
192 return UINT32_MAX;
193}
194
195
196/**
197 * Gets the APIC ID using the best available method.
198 *
199 * @returns APIC ID.
200 * @param pGip The GIP, for SUPGIPGETCPU_XXX.
201 *
202 * @note APIC ID == CPU ID on non-x86 platforms.
203 */
204DECLINLINE(uint32_t) supdrvGipGetApicId(PSUPGLOBALINFOPAGE pGip)
205{
206#if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
207 if (pGip->fGetGipCpu & SUPGIPGETCPU_APIC_ID_EXT_0B)
208 return ASMGetApicIdExt0B();
209 if (pGip->fGetGipCpu & SUPGIPGETCPU_APIC_ID_EXT_8000001E)
210 return ASMGetApicIdExt8000001E();
211 return ASMGetApicId();
212
213#elif defined(RT_ARCH_ARM64) && defined(RT_OS_WINDOWS)
214 RT_NOREF(pGip);
215 return (uint32_t)ASMGetThreadIdRoEL0();
216
217#else
218# error "port me"
219#endif
220}
221
222
223/**
224 * Gets the APIC ID using the best available method, slow version.
225 *
226 * @note APIC ID == CPU ID on non-x86 platforms.
227 */
228static uint32_t supdrvGipGetApicIdSlow(void)
229{
230#if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
231 uint32_t const idApic = ASMGetApicId();
232
233 /* The Intel CPU topology leaf: */
234 uint32_t uOther = ASMCpuId_EAX(0);
235 if (uOther >= UINT32_C(0xb) && RTX86IsValidStdRange(uOther))
236 {
237 uint32_t uEax = 0;
238 uint32_t uEbx = 0;
239 uint32_t uEcx = 0;
240 uint32_t uEdx = 0;
241# if defined(RT_OS_LINUX) || defined(RT_OS_FREEBSD)
242 ASMCpuId_Idx_ECX(0xb, 0, &uEax, &uEbx, &uEcx, &uEdx);
243# else
244 ASMCpuIdExSlow(0xb, 0, 0, 0, &uEax, &uEbx, &uEcx, &uEdx);
245# endif
246 if ((uEcx >> 8) != 0) /* level type != invalid */
247 {
248 if ((uEdx & 0xff) == idApic)
249 return uEdx;
250 AssertMsgFailed(("ASMGetApicIdExt0B=>%#x idApic=%#x\n", uEdx, idApic));
251 }
252 }
253
254 /* The AMD leaf: */
255 uOther = ASMCpuId_EAX(UINT32_C(0x80000000));
256 if (uOther >= UINT32_C(0x8000001e) && RTX86IsValidExtRange(uOther))
257 {
258 uOther = ASMGetApicIdExt8000001E();
259 if ((uOther & 0xff) == idApic)
260 return uOther;
261 AssertMsgFailed(("ASMGetApicIdExt8000001E=>%#x idApic=%#x\n", uOther, idApic));
262 }
263 return idApic;
264
265#elif defined(RT_ARCH_ARM64) && defined(RT_OS_WINDOWS)
266 return (uint32_t)ASMGetThreadIdRoEL0();
267
268#else
269# error "port me"
270#endif
271}
272
273
274
275/*
276 *
277 * GIP Mapping and Unmapping Related Code.
278 * GIP Mapping and Unmapping Related Code.
279 * GIP Mapping and Unmapping Related Code.
280 *
281 *
282 */
283
284
285/**
286 * (Re-)initializes the per-cpu structure prior to starting or resuming the GIP
287 * updating.
288 *
289 * @param pGipCpu The per CPU structure for this CPU.
290 * @param u64NanoTS The current time.
291 */
292static void supdrvGipReInitCpu(PSUPGIPCPU pGipCpu, uint64_t u64NanoTS)
293{
294 /*
295 * Here we don't really care about applying the TSC delta. The re-initialization of this
296 * value is not relevant especially while (re)starting the GIP as the first few ones will
297 * be ignored anyway, see supdrvGipDoUpdateCpu().
298 */
299 pGipCpu->u64TSC = ASMReadTSC() - pGipCpu->u32UpdateIntervalTSC;
300 pGipCpu->u64NanoTS = u64NanoTS;
301}
302
303
304/**
305 * Set the current TSC and NanoTS value for the CPU.
306 *
307 * @param idCpu The CPU ID. Unused - we have to use the APIC ID.
308 * @param pvUser1 Pointer to the ring-0 GIP mapping.
309 * @param pvUser2 Pointer to the variable holding the current time.
310 */
311static DECLCALLBACK(void) supdrvGipReInitCpuCallback(RTCPUID idCpu, void *pvUser1, void *pvUser2)
312{
313 PSUPGLOBALINFOPAGE pGip = (PSUPGLOBALINFOPAGE)pvUser1;
314 uint32_t const idApic = supdrvGipGetApicId(pGip);
315 if (idApic < RT_ELEMENTS(pGip->aiCpuFromApicId))
316 {
317 unsigned const iCpu = pGip->aiCpuFromApicId[idApic];
318
319 if (RT_LIKELY(iCpu < pGip->cCpus && pGip->aCPUs[iCpu].idCpu == idCpu))
320 supdrvGipReInitCpu(&pGip->aCPUs[iCpu], *(uint64_t *)pvUser2);
321 else
322 LogRelMax(64, ("supdrvGipReInitCpuCallback: iCpu=%#x out of bounds (%#zx, idApic=%#x)\n",
323 iCpu, RT_ELEMENTS(pGip->aiCpuFromApicId), idApic));
324 }
325 else
326 LogRelMax(64, ("supdrvGipReInitCpuCallback: idApic=%#x out of bounds (%#zx)\n",
327 idApic, RT_ELEMENTS(pGip->aiCpuFromApicId)));
328
329 NOREF(pvUser2);
330}
331
332
333/**
334 * State structure for supdrvGipDetectGetGipCpuCallback.
335 */
336typedef struct SUPDRVGIPDETECTGETCPU
337{
338 /** Bitmap of APIC IDs that has been seen (initialized to zero).
339 * Used to detect duplicate APIC IDs (paranoia). */
340 uint8_t volatile bmApicId[4096 / 8];
341 /** Mask of supported GIP CPU getter methods (SUPGIPGETCPU_XXX) (all bits set
342 * initially). The callback clears the methods not detected. */
343 uint32_t volatile fSupported;
344 /** The first callback detecting any kind of range issues (initialized to
345 * NIL_RTCPUID). */
346 RTCPUID volatile idCpuProblem;
347} SUPDRVGIPDETECTGETCPU;
348/** Pointer to state structure for supdrvGipDetectGetGipCpuCallback. */
349typedef SUPDRVGIPDETECTGETCPU *PSUPDRVGIPDETECTGETCPU;
350
351
352/**
353 * Checks for alternative ways of getting the CPU ID.
354 *
355 * This also checks the APIC ID, CPU ID and CPU set index values against the
356 * GIP tables.
357 *
358 * @param idCpu The CPU ID. Unused - we have to use the APIC ID.
359 * @param pvUser1 Pointer to the state structure.
360 * @param pvUser2 Pointer to the GIP.
361 */
362static DECLCALLBACK(void) supdrvGipDetectGetGipCpuCallback(RTCPUID idCpu, void *pvUser1, void *pvUser2)
363{
364 PSUPDRVGIPDETECTGETCPU pState = (PSUPDRVGIPDETECTGETCPU)pvUser1;
365 PSUPGLOBALINFOPAGE pGip = (PSUPGLOBALINFOPAGE)pvUser2;
366 int const iCpuSet = RTMpCpuIdToSetIndex(idCpu);
367 uint32_t fSupported = 0;
368 uint32_t idApic;
369#if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
370 uint32_t uEax, uEbx, uEcx, uEdx;
371#else
372 uint32_t const uEax = 0; /* Dummy for LogRel. */
373#endif
374 NOREF(pGip);
375
376 AssertMsg(idCpu == RTMpCpuId(), ("idCpu=%#x RTMpCpuId()=%#x\n", idCpu, RTMpCpuId())); /* paranoia^3 */
377
378#if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
379 /*
380 * Check that the CPU ID and CPU set index are interchangable.
381 */
382 if ((RTCPUID)iCpuSet == idCpu)
383 {
384 AssertCompile(RT_IS_POWER_OF_TWO(RTCPUSET_MAX_CPUS));
385 if ( iCpuSet >= 0
386 && iCpuSet < RTCPUSET_MAX_CPUS
387 && RT_IS_POWER_OF_TWO(RTCPUSET_MAX_CPUS))
388 {
389 PSUPGIPCPU pGipCpu = SUPGetGipCpuBySetIndex(pGip, iCpuSet);
390
391 /*
392 * Check whether the IDTR.LIMIT contains a CPU number.
393 */
394# ifdef RT_ARCH_X86
395 uint16_t const cbIdt = sizeof(X86DESC64SYSTEM) * 256;
396# else
397 uint16_t const cbIdt = sizeof(X86DESCGATE) * 256;
398# endif
399 RTIDTR Idtr;
400 ASMGetIDTR(&Idtr);
401 if (Idtr.cbIdt >= cbIdt)
402 {
403 uint32_t uTmp = Idtr.cbIdt - cbIdt;
404 uTmp &= RTCPUSET_MAX_CPUS - 1;
405 if (uTmp == idCpu)
406 {
407 RTIDTR Idtr2;
408 ASMGetIDTR(&Idtr2);
409 if (Idtr2.cbIdt == Idtr.cbIdt)
410 fSupported |= SUPGIPGETCPU_IDTR_LIMIT_MASK_MAX_SET_CPUS;
411 }
412 }
413
414 /*
415 * Check whether RDTSCP is an option.
416 */
417 if (ASMHasCpuId())
418 {
419 if ( RTX86IsValidExtRange(ASMCpuId_EAX(UINT32_C(0x80000000)))
420 && (ASMCpuId_EDX(UINT32_C(0x80000001)) & X86_CPUID_EXT_FEATURE_EDX_RDTSCP) )
421 {
422 uint32_t uAux;
423 ASMReadTscWithAux(&uAux);
424 if ((uAux & (RTCPUSET_MAX_CPUS - 1)) == idCpu)
425 {
426 ASMNopPause();
427 ASMReadTscWithAux(&uAux);
428 if ((uAux & (RTCPUSET_MAX_CPUS - 1)) == idCpu)
429 fSupported |= SUPGIPGETCPU_RDTSCP_MASK_MAX_SET_CPUS;
430 }
431
432 if (pGipCpu)
433 {
434 uint32_t const uGroupedAux = (uint8_t)pGipCpu->iCpuGroupMember | ((uint32_t)pGipCpu->iCpuGroup << 8);
435 if ( (uAux & UINT16_MAX) == uGroupedAux
436 && pGipCpu->iCpuGroupMember <= UINT8_MAX)
437 {
438 ASMNopPause();
439 ASMReadTscWithAux(&uAux);
440 if ((uAux & UINT16_MAX) == uGroupedAux)
441 fSupported |= SUPGIPGETCPU_RDTSCP_GROUP_IN_CH_NUMBER_IN_CL;
442 }
443 }
444 }
445 }
446 }
447 }
448
449 /*
450 * Check for extended APIC ID methods.
451 */
452 idApic = UINT32_MAX;
453 uEax = ASMCpuId_EAX(0);
454 if (uEax >= UINT32_C(0xb) && RTX86IsValidStdRange(uEax))
455 {
456# if defined(RT_OS_LINUX) || defined(RT_OS_FREEBSD)
457 ASMCpuId_Idx_ECX(0xb, 0, &uEax, &uEbx, &uEcx, &uEdx);
458# else
459 ASMCpuIdExSlow(0xb, 0, 0, 0, &uEax, &uEbx, &uEcx, &uEdx);
460# endif
461 if ((uEcx >> 8) != 0) /* level type != invalid */
462 {
463 if (RT_LIKELY( uEdx < RT_ELEMENTS(pGip->aiCpuFromApicId)
464 && !ASMBitTest(pState->bmApicId, uEdx)))
465 {
466 if (uEdx == ASMGetApicIdExt0B())
467 {
468 idApic = uEdx;
469 fSupported |= SUPGIPGETCPU_APIC_ID_EXT_0B;
470 }
471 else
472 AssertMsgFailed(("%#x vs %#x\n", uEdx, ASMGetApicIdExt0B()));
473 }
474 }
475 }
476
477 uEax = ASMCpuId_EAX(UINT32_C(0x80000000));
478 if (uEax >= UINT32_C(0x8000001e) && RTX86IsValidExtRange(uEax))
479 {
480# if defined(RT_OS_LINUX) || defined(RT_OS_FREEBSD)
481 ASMCpuId_Idx_ECX(UINT32_C(0x8000001e), 0, &uEax, &uEbx, &uEcx, &uEdx);
482# else
483 ASMCpuIdExSlow(UINT32_C(0x8000001e), 0, 0, 0, &uEax, &uEbx, &uEcx, &uEdx);
484# endif
485 if (uEax || uEbx || uEcx || uEdx)
486 {
487 if (RT_LIKELY( uEax < RT_ELEMENTS(pGip->aiCpuFromApicId)
488 && ( idApic == UINT32_MAX
489 || idApic == uEax)
490 && !ASMBitTest(pState->bmApicId, uEax)))
491 {
492 if (uEax == ASMGetApicIdExt8000001E())
493 {
494 idApic = uEax;
495 fSupported |= SUPGIPGETCPU_APIC_ID_EXT_8000001E;
496 }
497 else
498 AssertMsgFailed(("%#x vs %#x\n", uEax, ASMGetApicIdExt8000001E()));
499 }
500 }
501 }
502
503#else /* !defined(RT_ARCH_AMD64) && !defined(RT_ARCH_X86) */
504 fSupported |= SUPGIPGETCPU_TPIDRRO_EL0;
505 idApic = supdrvGipGetApicIdSlow();
506#endif /* !defined(RT_ARCH_AMD64) && !defined(RT_ARCH_X86) */
507
508 /*
509 * Check that the APIC ID is unique.
510 */
511#if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
512 uEax = ASMGetApicId();
513 if (RT_LIKELY( uEax < RT_ELEMENTS(pGip->aiCpuFromApicId)
514 && ( idApic == UINT32_MAX
515 || idApic == uEax)
516 && !ASMAtomicBitTestAndSet(pState->bmApicId, uEax)))
517 {
518 idApic = uEax;
519 fSupported |= SUPGIPGETCPU_APIC_ID;
520 }
521 else
522#endif /* defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86) */
523 if ( idApic == UINT32_MAX
524 || idApic >= RT_ELEMENTS(pGip->aiCpuFromApicId) /* parnaoia */
525 || ASMAtomicBitTestAndSet(pState->bmApicId, idApic))
526 {
527 AssertCompile(sizeof(pState->bmApicId) * 8 == RT_ELEMENTS(pGip->aiCpuFromApicId));
528 ASMAtomicCmpXchgU32(&pState->idCpuProblem, idCpu, NIL_RTCPUID);
529 LogRel(("supdrvGipDetectGetGipCpuCallback: idCpu=%#x iCpuSet=%d idApic=%#x/%#x - duplicate APIC ID.\n",
530 idCpu, iCpuSet, uEax, idApic));
531 }
532
533 /*
534 * Check that the iCpuSet is within the expected range.
535 */
536 if (RT_UNLIKELY( iCpuSet < 0
537 || (unsigned)iCpuSet >= RTCPUSET_MAX_CPUS
538 || (unsigned)iCpuSet >= RT_ELEMENTS(pGip->aiCpuFromCpuSetIdx)))
539 {
540 ASMAtomicCmpXchgU32(&pState->idCpuProblem, idCpu, NIL_RTCPUID);
541 LogRel(("supdrvGipDetectGetGipCpuCallback: idCpu=%#x iCpuSet=%d idApic=%#x - CPU set index is out of range.\n",
542 idCpu, iCpuSet, idApic));
543 }
544 else
545 {
546 RTCPUID idCpu2 = RTMpCpuIdFromSetIndex(iCpuSet);
547 if (RT_UNLIKELY(idCpu2 != idCpu))
548 {
549 ASMAtomicCmpXchgU32(&pState->idCpuProblem, idCpu, NIL_RTCPUID);
550 LogRel(("supdrvGipDetectGetGipCpuCallback: idCpu=%#x iCpuSet=%d idApic=%#x - CPU id/index roundtrip problem: %#x\n",
551 idCpu, iCpuSet, idApic, idCpu2));
552 }
553 }
554
555 /*
556 * Update the supported feature mask before we return.
557 */
558 ASMAtomicAndU32(&pState->fSupported, fSupported);
559
560 NOREF(pvUser2);
561}
562
563
564/**
565 * Increase the timer freqency on hosts where this is possible (NT).
566 *
567 * The idea is that more interrupts is better for us... Also, it's better than
568 * we increase the timer frequence, because we might end up getting inaccurate
569 * callbacks if someone else does it.
570 *
571 * @param pDevExt Sets u32SystemTimerGranularityGrant if increased.
572 */
573static void supdrvGipRequestHigherTimerFrequencyFromSystem(PSUPDRVDEVEXT pDevExt)
574{
575 if (pDevExt->u32SystemTimerGranularityGrant == 0)
576 {
577 uint32_t u32SystemResolution;
578 if ( RT_SUCCESS_NP(RTTimerRequestSystemGranularity( 976563 /* 1024 HZ */, &u32SystemResolution))
579 || RT_SUCCESS_NP(RTTimerRequestSystemGranularity( 1000000 /* 1000 HZ */, &u32SystemResolution))
580 || RT_SUCCESS_NP(RTTimerRequestSystemGranularity( 1953125 /* 512 HZ */, &u32SystemResolution))
581 || RT_SUCCESS_NP(RTTimerRequestSystemGranularity( 2000000 /* 500 HZ */, &u32SystemResolution))
582 )
583 {
584#if 0 /* def VBOX_STRICT - this is somehow triggers bogus assertions on windows 10 */
585 uint32_t u32After = RTTimerGetSystemGranularity();
586 AssertMsg(u32After <= u32SystemResolution, ("u32After=%u u32SystemResolution=%u\n", u32After, u32SystemResolution));
587#endif
588 pDevExt->u32SystemTimerGranularityGrant = u32SystemResolution;
589 }
590 }
591}
592
593
594/**
595 * Undoes supdrvGipRequestHigherTimerFrequencyFromSystem.
596 *
597 * @param pDevExt Clears u32SystemTimerGranularityGrant.
598 */
599static void supdrvGipReleaseHigherTimerFrequencyFromSystem(PSUPDRVDEVEXT pDevExt)
600{
601 if (pDevExt->u32SystemTimerGranularityGrant)
602 {
603 int rc2 = RTTimerReleaseSystemGranularity(pDevExt->u32SystemTimerGranularityGrant);
604 AssertRC(rc2);
605 pDevExt->u32SystemTimerGranularityGrant = 0;
606 }
607}
608
609
610/**
611 * Maps the GIP into userspace and/or get the physical address of the GIP.
612 *
613 * @returns IPRT status code.
614 * @param pSession Session to which the GIP mapping should belong.
615 * @param ppGipR3 Where to store the address of the ring-3 mapping. (optional)
616 * @param pHCPhysGip Where to store the physical address. (optional)
617 *
618 * @remark There is no reference counting on the mapping, so one call to this function
619 * count globally as one reference. One call to SUPR0GipUnmap() is will unmap GIP
620 * and remove the session as a GIP user.
621 */
622SUPR0DECL(int) SUPR0GipMap(PSUPDRVSESSION pSession, PRTR3PTR ppGipR3, PRTHCPHYS pHCPhysGip)
623{
624 int rc;
625 PSUPDRVDEVEXT pDevExt = pSession->pDevExt;
626 RTR3PTR pGipR3 = NIL_RTR3PTR;
627 RTHCPHYS HCPhys = NIL_RTHCPHYS;
628 LogFlow(("SUPR0GipMap: pSession=%p ppGipR3=%p pHCPhysGip=%p\n", pSession, ppGipR3, pHCPhysGip));
629
630 /*
631 * Validate
632 */
633 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
634 AssertPtrNullReturn(ppGipR3, VERR_INVALID_POINTER);
635 AssertPtrNullReturn(pHCPhysGip, VERR_INVALID_POINTER);
636
637#ifdef SUPDRV_USE_MUTEX_FOR_GIP
638 RTSemMutexRequest(pDevExt->mtxGip, RT_INDEFINITE_WAIT);
639#else
640 RTSemFastMutexRequest(pDevExt->mtxGip);
641#endif
642 if (pDevExt->pGip)
643 {
644 /*
645 * Map it?
646 */
647 rc = VINF_SUCCESS;
648 if (ppGipR3)
649 {
650 if (pSession->GipMapObjR3 == NIL_RTR0MEMOBJ)
651 rc = RTR0MemObjMapUser(&pSession->GipMapObjR3, pDevExt->GipMemObj, (RTR3PTR)-1, 0,
652 RTMEM_PROT_READ, NIL_RTR0PROCESS);
653 if (RT_SUCCESS(rc))
654 pGipR3 = RTR0MemObjAddressR3(pSession->GipMapObjR3);
655 }
656
657 /*
658 * Get physical address.
659 */
660 if (pHCPhysGip && RT_SUCCESS(rc))
661 HCPhys = pDevExt->HCPhysGip;
662
663 /*
664 * Reference globally.
665 */
666 if (!pSession->fGipReferenced && RT_SUCCESS(rc))
667 {
668 pSession->fGipReferenced = 1;
669 pDevExt->cGipUsers++;
670 if (pDevExt->cGipUsers == 1)
671 {
672 PSUPGLOBALINFOPAGE pGipR0 = pDevExt->pGip;
673 uint64_t u64NanoTS;
674
675 /*
676 * GIP starts/resumes updating again. On windows we bump the
677 * host timer frequency to make sure we don't get stuck in guest
678 * mode and to get better timer (and possibly clock) accuracy.
679 */
680 LogFlow(("SUPR0GipMap: Resumes GIP updating\n"));
681
682 supdrvGipRequestHigherTimerFrequencyFromSystem(pDevExt);
683
684 /*
685 * document me
686 */
687 if (pGipR0->aCPUs[0].u32TransactionId != 2 /* not the first time */)
688 {
689 unsigned i;
690 for (i = 0; i < pGipR0->cCpus; i++)
691 ASMAtomicUoWriteU32(&pGipR0->aCPUs[i].u32TransactionId,
692 (pGipR0->aCPUs[i].u32TransactionId + GIP_UPDATEHZ_RECALC_FREQ * 2)
693 & ~(GIP_UPDATEHZ_RECALC_FREQ * 2 - 1));
694 ASMAtomicWriteU64(&pGipR0->u64NanoTSLastUpdateHz, 0);
695 }
696
697 /*
698 * document me
699 */
700 u64NanoTS = RTTimeSystemNanoTS() - pGipR0->u32UpdateIntervalNS;
701 if ( pGipR0->u32Mode == SUPGIPMODE_INVARIANT_TSC
702 || pGipR0->u32Mode == SUPGIPMODE_SYNC_TSC
703 || RTMpGetOnlineCount() == 1)
704 supdrvGipReInitCpu(&pGipR0->aCPUs[0], u64NanoTS);
705 else
706 RTMpOnAll(supdrvGipReInitCpuCallback, pGipR0, &u64NanoTS);
707
708 /*
709 * Detect alternative ways to figure the CPU ID in ring-3 and
710 * raw-mode context. Check the sanity of the APIC IDs, CPU IDs,
711 * and CPU set indexes while we're at it.
712 */
713 if (RT_SUCCESS(rc))
714 {
715 PSUPDRVGIPDETECTGETCPU pDetectState = (PSUPDRVGIPDETECTGETCPU)RTMemTmpAllocZ(sizeof(*pDetectState));
716 if (pDetectState)
717 {
718 pDetectState->fSupported = UINT32_MAX;
719 pDetectState->idCpuProblem = NIL_RTCPUID;
720 rc = RTMpOnAll(supdrvGipDetectGetGipCpuCallback, pDetectState, pGipR0);
721 if (pDetectState->idCpuProblem == NIL_RTCPUID)
722 {
723 if ( pDetectState->fSupported != UINT32_MAX
724 && pDetectState->fSupported != 0)
725 {
726 if (pGipR0->fGetGipCpu != pDetectState->fSupported)
727 {
728 pGipR0->fGetGipCpu = pDetectState->fSupported;
729 LogRel(("SUPR0GipMap: fGetGipCpu=%#x\n", pDetectState->fSupported));
730 }
731 }
732 else
733 {
734 LogRel(("SUPR0GipMap: No supported ways of getting the APIC ID or CPU number in ring-3! (%#x)\n",
735 pDetectState->fSupported));
736 rc = VERR_UNSUPPORTED_CPU;
737 }
738 }
739 else
740 {
741 LogRel(("SUPR0GipMap: APIC ID, CPU ID or CPU set index problem detected on CPU #%u (%#x)!\n",
742 pDetectState->idCpuProblem, pDetectState->idCpuProblem));
743 rc = VERR_INVALID_CPU_ID;
744 }
745 RTMemTmpFree(pDetectState);
746 }
747 else
748 rc = VERR_NO_TMP_MEMORY;
749 }
750
751 /*
752 * Start the GIP timer if all is well..
753 */
754 if (RT_SUCCESS(rc))
755 {
756#ifndef DO_NOT_START_GIP
757 rc = RTTimerStart(pDevExt->pGipTimer, 0 /* fire ASAP */); AssertRC(rc);
758#endif
759 rc = VINF_SUCCESS;
760 }
761
762 /*
763 * Bail out on error.
764 */
765 if (RT_FAILURE(rc))
766 {
767 LogRel(("SUPR0GipMap: failed rc=%Rrc\n", rc));
768 pDevExt->cGipUsers = 0;
769 pSession->fGipReferenced = 0;
770 if (pSession->GipMapObjR3 != NIL_RTR0MEMOBJ)
771 {
772 int rc2 = RTR0MemObjFree(pSession->GipMapObjR3, false); AssertRC(rc2);
773 if (RT_SUCCESS(rc2))
774 pSession->GipMapObjR3 = NIL_RTR0MEMOBJ;
775 }
776 HCPhys = NIL_RTHCPHYS;
777 pGipR3 = NIL_RTR3PTR;
778 }
779 }
780 }
781 }
782 else
783 {
784 rc = VERR_GENERAL_FAILURE;
785 Log(("SUPR0GipMap: GIP is not available!\n"));
786 }
787#ifdef SUPDRV_USE_MUTEX_FOR_GIP
788 RTSemMutexRelease(pDevExt->mtxGip);
789#else
790 RTSemFastMutexRelease(pDevExt->mtxGip);
791#endif
792
793 /*
794 * Write returns.
795 */
796 if (pHCPhysGip)
797 *pHCPhysGip = HCPhys;
798 if (ppGipR3)
799 *ppGipR3 = pGipR3;
800
801#ifdef DEBUG_DARWIN_GIP
802 OSDBGPRINT(("SUPR0GipMap: returns %d *pHCPhysGip=%lx pGipR3=%p\n", rc, (unsigned long)HCPhys, (void *)pGipR3));
803#else
804 LogFlow(( "SUPR0GipMap: returns %d *pHCPhysGip=%lx pGipR3=%p\n", rc, (unsigned long)HCPhys, (void *)pGipR3));
805#endif
806 return rc;
807}
808SUPR0_EXPORT_SYMBOL(SUPR0GipMap);
809
810
811/**
812 * Unmaps any user mapping of the GIP and terminates all GIP access
813 * from this session.
814 *
815 * @returns IPRT status code.
816 * @param pSession Session to which the GIP mapping should belong.
817 */
818SUPR0DECL(int) SUPR0GipUnmap(PSUPDRVSESSION pSession)
819{
820 int rc = VINF_SUCCESS;
821 PSUPDRVDEVEXT pDevExt = pSession->pDevExt;
822#ifdef DEBUG_DARWIN_GIP
823 OSDBGPRINT(("SUPR0GipUnmap: pSession=%p pGip=%p GipMapObjR3=%p\n",
824 pSession,
825 pSession->GipMapObjR3 != NIL_RTR0MEMOBJ ? RTR0MemObjAddress(pSession->GipMapObjR3) : NULL,
826 pSession->GipMapObjR3));
827#else
828 LogFlow(("SUPR0GipUnmap: pSession=%p\n", pSession));
829#endif
830 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
831
832#ifdef SUPDRV_USE_MUTEX_FOR_GIP
833 RTSemMutexRequest(pDevExt->mtxGip, RT_INDEFINITE_WAIT);
834#else
835 RTSemFastMutexRequest(pDevExt->mtxGip);
836#endif
837
838 /*
839 * GIP test-mode session?
840 */
841 if ( pSession->fGipTestMode
842 && pDevExt->pGip)
843 {
844 supdrvGipSetFlags(pDevExt, pSession, 0, ~SUPGIP_FLAGS_TESTING_ENABLE);
845 Assert(!pSession->fGipTestMode);
846 }
847
848 /*
849 * Unmap anything?
850 */
851 if (pSession->GipMapObjR3 != NIL_RTR0MEMOBJ)
852 {
853 rc = RTR0MemObjFree(pSession->GipMapObjR3, false);
854 AssertRC(rc);
855 if (RT_SUCCESS(rc))
856 pSession->GipMapObjR3 = NIL_RTR0MEMOBJ;
857 }
858
859 /*
860 * Dereference global GIP.
861 */
862 if (pSession->fGipReferenced && !rc)
863 {
864 pSession->fGipReferenced = 0;
865 if ( pDevExt->cGipUsers > 0
866 && !--pDevExt->cGipUsers)
867 {
868 LogFlow(("SUPR0GipUnmap: Suspends GIP updating\n"));
869#ifndef DO_NOT_START_GIP
870 rc = RTTimerStop(pDevExt->pGipTimer); AssertRC(rc); rc = VINF_SUCCESS;
871#endif
872 supdrvGipReleaseHigherTimerFrequencyFromSystem(pDevExt);
873 }
874 }
875
876#ifdef SUPDRV_USE_MUTEX_FOR_GIP
877 RTSemMutexRelease(pDevExt->mtxGip);
878#else
879 RTSemFastMutexRelease(pDevExt->mtxGip);
880#endif
881
882 return rc;
883}
884SUPR0_EXPORT_SYMBOL(SUPR0GipUnmap);
885
886
887/**
888 * Gets the GIP pointer.
889 *
890 * @returns Pointer to the GIP or NULL.
891 */
892SUPDECL(PSUPGLOBALINFOPAGE) SUPGetGIP(void)
893{
894 return g_pSUPGlobalInfoPage;
895}
896
897
898
899
900
901/*
902 *
903 *
904 * GIP Initialization, Termination and CPU Offline / Online Related Code.
905 * GIP Initialization, Termination and CPU Offline / Online Related Code.
906 * GIP Initialization, Termination and CPU Offline / Online Related Code.
907 *
908 *
909 */
910
911/**
912 * Used by supdrvGipInitRefineInvariantTscFreqTimer and supdrvGipInitMeasureTscFreq
913 * to update the TSC frequency related GIP variables.
914 *
915 * @param pGip The GIP.
916 * @param nsElapsed The number of nanoseconds elapsed.
917 * @param cElapsedTscTicks The corresponding number of TSC ticks.
918 * @param iTick The tick number for debugging.
919 */
920static void supdrvGipInitSetCpuFreq(PSUPGLOBALINFOPAGE pGip, uint64_t nsElapsed, uint64_t cElapsedTscTicks, uint32_t iTick)
921{
922 /*
923 * Calculate the frequency.
924 */
925 uint64_t uCpuHz;
926 if ( cElapsedTscTicks < UINT64_MAX / RT_NS_1SEC
927 && nsElapsed < UINT32_MAX)
928 uCpuHz = ASMMultU64ByU32DivByU32(cElapsedTscTicks, RT_NS_1SEC, (uint32_t)nsElapsed);
929 else
930 {
931 RTUINT128U CpuHz, Tmp, Divisor;
932 CpuHz.s.Lo = CpuHz.s.Hi = 0;
933 RTUInt128MulU64ByU64(&Tmp, cElapsedTscTicks, RT_NS_1SEC_64);
934 RTUInt128Div(&CpuHz, &Tmp, RTUInt128AssignU64(&Divisor, nsElapsed));
935 uCpuHz = CpuHz.s.Lo;
936 }
937
938 /*
939 * Update the GIP.
940 */
941 ASMAtomicWriteU64(&pGip->u64CpuHz, uCpuHz);
942 if (pGip->u32Mode != SUPGIPMODE_ASYNC_TSC)
943 {
944 ASMAtomicWriteU64(&pGip->aCPUs[0].u64CpuHz, uCpuHz);
945
946 /* For inspecting the frequency calcs using tstGIP-2, debugger or similar. */
947 if (iTick + 1 < pGip->cCpus)
948 ASMAtomicWriteU64(&pGip->aCPUs[iTick + 1].u64CpuHz, uCpuHz);
949 }
950}
951
952
953/**
954 * Timer callback function for TSC frequency refinement in invariant GIP mode.
955 *
956 * This is started during driver init and fires once
957 * GIP_TSC_REFINE_PERIOD_IN_SECS seconds later.
958 *
959 * @param pTimer The timer.
960 * @param pvUser Opaque pointer to the device instance data.
961 * @param iTick The timer tick.
962 */
963static DECLCALLBACK(void) supdrvGipInitRefineInvariantTscFreqTimer(PRTTIMER pTimer, void *pvUser, uint64_t iTick)
964{
965 PSUPDRVDEVEXT pDevExt = (PSUPDRVDEVEXT)pvUser;
966 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
967 RTCPUID idCpu;
968 uint64_t cNsElapsed;
969 uint64_t cTscTicksElapsed;
970 uint64_t nsNow;
971 uint64_t uTsc;
972 RTCCUINTREG fEFlags;
973
974 /* Paranoia. */
975 AssertReturnVoid(pGip);
976 AssertReturnVoid(pGip->u32Mode == SUPGIPMODE_INVARIANT_TSC);
977
978 /*
979 * If we got a power event, stop the refinement process.
980 */
981 if (pDevExt->fInvTscRefinePowerEvent)
982 {
983 int rc = RTTimerStop(pTimer); AssertRC(rc);
984 return;
985 }
986
987 /*
988 * Read the TSC and time, noting which CPU we are on.
989 *
990 * Don't bother spinning until RTTimeSystemNanoTS changes, since on
991 * systems where it matters we're in a context where we cannot waste that
992 * much time (DPC watchdog, called from clock interrupt).
993 */
994 fEFlags = ASMIntDisableFlags();
995 uTsc = ASMReadTSC();
996 nsNow = RTTimeSystemNanoTS();
997 idCpu = RTMpCpuId();
998 ASMSetFlags(fEFlags);
999
1000 cNsElapsed = nsNow - pDevExt->nsStartInvarTscRefine;
1001 cTscTicksElapsed = uTsc - pDevExt->uTscStartInvarTscRefine;
1002
1003 /*
1004 * If the above measurement was taken on a different CPU than the one we
1005 * started the process on, cTscTicksElapsed will need to be adjusted with
1006 * the TSC deltas of both the CPUs.
1007 *
1008 * We ASSUME that the delta calculation process takes less time than the
1009 * TSC frequency refinement timer. If it doesn't, we'll complain and
1010 * drop the frequency refinement.
1011 *
1012 * Note! We cannot entirely trust enmUseTscDelta here because it's
1013 * downgraded after each delta calculation.
1014 */
1015 if ( idCpu != pDevExt->idCpuInvarTscRefine
1016 && pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_ZERO_CLAIMED)
1017 {
1018 uint32_t iStartCpuSet = RTMpCpuIdToSetIndex(pDevExt->idCpuInvarTscRefine);
1019 uint32_t iStopCpuSet = RTMpCpuIdToSetIndex(idCpu);
1020 uint16_t iStartGipCpu = iStartCpuSet < RT_ELEMENTS(pGip->aiCpuFromCpuSetIdx)
1021 ? pGip->aiCpuFromCpuSetIdx[iStartCpuSet] : UINT16_MAX;
1022 uint16_t iStopGipCpu = iStopCpuSet < RT_ELEMENTS(pGip->aiCpuFromCpuSetIdx)
1023 ? pGip->aiCpuFromCpuSetIdx[iStopCpuSet] : UINT16_MAX;
1024 int64_t iStartTscDelta = iStartGipCpu < pGip->cCpus ? pGip->aCPUs[iStartGipCpu].i64TSCDelta : INT64_MAX;
1025 int64_t iStopTscDelta = iStopGipCpu < pGip->cCpus ? pGip->aCPUs[iStopGipCpu].i64TSCDelta : INT64_MAX;
1026 if (RT_LIKELY(iStartTscDelta != INT64_MAX && iStopTscDelta != INT64_MAX))
1027 {
1028 if (pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_PRACTICALLY_ZERO)
1029 {
1030 /* cTscTicksElapsed = (uTsc - iStopTscDelta) - (pDevExt->uTscStartInvarTscRefine - iStartTscDelta); */
1031 cTscTicksElapsed += iStartTscDelta - iStopTscDelta;
1032 }
1033 }
1034 /*
1035 * Allow 5 times the refinement period to elapse before we give up on the TSC delta
1036 * calculations.
1037 */
1038 else if (cNsElapsed > GIP_TSC_REFINE_PERIOD_IN_SECS * 5 * RT_NS_1SEC_64)
1039 {
1040 SUPR0Printf("vboxdrv: Failed to refine invariant TSC frequency because deltas are unavailable after %u (%u) seconds\n",
1041 (uint32_t)(cNsElapsed / RT_NS_1SEC), GIP_TSC_REFINE_PERIOD_IN_SECS);
1042 SUPR0Printf("vboxdrv: start: %u, %u, %#llx stop: %u, %u, %#llx\n",
1043 iStartCpuSet, iStartGipCpu, iStartTscDelta, iStopCpuSet, iStopGipCpu, iStopTscDelta);
1044 int rc = RTTimerStop(pTimer); AssertRC(rc);
1045 return;
1046 }
1047 }
1048
1049 /*
1050 * Calculate and update the CPU frequency variables in GIP.
1051 *
1052 * If there is a GIP user already and we've already refined the frequency
1053 * a couple of times, don't update it as we want a stable frequency value
1054 * for all VMs.
1055 */
1056 if ( pDevExt->cGipUsers == 0
1057 || cNsElapsed < RT_NS_1SEC * 2)
1058 {
1059 supdrvGipInitSetCpuFreq(pGip, cNsElapsed, cTscTicksElapsed, (uint32_t)iTick);
1060
1061 /*
1062 * Stop the timer once we've reached the defined refinement period.
1063 */
1064 if (cNsElapsed > GIP_TSC_REFINE_PERIOD_IN_SECS * RT_NS_1SEC_64)
1065 {
1066 int rc = RTTimerStop(pTimer);
1067 AssertRC(rc);
1068 }
1069 }
1070 else
1071 {
1072 int rc = RTTimerStop(pTimer);
1073 AssertRC(rc);
1074 }
1075}
1076
1077
1078/**
1079 * @callback_method_impl{FNRTPOWERNOTIFICATION}
1080 */
1081static DECLCALLBACK(void) supdrvGipPowerNotificationCallback(RTPOWEREVENT enmEvent, void *pvUser)
1082{
1083 PSUPDRVDEVEXT pDevExt = (PSUPDRVDEVEXT)pvUser;
1084 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
1085
1086 /*
1087 * If the TSC frequency refinement timer is running, we need to cancel it so it
1088 * doesn't screw up the frequency after a long suspend.
1089 *
1090 * Recalculate all TSC-deltas on host resume as it may have changed, seen
1091 * on Windows 7 running on the Dell Optiplex Intel Core i5-3570.
1092 */
1093 if (enmEvent == RTPOWEREVENT_RESUME)
1094 {
1095 ASMAtomicWriteBool(&pDevExt->fInvTscRefinePowerEvent, true);
1096 if ( RT_LIKELY(pGip)
1097 && pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_ZERO_CLAIMED
1098 && !supdrvOSAreCpusOfflinedOnSuspend())
1099 {
1100#ifdef SUPDRV_USE_TSC_DELTA_THREAD
1101 supdrvTscDeltaThreadStartMeasurement(pDevExt, true /* fForceAll */);
1102#else
1103 RTCpuSetCopy(&pDevExt->TscDeltaCpuSet, &pGip->OnlineCpuSet);
1104 supdrvTscMeasureInitialDeltas(pDevExt);
1105#endif
1106 }
1107 }
1108 else if (enmEvent == RTPOWEREVENT_SUSPEND)
1109 ASMAtomicWriteBool(&pDevExt->fInvTscRefinePowerEvent, true);
1110}
1111
1112
1113/**
1114 * Start the TSC-frequency refinment timer for the invariant TSC GIP mode.
1115 *
1116 * We cannot use this in the synchronous and asynchronous tsc GIP modes because
1117 * the CPU may change the TSC frequence between now and when the timer fires
1118 * (supdrvInitAsyncRefineTscTimer).
1119 *
1120 * @param pDevExt Pointer to the device instance data.
1121 */
1122static void supdrvGipInitStartTimerForRefiningInvariantTscFreq(PSUPDRVDEVEXT pDevExt)
1123{
1124 uint64_t u64NanoTS;
1125 RTCCUINTREG fEFlags;
1126 int rc;
1127
1128 /*
1129 * Register a power management callback.
1130 */
1131 pDevExt->fInvTscRefinePowerEvent = false;
1132 rc = RTPowerNotificationRegister(supdrvGipPowerNotificationCallback, pDevExt);
1133 AssertRC(rc); /* ignore */
1134
1135 /*
1136 * Record the TSC and NanoTS as the starting anchor point for refinement
1137 * of the TSC. We try get as close to a clock tick as possible on systems
1138 * which does not provide high resolution time.
1139 */
1140 u64NanoTS = RTTimeSystemNanoTS();
1141 while (RTTimeSystemNanoTS() == u64NanoTS)
1142 ASMNopPause();
1143
1144 fEFlags = ASMIntDisableFlags();
1145 pDevExt->uTscStartInvarTscRefine = ASMReadTSC();
1146 pDevExt->nsStartInvarTscRefine = RTTimeSystemNanoTS();
1147 pDevExt->idCpuInvarTscRefine = RTMpCpuId();
1148 ASMSetFlags(fEFlags);
1149
1150 /*
1151 * Create a timer that runs on the same CPU so we won't have a depencency
1152 * on the TSC-delta and can run in parallel to it. On systems that does not
1153 * implement CPU specific timers we'll apply deltas in the timer callback,
1154 * just like we do for CPUs going offline.
1155 *
1156 * The longer the refinement interval the better the accuracy, at least in
1157 * theory. If it's too long though, ring-3 may already be starting its
1158 * first VMs before we're done. On most systems we will be loading the
1159 * support driver during boot and VMs won't be started for a while yet,
1160 * it is really only a problem during development (especially with
1161 * on-demand driver starting on windows).
1162 *
1163 * To avoid wasting time doing a long supdrvGipInitMeasureTscFreq() call
1164 * to calculate the frequency during driver loading, the timer is set
1165 * to fire after 200 ms the first time. It will then reschedule itself
1166 * to fire every second until GIP_TSC_REFINE_PERIOD_IN_SECS has been
1167 * reached or it notices that there is a user land client with GIP
1168 * mapped (we want a stable frequency for all VMs).
1169 */
1170 rc = RTTimerCreateEx(&pDevExt->pInvarTscRefineTimer, RT_NS_1SEC,
1171 RTTIMER_FLAGS_CPU(RTMpCpuIdToSetIndex(pDevExt->idCpuInvarTscRefine)),
1172 supdrvGipInitRefineInvariantTscFreqTimer, pDevExt);
1173 if (RT_SUCCESS(rc))
1174 {
1175 rc = RTTimerStart(pDevExt->pInvarTscRefineTimer, 2*RT_NS_100MS);
1176 if (RT_SUCCESS(rc))
1177 return;
1178 RTTimerDestroy(pDevExt->pInvarTscRefineTimer);
1179 }
1180
1181 if (rc == VERR_CPU_OFFLINE || rc == VERR_NOT_SUPPORTED)
1182 {
1183 rc = RTTimerCreateEx(&pDevExt->pInvarTscRefineTimer, RT_NS_1SEC, RTTIMER_FLAGS_CPU_ANY,
1184 supdrvGipInitRefineInvariantTscFreqTimer, pDevExt);
1185 if (RT_SUCCESS(rc))
1186 {
1187 rc = RTTimerStart(pDevExt->pInvarTscRefineTimer, 2*RT_NS_100MS);
1188 if (RT_SUCCESS(rc))
1189 return;
1190 RTTimerDestroy(pDevExt->pInvarTscRefineTimer);
1191 }
1192 }
1193
1194 pDevExt->pInvarTscRefineTimer = NULL;
1195 OSDBGPRINT(("vboxdrv: Failed to create or start TSC frequency refinement timer: rc=%Rrc\n", rc));
1196}
1197
1198
1199/**
1200 * @callback_method_impl{PFNRTMPWORKER,
1201 * RTMpOnSpecific callback for reading TSC and time on the CPU we started
1202 * the measurements on.}
1203 */
1204static DECLCALLBACK(void) supdrvGipInitReadTscAndNanoTsOnCpu(RTCPUID idCpu, void *pvUser1, void *pvUser2)
1205{
1206 RTCCUINTREG fEFlags = ASMIntDisableFlags();
1207 uint64_t *puTscStop = (uint64_t *)pvUser1;
1208 uint64_t *pnsStop = (uint64_t *)pvUser2;
1209 RT_NOREF1(idCpu);
1210
1211 *puTscStop = ASMReadTSC();
1212 *pnsStop = RTTimeSystemNanoTS();
1213
1214 ASMSetFlags(fEFlags);
1215}
1216
1217
1218/**
1219 * Measures the TSC frequency of the system.
1220 *
1221 * The TSC frequency can vary on systems which are not reported as invariant.
1222 * On such systems the object of this function is to find out what the nominal,
1223 * maximum TSC frequency under 'normal' CPU operation.
1224 *
1225 * @returns VBox status code.
1226 * @param pGip Pointer to the GIP.
1227 * @param fRough Set if we're doing the rough calculation that the
1228 * TSC measuring code needs, where accuracy isn't all
1229 * that important (too high is better than too low).
1230 * When clear we try for best accuracy that we can
1231 * achieve in reasonably short time.
1232 */
1233static int supdrvGipInitMeasureTscFreq(PSUPGLOBALINFOPAGE pGip, bool fRough)
1234{
1235 uint32_t nsTimerIncr = RTTimerGetSystemGranularity();
1236 int cTriesLeft = fRough ? 4 : 2;
1237 while (cTriesLeft-- > 0)
1238 {
1239 RTCCUINTREG fEFlags;
1240 uint64_t nsStart;
1241 uint64_t nsStop;
1242 uint64_t uTscStart;
1243 uint64_t uTscStop;
1244 RTCPUID idCpuStart;
1245 RTCPUID idCpuStop;
1246
1247 /*
1248 * Synchronize with the host OS clock tick on systems without high
1249 * resolution time API (older Windows version for example).
1250 */
1251 nsStart = RTTimeSystemNanoTS();
1252 while (RTTimeSystemNanoTS() == nsStart)
1253 ASMNopPause();
1254
1255 /*
1256 * Read the TSC and current time, noting which CPU we're on.
1257 */
1258 fEFlags = ASMIntDisableFlags();
1259 uTscStart = ASMReadTSC();
1260 nsStart = RTTimeSystemNanoTS();
1261 idCpuStart = RTMpCpuId();
1262 ASMSetFlags(fEFlags);
1263
1264 /*
1265 * Delay for a while.
1266 */
1267 if (pGip->u32Mode == SUPGIPMODE_INVARIANT_TSC)
1268 {
1269 /*
1270 * Sleep-wait since the TSC frequency is constant, it eases host load.
1271 * Shorter interval produces more variance in the frequency (esp. Windows).
1272 */
1273 uint64_t msElapsed = 0;
1274 uint64_t msDelay = ( ((fRough ? 16 : 200) * RT_NS_1MS + nsTimerIncr - 1) / nsTimerIncr * nsTimerIncr - RT_NS_100US )
1275 / RT_NS_1MS;
1276 do
1277 {
1278 RTThreadSleep((RTMSINTERVAL)(msDelay - msElapsed));
1279 nsStop = RTTimeSystemNanoTS();
1280 msElapsed = (nsStop - nsStart) / RT_NS_1MS;
1281 } while (msElapsed < msDelay);
1282
1283 while (RTTimeSystemNanoTS() == nsStop)
1284 ASMNopPause();
1285 }
1286 else
1287 {
1288 /*
1289 * Busy-wait keeping the frequency up.
1290 */
1291 do
1292 {
1293 ASMNopPause();
1294 nsStop = RTTimeSystemNanoTS();
1295 } while (nsStop - nsStart < RT_NS_100MS);
1296 }
1297
1298 /*
1299 * Read the TSC and time again.
1300 */
1301 fEFlags = ASMIntDisableFlags();
1302 uTscStop = ASMReadTSC();
1303 nsStop = RTTimeSystemNanoTS();
1304 idCpuStop = RTMpCpuId();
1305 ASMSetFlags(fEFlags);
1306
1307 /*
1308 * If the CPU changes, things get a bit complicated and what we
1309 * can get away with depends on the GIP mode / TSC reliability.
1310 */
1311 if (idCpuStop != idCpuStart)
1312 {
1313 bool fDoXCall = false;
1314
1315 /*
1316 * Synchronous TSC mode: we're probably fine as it's unlikely
1317 * that we were rescheduled because of TSC throttling or power
1318 * management reasons, so just go ahead.
1319 */
1320 if (pGip->u32Mode == SUPGIPMODE_SYNC_TSC)
1321 {
1322 /* Probably ok, maybe we should retry once?. */
1323 Assert(pGip->enmUseTscDelta == SUPGIPUSETSCDELTA_NOT_APPLICABLE);
1324 }
1325 /*
1326 * If we're just doing the rough measurement, do the cross call and
1327 * get on with things (we don't have deltas!).
1328 */
1329 else if (fRough)
1330 fDoXCall = true;
1331 /*
1332 * Invariant TSC mode: It doesn't matter if we have delta available
1333 * for both CPUs. That is not something we can assume at this point.
1334 *
1335 * Note! We cannot necessarily trust enmUseTscDelta here because it's
1336 * downgraded after each delta calculation and the delta
1337 * calculations may not be complete yet.
1338 */
1339 else if (pGip->u32Mode == SUPGIPMODE_INVARIANT_TSC)
1340 {
1341/** @todo This section of code is never reached atm, consider dropping it later on... */
1342 if (pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_ZERO_CLAIMED)
1343 {
1344 uint32_t iStartCpuSet = RTMpCpuIdToSetIndex(idCpuStart);
1345 uint32_t iStopCpuSet = RTMpCpuIdToSetIndex(idCpuStop);
1346 uint16_t iStartGipCpu = iStartCpuSet < RT_ELEMENTS(pGip->aiCpuFromCpuSetIdx)
1347 ? pGip->aiCpuFromCpuSetIdx[iStartCpuSet] : UINT16_MAX;
1348 uint16_t iStopGipCpu = iStopCpuSet < RT_ELEMENTS(pGip->aiCpuFromCpuSetIdx)
1349 ? pGip->aiCpuFromCpuSetIdx[iStopCpuSet] : UINT16_MAX;
1350 int64_t iStartTscDelta = iStartGipCpu < pGip->cCpus ? pGip->aCPUs[iStartGipCpu].i64TSCDelta : INT64_MAX;
1351 int64_t iStopTscDelta = iStopGipCpu < pGip->cCpus ? pGip->aCPUs[iStopGipCpu].i64TSCDelta : INT64_MAX;
1352 if (RT_LIKELY(iStartTscDelta != INT64_MAX && iStopTscDelta != INT64_MAX))
1353 {
1354 if (pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_PRACTICALLY_ZERO)
1355 {
1356 uTscStart -= iStartTscDelta;
1357 uTscStop -= iStopTscDelta;
1358 }
1359 }
1360 /*
1361 * Invalid CPU indexes are not caused by online/offline races, so
1362 * we have to trigger driver load failure if that happens as GIP
1363 * and IPRT assumptions are busted on this system.
1364 */
1365 else if (iStopGipCpu >= pGip->cCpus || iStartGipCpu >= pGip->cCpus)
1366 {
1367 SUPR0Printf("vboxdrv: Unexpected CPU index in supdrvGipInitMeasureTscFreq.\n");
1368 SUPR0Printf("vboxdrv: start: %u, %u, %#llx stop: %u, %u, %#llx\n",
1369 iStartCpuSet, iStartGipCpu, iStartTscDelta, iStopCpuSet, iStopGipCpu, iStopTscDelta);
1370 return VERR_INVALID_CPU_INDEX;
1371 }
1372 /*
1373 * No valid deltas. We retry, if we're on our last retry
1374 * we do the cross call instead just to get a result. The
1375 * frequency will be refined in a few seconds anyway.
1376 */
1377 else if (cTriesLeft > 0)
1378 continue;
1379 else
1380 fDoXCall = true;
1381 }
1382 }
1383 /*
1384 * Asynchronous TSC mode: This is bad, as the reason we usually
1385 * use this mode is to deal with variable TSC frequencies and
1386 * deltas. So, we need to get the TSC from the same CPU as
1387 * started it, we also need to keep that CPU busy. So, retry
1388 * and fall back to the cross call on the last attempt.
1389 */
1390 else
1391 {
1392 Assert(pGip->u32Mode == SUPGIPMODE_ASYNC_TSC);
1393 if (cTriesLeft > 0)
1394 continue;
1395 fDoXCall = true;
1396 }
1397
1398 if (fDoXCall)
1399 {
1400 /*
1401 * Try read the TSC and timestamp on the start CPU.
1402 */
1403 int rc = RTMpOnSpecific(idCpuStart, supdrvGipInitReadTscAndNanoTsOnCpu, &uTscStop, &nsStop);
1404 if (RT_FAILURE(rc) && (!fRough || cTriesLeft > 0))
1405 continue;
1406 }
1407 }
1408
1409 /*
1410 * Calculate the TSC frequency and update it (shared with the refinement timer).
1411 */
1412 supdrvGipInitSetCpuFreq(pGip, nsStop - nsStart, uTscStop - uTscStart, 0);
1413 return VINF_SUCCESS;
1414 }
1415
1416 Assert(!fRough);
1417 return VERR_SUPDRV_TSC_FREQ_MEASUREMENT_FAILED;
1418}
1419
1420
1421/**
1422 * Finds our (@a idCpu) entry, or allocates a new one if not found.
1423 *
1424 * @returns Index of the CPU in the cache set.
1425 * @param pGip The GIP.
1426 * @param idCpu The CPU ID.
1427 */
1428static uint32_t supdrvGipFindOrAllocCpuIndexForCpuId(PSUPGLOBALINFOPAGE pGip, RTCPUID idCpu)
1429{
1430 uint32_t i, cTries;
1431
1432 /*
1433 * ASSUMES that CPU IDs are constant.
1434 */
1435 for (i = 0; i < pGip->cCpus; i++)
1436 if (pGip->aCPUs[i].idCpu == idCpu)
1437 return i;
1438
1439 cTries = 0;
1440 do
1441 {
1442 for (i = 0; i < pGip->cCpus; i++)
1443 {
1444 bool fRc;
1445 ASMAtomicCmpXchgSize(&pGip->aCPUs[i].idCpu, idCpu, NIL_RTCPUID, fRc);
1446 if (fRc)
1447 return i;
1448 }
1449 } while (cTries++ < 32);
1450 AssertReleaseFailed();
1451 return i - 1;
1452}
1453
1454
1455/**
1456 * The calling CPU should be accounted as online, update GIP accordingly.
1457 *
1458 * This is used by supdrvGipCreate() as well as supdrvGipMpEvent().
1459 *
1460 * @param pDevExt The device extension.
1461 * @param idCpu The CPU ID.
1462 */
1463static void supdrvGipMpEventOnlineOrInitOnCpu(PSUPDRVDEVEXT pDevExt, RTCPUID idCpu)
1464{
1465 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
1466 int iCpuSet = 0;
1467 uint32_t idApic;
1468 uint32_t i = 0;
1469 uint64_t u64NanoTS = 0;
1470
1471 AssertPtrReturnVoid(pGip);
1472 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
1473 AssertRelease(idCpu == RTMpCpuId());
1474 Assert(pGip->cPossibleCpus == RTMpGetCount());
1475
1476 /*
1477 * Do this behind a spinlock with interrupts disabled as this can fire
1478 * on all CPUs simultaneously, see @bugref{6110}.
1479 */
1480 RTSpinlockAcquire(pDevExt->hGipSpinlock);
1481
1482 /*
1483 * Update the globals.
1484 */
1485 ASMAtomicWriteU16(&pGip->cPresentCpus, RTMpGetPresentCount());
1486 ASMAtomicWriteU16(&pGip->cOnlineCpus, RTMpGetOnlineCount());
1487 iCpuSet = RTMpCpuIdToSetIndex(idCpu);
1488 if (iCpuSet >= 0)
1489 {
1490 Assert(RTCpuSetIsMemberByIndex(&pGip->PossibleCpuSet, iCpuSet));
1491 RTCpuSetAddByIndex(&pGip->OnlineCpuSet, iCpuSet);
1492 RTCpuSetAddByIndex(&pGip->PresentCpuSet, iCpuSet);
1493 }
1494
1495 /*
1496 * Update the entry.
1497 */
1498 u64NanoTS = RTTimeSystemNanoTS() - pGip->u32UpdateIntervalNS;
1499 i = supdrvGipFindOrAllocCpuIndexForCpuId(pGip, idCpu);
1500
1501 supdrvGipInitCpu(pGip, &pGip->aCPUs[i], u64NanoTS, pGip->u64CpuHz);
1502
1503 idApic = supdrvGipGetApicIdSlow();
1504 ASMAtomicWriteU16(&pGip->aCPUs[i].idApic, idApic);
1505 ASMAtomicWriteS16(&pGip->aCPUs[i].iCpuSet, (int16_t)iCpuSet);
1506 ASMAtomicWriteSize(&pGip->aCPUs[i].idCpu, idCpu);
1507
1508 pGip->aCPUs[i].iCpuGroup = 0;
1509 pGip->aCPUs[i].iCpuGroupMember = iCpuSet;
1510#ifdef RT_OS_WINDOWS
1511 supdrvOSGipInitGroupBitsForCpu(pDevExt, pGip, &pGip->aCPUs[i]);
1512#endif
1513
1514 /*
1515 * Update the APIC ID and CPU set index mappings.
1516 */
1517 if (idApic < RT_ELEMENTS(pGip->aiCpuFromApicId))
1518 ASMAtomicWriteU16(&pGip->aiCpuFromApicId[idApic], i);
1519 else
1520 LogRelMax(64, ("supdrvGipMpEventOnlineOrInitOnCpu: idApic=%#x is out of bounds (%#zx, i=%u, iCpuSet=%d)\n",
1521 idApic, RT_ELEMENTS(pGip->aiCpuFromApicId), i, iCpuSet));
1522 if ((unsigned)iCpuSet < RT_ELEMENTS(pGip->aiCpuFromCpuSetIdx))
1523 ASMAtomicWriteU16(&pGip->aiCpuFromCpuSetIdx[iCpuSet], i);
1524 else
1525 LogRelMax(64, ("supdrvGipMpEventOnlineOrInitOnCpu: iCpuSet=%d is out of bounds (%#zx, i=%u, idApic=%d)\n",
1526 iCpuSet, RT_ELEMENTS(pGip->aiCpuFromApicId), i, idApic));
1527
1528 /* Add this CPU to this set of CPUs we need to calculate the TSC-delta for. */
1529 RTCpuSetAddByIndex(&pDevExt->TscDeltaCpuSet, RTMpCpuIdToSetIndex(idCpu));
1530
1531 /* Update the Mp online/offline counter. */
1532 ASMAtomicIncU32(&pDevExt->cMpOnOffEvents);
1533
1534 /* Commit it. */
1535 ASMAtomicWriteSize(&pGip->aCPUs[i].enmState, SUPGIPCPUSTATE_ONLINE);
1536
1537 RTSpinlockRelease(pDevExt->hGipSpinlock);
1538}
1539
1540
1541/**
1542 * RTMpOnSpecific callback wrapper for supdrvGipMpEventOnlineOrInitOnCpu().
1543 *
1544 * @param idCpu The CPU ID we are running on.
1545 * @param pvUser1 Opaque pointer to the device instance data.
1546 * @param pvUser2 Not used.
1547 */
1548static DECLCALLBACK(void) supdrvGipMpEventOnlineCallback(RTCPUID idCpu, void *pvUser1, void *pvUser2)
1549{
1550 PSUPDRVDEVEXT pDevExt = (PSUPDRVDEVEXT)pvUser1;
1551 NOREF(pvUser2);
1552 supdrvGipMpEventOnlineOrInitOnCpu(pDevExt, idCpu);
1553}
1554
1555
1556/**
1557 * The CPU should be accounted as offline, update the GIP accordingly.
1558 *
1559 * This is used by supdrvGipMpEvent.
1560 *
1561 * @param pDevExt The device extension.
1562 * @param idCpu The CPU ID.
1563 */
1564static void supdrvGipMpEventOffline(PSUPDRVDEVEXT pDevExt, RTCPUID idCpu)
1565{
1566 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
1567 int iCpuSet;
1568 unsigned i;
1569
1570 AssertPtrReturnVoid(pGip);
1571 RTSpinlockAcquire(pDevExt->hGipSpinlock);
1572
1573 iCpuSet = RTMpCpuIdToSetIndex(idCpu);
1574 AssertReturnVoid(iCpuSet >= 0);
1575
1576 i = pGip->aiCpuFromCpuSetIdx[iCpuSet];
1577 AssertReturnVoid(i < pGip->cCpus);
1578 AssertReturnVoid(pGip->aCPUs[i].idCpu == idCpu);
1579
1580 Assert(RTCpuSetIsMemberByIndex(&pGip->PossibleCpuSet, iCpuSet));
1581 RTCpuSetDelByIndex(&pGip->OnlineCpuSet, iCpuSet);
1582
1583 /* Update the Mp online/offline counter. */
1584 ASMAtomicIncU32(&pDevExt->cMpOnOffEvents);
1585
1586 if (pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_ZERO_CLAIMED)
1587 {
1588 /* Reset the TSC delta, we will recalculate it lazily. */
1589 ASMAtomicWriteS64(&pGip->aCPUs[i].i64TSCDelta, INT64_MAX);
1590 /* Remove this CPU from the set of CPUs that we have obtained the TSC deltas. */
1591 RTCpuSetDelByIndex(&pDevExt->TscDeltaObtainedCpuSet, iCpuSet);
1592 }
1593
1594 /* Commit it. */
1595 ASMAtomicWriteSize(&pGip->aCPUs[i].enmState, SUPGIPCPUSTATE_OFFLINE);
1596
1597 RTSpinlockRelease(pDevExt->hGipSpinlock);
1598}
1599
1600
1601/**
1602 * Multiprocessor event notification callback.
1603 *
1604 * This is used to make sure that the GIP master gets passed on to
1605 * another CPU. It also updates the associated CPU data.
1606 *
1607 * @param enmEvent The event.
1608 * @param idCpu The cpu it applies to.
1609 * @param pvUser Pointer to the device extension.
1610 */
1611static DECLCALLBACK(void) supdrvGipMpEvent(RTMPEVENT enmEvent, RTCPUID idCpu, void *pvUser)
1612{
1613 PSUPDRVDEVEXT pDevExt = (PSUPDRVDEVEXT)pvUser;
1614 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
1615
1616 if (pGip)
1617 {
1618 RTTHREADPREEMPTSTATE PreemptState = RTTHREADPREEMPTSTATE_INITIALIZER;
1619 switch (enmEvent)
1620 {
1621 case RTMPEVENT_ONLINE:
1622 {
1623 RTThreadPreemptDisable(&PreemptState);
1624 if (idCpu == RTMpCpuId())
1625 {
1626 supdrvGipMpEventOnlineOrInitOnCpu(pDevExt, idCpu);
1627 RTThreadPreemptRestore(&PreemptState);
1628 }
1629 else
1630 {
1631 RTThreadPreemptRestore(&PreemptState);
1632 RTMpOnSpecific(idCpu, supdrvGipMpEventOnlineCallback, pDevExt, NULL /* pvUser2 */);
1633 }
1634
1635 /*
1636 * Recompute TSC-delta for the newly online'd CPU.
1637 */
1638 if (pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_ZERO_CLAIMED)
1639 {
1640#ifdef SUPDRV_USE_TSC_DELTA_THREAD
1641 supdrvTscDeltaThreadStartMeasurement(pDevExt, false /* fForceAll */);
1642#else
1643 uint32_t iCpu = supdrvGipFindOrAllocCpuIndexForCpuId(pGip, idCpu);
1644 supdrvTscMeasureDeltaOne(pDevExt, iCpu);
1645#endif
1646 }
1647 break;
1648 }
1649
1650 case RTMPEVENT_OFFLINE:
1651 supdrvGipMpEventOffline(pDevExt, idCpu);
1652 break;
1653 }
1654 }
1655
1656 /*
1657 * Make sure there is a master GIP.
1658 */
1659 if (enmEvent == RTMPEVENT_OFFLINE)
1660 {
1661 RTCPUID idGipMaster = ASMAtomicReadU32(&pDevExt->idGipMaster);
1662 if (idGipMaster == idCpu)
1663 {
1664 /*
1665 * The GIP master is going offline, find a new one.
1666 */
1667 bool fIgnored;
1668 unsigned i;
1669 RTCPUID idNewGipMaster = NIL_RTCPUID;
1670 RTCPUSET OnlineCpus;
1671 RTMpGetOnlineSet(&OnlineCpus);
1672
1673 for (i = 0; i < RTCPUSET_MAX_CPUS; i++)
1674 if (RTCpuSetIsMemberByIndex(&OnlineCpus, i))
1675 {
1676 RTCPUID idCurCpu = RTMpCpuIdFromSetIndex(i);
1677 if (idCurCpu != idGipMaster)
1678 {
1679 idNewGipMaster = idCurCpu;
1680 break;
1681 }
1682 }
1683
1684 Log(("supdrvGipMpEvent: Gip master %#lx -> %#lx\n", (long)idGipMaster, (long)idNewGipMaster));
1685 ASMAtomicCmpXchgSize(&pDevExt->idGipMaster, idNewGipMaster, idGipMaster, fIgnored);
1686 NOREF(fIgnored);
1687 }
1688 }
1689}
1690
1691
1692/**
1693 * On CPU initialization callback for RTMpOnAll.
1694 *
1695 * @param idCpu The CPU ID.
1696 * @param pvUser1 The device extension.
1697 * @param pvUser2 The GIP.
1698 */
1699static DECLCALLBACK(void) supdrvGipInitOnCpu(RTCPUID idCpu, void *pvUser1, void *pvUser2)
1700{
1701 /* This is good enough, even though it will update some of the globals a
1702 bit to much. */
1703 supdrvGipMpEventOnlineOrInitOnCpu((PSUPDRVDEVEXT)pvUser1, idCpu);
1704 NOREF(pvUser2);
1705}
1706
1707
1708/**
1709 * Callback used by supdrvDetermineAsyncTSC to read the TSC on a CPU.
1710 *
1711 * @param idCpu Ignored.
1712 * @param pvUser1 Where to put the TSC.
1713 * @param pvUser2 Ignored.
1714 */
1715static DECLCALLBACK(void) supdrvGipInitDetermineAsyncTscWorker(RTCPUID idCpu, void *pvUser1, void *pvUser2)
1716{
1717 Assert(RTMpCpuIdToSetIndex(idCpu) == (intptr_t)pvUser2);
1718 ASMAtomicWriteU64((uint64_t volatile *)pvUser1, ASMReadTSC());
1719 RT_NOREF2(idCpu, pvUser2);
1720}
1721
1722
1723/**
1724 * Determine if Async GIP mode is required because of TSC drift.
1725 *
1726 * When using the default/normal timer code it is essential that the time stamp counter
1727 * (TSC) runs never backwards, that is, a read operation to the counter should return
1728 * a bigger value than any previous read operation. This is guaranteed by the latest
1729 * AMD CPUs and by newer Intel CPUs which never enter the C2 state (P4). In any other
1730 * case we have to choose the asynchronous timer mode.
1731 *
1732 * @param poffMin Pointer to the determined difference between different
1733 * cores (optional, can be NULL).
1734 * @return false if the time stamp counters appear to be synchronized, true otherwise.
1735 */
1736static bool supdrvGipInitDetermineAsyncTsc(uint64_t *poffMin)
1737{
1738 /*
1739 * Just iterate all the cpus 8 times and make sure that the TSC is
1740 * ever increasing. We don't bother taking TSC rollover into account.
1741 */
1742 int iEndCpu = RTMpGetArraySize();
1743 int iCpu;
1744 int cLoops = 8;
1745 bool fAsync = false;
1746 int rc = VINF_SUCCESS;
1747 uint64_t offMax = 0;
1748 uint64_t offMin = ~(uint64_t)0;
1749 uint64_t PrevTsc = ASMReadTSC();
1750
1751 while (cLoops-- > 0)
1752 {
1753 for (iCpu = 0; iCpu < iEndCpu; iCpu++)
1754 {
1755 uint64_t CurTsc;
1756 rc = RTMpOnSpecific(RTMpCpuIdFromSetIndex(iCpu), supdrvGipInitDetermineAsyncTscWorker,
1757 &CurTsc, (void *)(uintptr_t)iCpu);
1758 if (RT_SUCCESS(rc))
1759 {
1760 if (CurTsc <= PrevTsc)
1761 {
1762 fAsync = true;
1763 offMin = offMax = PrevTsc - CurTsc;
1764 Log(("supdrvGipInitDetermineAsyncTsc: iCpu=%d cLoops=%d CurTsc=%llx PrevTsc=%llx\n",
1765 iCpu, cLoops, CurTsc, PrevTsc));
1766 break;
1767 }
1768
1769 /* Gather statistics (except the first time). */
1770 if (iCpu != 0 || cLoops != 7)
1771 {
1772 uint64_t off = CurTsc - PrevTsc;
1773 if (off < offMin)
1774 offMin = off;
1775 if (off > offMax)
1776 offMax = off;
1777 Log2(("%d/%d: off=%llx\n", cLoops, iCpu, off));
1778 }
1779
1780 /* Next */
1781 PrevTsc = CurTsc;
1782 }
1783 else if (rc == VERR_NOT_SUPPORTED)
1784 break;
1785 else
1786 AssertMsg(rc == VERR_CPU_NOT_FOUND || rc == VERR_CPU_OFFLINE, ("%d\n", rc));
1787 }
1788
1789 /* broke out of the loop. */
1790 if (iCpu < iEndCpu)
1791 break;
1792 }
1793
1794 if (poffMin)
1795 *poffMin = offMin; /* Almost RTMpOnSpecific profiling. */
1796 Log(("supdrvGipInitDetermineAsyncTsc: returns %d; iEndCpu=%d rc=%d offMin=%llx offMax=%llx\n",
1797 fAsync, iEndCpu, rc, offMin, offMax));
1798#if !defined(RT_OS_SOLARIS) && !defined(RT_OS_OS2) && !defined(RT_OS_WINDOWS)
1799 OSDBGPRINT(("vboxdrv: fAsync=%d offMin=%#lx offMax=%#lx\n", fAsync, (long)offMin, (long)offMax));
1800#endif
1801 return fAsync;
1802}
1803
1804
1805/**
1806 * supdrvGipInit() worker that determines the GIP TSC mode.
1807 *
1808 * @returns The most suitable TSC mode.
1809 * @param pDevExt Pointer to the device instance data.
1810 */
1811static SUPGIPMODE supdrvGipInitDetermineTscMode(PSUPDRVDEVEXT pDevExt)
1812{
1813#if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
1814 uint64_t u64DiffCoresIgnored;
1815 uint32_t uEAX, uEBX, uECX, uEDX;
1816
1817 /*
1818 * Establish whether the CPU advertises TSC as invariant, we need that in
1819 * a couple of places below.
1820 */
1821 bool fInvariantTsc = false;
1822 if (ASMHasCpuId())
1823 {
1824 uEAX = ASMCpuId_EAX(0x80000000);
1825 if (RTX86IsValidExtRange(uEAX) && uEAX >= 0x80000007)
1826 {
1827 uEDX = ASMCpuId_EDX(0x80000007);
1828 if (uEDX & X86_CPUID_AMD_ADVPOWER_EDX_TSCINVAR)
1829 fInvariantTsc = true;
1830 }
1831 }
1832
1833 /*
1834 * On single CPU systems, we don't need to consider ASYNC mode.
1835 */
1836 if (RTMpGetCount() <= 1)
1837 return fInvariantTsc ? SUPGIPMODE_INVARIANT_TSC : SUPGIPMODE_SYNC_TSC;
1838
1839 /*
1840 * Allow the user and/or OS specific bits to force async mode.
1841 */
1842 if (supdrvOSGetForcedAsyncTscMode(pDevExt))
1843 return SUPGIPMODE_ASYNC_TSC;
1844
1845 /*
1846 * Use invariant mode if the CPU says TSC is invariant.
1847 */
1848 if (fInvariantTsc)
1849 return SUPGIPMODE_INVARIANT_TSC;
1850
1851 /*
1852 * TSC is not invariant and we're on SMP, this presents two problems:
1853 *
1854 * (1) There might be a skew between the CPU, so that cpu0
1855 * returns a TSC that is slightly different from cpu1.
1856 * This screw may be due to (2), bad TSC initialization
1857 * or slightly different TSC rates.
1858 *
1859 * (2) Power management (and other things) may cause the TSC
1860 * to run at a non-constant speed, and cause the speed
1861 * to be different on the cpus. This will result in (1).
1862 *
1863 * If any of the above is detected, we will have to use ASYNC mode.
1864 */
1865 /* (1). Try check for current differences between the cpus. */
1866 if (supdrvGipInitDetermineAsyncTsc(&u64DiffCoresIgnored))
1867 return SUPGIPMODE_ASYNC_TSC;
1868
1869 /* (2) If it's an AMD CPU with power management, we won't trust its TSC. */
1870 ASMCpuId(0, &uEAX, &uEBX, &uECX, &uEDX);
1871 if ( RTX86IsValidStdRange(uEAX)
1872 && (RTX86IsAmdCpu(uEBX, uECX, uEDX) || RTX86IsHygonCpu(uEBX, uECX, uEDX)) )
1873 {
1874 /* Check for APM support. */
1875 uEAX = ASMCpuId_EAX(0x80000000);
1876 if (RTX86IsValidExtRange(uEAX) && uEAX >= 0x80000007)
1877 {
1878 uEDX = ASMCpuId_EDX(0x80000007);
1879 if (uEDX & 0x3e) /* STC|TM|THERMTRIP|VID|FID. Ignore TS. */
1880 return SUPGIPMODE_ASYNC_TSC;
1881 }
1882 }
1883
1884 return SUPGIPMODE_SYNC_TSC;
1885
1886#elif defined(RT_ARCH_ARM64)
1887 RT_NOREF(pDevExt);
1888 return SUPGIPMODE_INVARIANT_TSC;
1889
1890#else
1891# error "Port me"
1892#endif
1893}
1894
1895
1896/**
1897 * Initializes per-CPU GIP information.
1898 *
1899 * @param pGip Pointer to the GIP.
1900 * @param pCpu Pointer to which GIP CPU to initialize.
1901 * @param u64NanoTS The current nanosecond timestamp.
1902 * @param uCpuHz The CPU frequency to set, 0 if the caller doesn't know.
1903 */
1904static void supdrvGipInitCpu(PSUPGLOBALINFOPAGE pGip, PSUPGIPCPU pCpu, uint64_t u64NanoTS, uint64_t uCpuHz)
1905{
1906 pCpu->u32TransactionId = 2;
1907 pCpu->u64NanoTS = u64NanoTS;
1908 pCpu->u64TSC = ASMReadTSC();
1909 pCpu->u64TSCSample = GIP_TSC_DELTA_RSVD;
1910 pCpu->i64TSCDelta = pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_ZERO_CLAIMED ? INT64_MAX : 0;
1911
1912 ASMAtomicWriteSize(&pCpu->enmState, SUPGIPCPUSTATE_INVALID);
1913 ASMAtomicWriteU32(&pCpu->idCpu, NIL_RTCPUID);
1914 ASMAtomicWriteS16(&pCpu->iCpuSet, -1);
1915 ASMAtomicWriteU16(&pCpu->iCpuGroup, 0);
1916 ASMAtomicWriteU16(&pCpu->iCpuGroupMember, UINT16_MAX);
1917 ASMAtomicWriteU16(&pCpu->idApic, UINT16_MAX);
1918 ASMAtomicWriteU32(&pCpu->iReservedForNumaNode, 0);
1919
1920 /*
1921 * The first time we're called, we don't have a CPU frequency handy,
1922 * so pretend it's a 4 GHz CPU. On CPUs that are online, we'll get
1923 * called again and at that point we have a more plausible CPU frequency
1924 * value handy. The frequency history will also be adjusted again on
1925 * the 2nd timer callout (maybe we can skip that now?).
1926 */
1927 if (!uCpuHz)
1928 {
1929 pCpu->u64CpuHz = _4G - 1;
1930 pCpu->u32UpdateIntervalTSC = (uint32_t)((_4G - 1) / pGip->u32UpdateHz);
1931 }
1932 else
1933 {
1934 pCpu->u64CpuHz = uCpuHz;
1935 pCpu->u32UpdateIntervalTSC = (uint32_t)(uCpuHz / pGip->u32UpdateHz);
1936 }
1937 pCpu->au32TSCHistory[0]
1938 = pCpu->au32TSCHistory[1]
1939 = pCpu->au32TSCHistory[2]
1940 = pCpu->au32TSCHistory[3]
1941 = pCpu->au32TSCHistory[4]
1942 = pCpu->au32TSCHistory[5]
1943 = pCpu->au32TSCHistory[6]
1944 = pCpu->au32TSCHistory[7]
1945 = pCpu->u32UpdateIntervalTSC;
1946}
1947
1948
1949/**
1950 * Initializes the GIP data.
1951 *
1952 * @returns VBox status code.
1953 * @param pDevExt Pointer to the device instance data.
1954 * @param pGip Pointer to the read-write kernel mapping of the GIP.
1955 * @param HCPhys The physical address of the GIP.
1956 * @param u64NanoTS The current nanosecond timestamp.
1957 * @param uUpdateHz The update frequency.
1958 * @param uUpdateIntervalNS The update interval in nanoseconds.
1959 * @param cCpus The CPU count.
1960 * @param cbGipCpuGroups The supdrvOSGipGetGroupTableSize return value we
1961 * used when allocating the GIP structure.
1962 */
1963static int supdrvGipInit(PSUPDRVDEVEXT pDevExt, PSUPGLOBALINFOPAGE pGip, RTHCPHYS HCPhys,
1964 uint64_t u64NanoTS, unsigned uUpdateHz, unsigned uUpdateIntervalNS,
1965 unsigned cCpus, size_t cbGipCpuGroups)
1966{
1967 size_t const cbGip = RT_ALIGN_Z(RT_UOFFSETOF_DYN(SUPGLOBALINFOPAGE, aCPUs[cCpus]) + cbGipCpuGroups, PAGE_SIZE);
1968 unsigned i;
1969#ifdef DEBUG_DARWIN_GIP
1970 OSDBGPRINT(("supdrvGipInit: pGip=%p HCPhys=%lx u64NanoTS=%llu uUpdateHz=%d cCpus=%u\n", pGip, (long)HCPhys, u64NanoTS, uUpdateHz, cCpus));
1971#else
1972 LogFlow(("supdrvGipInit: pGip=%p HCPhys=%lx u64NanoTS=%llu uUpdateHz=%d cCpus=%u\n", pGip, (long)HCPhys, u64NanoTS, uUpdateHz, cCpus));
1973#endif
1974
1975 /*
1976 * Initialize the structure.
1977 */
1978 memset(pGip, 0, cbGip);
1979
1980 pGip->u32Magic = SUPGLOBALINFOPAGE_MAGIC;
1981 pGip->u32Version = SUPGLOBALINFOPAGE_VERSION;
1982 pGip->u32Mode = supdrvGipInitDetermineTscMode(pDevExt);
1983 if ( pGip->u32Mode == SUPGIPMODE_INVARIANT_TSC
1984 /*|| pGip->u32Mode == SUPGIPMODE_SYNC_TSC */)
1985 pGip->enmUseTscDelta = supdrvOSAreTscDeltasInSync() /* Allow OS override (windows). */
1986 ? SUPGIPUSETSCDELTA_ZERO_CLAIMED : SUPGIPUSETSCDELTA_PRACTICALLY_ZERO /* downgrade later */;
1987 else
1988 pGip->enmUseTscDelta = SUPGIPUSETSCDELTA_NOT_APPLICABLE;
1989 pGip->cCpus = (uint16_t)cCpus;
1990 pGip->cPages = (uint16_t)(cbGip / PAGE_SIZE);
1991 pGip->u32UpdateHz = uUpdateHz;
1992 pGip->u32UpdateIntervalNS = uUpdateIntervalNS;
1993 pGip->fGetGipCpu = SUPGIPGETCPU_APIC_ID;
1994 RTCpuSetEmpty(&pGip->OnlineCpuSet);
1995 RTCpuSetEmpty(&pGip->PresentCpuSet);
1996 RTMpGetSet(&pGip->PossibleCpuSet);
1997 pGip->cOnlineCpus = RTMpGetOnlineCount();
1998 pGip->cPresentCpus = RTMpGetPresentCount();
1999 pGip->cPossibleCpus = RTMpGetCount();
2000 pGip->cPossibleCpuGroups = 1;
2001 pGip->idCpuMax = RTMpGetMaxCpuId();
2002 for (i = 0; i < RT_ELEMENTS(pGip->aiCpuFromApicId); i++)
2003 pGip->aiCpuFromApicId[i] = UINT16_MAX;
2004 for (i = 0; i < RT_ELEMENTS(pGip->aiCpuFromCpuSetIdx); i++)
2005 pGip->aiCpuFromCpuSetIdx[i] = UINT16_MAX;
2006 for (i = 0; i < RT_ELEMENTS(pGip->aoffCpuGroup); i++)
2007 pGip->aoffCpuGroup[i] = UINT32_MAX;
2008 for (i = 0; i < cCpus; i++)
2009 supdrvGipInitCpu(pGip, &pGip->aCPUs[i], u64NanoTS, 0 /*uCpuHz*/);
2010#ifdef RT_OS_WINDOWS
2011 int rc = supdrvOSInitGipGroupTable(pDevExt, pGip, cbGipCpuGroups);
2012 AssertRCReturn(rc, rc);
2013#endif
2014
2015 /*
2016 * Link it to the device extension.
2017 */
2018 pDevExt->pGip = pGip;
2019 pDevExt->HCPhysGip = HCPhys;
2020 pDevExt->cGipUsers = 0;
2021
2022 return VINF_SUCCESS;
2023}
2024
2025
2026/**
2027 * Creates the GIP.
2028 *
2029 * @returns VBox status code.
2030 * @param pDevExt Instance data. GIP stuff may be updated.
2031 */
2032int VBOXCALL supdrvGipCreate(PSUPDRVDEVEXT pDevExt)
2033{
2034 PSUPGLOBALINFOPAGE pGip;
2035 size_t cbGip;
2036 size_t cbGipCpuGroups;
2037 RTHCPHYS HCPhysGip;
2038 uint32_t u32SystemResolution;
2039 uint32_t u32Interval;
2040 uint32_t u32MinInterval;
2041 uint32_t uMod;
2042 unsigned cCpus;
2043 int rc;
2044
2045 LogFlow(("supdrvGipCreate:\n"));
2046
2047 /*
2048 * Assert order.
2049 */
2050 Assert(pDevExt->u32SystemTimerGranularityGrant == 0);
2051 Assert(pDevExt->GipMemObj == NIL_RTR0MEMOBJ);
2052 Assert(!pDevExt->pGipTimer);
2053#ifdef SUPDRV_USE_MUTEX_FOR_GIP
2054 Assert(pDevExt->mtxGip != NIL_RTSEMMUTEX);
2055 Assert(pDevExt->mtxTscDelta != NIL_RTSEMMUTEX);
2056#else
2057 Assert(pDevExt->mtxGip != NIL_RTSEMFASTMUTEX);
2058 Assert(pDevExt->mtxTscDelta != NIL_RTSEMFASTMUTEX);
2059#endif
2060
2061 /*
2062 * Check the CPU count.
2063 */
2064 cCpus = RTMpGetArraySize();
2065 if (cCpus > RT_MIN(RTCPUSET_MAX_CPUS, RT_ELEMENTS(pGip->aiCpuFromApicId)))
2066 {
2067 SUPR0Printf("VBoxDrv: Too many CPUs (%u) for the GIP (max %u)\n", cCpus, RT_MIN(RTCPUSET_MAX_CPUS, RT_ELEMENTS(pGip->aiCpuFromApicId)));
2068 return VERR_TOO_MANY_CPUS;
2069 }
2070
2071 /*
2072 * Allocate a contiguous set of pages with a default kernel mapping.
2073 */
2074#ifdef RT_OS_WINDOWS
2075 cbGipCpuGroups = supdrvOSGipGetGroupTableSize(pDevExt);
2076#else
2077 cbGipCpuGroups = 0;
2078#endif
2079 cbGip = RT_UOFFSETOF_DYN(SUPGLOBALINFOPAGE, aCPUs[cCpus]) + cbGipCpuGroups;
2080 rc = RTR0MemObjAllocCont(&pDevExt->GipMemObj, cbGip, NIL_RTHCPHYS /*PhysHighest*/, false /*fExecutable*/);
2081 if (RT_FAILURE(rc))
2082 {
2083 OSDBGPRINT(("supdrvGipCreate: failed to allocate the GIP page. rc=%d\n", rc));
2084 return rc;
2085 }
2086 pGip = (PSUPGLOBALINFOPAGE)RTR0MemObjAddress(pDevExt->GipMemObj); AssertPtr(pGip);
2087 HCPhysGip = RTR0MemObjGetPagePhysAddr(pDevExt->GipMemObj, 0); Assert(HCPhysGip != NIL_RTHCPHYS);
2088
2089 /*
2090 * Find a reasonable update interval and initialize the structure.
2091 */
2092 supdrvGipRequestHigherTimerFrequencyFromSystem(pDevExt);
2093 /** @todo figure out why using a 100Ms interval upsets timekeeping in VMs.
2094 * See @bugref{6710}. */
2095 u32MinInterval = RT_NS_10MS;
2096 u32SystemResolution = RTTimerGetSystemGranularity();
2097 u32Interval = u32MinInterval;
2098 uMod = u32MinInterval % u32SystemResolution;
2099 if (uMod)
2100 u32Interval += u32SystemResolution - uMod;
2101
2102 rc = supdrvGipInit(pDevExt, pGip, HCPhysGip, RTTimeSystemNanoTS(), RT_NS_1SEC / u32Interval /*=Hz*/, u32Interval,
2103 cCpus, cbGipCpuGroups);
2104
2105 /*
2106 * Important sanity check... (Sets rc)
2107 */
2108 if (RT_UNLIKELY( pGip->enmUseTscDelta == SUPGIPUSETSCDELTA_ZERO_CLAIMED
2109 && pGip->u32Mode == SUPGIPMODE_ASYNC_TSC
2110 && !supdrvOSGetForcedAsyncTscMode(pDevExt)))
2111 {
2112 OSDBGPRINT(("supdrvGipCreate: Host-OS/user claims the TSC-deltas are zero but we detected async. TSC! Bad.\n"));
2113 rc = VERR_INTERNAL_ERROR_2;
2114 }
2115
2116 /* It doesn't make sense to do TSC-delta detection on systems we detect as async. */
2117 AssertStmt( pGip->u32Mode != SUPGIPMODE_ASYNC_TSC
2118 || pGip->enmUseTscDelta <= SUPGIPUSETSCDELTA_ZERO_CLAIMED,
2119 rc = VERR_INTERNAL_ERROR_3);
2120
2121 /*
2122 * Do the TSC frequency measurements.
2123 *
2124 * If we're in invariant TSC mode, just to a quick preliminary measurement
2125 * that the TSC-delta measurement code can use to yield cross calls.
2126 *
2127 * If we're in any of the other two modes, neither which require MP init,
2128 * notifications or deltas for the job, do the full measurement now so
2129 * that supdrvGipInitOnCpu() can populate the TSC interval and history
2130 * array with more reasonable values.
2131 */
2132 if (RT_SUCCESS(rc))
2133 {
2134 if (pGip->u32Mode == SUPGIPMODE_INVARIANT_TSC)
2135 {
2136 rc = supdrvGipInitMeasureTscFreq(pGip, true /*fRough*/); /* cannot fail */
2137 supdrvGipInitStartTimerForRefiningInvariantTscFreq(pDevExt);
2138 }
2139 else
2140 rc = supdrvGipInitMeasureTscFreq(pGip, false /*fRough*/);
2141 if (RT_SUCCESS(rc))
2142 {
2143 /*
2144 * Start TSC-delta measurement thread before we start getting MP
2145 * events that will try kick it into action (includes the
2146 * RTMpOnAll/supdrvGipInitOnCpu call below).
2147 */
2148 RTCpuSetEmpty(&pDevExt->TscDeltaCpuSet);
2149 RTCpuSetEmpty(&pDevExt->TscDeltaObtainedCpuSet);
2150#ifdef SUPDRV_USE_TSC_DELTA_THREAD
2151 if (pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_ZERO_CLAIMED)
2152 rc = supdrvTscDeltaThreadInit(pDevExt);
2153#endif
2154 if (RT_SUCCESS(rc))
2155 {
2156 rc = RTMpNotificationRegister(supdrvGipMpEvent, pDevExt);
2157 if (RT_SUCCESS(rc))
2158 {
2159 /*
2160 * Do GIP initialization on all online CPUs. Wake up the
2161 * TSC-delta thread afterwards.
2162 */
2163 rc = RTMpOnAll(supdrvGipInitOnCpu, pDevExt, pGip);
2164 if (RT_SUCCESS(rc))
2165 {
2166#ifdef SUPDRV_USE_TSC_DELTA_THREAD
2167 supdrvTscDeltaThreadStartMeasurement(pDevExt, true /* fForceAll */);
2168#else
2169 uint16_t iCpu;
2170 if (pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_ZERO_CLAIMED)
2171 {
2172 /*
2173 * Measure the TSC deltas now that we have MP notifications.
2174 */
2175 int cTries = 5;
2176 do
2177 {
2178 rc = supdrvTscMeasureInitialDeltas(pDevExt);
2179 if ( rc != VERR_TRY_AGAIN
2180 && rc != VERR_CPU_OFFLINE)
2181 break;
2182 } while (--cTries > 0);
2183 for (iCpu = 0; iCpu < pGip->cCpus; iCpu++)
2184 Log(("supdrvTscDeltaInit: cpu[%u] delta %lld\n", iCpu, pGip->aCPUs[iCpu].i64TSCDelta));
2185 }
2186 else
2187 {
2188 for (iCpu = 0; iCpu < pGip->cCpus; iCpu++)
2189 AssertMsg(!pGip->aCPUs[iCpu].i64TSCDelta, ("iCpu=%u %lld mode=%d\n", iCpu, pGip->aCPUs[iCpu].i64TSCDelta, pGip->u32Mode));
2190 }
2191 if (RT_SUCCESS(rc))
2192#endif
2193 {
2194 /*
2195 * Create the timer.
2196 * If CPU_ALL isn't supported we'll have to fall back to synchronous mode.
2197 */
2198 if (pGip->u32Mode == SUPGIPMODE_ASYNC_TSC)
2199 {
2200 rc = RTTimerCreateEx(&pDevExt->pGipTimer, u32Interval, RTTIMER_FLAGS_CPU_ALL,
2201 supdrvGipAsyncTimer, pDevExt);
2202 if (rc == VERR_NOT_SUPPORTED)
2203 {
2204 OSDBGPRINT(("supdrvGipCreate: omni timer not supported, falling back to synchronous mode\n"));
2205 pGip->u32Mode = SUPGIPMODE_SYNC_TSC;
2206 }
2207 }
2208 if (pGip->u32Mode != SUPGIPMODE_ASYNC_TSC)
2209 rc = RTTimerCreateEx(&pDevExt->pGipTimer, u32Interval, 0 /* fFlags */,
2210 supdrvGipSyncAndInvariantTimer, pDevExt);
2211 if (RT_SUCCESS(rc))
2212 {
2213 /*
2214 * We're good.
2215 */
2216 Log(("supdrvGipCreate: %u ns interval.\n", u32Interval));
2217 supdrvGipReleaseHigherTimerFrequencyFromSystem(pDevExt);
2218
2219 g_pSUPGlobalInfoPage = pGip;
2220 return VINF_SUCCESS;
2221 }
2222
2223 OSDBGPRINT(("supdrvGipCreate: failed create GIP timer at %u ns interval. rc=%Rrc\n", u32Interval, rc));
2224 Assert(!pDevExt->pGipTimer);
2225 }
2226 }
2227 else
2228 OSDBGPRINT(("supdrvGipCreate: RTMpOnAll failed. rc=%Rrc\n", rc));
2229 }
2230 else
2231 OSDBGPRINT(("supdrvGipCreate: failed to register MP event notfication. rc=%Rrc\n", rc));
2232 }
2233 else
2234 OSDBGPRINT(("supdrvGipCreate: supdrvTscDeltaInit failed. rc=%Rrc\n", rc));
2235 }
2236 else
2237 OSDBGPRINT(("supdrvGipCreate: supdrvTscMeasureInitialDeltas failed. rc=%Rrc\n", rc));
2238 }
2239
2240 /* Releases timer frequency increase too. */
2241 supdrvGipDestroy(pDevExt);
2242 return rc;
2243}
2244
2245
2246/**
2247 * Invalidates the GIP data upon termination.
2248 *
2249 * @param pGip Pointer to the read-write kernel mapping of the GIP.
2250 */
2251static void supdrvGipTerm(PSUPGLOBALINFOPAGE pGip)
2252{
2253 unsigned i;
2254 pGip->u32Magic = 0;
2255 for (i = 0; i < pGip->cCpus; i++)
2256 {
2257 pGip->aCPUs[i].u64NanoTS = 0;
2258 pGip->aCPUs[i].u64TSC = 0;
2259 pGip->aCPUs[i].iTSCHistoryHead = 0;
2260 pGip->aCPUs[i].u64TSCSample = 0;
2261 pGip->aCPUs[i].i64TSCDelta = INT64_MAX;
2262 }
2263}
2264
2265
2266/**
2267 * Terminates the GIP.
2268 *
2269 * @param pDevExt Instance data. GIP stuff may be updated.
2270 */
2271void VBOXCALL supdrvGipDestroy(PSUPDRVDEVEXT pDevExt)
2272{
2273 int rc;
2274#ifdef DEBUG_DARWIN_GIP
2275 OSDBGPRINT(("supdrvGipDestroy: pDevExt=%p pGip=%p pGipTimer=%p GipMemObj=%p\n", pDevExt,
2276 pDevExt->GipMemObj != NIL_RTR0MEMOBJ ? RTR0MemObjAddress(pDevExt->GipMemObj) : NULL,
2277 pDevExt->pGipTimer, pDevExt->GipMemObj));
2278#endif
2279
2280 /*
2281 * Stop receiving MP notifications before tearing anything else down.
2282 */
2283 RTMpNotificationDeregister(supdrvGipMpEvent, pDevExt);
2284
2285#ifdef SUPDRV_USE_TSC_DELTA_THREAD
2286 /*
2287 * Terminate the TSC-delta measurement thread and resources.
2288 */
2289 supdrvTscDeltaTerm(pDevExt);
2290#endif
2291
2292 /*
2293 * Destroy the TSC-refinement timer.
2294 */
2295 if (pDevExt->pInvarTscRefineTimer)
2296 {
2297 RTTimerDestroy(pDevExt->pInvarTscRefineTimer);
2298 pDevExt->pInvarTscRefineTimer = NULL;
2299 }
2300
2301 /*
2302 * Invalid the GIP data.
2303 */
2304 if (pDevExt->pGip)
2305 {
2306 supdrvGipTerm(pDevExt->pGip);
2307 pDevExt->pGip = NULL;
2308 }
2309 g_pSUPGlobalInfoPage = NULL;
2310
2311 /*
2312 * Destroy the timer and free the GIP memory object.
2313 */
2314 if (pDevExt->pGipTimer)
2315 {
2316 rc = RTTimerDestroy(pDevExt->pGipTimer); AssertRC(rc);
2317 pDevExt->pGipTimer = NULL;
2318 }
2319
2320 if (pDevExt->GipMemObj != NIL_RTR0MEMOBJ)
2321 {
2322 rc = RTR0MemObjFree(pDevExt->GipMemObj, true /* free mappings */); AssertRC(rc);
2323 pDevExt->GipMemObj = NIL_RTR0MEMOBJ;
2324 }
2325
2326 /*
2327 * Finally, make sure we've release the system timer resolution request
2328 * if one actually succeeded and is still pending.
2329 */
2330 supdrvGipReleaseHigherTimerFrequencyFromSystem(pDevExt);
2331}
2332
2333
2334
2335
2336/*
2337 *
2338 *
2339 * GIP Update Timer Related Code
2340 * GIP Update Timer Related Code
2341 * GIP Update Timer Related Code
2342 *
2343 *
2344 */
2345
2346
2347/**
2348 * Worker routine for supdrvGipUpdate() and supdrvGipUpdatePerCpu() that
2349 * updates all the per cpu data except the transaction id.
2350 *
2351 * @param pDevExt The device extension.
2352 * @param pGipCpu Pointer to the per cpu data.
2353 * @param u64NanoTS The current time stamp.
2354 * @param u64TSC The current TSC.
2355 * @param iTick The current timer tick.
2356 *
2357 * @remarks Can be called with interrupts disabled!
2358 */
2359static void supdrvGipDoUpdateCpu(PSUPDRVDEVEXT pDevExt, PSUPGIPCPU pGipCpu, uint64_t u64NanoTS, uint64_t u64TSC, uint64_t iTick)
2360{
2361 uint64_t u64TSCDelta;
2362 bool fUpdateCpuHz;
2363 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
2364 AssertPtrReturnVoid(pGip);
2365
2366 /* Delta between this and the previous update. */
2367 ASMAtomicUoWriteU32(&pGipCpu->u32PrevUpdateIntervalNS, (uint32_t)(u64NanoTS - pGipCpu->u64NanoTS));
2368
2369 /*
2370 * Update the NanoTS.
2371 */
2372 ASMAtomicWriteU64(&pGipCpu->u64NanoTS, u64NanoTS);
2373
2374 /*
2375 * Calc TSC delta.
2376 */
2377 u64TSCDelta = u64TSC - pGipCpu->u64TSC;
2378 ASMAtomicWriteU64(&pGipCpu->u64TSC, u64TSC);
2379
2380 /*
2381 * Determine if we need to update the CPU (TSC) frequency calculation.
2382 *
2383 * We don't need to keep recalculating the frequency when it's invariant,
2384 * unless the special tstGIP-2 testing mode is enabled.
2385 */
2386 fUpdateCpuHz = pGip->u32Mode != SUPGIPMODE_INVARIANT_TSC;
2387 if (!(pGip->fFlags & SUPGIP_FLAGS_TESTING))
2388 { /* likely*/ }
2389 else
2390 {
2391 uint32_t fGipFlags = pGip->fFlags;
2392 if (fGipFlags & (SUPGIP_FLAGS_TESTING_ENABLE | SUPGIP_FLAGS_TESTING_START))
2393 {
2394 if (fGipFlags & SUPGIP_FLAGS_TESTING_START)
2395 {
2396 /* Cache the TSC frequency before forcing updates due to test mode. */
2397 if (!fUpdateCpuHz)
2398 pDevExt->uGipTestModeInvariantCpuHz = pGip->aCPUs[0].u64CpuHz;
2399 ASMAtomicAndU32(&pGip->fFlags, ~SUPGIP_FLAGS_TESTING_START);
2400 }
2401 fUpdateCpuHz = true;
2402 }
2403 else if (fGipFlags & SUPGIP_FLAGS_TESTING_STOP)
2404 {
2405 /* Restore the cached TSC frequency if any. */
2406 if (!fUpdateCpuHz)
2407 {
2408 Assert(pDevExt->uGipTestModeInvariantCpuHz);
2409 ASMAtomicWriteU64(&pGip->aCPUs[0].u64CpuHz, pDevExt->uGipTestModeInvariantCpuHz);
2410 }
2411 ASMAtomicAndU32(&pGip->fFlags, ~(SUPGIP_FLAGS_TESTING_STOP | SUPGIP_FLAGS_TESTING));
2412 }
2413 }
2414
2415 /*
2416 * Calculate the CPU (TSC) frequency if necessary.
2417 */
2418 if (fUpdateCpuHz)
2419 {
2420 uint64_t u64CpuHz;
2421 uint32_t u32UpdateIntervalTSC;
2422 uint32_t u32UpdateIntervalTSCSlack;
2423 uint32_t u32TransactionId;
2424 unsigned iTSCHistoryHead;
2425
2426 if (u64TSCDelta >> 32)
2427 {
2428 u64TSCDelta = pGipCpu->u32UpdateIntervalTSC;
2429 pGipCpu->cErrors++;
2430 }
2431
2432 /*
2433 * On the 2nd and 3rd callout, reset the history with the current TSC
2434 * interval since the values entered by supdrvGipInit are totally off.
2435 * The interval on the 1st callout completely unreliable, the 2nd is a bit
2436 * better, while the 3rd should be most reliable.
2437 */
2438 /** @todo Could we drop this now that we initializes the history
2439 * with nominal TSC frequency values? */
2440 u32TransactionId = pGipCpu->u32TransactionId;
2441 if (RT_UNLIKELY( ( u32TransactionId == 5
2442 || u32TransactionId == 7)
2443 && ( iTick == 2
2444 || iTick == 3) ))
2445 {
2446 unsigned i;
2447 for (i = 0; i < RT_ELEMENTS(pGipCpu->au32TSCHistory); i++)
2448 ASMAtomicUoWriteU32(&pGipCpu->au32TSCHistory[i], (uint32_t)u64TSCDelta);
2449 }
2450
2451 /*
2452 * Validate the NanoTS deltas between timer fires with an arbitrary threshold of 0.5%.
2453 * Wait until we have at least one full history since the above history reset. The
2454 * assumption is that the majority of the previous history values will be tolerable.
2455 * See @bugref{6710#c67}.
2456 */
2457 /** @todo Could we drop the fudging there now that we initializes the history
2458 * with nominal TSC frequency values? */
2459 if ( u32TransactionId > 23 /* 7 + (8 * 2) */
2460 && pGip->u32Mode != SUPGIPMODE_ASYNC_TSC)
2461 {
2462 uint32_t uNanoTsThreshold = pGip->u32UpdateIntervalNS / 200;
2463 if ( pGipCpu->u32PrevUpdateIntervalNS > pGip->u32UpdateIntervalNS + uNanoTsThreshold
2464 || pGipCpu->u32PrevUpdateIntervalNS < pGip->u32UpdateIntervalNS - uNanoTsThreshold)
2465 {
2466 uint32_t u32;
2467 u32 = pGipCpu->au32TSCHistory[0];
2468 u32 += pGipCpu->au32TSCHistory[1];
2469 u32 += pGipCpu->au32TSCHistory[2];
2470 u32 += pGipCpu->au32TSCHistory[3];
2471 u32 >>= 2;
2472 u64TSCDelta = pGipCpu->au32TSCHistory[4];
2473 u64TSCDelta += pGipCpu->au32TSCHistory[5];
2474 u64TSCDelta += pGipCpu->au32TSCHistory[6];
2475 u64TSCDelta += pGipCpu->au32TSCHistory[7];
2476 u64TSCDelta >>= 2;
2477 u64TSCDelta += u32;
2478 u64TSCDelta >>= 1;
2479 }
2480 }
2481
2482 /*
2483 * TSC History.
2484 */
2485 Assert(RT_ELEMENTS(pGipCpu->au32TSCHistory) == 8);
2486 iTSCHistoryHead = (pGipCpu->iTSCHistoryHead + 1) & 7;
2487 ASMAtomicWriteU32(&pGipCpu->iTSCHistoryHead, iTSCHistoryHead);
2488 ASMAtomicWriteU32(&pGipCpu->au32TSCHistory[iTSCHistoryHead], (uint32_t)u64TSCDelta);
2489
2490 /*
2491 * UpdateIntervalTSC = average of last 8,2,1 intervals depending on update HZ.
2492 *
2493 * On Windows, we have an occasional (but recurring) sour value that messed up
2494 * the history but taking only 1 interval reduces the precision overall.
2495 */
2496 if ( pGip->u32Mode == SUPGIPMODE_INVARIANT_TSC
2497 || pGip->u32UpdateHz >= 1000)
2498 {
2499 uint32_t u32;
2500 u32 = pGipCpu->au32TSCHistory[0];
2501 u32 += pGipCpu->au32TSCHistory[1];
2502 u32 += pGipCpu->au32TSCHistory[2];
2503 u32 += pGipCpu->au32TSCHistory[3];
2504 u32 >>= 2;
2505 u32UpdateIntervalTSC = pGipCpu->au32TSCHistory[4];
2506 u32UpdateIntervalTSC += pGipCpu->au32TSCHistory[5];
2507 u32UpdateIntervalTSC += pGipCpu->au32TSCHistory[6];
2508 u32UpdateIntervalTSC += pGipCpu->au32TSCHistory[7];
2509 u32UpdateIntervalTSC >>= 2;
2510 u32UpdateIntervalTSC += u32;
2511 u32UpdateIntervalTSC >>= 1;
2512
2513 /* Value chosen for a 2GHz Athlon64 running linux 2.6.10/11. */
2514 u32UpdateIntervalTSCSlack = u32UpdateIntervalTSC >> 14;
2515 }
2516 else if (pGip->u32UpdateHz >= 90)
2517 {
2518 u32UpdateIntervalTSC = (uint32_t)u64TSCDelta;
2519 u32UpdateIntervalTSC += pGipCpu->au32TSCHistory[(iTSCHistoryHead - 1) & 7];
2520 u32UpdateIntervalTSC >>= 1;
2521
2522 /* value chosen on a 2GHz thinkpad running windows */
2523 u32UpdateIntervalTSCSlack = u32UpdateIntervalTSC >> 7;
2524 }
2525 else
2526 {
2527 u32UpdateIntervalTSC = (uint32_t)u64TSCDelta;
2528
2529 /* This value hasn't be checked yet.. waiting for OS/2 and 33Hz timers.. :-) */
2530 u32UpdateIntervalTSCSlack = u32UpdateIntervalTSC >> 6;
2531 }
2532 ASMAtomicWriteU32(&pGipCpu->u32UpdateIntervalTSC, u32UpdateIntervalTSC + u32UpdateIntervalTSCSlack);
2533
2534 /*
2535 * CpuHz.
2536 */
2537 u64CpuHz = ASMMult2xU32RetU64(u32UpdateIntervalTSC, RT_NS_1SEC);
2538 u64CpuHz /= pGip->u32UpdateIntervalNS;
2539 ASMAtomicWriteU64(&pGipCpu->u64CpuHz, u64CpuHz);
2540 }
2541}
2542
2543
2544/**
2545 * Updates the GIP.
2546 *
2547 * @param pDevExt The device extension.
2548 * @param u64NanoTS The current nanosecond timestamp.
2549 * @param u64TSC The current TSC timestamp.
2550 * @param idCpu The CPU ID.
2551 * @param iTick The current timer tick.
2552 *
2553 * @remarks Can be called with interrupts disabled!
2554 */
2555static void supdrvGipUpdate(PSUPDRVDEVEXT pDevExt, uint64_t u64NanoTS, uint64_t u64TSC, RTCPUID idCpu, uint64_t iTick)
2556{
2557 /*
2558 * Determine the relevant CPU data.
2559 */
2560 PSUPGIPCPU pGipCpu;
2561 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
2562 AssertPtrReturnVoid(pGip);
2563
2564 if (pGip->u32Mode != SUPGIPMODE_ASYNC_TSC)
2565 pGipCpu = &pGip->aCPUs[0];
2566 else
2567 {
2568 unsigned iCpu;
2569 uint32_t idApic = supdrvGipGetApicId(pGip);
2570 if (RT_LIKELY(idApic < RT_ELEMENTS(pGip->aiCpuFromApicId)))
2571 { /* likely */ }
2572 else
2573 return;
2574 iCpu = pGip->aiCpuFromApicId[idApic];
2575 if (RT_LIKELY(iCpu < pGip->cCpus))
2576 { /* likely */ }
2577 else
2578 return;
2579 pGipCpu = &pGip->aCPUs[iCpu];
2580 if (RT_LIKELY(pGipCpu->idCpu == idCpu))
2581 { /* likely */ }
2582 else
2583 return;
2584 }
2585
2586 /*
2587 * Start update transaction.
2588 */
2589 if (!(ASMAtomicIncU32(&pGipCpu->u32TransactionId) & 1))
2590 {
2591 /* this can happen on win32 if we're taking to long and there are more CPUs around. shouldn't happen though. */
2592 AssertMsgFailed(("Invalid transaction id, %#x, not odd!\n", pGipCpu->u32TransactionId));
2593 ASMAtomicIncU32(&pGipCpu->u32TransactionId);
2594 pGipCpu->cErrors++;
2595 return;
2596 }
2597
2598 /*
2599 * Recalc the update frequency every 0x800th time.
2600 */
2601 if ( pGip->u32Mode != SUPGIPMODE_INVARIANT_TSC /* cuz we're not recalculating the frequency on invariant hosts. */
2602 && !(pGipCpu->u32TransactionId & (GIP_UPDATEHZ_RECALC_FREQ * 2 - 2)))
2603 {
2604 if (pGip->u64NanoTSLastUpdateHz)
2605 {
2606#ifdef RT_ARCH_AMD64 /** @todo fix 64-bit div here to work on x86 linux. */
2607 uint64_t u64Delta = u64NanoTS - pGip->u64NanoTSLastUpdateHz;
2608 uint32_t u32UpdateHz = (uint32_t)((RT_NS_1SEC_64 * GIP_UPDATEHZ_RECALC_FREQ) / u64Delta);
2609 if (u32UpdateHz <= 2000 && u32UpdateHz >= 30)
2610 {
2611 /** @todo r=ramshankar: Changing u32UpdateHz might screw up TSC frequency
2612 * calculation on non-invariant hosts if it changes the history decision
2613 * taken in supdrvGipDoUpdateCpu(). */
2614 uint64_t u64Interval = u64Delta / GIP_UPDATEHZ_RECALC_FREQ;
2615 ASMAtomicWriteU32(&pGip->u32UpdateHz, u32UpdateHz);
2616 ASMAtomicWriteU32(&pGip->u32UpdateIntervalNS, (uint32_t)u64Interval);
2617 }
2618#endif
2619 }
2620 ASMAtomicWriteU64(&pGip->u64NanoTSLastUpdateHz, u64NanoTS | 1);
2621 }
2622
2623 /*
2624 * Update the data.
2625 */
2626 supdrvGipDoUpdateCpu(pDevExt, pGipCpu, u64NanoTS, u64TSC, iTick);
2627
2628 /*
2629 * Complete transaction.
2630 */
2631 ASMAtomicIncU32(&pGipCpu->u32TransactionId);
2632}
2633
2634
2635/**
2636 * Updates the per cpu GIP data for the calling cpu.
2637 *
2638 * @param pDevExt The device extension.
2639 * @param u64NanoTS The current nanosecond timestamp.
2640 * @param u64TSC The current TSC timesaver.
2641 * @param idCpu The CPU ID.
2642 * @param idApic The APIC id for the CPU index.
2643 * @param iTick The current timer tick.
2644 *
2645 * @remarks Can be called with interrupts disabled!
2646 */
2647static void supdrvGipUpdatePerCpu(PSUPDRVDEVEXT pDevExt, uint64_t u64NanoTS, uint64_t u64TSC,
2648 RTCPUID idCpu, uint8_t idApic, uint64_t iTick)
2649{
2650 uint32_t iCpu;
2651 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
2652
2653 /*
2654 * Avoid a potential race when a CPU online notification doesn't fire on
2655 * the onlined CPU but the tick creeps in before the event notification is
2656 * run.
2657 */
2658 if (RT_LIKELY(iTick != 1))
2659 { /* likely*/ }
2660 else
2661 {
2662 iCpu = supdrvGipFindOrAllocCpuIndexForCpuId(pGip, idCpu);
2663 if (pGip->aCPUs[iCpu].enmState == SUPGIPCPUSTATE_OFFLINE)
2664 supdrvGipMpEventOnlineOrInitOnCpu(pDevExt, idCpu);
2665 }
2666
2667 iCpu = pGip->aiCpuFromApicId[idApic];
2668 if (RT_LIKELY(iCpu < pGip->cCpus))
2669 {
2670 PSUPGIPCPU pGipCpu = &pGip->aCPUs[iCpu];
2671 if (pGipCpu->idCpu == idCpu)
2672 {
2673 /*
2674 * Start update transaction.
2675 */
2676 if (!(ASMAtomicIncU32(&pGipCpu->u32TransactionId) & 1))
2677 {
2678 AssertMsgFailed(("Invalid transaction id, %#x, not odd!\n", pGipCpu->u32TransactionId));
2679 ASMAtomicIncU32(&pGipCpu->u32TransactionId);
2680 pGipCpu->cErrors++;
2681 return;
2682 }
2683
2684 /*
2685 * Update the data.
2686 */
2687 supdrvGipDoUpdateCpu(pDevExt, pGipCpu, u64NanoTS, u64TSC, iTick);
2688
2689 /*
2690 * Complete transaction.
2691 */
2692 ASMAtomicIncU32(&pGipCpu->u32TransactionId);
2693 }
2694 }
2695}
2696
2697
2698/**
2699 * Timer callback function for the sync and invariant GIP modes.
2700 *
2701 * @param pTimer The timer.
2702 * @param pvUser Opaque pointer to the device extension.
2703 * @param iTick The timer tick.
2704 */
2705static DECLCALLBACK(void) supdrvGipSyncAndInvariantTimer(PRTTIMER pTimer, void *pvUser, uint64_t iTick)
2706{
2707 PSUPDRVDEVEXT pDevExt = (PSUPDRVDEVEXT)pvUser;
2708 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
2709 RTCCUINTREG fEFlags = ASMIntDisableFlags(); /* No interruptions please (real problem on S10). */
2710 uint64_t u64TSC = ASMReadTSC();
2711 uint64_t u64NanoTS = RTTimeSystemNanoTS();
2712 RT_NOREF1(pTimer);
2713
2714 if (pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_PRACTICALLY_ZERO)
2715 {
2716 /*
2717 * The calculations in supdrvGipUpdate() is somewhat timing sensitive,
2718 * missing timer ticks is not an option for GIP because the GIP users
2719 * will end up incrementing the time in 1ns per time getter call until
2720 * there is a complete timer update. So, if the delta has yet to be
2721 * calculated, we just pretend it is zero for now (the GIP users
2722 * probably won't have it for a wee while either and will do the same).
2723 *
2724 * We could maybe on some platforms try cross calling a CPU with a
2725 * working delta here, but it's not worth the hassle since the
2726 * likelihood of this happening is really low. On Windows, Linux, and
2727 * Solaris timers fire on the CPU they were registered/started on.
2728 * Darwin timers doesn't necessarily (they are high priority threads).
2729 */
2730 uint32_t iCpuSet = RTMpCpuIdToSetIndex(RTMpCpuId());
2731 uint16_t iGipCpu = RT_LIKELY(iCpuSet < RT_ELEMENTS(pGip->aiCpuFromCpuSetIdx))
2732 ? pGip->aiCpuFromCpuSetIdx[iCpuSet] : UINT16_MAX;
2733 Assert(!ASMIntAreEnabled());
2734 if (RT_LIKELY(iGipCpu < pGip->cCpus))
2735 {
2736 int64_t iTscDelta = pGip->aCPUs[iGipCpu].i64TSCDelta;
2737 if (iTscDelta != INT64_MAX)
2738 u64TSC -= iTscDelta;
2739 }
2740 }
2741
2742 supdrvGipUpdate(pDevExt, u64NanoTS, u64TSC, NIL_RTCPUID, iTick);
2743
2744 ASMSetFlags(fEFlags);
2745}
2746
2747
2748/**
2749 * Timer callback function for async GIP mode.
2750 * @param pTimer The timer.
2751 * @param pvUser Opaque pointer to the device extension.
2752 * @param iTick The timer tick.
2753 */
2754static DECLCALLBACK(void) supdrvGipAsyncTimer(PRTTIMER pTimer, void *pvUser, uint64_t iTick)
2755{
2756 PSUPDRVDEVEXT pDevExt = (PSUPDRVDEVEXT)pvUser;
2757 RTCCUINTREG fEFlags = ASMIntDisableFlags(); /* No interruptions please (real problem on S10). */
2758 RTCPUID idCpu = RTMpCpuId();
2759 uint64_t u64TSC = ASMReadTSC();
2760 uint64_t NanoTS = RTTimeSystemNanoTS();
2761 RT_NOREF1(pTimer);
2762
2763 /** @todo reset the transaction number and whatnot when iTick == 1. */
2764 if (pDevExt->idGipMaster == idCpu)
2765 supdrvGipUpdate(pDevExt, NanoTS, u64TSC, idCpu, iTick);
2766 else
2767 supdrvGipUpdatePerCpu(pDevExt, NanoTS, u64TSC, idCpu, supdrvGipGetApicId(pDevExt->pGip), iTick);
2768
2769 ASMSetFlags(fEFlags);
2770}
2771
2772
2773
2774
2775/*
2776 *
2777 *
2778 * TSC Delta Measurements And Related Code
2779 * TSC Delta Measurements And Related Code
2780 * TSC Delta Measurements And Related Code
2781 *
2782 *
2783 */
2784
2785
2786/*
2787 * Select TSC delta measurement algorithm.
2788 */
2789#if 0
2790# define GIP_TSC_DELTA_METHOD_1
2791#else
2792# define GIP_TSC_DELTA_METHOD_2
2793#endif
2794
2795/** For padding variables to keep them away from other cache lines. Better too
2796 * large than too small!
2797 * @remarks Current AMD64 and x86 CPUs seems to use 64 bytes. There are claims
2798 * that NetBurst had 128 byte cache lines while the 486 thru Pentium
2799 * III had 32 bytes cache lines. */
2800#define GIP_TSC_DELTA_CACHE_LINE_SIZE 128
2801
2802
2803/**
2804 * TSC delta measurement algorithm \#2 result entry.
2805 */
2806typedef struct SUPDRVTSCDELTAMETHOD2ENTRY
2807{
2808 uint32_t iSeqMine;
2809 uint32_t iSeqOther;
2810 uint64_t uTsc;
2811} SUPDRVTSCDELTAMETHOD2ENTRY;
2812
2813/**
2814 * TSC delta measurement algorithm \#2 Data.
2815 */
2816typedef struct SUPDRVTSCDELTAMETHOD2
2817{
2818 /** Padding to make sure the iCurSeqNo is in its own cache line. */
2819 uint64_t au64CacheLinePaddingBefore[GIP_TSC_DELTA_CACHE_LINE_SIZE / sizeof(uint64_t)];
2820 /** The current sequence number of this worker. */
2821 uint32_t volatile iCurSeqNo;
2822 /** Padding to make sure the iCurSeqNo is in its own cache line. */
2823 uint32_t au64CacheLinePaddingAfter[GIP_TSC_DELTA_CACHE_LINE_SIZE / sizeof(uint32_t) - 1];
2824 /** Result table. */
2825 SUPDRVTSCDELTAMETHOD2ENTRY aResults[64];
2826} SUPDRVTSCDELTAMETHOD2;
2827/** Pointer to the data for TSC delta measurement algorithm \#2 .*/
2828typedef SUPDRVTSCDELTAMETHOD2 *PSUPDRVTSCDELTAMETHOD2;
2829
2830
2831/**
2832 * The TSC delta synchronization struct, version 2.
2833 *
2834 * The synchronization variable is completely isolated in its own cache line
2835 * (provided our max cache line size estimate is correct).
2836 */
2837typedef struct SUPTSCDELTASYNC2
2838{
2839 /** Padding to make sure the uVar1 is in its own cache line. */
2840 uint64_t au64CacheLinePaddingBefore[GIP_TSC_DELTA_CACHE_LINE_SIZE / sizeof(uint64_t)];
2841
2842 /** The synchronization variable, holds values GIP_TSC_DELTA_SYNC_*. */
2843 volatile uint32_t uSyncVar;
2844 /** Sequence synchronizing variable used for post 'GO' synchronization. */
2845 volatile uint32_t uSyncSeq;
2846
2847 /** Padding to make sure the uVar1 is in its own cache line. */
2848 uint64_t au64CacheLinePaddingAfter[GIP_TSC_DELTA_CACHE_LINE_SIZE / sizeof(uint64_t) - 2];
2849
2850 /** Start RDTSC value. Put here mainly to save stack space. */
2851 uint64_t uTscStart;
2852 /** Copy of SUPDRVGIPTSCDELTARGS::cMaxTscTicks. */
2853 uint64_t cMaxTscTicks;
2854} SUPTSCDELTASYNC2;
2855AssertCompileSize(SUPTSCDELTASYNC2, GIP_TSC_DELTA_CACHE_LINE_SIZE * 2 + sizeof(uint64_t));
2856typedef SUPTSCDELTASYNC2 *PSUPTSCDELTASYNC2;
2857
2858/** Prestart wait. */
2859#define GIP_TSC_DELTA_SYNC2_PRESTART_WAIT UINT32_C(0x0ffe)
2860/** Prestart aborted. */
2861#define GIP_TSC_DELTA_SYNC2_PRESTART_ABORT UINT32_C(0x0fff)
2862/** Ready (on your mark). */
2863#define GIP_TSC_DELTA_SYNC2_READY UINT32_C(0x1000)
2864/** Steady (get set). */
2865#define GIP_TSC_DELTA_SYNC2_STEADY UINT32_C(0x1001)
2866/** Go! */
2867#define GIP_TSC_DELTA_SYNC2_GO UINT32_C(0x1002)
2868/** Used by the verification test. */
2869#define GIP_TSC_DELTA_SYNC2_GO_GO UINT32_C(0x1003)
2870
2871/** We reached the time limit. */
2872#define GIP_TSC_DELTA_SYNC2_TIMEOUT UINT32_C(0x1ffe)
2873/** The other party won't touch the sync struct ever again. */
2874#define GIP_TSC_DELTA_SYNC2_FINAL UINT32_C(0x1fff)
2875
2876
2877/**
2878 * Argument package/state passed by supdrvTscMeasureDeltaOne() to the RTMpOn
2879 * callback worker.
2880 * @todo add
2881 */
2882typedef struct SUPDRVGIPTSCDELTARGS
2883{
2884 /** The device extension. */
2885 PSUPDRVDEVEXT pDevExt;
2886 /** Pointer to the GIP CPU array entry for the worker. */
2887 PSUPGIPCPU pWorker;
2888 /** Pointer to the GIP CPU array entry for the master. */
2889 PSUPGIPCPU pMaster;
2890 /** The maximum number of ticks to spend in supdrvTscMeasureDeltaCallback.
2891 * (This is what we need a rough TSC frequency for.) */
2892 uint64_t cMaxTscTicks;
2893 /** Used to abort synchronization setup. */
2894 bool volatile fAbortSetup;
2895
2896 /** Padding to make sure the master variables live in its own cache lines. */
2897 uint64_t au64CacheLinePaddingBefore[GIP_TSC_DELTA_CACHE_LINE_SIZE / sizeof(uint64_t)];
2898
2899 /** @name Master
2900 * @{ */
2901 /** The time the master spent in the MP worker. */
2902 uint64_t cElapsedMasterTscTicks;
2903 /** The iTry value when stopped at. */
2904 uint32_t iTry;
2905 /** Set if the run timed out. */
2906 bool volatile fTimedOut;
2907 /** Pointer to the master's synchronization struct (on stack). */
2908 PSUPTSCDELTASYNC2 volatile pSyncMaster;
2909 /** Master data union. */
2910 union
2911 {
2912 /** Data (master) for delta verification. */
2913 struct
2914 {
2915 /** Verification test TSC values for the master. */
2916 uint64_t volatile auTscs[32];
2917 } Verify;
2918 /** Data (master) for measurement method \#2. */
2919 struct
2920 {
2921 /** Data and sequence number. */
2922 SUPDRVTSCDELTAMETHOD2 Data;
2923 /** The lag setting for the next run. */
2924 bool fLag;
2925 /** Number of hits. */
2926 uint32_t cHits;
2927 } M2;
2928 } uMaster;
2929 /** The verifier verdict, VINF_SUCCESS if ok, VERR_OUT_OF_RANGE if not,
2930 * VERR_TRY_AGAIN on timeout. */
2931 int32_t rcVerify;
2932#ifdef TSCDELTA_VERIFY_WITH_STATS
2933 /** The maximum difference between TSC read during delta verification. */
2934 int64_t cMaxVerifyTscTicks;
2935 /** The minimum difference between two TSC reads during verification. */
2936 int64_t cMinVerifyTscTicks;
2937 /** The bad TSC diff, worker relative to master (= worker - master).
2938 * Negative value means the worker is behind the master. */
2939 int64_t iVerifyBadTscDiff;
2940#endif
2941 /** @} */
2942
2943 /** Padding to make sure the worker variables live is in its own cache line. */
2944 uint64_t au64CacheLinePaddingBetween[GIP_TSC_DELTA_CACHE_LINE_SIZE / sizeof(uint64_t)];
2945
2946 /** @name Proletarian
2947 * @{ */
2948 /** Pointer to the worker's synchronization struct (on stack). */
2949 PSUPTSCDELTASYNC2 volatile pSyncWorker;
2950 /** The time the worker spent in the MP worker. */
2951 uint64_t cElapsedWorkerTscTicks;
2952 /** Worker data union. */
2953 union
2954 {
2955 /** Data (worker) for delta verification. */
2956 struct
2957 {
2958 /** Verification test TSC values for the worker. */
2959 uint64_t volatile auTscs[32];
2960 } Verify;
2961 /** Data (worker) for measurement method \#2. */
2962 struct
2963 {
2964 /** Data and sequence number. */
2965 SUPDRVTSCDELTAMETHOD2 Data;
2966 /** The lag setting for the next run (set by master). */
2967 bool fLag;
2968 } M2;
2969 } uWorker;
2970 /** @} */
2971
2972 /** Padding to make sure the above is in its own cache line. */
2973 uint64_t au64CacheLinePaddingAfter[GIP_TSC_DELTA_CACHE_LINE_SIZE / sizeof(uint64_t)];
2974} SUPDRVGIPTSCDELTARGS;
2975typedef SUPDRVGIPTSCDELTARGS *PSUPDRVGIPTSCDELTARGS;
2976
2977
2978/** @name Macros that implements the basic synchronization steps common to
2979 * the algorithms.
2980 *
2981 * Must be used from loop as the timeouts are implemented via 'break' statements
2982 * at the moment.
2983 *
2984 * @{
2985 */
2986#if defined(DEBUG_bird) /* || defined(VBOX_STRICT) */
2987# define TSCDELTA_DBG_VARS() uint32_t iDbgCounter
2988# define TSCDELTA_DBG_START_LOOP() do { iDbgCounter = 0; } while (0)
2989# define TSCDELTA_DBG_CHECK_LOOP() \
2990 do { iDbgCounter++; if ((iDbgCounter & UINT32_C(0x01ffffff)) == 0) RT_BREAKPOINT(); } while (0)
2991#else
2992# define TSCDELTA_DBG_VARS() ((void)0)
2993# define TSCDELTA_DBG_START_LOOP() ((void)0)
2994# define TSCDELTA_DBG_CHECK_LOOP() ((void)0)
2995#endif
2996#if 0
2997# define TSCDELTA_DBG_SYNC_MSG(a_Args) SUPR0Printf a_Args
2998#else
2999# define TSCDELTA_DBG_SYNC_MSG(a_Args) ((void)0)
3000#endif
3001#if 0
3002# define TSCDELTA_DBG_SYNC_MSG2(a_Args) SUPR0Printf a_Args
3003#else
3004# define TSCDELTA_DBG_SYNC_MSG2(a_Args) ((void)0)
3005#endif
3006#if 0
3007# define TSCDELTA_DBG_SYNC_MSG9(a_Args) SUPR0Printf a_Args
3008#else
3009# define TSCDELTA_DBG_SYNC_MSG9(a_Args) ((void)0)
3010#endif
3011
3012
3013static bool supdrvTscDeltaSync2_Before(PSUPTSCDELTASYNC2 pMySync, PSUPTSCDELTASYNC2 pOtherSync,
3014 bool fIsMaster, PRTCCUINTREG pfEFlags, PSUPDRVGIPTSCDELTARGS pArgs)
3015{
3016 uint32_t iMySeq = fIsMaster ? 0 : 256;
3017 uint32_t const iMaxSeq = iMySeq + 16; /* For the last loop, darn linux/freebsd C-ishness. */
3018 uint32_t u32Tmp;
3019 uint32_t iSync2Loops = 0;
3020 RTCCUINTREG fEFlags;
3021 TSCDELTA_DBG_VARS();
3022
3023#if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
3024 *pfEFlags = X86_EFL_IF | X86_EFL_1; /* should shut up most nagging compilers. */
3025#else
3026 *pfEFlags = 0;
3027#endif
3028
3029 /*
3030 * The master tells the worker to get on it's mark.
3031 */
3032 if (fIsMaster)
3033 {
3034 if (RT_LIKELY(ASMAtomicCmpXchgU32(&pOtherSync->uSyncVar, GIP_TSC_DELTA_SYNC2_STEADY, GIP_TSC_DELTA_SYNC2_READY)))
3035 { /* likely*/ }
3036 else
3037 {
3038 TSCDELTA_DBG_SYNC_MSG(("sync/before/%s: #1 uSyncVar=%#x\n", fIsMaster ? "master" : "worker", pOtherSync->uSyncVar));
3039 return false;
3040 }
3041 }
3042
3043 /*
3044 * Wait for the on your mark signal (ack in the master case). We process timeouts here.
3045 */
3046 ASMAtomicWriteU32(&(pMySync)->uSyncSeq, 0);
3047 for (;;)
3048 {
3049 fEFlags = ASMIntDisableFlags();
3050 u32Tmp = ASMAtomicReadU32(&pMySync->uSyncVar);
3051 if (u32Tmp == GIP_TSC_DELTA_SYNC2_STEADY)
3052 break;
3053 ASMSetFlags(fEFlags);
3054 ASMNopPause();
3055
3056 /* Abort? */
3057 if (u32Tmp != GIP_TSC_DELTA_SYNC2_READY)
3058 {
3059 TSCDELTA_DBG_SYNC_MSG(("sync/before/%s: #2 u32Tmp=%#x\n", fIsMaster ? "master" : "worker", u32Tmp));
3060 return false;
3061 }
3062
3063 /* Check for timeouts every so often (not every loop in case RDTSC is
3064 trapping or something). Must check the first time around. */
3065#if 0 /* For debugging the timeout paths. */
3066 static uint32_t volatile xxx;
3067#endif
3068 if ( ( (iSync2Loops & 0x3ff) == 0
3069 && ASMReadTSC() - pMySync->uTscStart > pMySync->cMaxTscTicks)
3070#if 0 /* This is crazy, I know, but enable this code and the results are markedly better when enabled on the 1.4GHz AMD (debug). */
3071 || (!fIsMaster && (++xxx & 0xf) == 0)
3072#endif
3073 )
3074 {
3075 /* Try switch our own state into timeout mode so the master cannot tell us to 'GO',
3076 ignore the timeout if we've got the go ahead already (simpler). */
3077 if (ASMAtomicCmpXchgU32(&pMySync->uSyncVar, GIP_TSC_DELTA_SYNC2_TIMEOUT, GIP_TSC_DELTA_SYNC2_READY))
3078 {
3079 TSCDELTA_DBG_SYNC_MSG(("sync/before/%s: timeout\n", fIsMaster ? "master" : "worker"));
3080 ASMAtomicCmpXchgU32(&pOtherSync->uSyncVar, GIP_TSC_DELTA_SYNC2_TIMEOUT, GIP_TSC_DELTA_SYNC2_STEADY);
3081 ASMAtomicWriteBool(&pArgs->fTimedOut, true);
3082 return false;
3083 }
3084 }
3085 iSync2Loops++;
3086 }
3087
3088 /*
3089 * Interrupts are now disabled and will remain disabled until we do
3090 * TSCDELTA_MASTER_SYNC_AFTER / TSCDELTA_OTHER_SYNC_AFTER.
3091 */
3092 *pfEFlags = fEFlags;
3093
3094 /*
3095 * The worker tells the master that it is on its mark and that the master
3096 * need to get into position as well.
3097 */
3098 if (!fIsMaster)
3099 {
3100 if (RT_LIKELY(ASMAtomicCmpXchgU32(&pOtherSync->uSyncVar, GIP_TSC_DELTA_SYNC2_STEADY, GIP_TSC_DELTA_SYNC2_READY)))
3101 { /* likely */ }
3102 else
3103 {
3104 ASMSetFlags(fEFlags);
3105 TSCDELTA_DBG_SYNC_MSG(("sync/before/%s: #3 uSyncVar=%#x\n", fIsMaster ? "master" : "worker", pOtherSync->uSyncVar));
3106 return false;
3107 }
3108 }
3109
3110 /*
3111 * The master sends the 'go' to the worker and wait for ACK.
3112 */
3113 if (fIsMaster)
3114 {
3115 if (RT_LIKELY(ASMAtomicCmpXchgU32(&pOtherSync->uSyncVar, GIP_TSC_DELTA_SYNC2_GO, GIP_TSC_DELTA_SYNC2_STEADY)))
3116 { /* likely */ }
3117 else
3118 {
3119 ASMSetFlags(fEFlags);
3120 TSCDELTA_DBG_SYNC_MSG(("sync/before/%s: #4 uSyncVar=%#x\n", fIsMaster ? "master" : "worker", pOtherSync->uSyncVar));
3121 return false;
3122 }
3123 }
3124
3125 /*
3126 * Wait for the 'go' signal (ack in the master case).
3127 */
3128 TSCDELTA_DBG_START_LOOP();
3129 for (;;)
3130 {
3131 u32Tmp = ASMAtomicReadU32(&pMySync->uSyncVar);
3132 if (u32Tmp == GIP_TSC_DELTA_SYNC2_GO)
3133 break;
3134 if (RT_LIKELY(u32Tmp == GIP_TSC_DELTA_SYNC2_STEADY))
3135 { /* likely */ }
3136 else
3137 {
3138 ASMSetFlags(fEFlags);
3139 TSCDELTA_DBG_SYNC_MSG(("sync/before/%s: #5 u32Tmp=%#x\n", fIsMaster ? "master" : "worker", u32Tmp));
3140 return false;
3141 }
3142
3143 TSCDELTA_DBG_CHECK_LOOP();
3144 ASMNopPause();
3145 }
3146
3147 /*
3148 * The worker acks the 'go' (shouldn't fail).
3149 */
3150 if (!fIsMaster)
3151 {
3152 if (RT_LIKELY(ASMAtomicCmpXchgU32(&pOtherSync->uSyncVar, GIP_TSC_DELTA_SYNC2_GO, GIP_TSC_DELTA_SYNC2_STEADY)))
3153 { /* likely */ }
3154 else
3155 {
3156 ASMSetFlags(fEFlags);
3157 TSCDELTA_DBG_SYNC_MSG(("sync/before/%s: #6 uSyncVar=%#x\n", fIsMaster ? "master" : "worker", pOtherSync->uSyncVar));
3158 return false;
3159 }
3160 }
3161
3162 /*
3163 * Try enter mostly lockstep execution with it.
3164 */
3165 for (;;)
3166 {
3167 uint32_t iOtherSeq1, iOtherSeq2;
3168 ASMCompilerBarrier();
3169 ASMSerializeInstruction();
3170
3171 ASMAtomicWriteU32(&pMySync->uSyncSeq, iMySeq);
3172 ASMNopPause();
3173 iOtherSeq1 = ASMAtomicXchgU32(&pOtherSync->uSyncSeq, iMySeq);
3174 ASMNopPause();
3175 iOtherSeq2 = ASMAtomicReadU32(&pMySync->uSyncSeq);
3176
3177 ASMCompilerBarrier();
3178 if (iOtherSeq1 == iOtherSeq2)
3179 return true;
3180
3181 /* Did the other guy give up? Should we give up? */
3182 if ( iOtherSeq1 == UINT32_MAX
3183 || iOtherSeq2 == UINT32_MAX)
3184 return true;
3185 if (++iMySeq >= iMaxSeq)
3186 {
3187 ASMAtomicWriteU32(&pMySync->uSyncSeq, UINT32_MAX);
3188 return true;
3189 }
3190 ASMNopPause();
3191 }
3192}
3193
3194#define TSCDELTA_MASTER_SYNC_BEFORE(a_pMySync, a_pOtherSync, a_pfEFlags, a_pArgs) \
3195 if (RT_LIKELY(supdrvTscDeltaSync2_Before(a_pMySync, a_pOtherSync, true /*fIsMaster*/, a_pfEFlags, a_pArgs))) \
3196 { /*likely*/ } \
3197 else if (true) \
3198 { \
3199 TSCDELTA_DBG_SYNC_MSG9(("sync/before/master: #89\n")); \
3200 break; \
3201 } else do {} while (0)
3202#define TSCDELTA_OTHER_SYNC_BEFORE(a_pMySync, a_pOtherSync, a_pfEFlags, a_pArgs) \
3203 if (RT_LIKELY(supdrvTscDeltaSync2_Before(a_pMySync, a_pOtherSync, false /*fIsMaster*/, a_pfEFlags, a_pArgs))) \
3204 { /*likely*/ } \
3205 else if (true) \
3206 { \
3207 TSCDELTA_DBG_SYNC_MSG9(("sync/before/other: #89\n")); \
3208 break; \
3209 } else do {} while (0)
3210
3211
3212static bool supdrvTscDeltaSync2_After(PSUPTSCDELTASYNC2 pMySync, PSUPTSCDELTASYNC2 pOtherSync,
3213 bool fIsMaster, RTCCUINTREG fEFlags)
3214{
3215 TSCDELTA_DBG_VARS();
3216 RT_NOREF1(pOtherSync);
3217
3218 /*
3219 * Wait for the 'ready' signal. In the master's case, this means the
3220 * worker has completed its data collection, while in the worker's case it
3221 * means the master is done processing the data and it's time for the next
3222 * loop iteration (or whatever).
3223 */
3224 ASMSetFlags(fEFlags);
3225 TSCDELTA_DBG_START_LOOP();
3226 for (;;)
3227 {
3228 uint32_t u32Tmp = ASMAtomicReadU32(&pMySync->uSyncVar);
3229 if ( u32Tmp == GIP_TSC_DELTA_SYNC2_READY
3230 || (u32Tmp == GIP_TSC_DELTA_SYNC2_STEADY && !fIsMaster) /* kicked twice => race */ )
3231 return true;
3232 ASMNopPause();
3233 if (RT_LIKELY(u32Tmp == GIP_TSC_DELTA_SYNC2_GO))
3234 { /* likely */}
3235 else
3236 {
3237 TSCDELTA_DBG_SYNC_MSG(("sync/after/other: #1 u32Tmp=%#x\n", u32Tmp));
3238 return false; /* shouldn't ever happen! */
3239 }
3240 TSCDELTA_DBG_CHECK_LOOP();
3241 ASMNopPause();
3242 }
3243}
3244
3245#define TSCDELTA_MASTER_SYNC_AFTER(a_pMySync, a_pOtherSync, a_fEFlags) \
3246 if (RT_LIKELY(supdrvTscDeltaSync2_After(a_pMySync, a_pOtherSync, true /*fIsMaster*/, a_fEFlags))) \
3247 { /* likely */ } \
3248 else if (true) \
3249 { \
3250 TSCDELTA_DBG_SYNC_MSG9(("sync/after/master: #97\n")); \
3251 break; \
3252 } else do {} while (0)
3253
3254#define TSCDELTA_MASTER_KICK_OTHER_OUT_OF_AFTER(a_pMySync, a_pOtherSync) \
3255 /* \
3256 * Tell the worker that we're done processing the data and ready for the next round. \
3257 */ \
3258 if (RT_LIKELY(ASMAtomicCmpXchgU32(&(a_pOtherSync)->uSyncVar, GIP_TSC_DELTA_SYNC2_READY, GIP_TSC_DELTA_SYNC2_GO))) \
3259 { /* likely */ } \
3260 else if (true)\
3261 { \
3262 TSCDELTA_DBG_SYNC_MSG(("sync/after/master: #99 uSyncVar=%#x\n", (a_pOtherSync)->uSyncVar)); \
3263 break; \
3264 } else do {} while (0)
3265
3266#define TSCDELTA_OTHER_SYNC_AFTER(a_pMySync, a_pOtherSync, a_fEFlags) \
3267 if (true) { \
3268 /* \
3269 * Tell the master that we're done collecting data and wait for the next round to start. \
3270 */ \
3271 if (RT_LIKELY(ASMAtomicCmpXchgU32(&(a_pOtherSync)->uSyncVar, GIP_TSC_DELTA_SYNC2_READY, GIP_TSC_DELTA_SYNC2_GO))) \
3272 { /* likely */ } \
3273 else \
3274 { \
3275 ASMSetFlags(a_fEFlags); \
3276 TSCDELTA_DBG_SYNC_MSG(("sync/after/other: #0 uSyncVar=%#x\n", (a_pOtherSync)->uSyncVar)); \
3277 break; \
3278 } \
3279 if (RT_LIKELY(supdrvTscDeltaSync2_After(a_pMySync, a_pOtherSync, false /*fIsMaster*/, a_fEFlags))) \
3280 { /* likely */ } \
3281 else \
3282 { \
3283 TSCDELTA_DBG_SYNC_MSG9(("sync/after/other: #98\n")); \
3284 break; \
3285 } \
3286 } else do {} while (0)
3287/** @} */
3288
3289
3290#ifdef GIP_TSC_DELTA_METHOD_1
3291/**
3292 * TSC delta measurement algorithm \#1 (GIP_TSC_DELTA_METHOD_1).
3293 *
3294 *
3295 * We ignore the first few runs of the loop in order to prime the
3296 * cache. Also, we need to be careful about using 'pause' instruction
3297 * in critical busy-wait loops in this code - it can cause undesired
3298 * behaviour with hyperthreading.
3299 *
3300 * We try to minimize the measurement error by computing the minimum
3301 * read time of the compare statement in the worker by taking TSC
3302 * measurements across it.
3303 *
3304 * It must be noted that the computed minimum read time is mostly to
3305 * eliminate huge deltas when the worker is too early and doesn't by
3306 * itself help produce more accurate deltas. We allow two times the
3307 * computed minimum as an arbitrary acceptable threshold. Therefore,
3308 * it is still possible to get negative deltas where there are none
3309 * when the worker is earlier. As long as these occasional negative
3310 * deltas are lower than the time it takes to exit guest-context and
3311 * the OS to reschedule EMT on a different CPU, we won't expose a TSC
3312 * that jumped backwards. It is due to the existence of the negative
3313 * deltas that we don't recompute the delta with the master and
3314 * worker interchanged to eliminate the remaining measurement error.
3315 *
3316 *
3317 * @param pArgs The argument/state data.
3318 * @param pMySync My synchronization structure.
3319 * @param pOtherSync My partner's synchronization structure.
3320 * @param fIsMaster Set if master, clear if worker.
3321 * @param iTry The attempt number.
3322 */
3323static void supdrvTscDeltaMethod1Loop(PSUPDRVGIPTSCDELTARGS pArgs, PSUPTSCDELTASYNC2 pMySync, PSUPTSCDELTASYNC2 pOtherSync,
3324 bool fIsMaster, uint32_t iTry)
3325{
3326 PSUPGIPCPU pGipCpuWorker = pArgs->pWorker;
3327 PSUPGIPCPU pGipCpuMaster = pArgs->pMaster;
3328 uint64_t uMinCmpReadTime = UINT64_MAX;
3329 unsigned iLoop;
3330 NOREF(iTry);
3331
3332 for (iLoop = 0; iLoop < GIP_TSC_DELTA_LOOPS; iLoop++)
3333 {
3334 RTCCUINTREG fEFlags;
3335 if (fIsMaster)
3336 {
3337 /*
3338 * The master.
3339 */
3340 AssertMsg(pGipCpuMaster->u64TSCSample == GIP_TSC_DELTA_RSVD,
3341 ("%#llx idMaster=%#x idWorker=%#x (idGipMaster=%#x)\n",
3342 pGipCpuMaster->u64TSCSample, pGipCpuMaster->idCpu, pGipCpuWorker->idCpu, pArgs->pDevExt->idGipMaster));
3343 TSCDELTA_MASTER_SYNC_BEFORE(pMySync, pOtherSync, &fEFlags, pArgs);
3344
3345 do
3346 {
3347 ASMSerializeInstruction();
3348 ASMAtomicWriteU64(&pGipCpuMaster->u64TSCSample, ASMReadTSC());
3349 } while (pGipCpuMaster->u64TSCSample == GIP_TSC_DELTA_RSVD);
3350
3351 TSCDELTA_MASTER_SYNC_AFTER(pMySync, pOtherSync, fEFlags);
3352
3353 /* Process the data. */
3354 if (iLoop > GIP_TSC_DELTA_PRIMER_LOOPS + GIP_TSC_DELTA_READ_TIME_LOOPS)
3355 {
3356 if (pGipCpuWorker->u64TSCSample != GIP_TSC_DELTA_RSVD)
3357 {
3358 int64_t iDelta = pGipCpuWorker->u64TSCSample
3359 - (pGipCpuMaster->u64TSCSample - pGipCpuMaster->i64TSCDelta);
3360 if ( iDelta >= GIP_TSC_DELTA_INITIAL_MASTER_VALUE
3361 ? iDelta < pGipCpuWorker->i64TSCDelta
3362 : iDelta > pGipCpuWorker->i64TSCDelta || pGipCpuWorker->i64TSCDelta == INT64_MAX)
3363 pGipCpuWorker->i64TSCDelta = iDelta;
3364 }
3365 }
3366
3367 /* Reset our TSC sample and tell the worker to move on. */
3368 ASMAtomicWriteU64(&pGipCpuMaster->u64TSCSample, GIP_TSC_DELTA_RSVD);
3369 TSCDELTA_MASTER_KICK_OTHER_OUT_OF_AFTER(pMySync, pOtherSync);
3370 }
3371 else
3372 {
3373 /*
3374 * The worker.
3375 */
3376 uint64_t uTscWorker;
3377 uint64_t uTscWorkerFlushed;
3378 uint64_t uCmpReadTime;
3379
3380 ASMAtomicReadU64(&pGipCpuMaster->u64TSCSample); /* Warm the cache line. */
3381 TSCDELTA_OTHER_SYNC_BEFORE(pMySync, pOtherSync, &fEFlags, pArgs);
3382
3383 /*
3384 * Keep reading the TSC until we notice that the master has read his. Reading
3385 * the TSC -after- the master has updated the memory is way too late. We thus
3386 * compensate by trying to measure how long it took for the worker to notice
3387 * the memory flushed from the master.
3388 */
3389 do
3390 {
3391 ASMSerializeInstruction();
3392 uTscWorker = ASMReadTSC();
3393 } while (pGipCpuMaster->u64TSCSample == GIP_TSC_DELTA_RSVD);
3394 ASMSerializeInstruction();
3395 uTscWorkerFlushed = ASMReadTSC();
3396
3397 uCmpReadTime = uTscWorkerFlushed - uTscWorker;
3398 if (iLoop > GIP_TSC_DELTA_PRIMER_LOOPS + GIP_TSC_DELTA_READ_TIME_LOOPS)
3399 {
3400 /* This is totally arbitrary a.k.a I don't like it but I have no better ideas for now. */
3401 if (uCmpReadTime < (uMinCmpReadTime << 1))
3402 {
3403 ASMAtomicWriteU64(&pGipCpuWorker->u64TSCSample, uTscWorker);
3404 if (uCmpReadTime < uMinCmpReadTime)
3405 uMinCmpReadTime = uCmpReadTime;
3406 }
3407 else
3408 ASMAtomicWriteU64(&pGipCpuWorker->u64TSCSample, GIP_TSC_DELTA_RSVD);
3409 }
3410 else if (iLoop > GIP_TSC_DELTA_PRIMER_LOOPS)
3411 {
3412 if (uCmpReadTime < uMinCmpReadTime)
3413 uMinCmpReadTime = uCmpReadTime;
3414 }
3415
3416 TSCDELTA_OTHER_SYNC_AFTER(pMySync, pOtherSync, fEFlags);
3417 }
3418 }
3419
3420 TSCDELTA_DBG_SYNC_MSG9(("sync/method1loop/%s: #92 iLoop=%u MyState=%#x\n", fIsMaster ? "master" : "worker", iLoop,
3421 pMySync->uSyncVar));
3422
3423 /*
3424 * We must reset the worker TSC sample value in case it gets picked as a
3425 * GIP master later on (it's trashed above, naturally).
3426 */
3427 if (!fIsMaster)
3428 ASMAtomicWriteU64(&pGipCpuWorker->u64TSCSample, GIP_TSC_DELTA_RSVD);
3429}
3430#endif /* GIP_TSC_DELTA_METHOD_1 */
3431
3432
3433#ifdef GIP_TSC_DELTA_METHOD_2
3434/*
3435 * TSC delta measurement algorithm \#2 configuration and code - Experimental!!
3436 */
3437
3438# define GIP_TSC_DELTA_M2_LOOPS (7 + GIP_TSC_DELTA_M2_PRIMER_LOOPS)
3439# define GIP_TSC_DELTA_M2_PRIMER_LOOPS 0
3440
3441
3442static void supdrvTscDeltaMethod2ProcessDataOnMaster(PSUPDRVGIPTSCDELTARGS pArgs)
3443{
3444 int64_t iMasterTscDelta = pArgs->pMaster->i64TSCDelta;
3445 int64_t iBestDelta = pArgs->pWorker->i64TSCDelta;
3446 uint32_t idxResult;
3447 uint32_t cHits = 0;
3448
3449 /*
3450 * Look for matching entries in the master and worker tables.
3451 */
3452 for (idxResult = 0; idxResult < RT_ELEMENTS(pArgs->uMaster.M2.Data.aResults); idxResult++)
3453 {
3454 uint32_t idxOther = pArgs->uMaster.M2.Data.aResults[idxResult].iSeqOther;
3455 if (idxOther & 1)
3456 {
3457 idxOther >>= 1;
3458 if (idxOther < RT_ELEMENTS(pArgs->uWorker.M2.Data.aResults))
3459 {
3460 if (pArgs->uWorker.M2.Data.aResults[idxOther].iSeqOther == pArgs->uMaster.M2.Data.aResults[idxResult].iSeqMine)
3461 {
3462 int64_t iDelta;
3463 iDelta = pArgs->uWorker.M2.Data.aResults[idxOther].uTsc
3464 - (pArgs->uMaster.M2.Data.aResults[idxResult].uTsc - iMasterTscDelta);
3465 if ( iDelta >= GIP_TSC_DELTA_INITIAL_MASTER_VALUE
3466 ? iDelta < iBestDelta
3467 : iDelta > iBestDelta || iBestDelta == INT64_MAX)
3468 iBestDelta = iDelta;
3469 cHits++;
3470 }
3471 }
3472 }
3473 }
3474
3475 /*
3476 * Save the results.
3477 */
3478 if (cHits > 2)
3479 pArgs->pWorker->i64TSCDelta = iBestDelta;
3480 pArgs->uMaster.M2.cHits += cHits;
3481}
3482
3483
3484/**
3485 * The core function of the 2nd TSC delta measurement algorithm.
3486 *
3487 * The idea here is that we have the two CPUs execute the exact same code
3488 * collecting a largish set of TSC samples. The code has one data dependency on
3489 * the other CPU which intention it is to synchronize the execution as well as
3490 * help cross references the two sets of TSC samples (the sequence numbers).
3491 *
3492 * The @a fLag parameter is used to modify the execution a tiny bit on one or
3493 * both of the CPUs. When @a fLag differs between the CPUs, it is thought that
3494 * it will help with making the CPUs enter lock step execution occasionally.
3495 *
3496 */
3497static void supdrvTscDeltaMethod2CollectData(PSUPDRVTSCDELTAMETHOD2 pMyData, uint32_t volatile *piOtherSeqNo, bool fLag)
3498{
3499 SUPDRVTSCDELTAMETHOD2ENTRY *pEntry = &pMyData->aResults[0];
3500 uint32_t cLeft = RT_ELEMENTS(pMyData->aResults);
3501
3502 ASMAtomicWriteU32(&pMyData->iCurSeqNo, 0);
3503 ASMSerializeInstruction();
3504 while (cLeft-- > 0)
3505 {
3506 uint64_t uTsc;
3507 uint32_t iSeqMine = ASMAtomicIncU32(&pMyData->iCurSeqNo);
3508 uint32_t iSeqOther = ASMAtomicReadU32(piOtherSeqNo);
3509 ASMCompilerBarrier();
3510 ASMSerializeInstruction(); /* Way better result than with ASMMemoryFenceSSE2() in this position! */
3511 uTsc = ASMReadTSC();
3512 ASMAtomicIncU32(&pMyData->iCurSeqNo);
3513 ASMCompilerBarrier();
3514 ASMSerializeInstruction();
3515 pEntry->iSeqMine = iSeqMine;
3516 pEntry->iSeqOther = iSeqOther;
3517 pEntry->uTsc = uTsc;
3518 pEntry++;
3519 ASMSerializeInstruction();
3520 if (fLag)
3521 ASMNopPause();
3522 }
3523}
3524
3525
3526/**
3527 * TSC delta measurement algorithm \#2 (GIP_TSC_DELTA_METHOD_2).
3528 *
3529 * See supdrvTscDeltaMethod2CollectData for algorithm details.
3530 *
3531 * @param pArgs The argument/state data.
3532 * @param pMySync My synchronization structure.
3533 * @param pOtherSync My partner's synchronization structure.
3534 * @param fIsMaster Set if master, clear if worker.
3535 * @param iTry The attempt number.
3536 */
3537static void supdrvTscDeltaMethod2Loop(PSUPDRVGIPTSCDELTARGS pArgs, PSUPTSCDELTASYNC2 pMySync, PSUPTSCDELTASYNC2 pOtherSync,
3538 bool fIsMaster, uint32_t iTry)
3539{
3540 unsigned iLoop;
3541 RT_NOREF1(iTry);
3542
3543 for (iLoop = 0; iLoop < GIP_TSC_DELTA_M2_LOOPS; iLoop++)
3544 {
3545 RTCCUINTREG fEFlags;
3546 if (fIsMaster)
3547 {
3548 /*
3549 * Adjust the loop lag fudge.
3550 */
3551# if GIP_TSC_DELTA_M2_PRIMER_LOOPS > 0
3552 if (iLoop < GIP_TSC_DELTA_M2_PRIMER_LOOPS)
3553 {
3554 /* Lag during the priming to be nice to everyone.. */
3555 pArgs->uMaster.M2.fLag = true;
3556 pArgs->uWorker.M2.fLag = true;
3557 }
3558 else
3559# endif
3560 if (iLoop < (GIP_TSC_DELTA_M2_LOOPS - GIP_TSC_DELTA_M2_PRIMER_LOOPS) / 4)
3561 {
3562 /* 25 % of the body without lagging. */
3563 pArgs->uMaster.M2.fLag = false;
3564 pArgs->uWorker.M2.fLag = false;
3565 }
3566 else if (iLoop < (GIP_TSC_DELTA_M2_LOOPS - GIP_TSC_DELTA_M2_PRIMER_LOOPS) / 4 * 2)
3567 {
3568 /* 25 % of the body with both lagging. */
3569 pArgs->uMaster.M2.fLag = true;
3570 pArgs->uWorker.M2.fLag = true;
3571 }
3572 else
3573 {
3574 /* 50% of the body with alternating lag. */
3575 pArgs->uMaster.M2.fLag = (iLoop & 1) == 0;
3576 pArgs->uWorker.M2.fLag= (iLoop & 1) == 1;
3577 }
3578
3579 /*
3580 * Sync up with the worker and collect data.
3581 */
3582 TSCDELTA_MASTER_SYNC_BEFORE(pMySync, pOtherSync, &fEFlags, pArgs);
3583 supdrvTscDeltaMethod2CollectData(&pArgs->uMaster.M2.Data, &pArgs->uWorker.M2.Data.iCurSeqNo, pArgs->uMaster.M2.fLag);
3584 TSCDELTA_MASTER_SYNC_AFTER(pMySync, pOtherSync, fEFlags);
3585
3586 /*
3587 * Process the data.
3588 */
3589# if GIP_TSC_DELTA_M2_PRIMER_LOOPS > 0
3590 if (iLoop >= GIP_TSC_DELTA_M2_PRIMER_LOOPS)
3591# endif
3592 supdrvTscDeltaMethod2ProcessDataOnMaster(pArgs);
3593
3594 TSCDELTA_MASTER_KICK_OTHER_OUT_OF_AFTER(pMySync, pOtherSync);
3595 }
3596 else
3597 {
3598 /*
3599 * The worker.
3600 */
3601 TSCDELTA_OTHER_SYNC_BEFORE(pMySync, pOtherSync, &fEFlags, pArgs);
3602 supdrvTscDeltaMethod2CollectData(&pArgs->uWorker.M2.Data, &pArgs->uMaster.M2.Data.iCurSeqNo, pArgs->uWorker.M2.fLag);
3603 TSCDELTA_OTHER_SYNC_AFTER(pMySync, pOtherSync, fEFlags);
3604 }
3605 }
3606}
3607
3608#endif /* GIP_TSC_DELTA_METHOD_2 */
3609
3610
3611
3612static int supdrvTscDeltaVerify(PSUPDRVGIPTSCDELTARGS pArgs, PSUPTSCDELTASYNC2 pMySync,
3613 PSUPTSCDELTASYNC2 pOtherSync, bool fIsMaster, int64_t iWorkerTscDelta)
3614{
3615 /*PSUPGIPCPU pGipCpuWorker = pArgs->pWorker; - unused */
3616 PSUPGIPCPU pGipCpuMaster = pArgs->pMaster;
3617 uint32_t i;
3618 TSCDELTA_DBG_VARS();
3619
3620 for (;;)
3621 {
3622 RTCCUINTREG fEFlags;
3623 AssertCompile((RT_ELEMENTS(pArgs->uMaster.Verify.auTscs) & 1) == 0);
3624 AssertCompile(RT_ELEMENTS(pArgs->uMaster.Verify.auTscs) == RT_ELEMENTS(pArgs->uWorker.Verify.auTscs));
3625
3626 if (fIsMaster)
3627 {
3628 uint64_t uTscWorker;
3629 TSCDELTA_MASTER_SYNC_BEFORE(pMySync, pOtherSync, &fEFlags, pArgs);
3630
3631 /*
3632 * Collect TSC, master goes first.
3633 */
3634 for (i = 0; i < RT_ELEMENTS(pArgs->uMaster.Verify.auTscs); i += 2)
3635 {
3636 /* Read, kick & wait #1. */
3637 uint64_t uTsc = ASMReadTSC();
3638 ASMAtomicWriteU32(&pOtherSync->uSyncVar, GIP_TSC_DELTA_SYNC2_GO_GO);
3639 ASMSerializeInstruction();
3640 pArgs->uMaster.Verify.auTscs[i] = uTsc;
3641 TSCDELTA_DBG_START_LOOP();
3642 while (ASMAtomicReadU32(&pMySync->uSyncVar) == GIP_TSC_DELTA_SYNC2_GO)
3643 {
3644 TSCDELTA_DBG_CHECK_LOOP();
3645 ASMNopPause();
3646 }
3647
3648 /* Read, kick & wait #2. */
3649 uTsc = ASMReadTSC();
3650 ASMAtomicWriteU32(&pOtherSync->uSyncVar, GIP_TSC_DELTA_SYNC2_GO);
3651 ASMSerializeInstruction();
3652 pArgs->uMaster.Verify.auTscs[i + 1] = uTsc;
3653 TSCDELTA_DBG_START_LOOP();
3654 while (ASMAtomicReadU32(&pMySync->uSyncVar) == GIP_TSC_DELTA_SYNC2_GO_GO)
3655 {
3656 TSCDELTA_DBG_CHECK_LOOP();
3657 ASMNopPause();
3658 }
3659 }
3660
3661 TSCDELTA_MASTER_SYNC_AFTER(pMySync, pOtherSync, fEFlags);
3662
3663 /*
3664 * Process the data.
3665 */
3666#ifdef TSCDELTA_VERIFY_WITH_STATS
3667 pArgs->cMaxVerifyTscTicks = INT64_MIN;
3668 pArgs->cMinVerifyTscTicks = INT64_MAX;
3669 pArgs->iVerifyBadTscDiff = 0;
3670#endif
3671 ASMAtomicWriteS32(&pArgs->rcVerify, VINF_SUCCESS);
3672 uTscWorker = 0;
3673 for (i = 0; i < RT_ELEMENTS(pArgs->uMaster.Verify.auTscs); i++)
3674 {
3675 /* Master vs previous worker entry. */
3676 uint64_t uTscMaster = pArgs->uMaster.Verify.auTscs[i] - pGipCpuMaster->i64TSCDelta;
3677 int64_t iDiff;
3678 if (i > 0)
3679 {
3680 iDiff = uTscMaster - uTscWorker;
3681#ifdef TSCDELTA_VERIFY_WITH_STATS
3682 if (iDiff > pArgs->cMaxVerifyTscTicks)
3683 pArgs->cMaxVerifyTscTicks = iDiff;
3684 if (iDiff < pArgs->cMinVerifyTscTicks)
3685 pArgs->cMinVerifyTscTicks = iDiff;
3686#endif
3687 if (iDiff < 0)
3688 {
3689#ifdef TSCDELTA_VERIFY_WITH_STATS
3690 pArgs->iVerifyBadTscDiff = -iDiff;
3691#endif
3692 ASMAtomicWriteS32(&pArgs->rcVerify, VERR_OUT_OF_RANGE);
3693 break;
3694 }
3695 }
3696
3697 /* Worker vs master. */
3698 uTscWorker = pArgs->uWorker.Verify.auTscs[i] - iWorkerTscDelta;
3699 iDiff = uTscWorker - uTscMaster;
3700#ifdef TSCDELTA_VERIFY_WITH_STATS
3701 if (iDiff > pArgs->cMaxVerifyTscTicks)
3702 pArgs->cMaxVerifyTscTicks = iDiff;
3703 if (iDiff < pArgs->cMinVerifyTscTicks)
3704 pArgs->cMinVerifyTscTicks = iDiff;
3705#endif
3706 if (iDiff < 0)
3707 {
3708#ifdef TSCDELTA_VERIFY_WITH_STATS
3709 pArgs->iVerifyBadTscDiff = iDiff;
3710#endif
3711 ASMAtomicWriteS32(&pArgs->rcVerify, VERR_OUT_OF_RANGE);
3712 break;
3713 }
3714 }
3715
3716 /* Done. */
3717 TSCDELTA_MASTER_KICK_OTHER_OUT_OF_AFTER(pMySync, pOtherSync);
3718 }
3719 else
3720 {
3721 /*
3722 * The worker, master leads.
3723 */
3724 TSCDELTA_OTHER_SYNC_BEFORE(pMySync, pOtherSync, &fEFlags, pArgs);
3725
3726 for (i = 0; i < RT_ELEMENTS(pArgs->uWorker.Verify.auTscs); i += 2)
3727 {
3728 uint64_t uTsc;
3729
3730 /* Wait, Read and Kick #1. */
3731 TSCDELTA_DBG_START_LOOP();
3732 while (ASMAtomicReadU32(&pMySync->uSyncVar) == GIP_TSC_DELTA_SYNC2_GO)
3733 {
3734 TSCDELTA_DBG_CHECK_LOOP();
3735 ASMNopPause();
3736 }
3737 uTsc = ASMReadTSC();
3738 ASMAtomicWriteU32(&pOtherSync->uSyncVar, GIP_TSC_DELTA_SYNC2_GO_GO);
3739 ASMSerializeInstruction();
3740 pArgs->uWorker.Verify.auTscs[i] = uTsc;
3741
3742 /* Wait, Read and Kick #2. */
3743 TSCDELTA_DBG_START_LOOP();
3744 while (ASMAtomicReadU32(&pMySync->uSyncVar) == GIP_TSC_DELTA_SYNC2_GO_GO)
3745 {
3746 TSCDELTA_DBG_CHECK_LOOP();
3747 ASMNopPause();
3748 }
3749 uTsc = ASMReadTSC();
3750 ASMAtomicWriteU32(&pOtherSync->uSyncVar, GIP_TSC_DELTA_SYNC2_GO);
3751 ASMSerializeInstruction();
3752 pArgs->uWorker.Verify.auTscs[i + 1] = uTsc;
3753 }
3754
3755 TSCDELTA_OTHER_SYNC_AFTER(pMySync, pOtherSync, fEFlags);
3756 }
3757 return pArgs->rcVerify;
3758 }
3759
3760 /*
3761 * Timed out, please retry.
3762 */
3763 ASMAtomicWriteS32(&pArgs->rcVerify, VERR_TRY_AGAIN);
3764 return VERR_TIMEOUT;
3765}
3766
3767
3768
3769/**
3770 * Handles the special abort procedure during synchronization setup in
3771 * supdrvTscMeasureDeltaCallbackUnwrapped().
3772 *
3773 * @returns 0 (dummy, ignored)
3774 * @param pArgs Pointer to argument/state data.
3775 * @param pMySync Pointer to my sync structure.
3776 * @param fIsMaster Set if we're the master, clear if worker.
3777 * @param fTimeout Set if it's a timeout.
3778 */
3779DECL_NO_INLINE(static, int)
3780supdrvTscMeasureDeltaCallbackAbortSyncSetup(PSUPDRVGIPTSCDELTARGS pArgs, PSUPTSCDELTASYNC2 pMySync, bool fIsMaster, bool fTimeout)
3781{
3782 PSUPTSCDELTASYNC2 volatile *ppMySync = fIsMaster ? &pArgs->pSyncMaster : &pArgs->pSyncWorker;
3783 PSUPTSCDELTASYNC2 volatile *ppOtherSync = fIsMaster ? &pArgs->pSyncWorker : &pArgs->pSyncMaster;
3784 TSCDELTA_DBG_VARS();
3785 RT_NOREF1(pMySync);
3786
3787 /*
3788 * Clear our sync pointer and make sure the abort flag is set.
3789 */
3790 ASMAtomicWriteNullPtr(ppMySync);
3791 ASMAtomicWriteBool(&pArgs->fAbortSetup, true);
3792 if (fTimeout)
3793 ASMAtomicWriteBool(&pArgs->fTimedOut, true);
3794
3795 /*
3796 * Make sure the other party is out of there and won't be touching our
3797 * sync state again (would cause stack corruption).
3798 */
3799 TSCDELTA_DBG_START_LOOP();
3800 while (ASMAtomicReadPtrT(ppOtherSync, PSUPTSCDELTASYNC2) != NULL)
3801 {
3802 ASMNopPause();
3803 ASMNopPause();
3804 ASMNopPause();
3805 TSCDELTA_DBG_CHECK_LOOP();
3806 }
3807
3808 return 0;
3809}
3810
3811
3812/**
3813 * This is used by supdrvTscMeasureInitialDeltas() to read the TSC on two CPUs
3814 * and compute the delta between them.
3815 *
3816 * To reduce code size a good when timeout handling was added, a dummy return
3817 * value had to be added (saves 1-3 lines per timeout case), thus this
3818 * 'Unwrapped' function and the dummy 0 return value.
3819 *
3820 * @returns 0 (dummy, ignored)
3821 * @param idCpu The CPU we are current scheduled on.
3822 * @param pArgs Pointer to a parameter package.
3823 *
3824 * @remarks Measuring TSC deltas between the CPUs is tricky because we need to
3825 * read the TSC at exactly the same time on both the master and the
3826 * worker CPUs. Due to DMA, bus arbitration, cache locality,
3827 * contention, SMI, pipelining etc. there is no guaranteed way of
3828 * doing this on x86 CPUs.
3829 */
3830static int supdrvTscMeasureDeltaCallbackUnwrapped(RTCPUID idCpu, PSUPDRVGIPTSCDELTARGS pArgs)
3831{
3832 PSUPDRVDEVEXT pDevExt = pArgs->pDevExt;
3833 PSUPGIPCPU pGipCpuWorker = pArgs->pWorker;
3834 PSUPGIPCPU pGipCpuMaster = pArgs->pMaster;
3835 bool const fIsMaster = idCpu == pGipCpuMaster->idCpu;
3836 uint32_t iTry;
3837 PSUPTSCDELTASYNC2 volatile *ppMySync = fIsMaster ? &pArgs->pSyncMaster : &pArgs->pSyncWorker;
3838 PSUPTSCDELTASYNC2 volatile *ppOtherSync = fIsMaster ? &pArgs->pSyncWorker : &pArgs->pSyncMaster;
3839 SUPTSCDELTASYNC2 MySync;
3840 PSUPTSCDELTASYNC2 pOtherSync;
3841 int rc;
3842 TSCDELTA_DBG_VARS();
3843
3844 /* A bit of paranoia first. */
3845 if (!pGipCpuMaster || !pGipCpuWorker)
3846 return 0;
3847
3848 /*
3849 * If the CPU isn't part of the measurement, return immediately.
3850 */
3851 if ( !fIsMaster
3852 && idCpu != pGipCpuWorker->idCpu)
3853 return 0;
3854
3855 /*
3856 * Set up my synchronization stuff and wait for the other party to show up.
3857 *
3858 * We don't wait forever since the other party may be off fishing (offline,
3859 * spinning with ints disables, whatever), we must play nice to the rest of
3860 * the system as this context generally isn't one in which we will get
3861 * preempted and we may hold up a number of lower priority interrupts.
3862 */
3863 ASMAtomicWriteU32(&MySync.uSyncVar, GIP_TSC_DELTA_SYNC2_PRESTART_WAIT);
3864 ASMAtomicWritePtr(ppMySync, &MySync);
3865 MySync.uTscStart = ASMReadTSC();
3866 MySync.cMaxTscTicks = pArgs->cMaxTscTicks;
3867
3868 /* Look for the partner, might not be here yet... Special abort considerations. */
3869 iTry = 0;
3870 TSCDELTA_DBG_START_LOOP();
3871 while ((pOtherSync = ASMAtomicReadPtrT(ppOtherSync, PSUPTSCDELTASYNC2)) == NULL)
3872 {
3873 ASMNopPause();
3874 if ( ASMAtomicReadBool(&pArgs->fAbortSetup)
3875 || !RTMpIsCpuOnline(fIsMaster ? pGipCpuWorker->idCpu : pGipCpuMaster->idCpu) )
3876 return supdrvTscMeasureDeltaCallbackAbortSyncSetup(pArgs, &MySync, fIsMaster, false /*fTimeout*/);
3877 if ( (iTry++ & 0xff) == 0
3878 && ASMReadTSC() - MySync.uTscStart > pArgs->cMaxTscTicks)
3879 return supdrvTscMeasureDeltaCallbackAbortSyncSetup(pArgs, &MySync, fIsMaster, true /*fTimeout*/);
3880 TSCDELTA_DBG_CHECK_LOOP();
3881 ASMNopPause();
3882 }
3883
3884 /* I found my partner, waiting to be found... Special abort considerations. */
3885 if (fIsMaster)
3886 if (!ASMAtomicCmpXchgU32(&pOtherSync->uSyncVar, GIP_TSC_DELTA_SYNC2_READY, GIP_TSC_DELTA_SYNC2_PRESTART_WAIT)) /* parnaoia */
3887 return supdrvTscMeasureDeltaCallbackAbortSyncSetup(pArgs, &MySync, fIsMaster, false /*fTimeout*/);
3888
3889 iTry = 0;
3890 TSCDELTA_DBG_START_LOOP();
3891 while (ASMAtomicReadU32(&MySync.uSyncVar) == GIP_TSC_DELTA_SYNC2_PRESTART_WAIT)
3892 {
3893 ASMNopPause();
3894 if (ASMAtomicReadBool(&pArgs->fAbortSetup))
3895 return supdrvTscMeasureDeltaCallbackAbortSyncSetup(pArgs, &MySync, fIsMaster, false /*fTimeout*/);
3896 if ( (iTry++ & 0xff) == 0
3897 && ASMReadTSC() - MySync.uTscStart > pArgs->cMaxTscTicks)
3898 {
3899 if ( fIsMaster
3900 && !ASMAtomicCmpXchgU32(&MySync.uSyncVar, GIP_TSC_DELTA_SYNC2_PRESTART_ABORT, GIP_TSC_DELTA_SYNC2_PRESTART_WAIT))
3901 break; /* race #1: slave has moved on, handle timeout in loop instead. */
3902 return supdrvTscMeasureDeltaCallbackAbortSyncSetup(pArgs, &MySync, fIsMaster, true /*fTimeout*/);
3903 }
3904 TSCDELTA_DBG_CHECK_LOOP();
3905 }
3906
3907 if (!fIsMaster)
3908 if (!ASMAtomicCmpXchgU32(&pOtherSync->uSyncVar, GIP_TSC_DELTA_SYNC2_READY, GIP_TSC_DELTA_SYNC2_PRESTART_WAIT)) /* race #1 */
3909 return supdrvTscMeasureDeltaCallbackAbortSyncSetup(pArgs, &MySync, fIsMaster, false /*fTimeout*/);
3910
3911/** @todo Add a resumable state to pArgs so we don't waste time if we time
3912 * out or something. Timeouts are legit, any of the two CPUs may get
3913 * interrupted. */
3914
3915 /*
3916 * Start by seeing if we have a zero delta between the two CPUs.
3917 * This should normally be the case.
3918 */
3919 rc = supdrvTscDeltaVerify(pArgs, &MySync, pOtherSync, fIsMaster, GIP_TSC_DELTA_INITIAL_MASTER_VALUE);
3920 if (RT_SUCCESS(rc))
3921 {
3922 if (fIsMaster)
3923 {
3924 ASMAtomicWriteS64(&pGipCpuWorker->i64TSCDelta, GIP_TSC_DELTA_INITIAL_MASTER_VALUE);
3925 RTCpuSetDelByIndex(&pDevExt->TscDeltaCpuSet, pGipCpuWorker->iCpuSet);
3926 RTCpuSetAddByIndex(&pDevExt->TscDeltaObtainedCpuSet, pGipCpuWorker->iCpuSet);
3927 }
3928 }
3929 /*
3930 * If the verification didn't time out, do regular delta measurements.
3931 * We retry this until we get a reasonable value.
3932 */
3933 else if (rc != VERR_TIMEOUT)
3934 {
3935 Assert(pGipCpuWorker->i64TSCDelta == INT64_MAX);
3936 for (iTry = 0; iTry < 12; iTry++)
3937 {
3938 /*
3939 * Check the state before we start.
3940 */
3941 uint32_t u32Tmp = ASMAtomicReadU32(&MySync.uSyncVar);
3942 if ( u32Tmp != GIP_TSC_DELTA_SYNC2_READY
3943 && (fIsMaster || u32Tmp != GIP_TSC_DELTA_SYNC2_STEADY) /* worker may be late prepping for the next round */ )
3944 {
3945 TSCDELTA_DBG_SYNC_MSG(("sync/loop/%s: #0 iTry=%u MyState=%#x\n", fIsMaster ? "master" : "worker", iTry, u32Tmp));
3946 break;
3947 }
3948
3949 /*
3950 * Do the measurements.
3951 */
3952#ifdef GIP_TSC_DELTA_METHOD_1
3953 supdrvTscDeltaMethod1Loop(pArgs, &MySync, pOtherSync, fIsMaster, iTry);
3954#elif defined(GIP_TSC_DELTA_METHOD_2)
3955 supdrvTscDeltaMethod2Loop(pArgs, &MySync, pOtherSync, fIsMaster, iTry);
3956#else
3957# error "huh??"
3958#endif
3959
3960 /*
3961 * Check the state.
3962 */
3963 u32Tmp = ASMAtomicReadU32(&MySync.uSyncVar);
3964 if ( u32Tmp != GIP_TSC_DELTA_SYNC2_READY
3965 && (fIsMaster || u32Tmp != GIP_TSC_DELTA_SYNC2_STEADY) /* worker may be late prepping for the next round */ )
3966 {
3967 if (fIsMaster)
3968 TSCDELTA_DBG_SYNC_MSG(("sync/loop/master: #1 iTry=%u MyState=%#x\n", iTry, u32Tmp));
3969 else
3970 TSCDELTA_DBG_SYNC_MSG2(("sync/loop/worker: #1 iTry=%u MyState=%#x\n", iTry, u32Tmp));
3971 break;
3972 }
3973
3974 /*
3975 * Success? If so, stop trying. Master decides.
3976 */
3977 if (fIsMaster)
3978 {
3979 if (pGipCpuWorker->i64TSCDelta != INT64_MAX)
3980 {
3981 RTCpuSetDelByIndex(&pDevExt->TscDeltaCpuSet, pGipCpuWorker->iCpuSet);
3982 RTCpuSetAddByIndex(&pDevExt->TscDeltaObtainedCpuSet, pGipCpuWorker->iCpuSet);
3983 TSCDELTA_DBG_SYNC_MSG2(("sync/loop/master: #9 iTry=%u MyState=%#x\n", iTry, MySync.uSyncVar));
3984 break;
3985 }
3986 }
3987 }
3988 if (fIsMaster)
3989 pArgs->iTry = iTry;
3990 }
3991
3992 /*
3993 * End the synchronization dance. We tell the other that we're done,
3994 * then wait for the same kind of reply.
3995 */
3996 ASMAtomicWriteU32(&pOtherSync->uSyncVar, GIP_TSC_DELTA_SYNC2_FINAL);
3997 ASMAtomicWriteNullPtr(ppMySync);
3998 iTry = 0;
3999 TSCDELTA_DBG_START_LOOP();
4000 while (ASMAtomicReadU32(&MySync.uSyncVar) != GIP_TSC_DELTA_SYNC2_FINAL)
4001 {
4002 iTry++;
4003 if ( iTry == 0
4004 && !RTMpIsCpuOnline(fIsMaster ? pGipCpuWorker->idCpu : pGipCpuMaster->idCpu))
4005 break; /* this really shouldn't happen. */
4006 TSCDELTA_DBG_CHECK_LOOP();
4007 ASMNopPause();
4008 }
4009
4010 /*
4011 * Collect some runtime stats.
4012 */
4013 if (fIsMaster)
4014 pArgs->cElapsedMasterTscTicks = ASMReadTSC() - MySync.uTscStart;
4015 else
4016 pArgs->cElapsedWorkerTscTicks = ASMReadTSC() - MySync.uTscStart;
4017 return 0;
4018}
4019
4020/**
4021 * Callback used by supdrvTscMeasureInitialDeltas() to read the TSC on two CPUs
4022 * and compute the delta between them.
4023 *
4024 * @param idCpu The CPU we are current scheduled on.
4025 * @param pvUser1 Pointer to a parameter package (SUPDRVGIPTSCDELTARGS).
4026 * @param pvUser2 Unused.
4027 */
4028static DECLCALLBACK(void) supdrvTscMeasureDeltaCallback(RTCPUID idCpu, void *pvUser1, void *pvUser2)
4029{
4030 supdrvTscMeasureDeltaCallbackUnwrapped(idCpu, (PSUPDRVGIPTSCDELTARGS)pvUser1);
4031 RT_NOREF1(pvUser2);
4032}
4033
4034
4035/**
4036 * Measures the TSC delta between the master GIP CPU and one specified worker
4037 * CPU.
4038 *
4039 * @returns VBox status code.
4040 * @retval VERR_SUPDRV_TSC_DELTA_MEASUREMENT_FAILED on pure measurement
4041 * failure.
4042 * @param pDevExt Pointer to the device instance data.
4043 * @param idxWorker The index of the worker CPU from the GIP's array of
4044 * CPUs.
4045 *
4046 * @remarks This must be called with preemption enabled!
4047 */
4048static int supdrvTscMeasureDeltaOne(PSUPDRVDEVEXT pDevExt, uint32_t idxWorker)
4049{
4050 int rc;
4051 int rc2;
4052 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
4053 RTCPUID idMaster = pDevExt->idGipMaster;
4054 PSUPGIPCPU pGipCpuWorker = &pGip->aCPUs[idxWorker];
4055 PSUPGIPCPU pGipCpuMaster;
4056 uint32_t iGipCpuMaster;
4057#if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
4058 uint32_t u32Tmp;
4059#endif
4060
4061 /* Validate input a bit. */
4062 AssertReturn(pGip, VERR_INVALID_PARAMETER);
4063 Assert(pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_ZERO_CLAIMED);
4064 Assert(RTThreadPreemptIsEnabled(NIL_RTTHREAD));
4065
4066 /*
4067 * Don't attempt measuring the delta for the GIP master.
4068 */
4069 if (pGipCpuWorker->idCpu == idMaster)
4070 {
4071 if (pGipCpuWorker->i64TSCDelta == INT64_MAX) /* This shouldn't happen, but just in case. */
4072 ASMAtomicWriteS64(&pGipCpuWorker->i64TSCDelta, GIP_TSC_DELTA_INITIAL_MASTER_VALUE);
4073 return VINF_SUCCESS;
4074 }
4075
4076 /*
4077 * One measurement at a time, at least for now. We might be using
4078 * broadcast IPIs so, so be nice to the rest of the system.
4079 */
4080#ifdef SUPDRV_USE_MUTEX_FOR_GIP
4081 rc = RTSemMutexRequest(pDevExt->mtxTscDelta, RT_INDEFINITE_WAIT);
4082#else
4083 rc = RTSemFastMutexRequest(pDevExt->mtxTscDelta);
4084#endif
4085 if (RT_FAILURE(rc))
4086 return rc;
4087
4088 /*
4089 * If the CPU has hyper-threading and the APIC IDs of the master and worker are adjacent,
4090 * try pick a different master. (This fudge only works with multi core systems.)
4091 * ASSUMES related threads have adjacent APIC IDs. ASSUMES two threads per core.
4092 *
4093 * We skip this on AMDs for now as their HTT is different from Intel's and
4094 * it doesn't seem to have any favorable effect on the results.
4095 *
4096 * If the master is offline, we need a new master too, so share the code.
4097 */
4098 iGipCpuMaster = supdrvGipFindCpuIndexForCpuId(pGip, idMaster);
4099 AssertReturn(iGipCpuMaster < pGip->cCpus, VERR_INVALID_CPU_ID);
4100 pGipCpuMaster = &pGip->aCPUs[iGipCpuMaster];
4101#if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
4102 if ( ( (pGipCpuMaster->idApic & ~1) == (pGipCpuWorker->idApic & ~1)
4103 && pGip->cOnlineCpus > 2
4104 && ASMHasCpuId()
4105 && RTX86IsValidStdRange(ASMCpuId_EAX(0))
4106 && (ASMCpuId_EDX(1) & X86_CPUID_FEATURE_EDX_HTT)
4107 && ( !ASMIsAmdCpu()
4108 || RTX86GetCpuFamily(u32Tmp = ASMCpuId_EAX(1)) > 0x15
4109 || ( RTX86GetCpuFamily(u32Tmp) == 0x15 /* Piledriver+, not bulldozer (FX-4150 didn't like it). */
4110 && RTX86GetCpuModelAMD(u32Tmp) >= 0x02) ) )
4111 || !RTMpIsCpuOnline(idMaster) )
4112 {
4113 uint32_t i;
4114 for (i = 0; i < pGip->cCpus; i++)
4115 if ( i != iGipCpuMaster
4116 && i != idxWorker
4117 && pGip->aCPUs[i].enmState == SUPGIPCPUSTATE_ONLINE
4118 && pGip->aCPUs[i].i64TSCDelta != INT64_MAX
4119 && pGip->aCPUs[i].idCpu != NIL_RTCPUID
4120 && pGip->aCPUs[i].idCpu != idMaster /* paranoia starts here... */
4121 && pGip->aCPUs[i].idCpu != pGipCpuWorker->idCpu
4122 && pGip->aCPUs[i].idApic != pGipCpuWorker->idApic
4123 && pGip->aCPUs[i].idApic != pGipCpuMaster->idApic
4124 && RTMpIsCpuOnline(pGip->aCPUs[i].idCpu))
4125 {
4126 iGipCpuMaster = i;
4127 pGipCpuMaster = &pGip->aCPUs[i];
4128 idMaster = pGipCpuMaster->idCpu;
4129 break;
4130 }
4131 }
4132#endif /* defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86) */
4133
4134 if (RTCpuSetIsMemberByIndex(&pGip->OnlineCpuSet, pGipCpuWorker->iCpuSet))
4135 {
4136 /*
4137 * Initialize data package for the RTMpOnPair callback.
4138 */
4139 PSUPDRVGIPTSCDELTARGS pArgs = (PSUPDRVGIPTSCDELTARGS)RTMemAllocZ(sizeof(*pArgs));
4140 if (pArgs)
4141 {
4142 pArgs->pWorker = pGipCpuWorker;
4143 pArgs->pMaster = pGipCpuMaster;
4144 pArgs->pDevExt = pDevExt;
4145 pArgs->pSyncMaster = NULL;
4146 pArgs->pSyncWorker = NULL;
4147 pArgs->cMaxTscTicks = ASMAtomicReadU64(&pGip->u64CpuHz) / 512; /* 1953 us */
4148
4149 /*
4150 * Do the RTMpOnPair call. We reset i64TSCDelta first so we
4151 * and supdrvTscMeasureDeltaCallback can use it as a success check.
4152 */
4153 /** @todo Store the i64TSCDelta result in pArgs first? Perhaps deals with
4154 * that when doing the restart loop reorg. */
4155 ASMAtomicWriteS64(&pGipCpuWorker->i64TSCDelta, INT64_MAX);
4156 rc = RTMpOnPair(pGipCpuMaster->idCpu, pGipCpuWorker->idCpu, RTMPON_F_CONCURRENT_EXEC,
4157 supdrvTscMeasureDeltaCallback, pArgs, NULL);
4158 if (RT_SUCCESS(rc))
4159 {
4160#if 0
4161 SUPR0Printf("mponpair ticks: %9llu %9llu max: %9llu iTry: %u%s\n", pArgs->cElapsedMasterTscTicks,
4162 pArgs->cElapsedWorkerTscTicks, pArgs->cMaxTscTicks, pArgs->iTry,
4163 pArgs->fTimedOut ? " timed out" :"");
4164#endif
4165#if 0
4166 SUPR0Printf("rcVerify=%d iVerifyBadTscDiff=%lld cMinVerifyTscTicks=%lld cMaxVerifyTscTicks=%lld\n",
4167 pArgs->rcVerify, pArgs->iVerifyBadTscDiff, pArgs->cMinVerifyTscTicks, pArgs->cMaxVerifyTscTicks);
4168#endif
4169 if (RT_LIKELY(pGipCpuWorker->i64TSCDelta != INT64_MAX))
4170 {
4171 /*
4172 * Work the TSC delta applicability rating. It starts
4173 * optimistic in supdrvGipInit, we downgrade it here.
4174 */
4175 SUPGIPUSETSCDELTA enmRating;
4176 if ( pGipCpuWorker->i64TSCDelta > GIP_TSC_DELTA_THRESHOLD_ROUGHLY_ZERO
4177 || pGipCpuWorker->i64TSCDelta < -GIP_TSC_DELTA_THRESHOLD_ROUGHLY_ZERO)
4178 enmRating = SUPGIPUSETSCDELTA_NOT_ZERO;
4179 else if ( pGipCpuWorker->i64TSCDelta > GIP_TSC_DELTA_THRESHOLD_PRACTICALLY_ZERO
4180 || pGipCpuWorker->i64TSCDelta < -GIP_TSC_DELTA_THRESHOLD_PRACTICALLY_ZERO)
4181 enmRating = SUPGIPUSETSCDELTA_ROUGHLY_ZERO;
4182 else
4183 enmRating = SUPGIPUSETSCDELTA_PRACTICALLY_ZERO;
4184 if (pGip->enmUseTscDelta < enmRating)
4185 {
4186 AssertCompile(sizeof(pGip->enmUseTscDelta) == sizeof(uint32_t));
4187 ASMAtomicWriteU32((uint32_t volatile *)&pGip->enmUseTscDelta, enmRating);
4188 }
4189 }
4190 else
4191 rc = VERR_SUPDRV_TSC_DELTA_MEASUREMENT_FAILED;
4192 }
4193 /** @todo return try-again if we get an offline CPU error. */
4194
4195 RTMemFree(pArgs);
4196 }
4197 else
4198 rc = VERR_NO_MEMORY;
4199 }
4200 else
4201 rc = VERR_CPU_OFFLINE;
4202
4203 /*
4204 * We're done now.
4205 */
4206#ifdef SUPDRV_USE_MUTEX_FOR_GIP
4207 rc2 = RTSemMutexRelease(pDevExt->mtxTscDelta); AssertRC(rc2);
4208#else
4209 rc2 = RTSemFastMutexRelease(pDevExt->mtxTscDelta); AssertRC(rc2);
4210#endif
4211 return rc;
4212}
4213
4214
4215/**
4216 * Resets the TSC-delta related TSC samples and optionally the deltas
4217 * themselves.
4218 *
4219 * @param pDevExt Pointer to the device instance data.
4220 * @param fResetTscDeltas Whether the TSC-deltas are also to be reset.
4221 *
4222 * @remarks This might be called while holding a spinlock!
4223 */
4224static void supdrvTscResetSamples(PSUPDRVDEVEXT pDevExt, bool fResetTscDeltas)
4225{
4226 unsigned iCpu;
4227 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
4228 for (iCpu = 0; iCpu < pGip->cCpus; iCpu++)
4229 {
4230 PSUPGIPCPU pGipCpu = &pGip->aCPUs[iCpu];
4231 ASMAtomicWriteU64(&pGipCpu->u64TSCSample, GIP_TSC_DELTA_RSVD);
4232 if (fResetTscDeltas)
4233 {
4234 RTCpuSetDelByIndex(&pDevExt->TscDeltaObtainedCpuSet, pGipCpu->iCpuSet);
4235 ASMAtomicWriteS64(&pGipCpu->i64TSCDelta, INT64_MAX);
4236 }
4237 }
4238}
4239
4240
4241/**
4242 * Picks an online CPU as the master TSC for TSC-delta computations.
4243 *
4244 * @returns VBox status code.
4245 * @param pDevExt Pointer to the device instance data.
4246 * @param pidxMaster Where to store the CPU array index of the chosen
4247 * master. Optional, can be NULL.
4248 */
4249static int supdrvTscPickMaster(PSUPDRVDEVEXT pDevExt, uint32_t *pidxMaster)
4250{
4251 /*
4252 * Pick the first CPU online as the master TSC and make it the new GIP master based
4253 * on the APIC ID.
4254 *
4255 * Technically we can simply use "idGipMaster" but doing this gives us master as CPU 0
4256 * in most cases making it nicer/easier for comparisons. It is safe to update the GIP
4257 * master as this point since the sync/async timer isn't created yet.
4258 */
4259 unsigned iCpu;
4260 uint32_t idxMaster = UINT32_MAX;
4261 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
4262 for (iCpu = 0; iCpu < RT_ELEMENTS(pGip->aiCpuFromApicId); iCpu++)
4263 {
4264 uint16_t idxCpu = pGip->aiCpuFromApicId[iCpu];
4265 if (idxCpu != UINT16_MAX)
4266 {
4267 PSUPGIPCPU pGipCpu = &pGip->aCPUs[idxCpu];
4268 if (RTCpuSetIsMemberByIndex(&pGip->OnlineCpuSet, pGipCpu->iCpuSet))
4269 {
4270 idxMaster = idxCpu;
4271 pGipCpu->i64TSCDelta = GIP_TSC_DELTA_INITIAL_MASTER_VALUE;
4272 ASMAtomicWriteSize(&pDevExt->idGipMaster, pGipCpu->idCpu);
4273 if (pidxMaster)
4274 *pidxMaster = idxMaster;
4275 return VINF_SUCCESS;
4276 }
4277 }
4278 }
4279 return VERR_CPU_OFFLINE;
4280}
4281
4282
4283/**
4284 * Performs the initial measurements of the TSC deltas between CPUs.
4285 *
4286 * This is called by supdrvGipCreate(), supdrvGipPowerNotificationCallback() or
4287 * triggered by it if threaded.
4288 *
4289 * @returns VBox status code.
4290 * @param pDevExt Pointer to the device instance data.
4291 *
4292 * @remarks Must be called only after supdrvGipInitOnCpu() as this function uses
4293 * idCpu, GIP's online CPU set which are populated in
4294 * supdrvGipInitOnCpu().
4295 */
4296static int supdrvTscMeasureInitialDeltas(PSUPDRVDEVEXT pDevExt)
4297{
4298 PSUPGIPCPU pGipCpuMaster;
4299 unsigned iCpu;
4300 unsigned iOddEven;
4301 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
4302 uint32_t idxMaster = UINT32_MAX;
4303 uint32_t cMpOnOffEvents = ASMAtomicReadU32(&pDevExt->cMpOnOffEvents);
4304
4305 Assert(pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_ZERO_CLAIMED);
4306 supdrvTscResetSamples(pDevExt, true /* fClearDeltas */);
4307 int rc = supdrvTscPickMaster(pDevExt, &idxMaster);
4308 if (RT_FAILURE(rc))
4309 {
4310 SUPR0Printf("Failed to pick a CPU master for TSC-delta measurements rc=%Rrc\n", rc);
4311 return rc;
4312 }
4313 AssertReturn(idxMaster < pGip->cCpus, VERR_INVALID_CPU_INDEX);
4314 pGipCpuMaster = &pGip->aCPUs[idxMaster];
4315 Assert(pDevExt->idGipMaster == pGipCpuMaster->idCpu);
4316
4317 /*
4318 * If there is only a single CPU online we have nothing to do.
4319 */
4320 if (pGip->cOnlineCpus <= 1)
4321 {
4322 AssertReturn(pGip->cOnlineCpus > 0, VERR_INTERNAL_ERROR_5);
4323 return VINF_SUCCESS;
4324 }
4325
4326 /*
4327 * Loop thru the GIP CPU array and get deltas for each CPU (except the
4328 * master). We do the CPUs with the even numbered APIC IDs first so that
4329 * we've got alternative master CPUs to pick from on hyper-threaded systems.
4330 */
4331 for (iOddEven = 0; iOddEven < 2; iOddEven++)
4332 {
4333 for (iCpu = 0; iCpu < pGip->cCpus; iCpu++)
4334 {
4335 PSUPGIPCPU pGipCpuWorker = &pGip->aCPUs[iCpu];
4336 if ( iCpu != idxMaster
4337 && (iOddEven > 0 || (pGipCpuWorker->idApic & 1) == 0)
4338 && RTCpuSetIsMemberByIndex(&pDevExt->TscDeltaCpuSet, pGipCpuWorker->iCpuSet))
4339 {
4340 rc = supdrvTscMeasureDeltaOne(pDevExt, iCpu);
4341 if (RT_FAILURE(rc))
4342 {
4343 SUPR0Printf("supdrvTscMeasureDeltaOne failed. rc=%d CPU[%u].idCpu=%u Master[%u].idCpu=%u\n", rc, iCpu,
4344 pGipCpuWorker->idCpu, idxMaster, pDevExt->idGipMaster, pGipCpuMaster->idCpu);
4345 break;
4346 }
4347
4348 if (ASMAtomicReadU32(&pDevExt->cMpOnOffEvents) != cMpOnOffEvents)
4349 {
4350 SUPR0Printf("One or more CPUs transitioned between online & offline states. I'm confused, retry...\n");
4351 rc = VERR_TRY_AGAIN;
4352 break;
4353 }
4354 }
4355 }
4356 }
4357
4358 return rc;
4359}
4360
4361
4362#ifdef SUPDRV_USE_TSC_DELTA_THREAD
4363
4364/**
4365 * Switches the TSC-delta measurement thread into the butchered state.
4366 *
4367 * @returns VBox status code.
4368 * @param pDevExt Pointer to the device instance data.
4369 * @param fSpinlockHeld Whether the TSC-delta spinlock is held or not.
4370 * @param pszFailed An error message to log.
4371 * @param rcFailed The error code to exit the thread with.
4372 */
4373static int supdrvTscDeltaThreadButchered(PSUPDRVDEVEXT pDevExt, bool fSpinlockHeld, const char *pszFailed, int rcFailed)
4374{
4375 if (!fSpinlockHeld)
4376 RTSpinlockAcquire(pDevExt->hTscDeltaSpinlock);
4377
4378 pDevExt->enmTscDeltaThreadState = kTscDeltaThreadState_Butchered;
4379 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
4380 OSDBGPRINT(("supdrvTscDeltaThreadButchered: %s. rc=%Rrc\n", pszFailed, rcFailed));
4381 return rcFailed;
4382}
4383
4384
4385/**
4386 * The TSC-delta measurement thread.
4387 *
4388 * @returns VBox status code.
4389 * @param hThread The thread handle.
4390 * @param pvUser Opaque pointer to the device instance data.
4391 */
4392static DECLCALLBACK(int) supdrvTscDeltaThread(RTTHREAD hThread, void *pvUser)
4393{
4394 PSUPDRVDEVEXT pDevExt = (PSUPDRVDEVEXT)pvUser;
4395 int rc = VERR_INTERNAL_ERROR_2;
4396 for (;;)
4397 {
4398 /*
4399 * Switch on the current state.
4400 */
4401 SUPDRVTSCDELTATHREADSTATE enmState;
4402 RTSpinlockAcquire(pDevExt->hTscDeltaSpinlock);
4403 enmState = pDevExt->enmTscDeltaThreadState;
4404 switch (enmState)
4405 {
4406 case kTscDeltaThreadState_Creating:
4407 {
4408 pDevExt->enmTscDeltaThreadState = kTscDeltaThreadState_Listening;
4409 rc = RTSemEventSignal(pDevExt->hTscDeltaEvent);
4410 if (RT_FAILURE(rc))
4411 return supdrvTscDeltaThreadButchered(pDevExt, true /* fSpinlockHeld */, "RTSemEventSignal", rc);
4412 RT_FALL_THRU();
4413 }
4414
4415 case kTscDeltaThreadState_Listening:
4416 {
4417 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
4418
4419 /*
4420 * Linux counts uninterruptible sleeps as load, hence we shall do a
4421 * regular, interruptible sleep here and ignore wake ups due to signals.
4422 * See task_contributes_to_load() in include/linux/sched.h in the Linux sources.
4423 */
4424 rc = RTThreadUserWaitNoResume(hThread, pDevExt->cMsTscDeltaTimeout);
4425 if ( RT_FAILURE(rc)
4426 && rc != VERR_TIMEOUT
4427 && rc != VERR_INTERRUPTED)
4428 return supdrvTscDeltaThreadButchered(pDevExt, false /* fSpinlockHeld */, "RTThreadUserWait", rc);
4429 RTThreadUserReset(hThread);
4430 break;
4431 }
4432
4433 case kTscDeltaThreadState_WaitAndMeasure:
4434 {
4435 pDevExt->enmTscDeltaThreadState = kTscDeltaThreadState_Measuring;
4436 rc = RTSemEventSignal(pDevExt->hTscDeltaEvent); /* (Safe on windows as long as spinlock isn't IRQ safe.) */
4437 if (RT_FAILURE(rc))
4438 return supdrvTscDeltaThreadButchered(pDevExt, true /* fSpinlockHeld */, "RTSemEventSignal", rc);
4439 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
4440 RTThreadSleep(1);
4441 RT_FALL_THRU();
4442 }
4443
4444 case kTscDeltaThreadState_Measuring:
4445 {
4446 if (pDevExt->fTscThreadRecomputeAllDeltas)
4447 {
4448 int cTries = 8;
4449 int cMsWaitPerTry = 10;
4450 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
4451 Assert(pGip);
4452 do
4453 {
4454 RTCpuSetCopy(&pDevExt->TscDeltaCpuSet, &pGip->OnlineCpuSet);
4455 rc = supdrvTscMeasureInitialDeltas(pDevExt);
4456 if ( RT_SUCCESS(rc)
4457 || ( RT_FAILURE(rc)
4458 && rc != VERR_TRY_AGAIN
4459 && rc != VERR_CPU_OFFLINE))
4460 {
4461 break;
4462 }
4463 RTThreadSleep(cMsWaitPerTry);
4464 } while (cTries-- > 0);
4465 pDevExt->fTscThreadRecomputeAllDeltas = false;
4466 }
4467 else
4468 {
4469 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
4470 unsigned iCpu;
4471
4472 /* Measure TSC-deltas only for the CPUs that are in the set. */
4473 rc = VINF_SUCCESS;
4474 for (iCpu = 0; iCpu < pGip->cCpus; iCpu++)
4475 {
4476 PSUPGIPCPU pGipCpuWorker = &pGip->aCPUs[iCpu];
4477 if (RTCpuSetIsMemberByIndex(&pDevExt->TscDeltaCpuSet, pGipCpuWorker->iCpuSet))
4478 {
4479 if (pGipCpuWorker->i64TSCDelta == INT64_MAX)
4480 {
4481 int rc2 = supdrvTscMeasureDeltaOne(pDevExt, iCpu);
4482 if (RT_FAILURE(rc2) && RT_SUCCESS(rc))
4483 rc = rc2;
4484 }
4485 else
4486 {
4487 /*
4488 * The thread/someone must've called SUPR0TscDeltaMeasureBySetIndex(),
4489 * mark the delta as fine to get the timer thread off our back.
4490 */
4491 RTCpuSetDelByIndex(&pDevExt->TscDeltaCpuSet, pGipCpuWorker->iCpuSet);
4492 RTCpuSetAddByIndex(&pDevExt->TscDeltaObtainedCpuSet, pGipCpuWorker->iCpuSet);
4493 }
4494 }
4495 }
4496 }
4497 RTSpinlockAcquire(pDevExt->hTscDeltaSpinlock);
4498 if (pDevExt->enmTscDeltaThreadState == kTscDeltaThreadState_Measuring)
4499 pDevExt->enmTscDeltaThreadState = kTscDeltaThreadState_Listening;
4500 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
4501 Assert(rc != VERR_NOT_AVAILABLE); /* VERR_NOT_AVAILABLE is used as init value, see supdrvTscDeltaThreadInit(). */
4502 ASMAtomicWriteS32(&pDevExt->rcTscDelta, rc);
4503 break;
4504 }
4505
4506 case kTscDeltaThreadState_Terminating:
4507 pDevExt->enmTscDeltaThreadState = kTscDeltaThreadState_Destroyed;
4508 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
4509 return VINF_SUCCESS;
4510
4511 case kTscDeltaThreadState_Butchered:
4512 default:
4513 return supdrvTscDeltaThreadButchered(pDevExt, true /* fSpinlockHeld */, "Invalid state", VERR_INVALID_STATE);
4514 }
4515 }
4516 /* not reached */
4517}
4518
4519
4520/**
4521 * Waits for the TSC-delta measurement thread to respond to a state change.
4522 *
4523 * @returns VINF_SUCCESS on success, VERR_TIMEOUT if it doesn't respond in time,
4524 * other error code on internal error.
4525 *
4526 * @param pDevExt The device instance data.
4527 * @param enmCurState The current state.
4528 * @param enmNewState The new state we're waiting for it to enter.
4529 */
4530static int supdrvTscDeltaThreadWait(PSUPDRVDEVEXT pDevExt, SUPDRVTSCDELTATHREADSTATE enmCurState,
4531 SUPDRVTSCDELTATHREADSTATE enmNewState)
4532{
4533 SUPDRVTSCDELTATHREADSTATE enmActualState;
4534 int rc;
4535
4536 /*
4537 * Wait a short while for the expected state transition.
4538 */
4539 RTSemEventWait(pDevExt->hTscDeltaEvent, RT_MS_1SEC);
4540 RTSpinlockAcquire(pDevExt->hTscDeltaSpinlock);
4541 enmActualState = pDevExt->enmTscDeltaThreadState;
4542 if (enmActualState == enmNewState)
4543 {
4544 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
4545 rc = VINF_SUCCESS;
4546 }
4547 else if (enmActualState == enmCurState)
4548 {
4549 /*
4550 * Wait longer if the state has not yet transitioned to the one we want.
4551 */
4552 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
4553 rc = RTSemEventWait(pDevExt->hTscDeltaEvent, 50 * RT_MS_1SEC);
4554 if ( RT_SUCCESS(rc)
4555 || rc == VERR_TIMEOUT)
4556 {
4557 /*
4558 * Check the state whether we've succeeded.
4559 */
4560 RTSpinlockAcquire(pDevExt->hTscDeltaSpinlock);
4561 enmActualState = pDevExt->enmTscDeltaThreadState;
4562 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
4563 if (enmActualState == enmNewState)
4564 rc = VINF_SUCCESS;
4565 else if (enmActualState == enmCurState)
4566 {
4567 rc = VERR_TIMEOUT;
4568 OSDBGPRINT(("supdrvTscDeltaThreadWait: timed out state transition. enmActualState=%d enmNewState=%d\n",
4569 enmActualState, enmNewState));
4570 }
4571 else
4572 {
4573 rc = VERR_INTERNAL_ERROR;
4574 OSDBGPRINT(("supdrvTscDeltaThreadWait: invalid state transition from %d to %d, expected %d\n", enmCurState,
4575 enmActualState, enmNewState));
4576 }
4577 }
4578 else
4579 OSDBGPRINT(("supdrvTscDeltaThreadWait: RTSemEventWait failed. rc=%Rrc\n", rc));
4580 }
4581 else
4582 {
4583 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
4584 OSDBGPRINT(("supdrvTscDeltaThreadWait: invalid state %d when transitioning from %d to %d\n",
4585 enmActualState, enmCurState, enmNewState));
4586 rc = VERR_INTERNAL_ERROR;
4587 }
4588
4589 return rc;
4590}
4591
4592
4593/**
4594 * Signals the TSC-delta thread to start measuring TSC-deltas.
4595 *
4596 * @param pDevExt Pointer to the device instance data.
4597 * @param fForceAll Force re-calculating TSC-deltas on all CPUs.
4598 */
4599static void supdrvTscDeltaThreadStartMeasurement(PSUPDRVDEVEXT pDevExt, bool fForceAll)
4600{
4601 if (pDevExt->hTscDeltaThread != NIL_RTTHREAD)
4602 {
4603 RTSpinlockAcquire(pDevExt->hTscDeltaSpinlock);
4604 if ( pDevExt->enmTscDeltaThreadState == kTscDeltaThreadState_Listening
4605 || pDevExt->enmTscDeltaThreadState == kTscDeltaThreadState_Measuring)
4606 {
4607 pDevExt->enmTscDeltaThreadState = kTscDeltaThreadState_WaitAndMeasure;
4608 if (fForceAll)
4609 pDevExt->fTscThreadRecomputeAllDeltas = true;
4610 }
4611 else if ( pDevExt->enmTscDeltaThreadState == kTscDeltaThreadState_WaitAndMeasure
4612 && fForceAll)
4613 pDevExt->fTscThreadRecomputeAllDeltas = true;
4614 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
4615 RTThreadUserSignal(pDevExt->hTscDeltaThread);
4616 }
4617}
4618
4619
4620/**
4621 * Terminates the actual thread running supdrvTscDeltaThread().
4622 *
4623 * This is an internal worker function for supdrvTscDeltaThreadInit() and
4624 * supdrvTscDeltaTerm().
4625 *
4626 * @param pDevExt Pointer to the device instance data.
4627 */
4628static void supdrvTscDeltaThreadTerminate(PSUPDRVDEVEXT pDevExt)
4629{
4630 int rc;
4631 RTSpinlockAcquire(pDevExt->hTscDeltaSpinlock);
4632 pDevExt->enmTscDeltaThreadState = kTscDeltaThreadState_Terminating;
4633 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
4634 RTThreadUserSignal(pDevExt->hTscDeltaThread);
4635 rc = RTThreadWait(pDevExt->hTscDeltaThread, 50 * RT_MS_1SEC, NULL /* prc */);
4636 if (RT_FAILURE(rc))
4637 {
4638 /* Signal a few more times before giving up. */
4639 int cTriesLeft = 5;
4640 while (--cTriesLeft > 0)
4641 {
4642 RTThreadUserSignal(pDevExt->hTscDeltaThread);
4643 rc = RTThreadWait(pDevExt->hTscDeltaThread, 2 * RT_MS_1SEC, NULL /* prc */);
4644 if (rc != VERR_TIMEOUT)
4645 break;
4646 }
4647 }
4648}
4649
4650
4651/**
4652 * Initializes and spawns the TSC-delta measurement thread.
4653 *
4654 * A thread is required for servicing re-measurement requests from events like
4655 * CPUs coming online, suspend/resume etc. as it cannot be done synchronously
4656 * under all contexts on all OSs.
4657 *
4658 * @returns VBox status code.
4659 * @param pDevExt Pointer to the device instance data.
4660 *
4661 * @remarks Must only be called -after- initializing GIP and setting up MP
4662 * notifications!
4663 */
4664static int supdrvTscDeltaThreadInit(PSUPDRVDEVEXT pDevExt)
4665{
4666 int rc;
4667 Assert(pDevExt->pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_ZERO_CLAIMED);
4668 rc = RTSpinlockCreate(&pDevExt->hTscDeltaSpinlock, RTSPINLOCK_FLAGS_INTERRUPT_UNSAFE, "VBoxTscSpnLck");
4669 if (RT_SUCCESS(rc))
4670 {
4671 rc = RTSemEventCreate(&pDevExt->hTscDeltaEvent);
4672 if (RT_SUCCESS(rc))
4673 {
4674 pDevExt->enmTscDeltaThreadState = kTscDeltaThreadState_Creating;
4675 pDevExt->cMsTscDeltaTimeout = 60000;
4676 rc = RTThreadCreate(&pDevExt->hTscDeltaThread, supdrvTscDeltaThread, pDevExt, 0 /* cbStack */,
4677 RTTHREADTYPE_DEFAULT, RTTHREADFLAGS_WAITABLE, "VBoxTscThread");
4678 if (RT_SUCCESS(rc))
4679 {
4680 rc = supdrvTscDeltaThreadWait(pDevExt, kTscDeltaThreadState_Creating, kTscDeltaThreadState_Listening);
4681 if (RT_SUCCESS(rc))
4682 {
4683 ASMAtomicWriteS32(&pDevExt->rcTscDelta, VERR_NOT_AVAILABLE);
4684 return rc;
4685 }
4686
4687 OSDBGPRINT(("supdrvTscDeltaInit: supdrvTscDeltaThreadWait failed. rc=%Rrc\n", rc));
4688 supdrvTscDeltaThreadTerminate(pDevExt);
4689 }
4690 else
4691 OSDBGPRINT(("supdrvTscDeltaInit: RTThreadCreate failed. rc=%Rrc\n", rc));
4692 RTSemEventDestroy(pDevExt->hTscDeltaEvent);
4693 pDevExt->hTscDeltaEvent = NIL_RTSEMEVENT;
4694 }
4695 else
4696 OSDBGPRINT(("supdrvTscDeltaInit: RTSemEventCreate failed. rc=%Rrc\n", rc));
4697 RTSpinlockDestroy(pDevExt->hTscDeltaSpinlock);
4698 pDevExt->hTscDeltaSpinlock = NIL_RTSPINLOCK;
4699 }
4700 else
4701 OSDBGPRINT(("supdrvTscDeltaInit: RTSpinlockCreate failed. rc=%Rrc\n", rc));
4702
4703 return rc;
4704}
4705
4706
4707/**
4708 * Terminates the TSC-delta measurement thread and cleanup.
4709 *
4710 * @param pDevExt Pointer to the device instance data.
4711 */
4712static void supdrvTscDeltaTerm(PSUPDRVDEVEXT pDevExt)
4713{
4714 if ( pDevExt->hTscDeltaSpinlock != NIL_RTSPINLOCK
4715 && pDevExt->hTscDeltaEvent != NIL_RTSEMEVENT)
4716 {
4717 supdrvTscDeltaThreadTerminate(pDevExt);
4718 }
4719
4720 if (pDevExt->hTscDeltaSpinlock != NIL_RTSPINLOCK)
4721 {
4722 RTSpinlockDestroy(pDevExt->hTscDeltaSpinlock);
4723 pDevExt->hTscDeltaSpinlock = NIL_RTSPINLOCK;
4724 }
4725
4726 if (pDevExt->hTscDeltaEvent != NIL_RTSEMEVENT)
4727 {
4728 RTSemEventDestroy(pDevExt->hTscDeltaEvent);
4729 pDevExt->hTscDeltaEvent = NIL_RTSEMEVENT;
4730 }
4731
4732 ASMAtomicWriteS32(&pDevExt->rcTscDelta, VERR_NOT_AVAILABLE);
4733}
4734
4735#endif /* SUPDRV_USE_TSC_DELTA_THREAD */
4736
4737/**
4738 * Measure the TSC delta for the CPU given by its CPU set index.
4739 *
4740 * @returns VBox status code.
4741 * @retval VERR_INTERRUPTED if interrupted while waiting.
4742 * @retval VERR_SUPDRV_TSC_DELTA_MEASUREMENT_FAILED if we were unable to get a
4743 * measurement.
4744 * @retval VERR_CPU_OFFLINE if the specified CPU is offline.
4745 *
4746 * @param pSession The caller's session. GIP must've been mapped.
4747 * @param iCpuSet The CPU set index of the CPU to measure.
4748 * @param fFlags Flags, SUP_TSCDELTA_MEASURE_F_XXX.
4749 * @param cMsWaitRetry Number of milliseconds to wait between each retry.
4750 * @param cMsWaitThread Number of milliseconds to wait for the thread to get
4751 * ready.
4752 * @param cTries Number of times to try, pass 0 for the default.
4753 */
4754SUPR0DECL(int) SUPR0TscDeltaMeasureBySetIndex(PSUPDRVSESSION pSession, uint32_t iCpuSet, uint32_t fFlags,
4755 RTMSINTERVAL cMsWaitRetry, RTMSINTERVAL cMsWaitThread, uint32_t cTries)
4756{
4757 PSUPDRVDEVEXT pDevExt;
4758 PSUPGLOBALINFOPAGE pGip;
4759 uint16_t iGipCpu;
4760 int rc;
4761#ifdef SUPDRV_USE_TSC_DELTA_THREAD
4762 uint64_t msTsStartWait;
4763 uint32_t iWaitLoop;
4764#endif
4765
4766 /*
4767 * Validate and adjust the input.
4768 */
4769 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
4770 if (!pSession->fGipReferenced)
4771 return VERR_WRONG_ORDER;
4772
4773 pDevExt = pSession->pDevExt;
4774 AssertReturn(SUP_IS_DEVEXT_VALID(pDevExt), VERR_INVALID_PARAMETER);
4775
4776 pGip = pDevExt->pGip;
4777 AssertPtrReturn(pGip, VERR_INTERNAL_ERROR_2);
4778
4779 AssertReturn(iCpuSet < RTCPUSET_MAX_CPUS, VERR_INVALID_CPU_INDEX);
4780 AssertReturn(iCpuSet < RT_ELEMENTS(pGip->aiCpuFromCpuSetIdx), VERR_INVALID_CPU_INDEX);
4781 iGipCpu = pGip->aiCpuFromCpuSetIdx[iCpuSet];
4782 AssertReturn(iGipCpu < pGip->cCpus, VERR_INVALID_CPU_INDEX);
4783
4784 if (fFlags & ~SUP_TSCDELTA_MEASURE_F_VALID_MASK)
4785 return VERR_INVALID_FLAGS;
4786
4787 /*
4788 * The request is a noop if the TSC delta isn't being used.
4789 */
4790 if (pGip->enmUseTscDelta <= SUPGIPUSETSCDELTA_ZERO_CLAIMED)
4791 return VINF_SUCCESS;
4792
4793 if (cTries == 0)
4794 cTries = 12;
4795 else if (cTries > 256)
4796 cTries = 256;
4797
4798 if (cMsWaitRetry == 0)
4799 cMsWaitRetry = 2;
4800 else if (cMsWaitRetry > 1000)
4801 cMsWaitRetry = 1000;
4802
4803#ifdef SUPDRV_USE_TSC_DELTA_THREAD
4804 /*
4805 * Has the TSC already been measured and we're not forced to redo it?
4806 */
4807 if ( pGip->aCPUs[iGipCpu].i64TSCDelta != INT64_MAX
4808 && !(fFlags & SUP_TSCDELTA_MEASURE_F_FORCE))
4809 return VINF_SUCCESS;
4810
4811 /*
4812 * Asynchronous request? Forward it to the thread, no waiting.
4813 */
4814 if (fFlags & SUP_TSCDELTA_MEASURE_F_ASYNC)
4815 {
4816 /** @todo Async. doesn't implement options like retries, waiting. We'll need
4817 * to pass those options to the thread somehow and implement it in the
4818 * thread. Check if anyone uses/needs fAsync before implementing this. */
4819 RTSpinlockAcquire(pDevExt->hTscDeltaSpinlock);
4820 RTCpuSetAddByIndex(&pDevExt->TscDeltaCpuSet, iCpuSet);
4821 if ( pDevExt->enmTscDeltaThreadState == kTscDeltaThreadState_Listening
4822 || pDevExt->enmTscDeltaThreadState == kTscDeltaThreadState_Measuring)
4823 {
4824 pDevExt->enmTscDeltaThreadState = kTscDeltaThreadState_WaitAndMeasure;
4825 rc = VINF_SUCCESS;
4826 }
4827 else if (pDevExt->enmTscDeltaThreadState != kTscDeltaThreadState_WaitAndMeasure)
4828 rc = VERR_THREAD_IS_DEAD;
4829 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
4830 RTThreadUserSignal(pDevExt->hTscDeltaThread);
4831 return VINF_SUCCESS;
4832 }
4833
4834 /*
4835 * If a TSC-delta measurement request is already being serviced by the thread,
4836 * wait 'cTries' times if a retry-timeout is provided, otherwise bail as busy.
4837 */
4838 msTsStartWait = RTTimeSystemMilliTS();
4839 for (iWaitLoop = 0;; iWaitLoop++)
4840 {
4841 uint64_t cMsElapsed;
4842 SUPDRVTSCDELTATHREADSTATE enmState;
4843 RTSpinlockAcquire(pDevExt->hTscDeltaSpinlock);
4844 enmState = pDevExt->enmTscDeltaThreadState;
4845 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
4846
4847 if (enmState == kTscDeltaThreadState_Measuring)
4848 { /* Must wait, the thread is busy. */ }
4849 else if (enmState == kTscDeltaThreadState_WaitAndMeasure)
4850 { /* Must wait, this state only says what will happen next. */ }
4851 else if (enmState == kTscDeltaThreadState_Terminating)
4852 { /* Must wait, this state only says what should happen next. */ }
4853 else
4854 break; /* All other states, the thread is either idly listening or dead. */
4855
4856 /* Wait or fail. */
4857 if (cMsWaitThread == 0)
4858 return VERR_SUPDRV_TSC_DELTA_MEASUREMENT_BUSY;
4859 cMsElapsed = RTTimeSystemMilliTS() - msTsStartWait;
4860 if (cMsElapsed >= cMsWaitThread)
4861 return VERR_SUPDRV_TSC_DELTA_MEASUREMENT_BUSY;
4862
4863 rc = RTThreadSleep(RT_MIN((RTMSINTERVAL)(cMsWaitThread - cMsElapsed), RT_MIN(iWaitLoop + 1, 10)));
4864 if (rc == VERR_INTERRUPTED)
4865 return rc;
4866 }
4867#endif /* SUPDRV_USE_TSC_DELTA_THREAD */
4868
4869 /*
4870 * Try measure the TSC delta the given number of times.
4871 */
4872 for (;;)
4873 {
4874 /* Unless we're forced to measure the delta, check whether it's done already. */
4875 if ( !(fFlags & SUP_TSCDELTA_MEASURE_F_FORCE)
4876 && pGip->aCPUs[iGipCpu].i64TSCDelta != INT64_MAX)
4877 {
4878 rc = VINF_SUCCESS;
4879 break;
4880 }
4881
4882 /* Measure it. */
4883 rc = supdrvTscMeasureDeltaOne(pDevExt, iGipCpu);
4884 if (rc != VERR_SUPDRV_TSC_DELTA_MEASUREMENT_FAILED)
4885 {
4886 Assert(pGip->aCPUs[iGipCpu].i64TSCDelta != INT64_MAX || RT_FAILURE_NP(rc));
4887 break;
4888 }
4889
4890 /* Retry? */
4891 if (cTries <= 1)
4892 break;
4893 cTries--;
4894
4895 /* Always delay between retries (be nice to the rest of the system
4896 and avoid the BSOD hounds). */
4897 rc = RTThreadSleep(cMsWaitRetry);
4898 if (rc == VERR_INTERRUPTED)
4899 break;
4900 }
4901
4902 return rc;
4903}
4904SUPR0_EXPORT_SYMBOL(SUPR0TscDeltaMeasureBySetIndex);
4905
4906
4907/**
4908 * Service a TSC-delta measurement request.
4909 *
4910 * @returns VBox status code.
4911 * @param pDevExt Pointer to the device instance data.
4912 * @param pSession The support driver session.
4913 * @param pReq Pointer to the TSC-delta measurement request.
4914 */
4915int VBOXCALL supdrvIOCtl_TscDeltaMeasure(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPTSCDELTAMEASURE pReq)
4916{
4917 uint32_t cTries;
4918 uint32_t iCpuSet;
4919 uint32_t fFlags;
4920 RTMSINTERVAL cMsWaitRetry;
4921 RT_NOREF1(pDevExt);
4922
4923 /*
4924 * Validate and adjust/resolve the input so they can be passed onto SUPR0TscDeltaMeasureBySetIndex.
4925 */
4926 AssertPtr(pDevExt); AssertPtr(pSession); AssertPtr(pReq); /* paranoia^2 */
4927
4928 if (pReq->u.In.idCpu == NIL_RTCPUID)
4929 return VERR_INVALID_CPU_ID;
4930 iCpuSet = RTMpCpuIdToSetIndex(pReq->u.In.idCpu);
4931 if (iCpuSet >= RTCPUSET_MAX_CPUS)
4932 return VERR_INVALID_CPU_ID;
4933
4934 cTries = pReq->u.In.cRetries == 0 ? 0 : (uint32_t)pReq->u.In.cRetries + 1;
4935
4936 cMsWaitRetry = RT_MAX(pReq->u.In.cMsWaitRetry, 5);
4937
4938 fFlags = 0;
4939 if (pReq->u.In.fAsync)
4940 fFlags |= SUP_TSCDELTA_MEASURE_F_ASYNC;
4941 if (pReq->u.In.fForce)
4942 fFlags |= SUP_TSCDELTA_MEASURE_F_FORCE;
4943
4944 return SUPR0TscDeltaMeasureBySetIndex(pSession, iCpuSet, fFlags, cMsWaitRetry,
4945 cTries == 0 ? 5 * RT_MS_1SEC : cMsWaitRetry * cTries /*cMsWaitThread*/,
4946 cTries);
4947}
4948
4949
4950/**
4951 * Reads TSC with delta applied.
4952 *
4953 * Will try to resolve delta value INT64_MAX before applying it. This is the
4954 * main purpose of this function, to handle the case where the delta needs to be
4955 * determined.
4956 *
4957 * @returns VBox status code.
4958 * @param pDevExt Pointer to the device instance data.
4959 * @param pSession The support driver session.
4960 * @param pReq Pointer to the TSC-read request.
4961 */
4962int VBOXCALL supdrvIOCtl_TscRead(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPTSCREAD pReq)
4963{
4964 PSUPGLOBALINFOPAGE pGip;
4965 int rc;
4966
4967 /*
4968 * Validate. We require the client to have mapped GIP (no asserting on
4969 * ring-3 preconditions).
4970 */
4971 AssertPtr(pDevExt); AssertPtr(pReq); AssertPtr(pSession); /* paranoia^2 */
4972 if (pSession->GipMapObjR3 == NIL_RTR0MEMOBJ)
4973 return VERR_WRONG_ORDER;
4974 pGip = pDevExt->pGip;
4975 AssertReturn(pGip, VERR_INTERNAL_ERROR_2);
4976
4977 /*
4978 * We're usually here because we need to apply delta, but we shouldn't be
4979 * upset if the GIP is some different mode.
4980 */
4981 if (pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_ZERO_CLAIMED)
4982 {
4983 uint32_t cTries = 0;
4984 for (;;)
4985 {
4986 /*
4987 * Start by gathering the data, using CLI for disabling preemption
4988 * while we do that.
4989 */
4990 RTCCUINTREG fEFlags = ASMIntDisableFlags();
4991 int iCpuSet = RTMpCpuIdToSetIndex(RTMpCpuId());
4992 int iGipCpu = 0; /* gcc maybe used uninitialized */
4993 if (RT_LIKELY( (unsigned)iCpuSet < RT_ELEMENTS(pGip->aiCpuFromCpuSetIdx)
4994 && (iGipCpu = pGip->aiCpuFromCpuSetIdx[iCpuSet]) < pGip->cCpus ))
4995 {
4996 int64_t i64Delta = pGip->aCPUs[iGipCpu].i64TSCDelta;
4997 pReq->u.Out.idApic = pGip->aCPUs[iGipCpu].idApic;
4998 pReq->u.Out.u64AdjustedTsc = ASMReadTSC();
4999 ASMSetFlags(fEFlags);
5000
5001 /*
5002 * If we're lucky we've got a delta, but no predictions here
5003 * as this I/O control is normally only used when the TSC delta
5004 * is set to INT64_MAX.
5005 */
5006 if (i64Delta != INT64_MAX)
5007 {
5008 pReq->u.Out.u64AdjustedTsc -= i64Delta;
5009 rc = VINF_SUCCESS;
5010 break;
5011 }
5012
5013 /* Give up after a few times. */
5014 if (cTries >= 4)
5015 {
5016 rc = VWRN_SUPDRV_TSC_DELTA_MEASUREMENT_FAILED;
5017 break;
5018 }
5019
5020 /* Need to measure the delta an try again. */
5021 rc = supdrvTscMeasureDeltaOne(pDevExt, iGipCpu);
5022 Assert(pGip->aCPUs[iGipCpu].i64TSCDelta != INT64_MAX || RT_FAILURE_NP(rc));
5023 /** @todo should probably delay on failure... dpc watchdogs */
5024 }
5025 else
5026 {
5027 /* This really shouldn't happen. */
5028 AssertMsgFailed(("idCpu=%#x iCpuSet=%#x (%d)\n", RTMpCpuId(), iCpuSet, iCpuSet));
5029 pReq->u.Out.idApic = supdrvGipGetApicIdSlow();
5030 pReq->u.Out.u64AdjustedTsc = ASMReadTSC();
5031 ASMSetFlags(fEFlags);
5032 rc = VERR_INTERNAL_ERROR_5; /** @todo change to warning. */
5033 break;
5034 }
5035 }
5036 }
5037 else
5038 {
5039 /*
5040 * No delta to apply. Easy. Deal with preemption the lazy way.
5041 */
5042 RTCCUINTREG fEFlags = ASMIntDisableFlags();
5043 int iCpuSet = RTMpCpuIdToSetIndex(RTMpCpuId());
5044 int iGipCpu = 0; /* gcc may be used uninitialized */
5045 if (RT_LIKELY( (unsigned)iCpuSet < RT_ELEMENTS(pGip->aiCpuFromCpuSetIdx)
5046 && (iGipCpu = pGip->aiCpuFromCpuSetIdx[iCpuSet]) < pGip->cCpus ))
5047 pReq->u.Out.idApic = pGip->aCPUs[iGipCpu].idApic;
5048 else
5049 pReq->u.Out.idApic = supdrvGipGetApicIdSlow();
5050 pReq->u.Out.u64AdjustedTsc = ASMReadTSC();
5051 ASMSetFlags(fEFlags);
5052 rc = VINF_SUCCESS;
5053 }
5054
5055 return rc;
5056}
5057
5058
5059/**
5060 * Worker for supdrvIOCtl_GipSetFlags.
5061 *
5062 * @returns VBox status code.
5063 * @retval VERR_WRONG_ORDER if an enable-once-per-session flag is set again for
5064 * a session.
5065 *
5066 * @param pDevExt Pointer to the device instance data.
5067 * @param pSession The support driver session.
5068 * @param fOrMask The OR mask of the GIP flags, see SUPGIP_FLAGS_XXX.
5069 * @param fAndMask The AND mask of the GIP flags, see SUPGIP_FLAGS_XXX.
5070 *
5071 * @remarks Caller must own the GIP mutex.
5072 *
5073 * @remarks This function doesn't validate any of the flags.
5074 */
5075static int supdrvGipSetFlags(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, uint32_t fOrMask, uint32_t fAndMask)
5076{
5077 uint32_t cRefs;
5078 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
5079 AssertMsg((fOrMask & fAndMask) == fOrMask, ("%#x & %#x\n", fOrMask, fAndMask)); /* ASSUMED by code below */
5080
5081 /*
5082 * Compute GIP test-mode flags.
5083 */
5084 if (fOrMask & SUPGIP_FLAGS_TESTING_ENABLE)
5085 {
5086 if (!pSession->fGipTestMode)
5087 {
5088 Assert(pDevExt->cGipTestModeRefs < _64K);
5089 pSession->fGipTestMode = true;
5090 cRefs = ++pDevExt->cGipTestModeRefs;
5091 if (cRefs == 1)
5092 {
5093 fOrMask |= SUPGIP_FLAGS_TESTING | SUPGIP_FLAGS_TESTING_START;
5094 fAndMask &= ~SUPGIP_FLAGS_TESTING_STOP;
5095 }
5096 }
5097 else
5098 {
5099 LogRelMax(10, ("supdrvGipSetFlags: SUPGIP_FLAGS_TESTING_ENABLE already set for this session\n"));
5100 return VERR_WRONG_ORDER;
5101 }
5102 }
5103 else if ( !(fAndMask & SUPGIP_FLAGS_TESTING_ENABLE)
5104 && pSession->fGipTestMode)
5105 {
5106 Assert(pDevExt->cGipTestModeRefs > 0);
5107 Assert(pDevExt->cGipTestModeRefs < _64K);
5108 pSession->fGipTestMode = false;
5109 cRefs = --pDevExt->cGipTestModeRefs;
5110 if (!cRefs)
5111 fOrMask |= SUPGIP_FLAGS_TESTING_STOP;
5112 else
5113 fAndMask |= SUPGIP_FLAGS_TESTING_ENABLE;
5114 }
5115
5116 /*
5117 * Commit the flags. This should be done as atomically as possible
5118 * since the flag consumers won't be holding the GIP mutex.
5119 */
5120 ASMAtomicOrU32(&pGip->fFlags, fOrMask);
5121 ASMAtomicAndU32(&pGip->fFlags, fAndMask);
5122
5123 return VINF_SUCCESS;
5124}
5125
5126
5127/**
5128 * Sets GIP test mode parameters.
5129 *
5130 * @returns VBox status code.
5131 * @param pDevExt Pointer to the device instance data.
5132 * @param pSession The support driver session.
5133 * @param fOrMask The OR mask of the GIP flags, see SUPGIP_FLAGS_XXX.
5134 * @param fAndMask The AND mask of the GIP flags, see SUPGIP_FLAGS_XXX.
5135 */
5136int VBOXCALL supdrvIOCtl_GipSetFlags(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, uint32_t fOrMask, uint32_t fAndMask)
5137{
5138 PSUPGLOBALINFOPAGE pGip;
5139 int rc;
5140
5141 /*
5142 * Validate. We require the client to have mapped GIP (no asserting on
5143 * ring-3 preconditions).
5144 */
5145 AssertPtr(pDevExt); AssertPtr(pSession); /* paranoia^2 */
5146 if (pSession->GipMapObjR3 == NIL_RTR0MEMOBJ)
5147 return VERR_WRONG_ORDER;
5148 pGip = pDevExt->pGip;
5149 AssertReturn(pGip, VERR_INTERNAL_ERROR_3);
5150
5151 if (fOrMask & ~SUPGIP_FLAGS_VALID_MASK)
5152 return VERR_INVALID_PARAMETER;
5153 if ((fAndMask & ~SUPGIP_FLAGS_VALID_MASK) != ~SUPGIP_FLAGS_VALID_MASK)
5154 return VERR_INVALID_PARAMETER;
5155
5156 /*
5157 * Don't confuse supdrvGipSetFlags or anyone else by both setting
5158 * and clearing the same flags. AND takes precedence.
5159 */
5160 fOrMask &= fAndMask;
5161
5162 /*
5163 * Take the loader lock to avoid having to think about races between two
5164 * clients changing the flags at the same time (state is not simple).
5165 */
5166#ifdef SUPDRV_USE_MUTEX_FOR_GIP
5167 RTSemMutexRequest(pDevExt->mtxGip, RT_INDEFINITE_WAIT);
5168#else
5169 RTSemFastMutexRequest(pDevExt->mtxGip);
5170#endif
5171
5172 rc = supdrvGipSetFlags(pDevExt, pSession, fOrMask, fAndMask);
5173
5174#ifdef SUPDRV_USE_MUTEX_FOR_GIP
5175 RTSemMutexRelease(pDevExt->mtxGip);
5176#else
5177 RTSemFastMutexRelease(pDevExt->mtxGip);
5178#endif
5179 return rc;
5180}
5181
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette