1 | /* $Id: bs3-timing-1-32.c32 89153 2021-05-19 01:16:56Z vboxsync $ */
|
---|
2 | /** @file
|
---|
3 | * BS3Kit - bs3-timinig-1, 32-bit C code.
|
---|
4 | */
|
---|
5 |
|
---|
6 | /*
|
---|
7 | * Copyright (C) 2007-2021 Oracle Corporation
|
---|
8 | *
|
---|
9 | * This file is part of VirtualBox Open Source Edition (OSE), as
|
---|
10 | * available from http://www.virtualbox.org. This file is free software;
|
---|
11 | * you can redistribute it and/or modify it under the terms of the GNU
|
---|
12 | * General Public License (GPL) as published by the Free Software
|
---|
13 | * Foundation, in version 2 as it comes in the "COPYING" file of the
|
---|
14 | * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
|
---|
15 | * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
|
---|
16 | *
|
---|
17 | * The contents of this file may alternatively be used under the terms
|
---|
18 | * of the Common Development and Distribution License Version 1.0
|
---|
19 | * (CDDL) only, as it comes in the "COPYING.CDDL" file of the
|
---|
20 | * VirtualBox OSE distribution, in which case the provisions of the
|
---|
21 | * CDDL are applicable instead of those of the GPL.
|
---|
22 | *
|
---|
23 | * You may elect to license modified versions of this file under the
|
---|
24 | * terms and conditions of either the GPL or the CDDL or both.
|
---|
25 | */
|
---|
26 |
|
---|
27 |
|
---|
28 | /*********************************************************************************************************************************
|
---|
29 | * Header Files *
|
---|
30 | *********************************************************************************************************************************/
|
---|
31 | #ifndef STANDALONE_EXECUTABLE
|
---|
32 | # include <bs3kit.h>
|
---|
33 | #endif
|
---|
34 | #include <iprt/asm-amd64-x86.h>
|
---|
35 | #include <iprt/asm-math.h>
|
---|
36 | #include <iprt/asm.h>
|
---|
37 | #include <iprt/uint128.h>
|
---|
38 |
|
---|
39 |
|
---|
40 | /*********************************************************************************************************************************
|
---|
41 | * Structures and Typedefs *
|
---|
42 | *********************************************************************************************************************************/
|
---|
43 | /** TSC timing results. */
|
---|
44 | typedef struct BS3TIMING1RESULT
|
---|
45 | {
|
---|
46 | /** Number of nanoseconds elapsed while testing. */
|
---|
47 | uint64_t cNsElapsed;
|
---|
48 | /** Number of CPU ticks elapsed while testing. */
|
---|
49 | uint64_t cTicksElapsed;
|
---|
50 | /** The minium number of ticks between TSC reads. */
|
---|
51 | uint64_t cTicksMin;
|
---|
52 | /** The maximum number of ticks between TSC reads. */
|
---|
53 | uint64_t cTicksMax;
|
---|
54 | /** The sum of all TSC read deltas. */
|
---|
55 | uint64_t cTicksSum;
|
---|
56 | /** Number of loops (TSC read deltas). */
|
---|
57 | uint64_t cTotalLoops;
|
---|
58 | /** Number of times TSC moved backwards. */
|
---|
59 | uint64_t cBackwards;
|
---|
60 | /** Approx log2(cTicks) distribution. */
|
---|
61 | uint64_t aDistribution[65];
|
---|
62 | } BS3TIMING1RESULT;
|
---|
63 |
|
---|
64 |
|
---|
65 | /*********************************************************************************************************************************
|
---|
66 | * Global Variables *
|
---|
67 | *********************************************************************************************************************************/
|
---|
68 | /** The total result. */
|
---|
69 | static BS3TIMING1RESULT g_TotalResult;
|
---|
70 |
|
---|
71 | /** Set if history wrapped (i.e. table is full). */
|
---|
72 | static bool g_fBigHistoryWrapped = false;
|
---|
73 | /** The next history entry. */
|
---|
74 | static uint32_t g_iBigHistory;
|
---|
75 | /** History of large gaps. */
|
---|
76 | static struct { uint64_t uTsc, cTicksDelta; } g_aBigHistory[384];
|
---|
77 |
|
---|
78 |
|
---|
79 |
|
---|
80 | /**
|
---|
81 | * Pretty prints
|
---|
82 | */
|
---|
83 | static void bs3Timing1_PrintTicks(uint64_t cTicks, uint64_t uCpuFreq)
|
---|
84 | {
|
---|
85 | if (uCpuFreq > _128M)
|
---|
86 | {
|
---|
87 | if (cTicks >= uCpuFreq * 1000)
|
---|
88 | Bs3TestPrintf("%'RU64s", cTicks / uCpuFreq);
|
---|
89 | else
|
---|
90 | {
|
---|
91 | const char *pszUnit;
|
---|
92 | uint64_t uValue;
|
---|
93 | if (cTicks >= uCpuFreq)
|
---|
94 | {
|
---|
95 | pszUnit = "s";
|
---|
96 | uValue = (cTicks * RT_MS_1SEC) / uCpuFreq;
|
---|
97 | }
|
---|
98 | else if (cTicks * RT_MS_1SEC >= uCpuFreq)
|
---|
99 | {
|
---|
100 | pszUnit = "ms";
|
---|
101 | uValue = (cTicks * RT_US_1SEC) / uCpuFreq;
|
---|
102 | }
|
---|
103 | else if (cTicks * RT_US_1SEC >= uCpuFreq)
|
---|
104 | {
|
---|
105 | pszUnit = "us";
|
---|
106 | uValue = (cTicks * RT_NS_1SEC) / uCpuFreq;
|
---|
107 | }
|
---|
108 | else if (cTicks * RT_NS_1SEC >= uCpuFreq)
|
---|
109 | {
|
---|
110 | pszUnit = "ns";
|
---|
111 | uValue = (cTicks * UINT64_C(1000000000000)) / uCpuFreq;
|
---|
112 | }
|
---|
113 | else
|
---|
114 | {
|
---|
115 | Bs3TestPrintf("%'RU64ps", (cTicks * UINT64_C(1000000000000)) / uCpuFreq);
|
---|
116 | return;
|
---|
117 | }
|
---|
118 | Bs3TestPrintf("%u.%03u%s (%'RU64 ticks)", (uint32_t)uValue / 1000, (uint32_t)uValue % 1000, pszUnit, cTicks);
|
---|
119 | }
|
---|
120 | }
|
---|
121 | else
|
---|
122 | Bs3TestPrintf("%'RU64 ticks", cTicks);
|
---|
123 | }
|
---|
124 |
|
---|
125 |
|
---|
126 | /**
|
---|
127 | * Prints a result.
|
---|
128 | *
|
---|
129 | * @param pResult The result to print.
|
---|
130 | * @param iRun The run (loop in qpc parlance).
|
---|
131 | * @param uVerbosity The verbosity level.
|
---|
132 | */
|
---|
133 | static void bs3Timing1_PrintResult(BS3TIMING1RESULT const *pResult, unsigned iRun, unsigned uVerbosity)
|
---|
134 | {
|
---|
135 | uint64_t uCpuFreq;
|
---|
136 |
|
---|
137 | /*
|
---|
138 | * Calc CPU frequency.
|
---|
139 | */
|
---|
140 | if (pResult->cNsElapsed > 0 && pResult->cTicksElapsed > 0)
|
---|
141 | {
|
---|
142 | #if 1
|
---|
143 | RTUINT128U Tmp1, Divisor, Result;
|
---|
144 | RTUInt128Div(&Result,
|
---|
145 | RTUInt128MulU64ByU64(&Tmp1, pResult->cTicksElapsed, RT_NS_1SEC),
|
---|
146 | RTUInt128AssignU64(&Divisor, pResult->cNsElapsed));
|
---|
147 | uCpuFreq = Result.s.Lo;
|
---|
148 | #else
|
---|
149 | unsigned const cShift = pResult->cTicksElapsed < UINT64_C(0x000225C17D04) ? 0
|
---|
150 | : pResult->cTicksElapsed < UINT64_C(0x00225C17D04D) ? 4
|
---|
151 | : pResult->cTicksElapsed < UINT64_C(0x0225C17D04DA) ? 8
|
---|
152 | : pResult->cTicksElapsed < UINT64_C(0x225C1D940BF6) ? 12
|
---|
153 | : 16;
|
---|
154 | uCpuFreq = pResult->cTicksElapsed * ((uint64_t)RT_NS_1SEC >> cShift) / (pResult->cNsElapsed >> cShift);
|
---|
155 | #endif
|
---|
156 | }
|
---|
157 | else
|
---|
158 | uCpuFreq = 1;
|
---|
159 |
|
---|
160 | /*
|
---|
161 | * Report results.
|
---|
162 | *
|
---|
163 | * Note! in 32-bit and 16-bit mode, values 4G or higher gets formatted as
|
---|
164 | * hexadecimal to avoid 64-bit division.
|
---|
165 | */
|
---|
166 | Bs3TestPrintf("Loop #%u: %'RU64 tests: ", iRun, pResult->cTotalLoops);
|
---|
167 |
|
---|
168 | Bs3TestPrintf("average ");
|
---|
169 | bs3Timing1_PrintTicks(pResult->cTicksSum / pResult->cTotalLoops, uCpuFreq);
|
---|
170 | Bs3TestPrintf(", min ");
|
---|
171 | bs3Timing1_PrintTicks(pResult->cTicksMin, uCpuFreq);
|
---|
172 | Bs3TestPrintf(", max ");
|
---|
173 | bs3Timing1_PrintTicks(pResult->cTicksMax, uCpuFreq);
|
---|
174 | Bs3TestPrintf("\n");
|
---|
175 |
|
---|
176 | /* Distribution (tick delta log2-ish). */
|
---|
177 | if (uVerbosity > 0)
|
---|
178 | {
|
---|
179 | unsigned iItem = 0;
|
---|
180 | unsigned i;
|
---|
181 | for (i = uVerbosity > 1 ? 0 : 5; i < RT_ELEMENTS(pResult->aDistribution); i++)
|
---|
182 | if (pResult->aDistribution[i] != 0)
|
---|
183 | {
|
---|
184 | if (iItem >= 6)
|
---|
185 | {
|
---|
186 | iItem = 0;
|
---|
187 | Bs3TestPrintf("\n");
|
---|
188 | }
|
---|
189 | iItem++;
|
---|
190 | Bs3TestPrintf(" %'11RU64|2^%-2u", pResult->aDistribution[i], i);
|
---|
191 | }
|
---|
192 | if (uVerbosity > 1)
|
---|
193 | Bs3TestPrintf(iItem < 6 ? " (%'RU64 Hz)\n" : "\n (%'RU64 Hz)\n", uCpuFreq);
|
---|
194 | else
|
---|
195 | Bs3TestPrintf("\n");
|
---|
196 | }
|
---|
197 | if (pResult->cBackwards != 0)
|
---|
198 | Bs3TestFailedF("TSC went backwards %'RU64 time(s)", pResult->cBackwards);
|
---|
199 | }
|
---|
200 |
|
---|
201 |
|
---|
202 | /**
|
---|
203 | * Do one TSC timing iteration.
|
---|
204 | *
|
---|
205 | * @param iRun The iteration number (loop).
|
---|
206 | * @param cSecs The number of seconds to sample TSCs.
|
---|
207 | * @param uVerbosity The noise level.
|
---|
208 | * @param iMinHistory The threshold level to put stuff in g_auTscHistory.
|
---|
209 | */
|
---|
210 | static void bs3Timing1_Tsc_One(unsigned iRun, uint32_t cSecs, unsigned uVerbosity, unsigned iMinHistory)
|
---|
211 | {
|
---|
212 | uint64_t const nsStart = Bs3TestNow();
|
---|
213 | uint64_t const uTscStart = ASMReadTSC();
|
---|
214 | uint64_t const nsDeadline = nsStart + cSecs * RT_NS_1SEC_64;
|
---|
215 | uint64_t cNsElapsed;
|
---|
216 | BS3TIMING1RESULT Result;
|
---|
217 | unsigned i;
|
---|
218 |
|
---|
219 | Bs3MemZero(&Result, sizeof(Result));
|
---|
220 | Result.cTicksMin = UINT64_MAX;
|
---|
221 |
|
---|
222 | /*
|
---|
223 | * Test loop.
|
---|
224 | */
|
---|
225 | do
|
---|
226 | {
|
---|
227 | unsigned cLoops = 100000 + 1;
|
---|
228 | Result.cTotalLoops += cLoops - 1;
|
---|
229 | while (--cLoops != 0)
|
---|
230 | {
|
---|
231 | uint64_t uTscPrev = ASMReadTSC();
|
---|
232 | uint64_t uTscNow = ASMReadTSC();
|
---|
233 | uint64_t cTicks = uTscNow - uTscPrev;
|
---|
234 | unsigned iBit;
|
---|
235 |
|
---|
236 | /* check that it doesn't go backwards*/
|
---|
237 | if ((int64_t)cTicks < 0)
|
---|
238 | Result.cBackwards++;
|
---|
239 |
|
---|
240 | /* min/max/avg */
|
---|
241 | Result.cTicksSum += cTicks;
|
---|
242 | if (cTicks < Result.cTicksMin)
|
---|
243 | Result.cTicksMin = cTicks;
|
---|
244 | if (cTicks > Result.cTicksMax)
|
---|
245 | Result.cTicksMax = cTicks;
|
---|
246 |
|
---|
247 | /* result distribution by most significant bit. */
|
---|
248 | iBit = ASMBitLastSetU64(cTicks);
|
---|
249 | Result.aDistribution[iBit] += 1;
|
---|
250 | if (iBit < iMinHistory)
|
---|
251 | { /* likely */ }
|
---|
252 | else
|
---|
253 | {
|
---|
254 | g_aBigHistory[g_iBigHistory].uTsc = uTscPrev;
|
---|
255 | g_aBigHistory[g_iBigHistory].cTicksDelta = cTicks;
|
---|
256 | if (++g_iBigHistory >= RT_ELEMENTS(g_aBigHistory))
|
---|
257 | {
|
---|
258 | g_iBigHistory = 0;
|
---|
259 | g_fBigHistoryWrapped = true;
|
---|
260 | }
|
---|
261 | }
|
---|
262 | }
|
---|
263 | } while ((cNsElapsed = Bs3TestNow()) < nsDeadline);
|
---|
264 |
|
---|
265 | Result.cTicksElapsed = ASMReadTSC() - uTscStart;
|
---|
266 | Result.cNsElapsed = cNsElapsed - nsStart;
|
---|
267 |
|
---|
268 | bs3Timing1_PrintResult(&Result, iRun, uVerbosity);
|
---|
269 |
|
---|
270 | /* Add to total. */
|
---|
271 | g_TotalResult.cNsElapsed += Result.cNsElapsed;
|
---|
272 | g_TotalResult.cTicksElapsed += Result.cTicksElapsed;
|
---|
273 | if (Result.cTicksMin < g_TotalResult.cTicksMin || g_TotalResult.cTicksMin == 0)
|
---|
274 | g_TotalResult.cTicksMin = Result.cTicksMin;
|
---|
275 | if (Result.cTicksMax > g_TotalResult.cTicksMax)
|
---|
276 | g_TotalResult.cTicksMax += Result.cTicksMax;
|
---|
277 | g_TotalResult.cTicksSum += Result.cTicksSum;
|
---|
278 | g_TotalResult.cTotalLoops += Result.cTotalLoops;
|
---|
279 | g_TotalResult.cBackwards += Result.cBackwards;
|
---|
280 | for (i = 0; i < RT_ELEMENTS(Result.aDistribution); i++)
|
---|
281 | g_TotalResult.aDistribution[i] += Result.aDistribution[i];
|
---|
282 | }
|
---|
283 |
|
---|
284 |
|
---|
285 | /**
|
---|
286 | * The TSC test driver.
|
---|
287 | *
|
---|
288 | * @param cLoops Number of test iterations.
|
---|
289 | * @param cSecs The number of seconds per iteration.
|
---|
290 | * @param uVerbosity How noisy we should be.
|
---|
291 | * @param iMinHistory The threshold for big gap history.
|
---|
292 | */
|
---|
293 | static void bs3Timing1_Tsc_Driver(unsigned cLoops, unsigned cSecs, unsigned uVerbosity, unsigned iMinHistory)
|
---|
294 | {
|
---|
295 | unsigned iLoop;
|
---|
296 |
|
---|
297 | #if 1
|
---|
298 | /*
|
---|
299 | * Verify that the first/last bit in U64 works (didn't).
|
---|
300 | */
|
---|
301 | iLoop = ASMBitLastSetU64( UINT64_C(0x1000100010001000)); if (iLoop != 61) Bs3TestFailedF("%d: iLoop=%d\n", __LINE__, iLoop);
|
---|
302 | iLoop = ASMBitFirstSetU64(UINT64_C(0x1000100010001000)); if (iLoop != 13) Bs3TestFailedF("%d: iLoop=%d\n", __LINE__, iLoop);
|
---|
303 | iLoop = ASMBitLastSetU64( UINT64_C(0x000ffff000000000)); if (iLoop != 52) Bs3TestFailedF("%d: iLoop=%d\n", __LINE__, iLoop);
|
---|
304 | iLoop = ASMBitFirstSetU64(UINT64_C(0x000ffff000000000)); if (iLoop != 37) Bs3TestFailedF("%d: iLoop=%d\n", __LINE__, iLoop);
|
---|
305 | #endif
|
---|
306 |
|
---|
307 | /*
|
---|
308 | * Do the work.
|
---|
309 | */
|
---|
310 | Bs3TestPrintf("Running %u loops, %u second%s each...\n", cLoops, cSecs, cSecs != 1 ? "s" : "");
|
---|
311 | for (iLoop = 1; iLoop <= cLoops; iLoop++)
|
---|
312 | bs3Timing1_Tsc_One(iLoop, cSecs, uVerbosity, iMinHistory);
|
---|
313 |
|
---|
314 | /*
|
---|
315 | * Report the total.
|
---|
316 | */
|
---|
317 | Bs3TestPrintf("Total:\n");
|
---|
318 | bs3Timing1_PrintResult(&g_TotalResult, iLoop, uVerbosity + 1);
|
---|
319 |
|
---|
320 | /*
|
---|
321 | * Dump the large gap history, if any.
|
---|
322 | */
|
---|
323 | if (g_fBigHistoryWrapped || g_iBigHistory > 0)
|
---|
324 | {
|
---|
325 | uint32_t const iFirst = g_fBigHistoryWrapped ? g_iBigHistory : 0;
|
---|
326 | uint32_t const iEnd = g_iBigHistory;
|
---|
327 | uint64_t uTscPrev = g_aBigHistory[iFirst].uTsc;
|
---|
328 | uint32_t i = iFirst;
|
---|
329 | Bs3TestPrintf("Big gap history (TSC, prev delta, test delta|level):\n");
|
---|
330 | do
|
---|
331 | {
|
---|
332 | Bs3TestPrintf(" %'RU64: %'14RU64 - %'14RU64|%u\n", g_aBigHistory[i].uTsc, g_aBigHistory[i].uTsc - uTscPrev,
|
---|
333 | g_aBigHistory[i].cTicksDelta, ASMBitLastSetU64(g_aBigHistory[i].cTicksDelta));
|
---|
334 | uTscPrev = g_aBigHistory[i].uTsc;
|
---|
335 | if (++i >= RT_ELEMENTS(g_aBigHistory))
|
---|
336 | i = 0;
|
---|
337 | } while (i != iEnd);
|
---|
338 | }
|
---|
339 | else
|
---|
340 | Bs3TestPrintf("No big gap history.\n");
|
---|
341 | }
|
---|
342 |
|
---|
343 |
|
---|
344 | #ifndef STANDALONE_EXECUTABLE
|
---|
345 | BS3_DECL(void) bs3Timing1_Tsc_pe32(void)
|
---|
346 | {
|
---|
347 | Bs3TestPrintf("bs3Timing1_Tsc_pe32\n");
|
---|
348 | bs3Timing1_Tsc_Driver(60, 10 /*sec*/, 1 /*uVerbosity*/, 17);
|
---|
349 | }
|
---|
350 | #endif
|
---|
351 |
|
---|
352 | /* P.S. don't forget: VBoxManage setextradata bs3-timing-1 VBoxInternal/Devices/VMMDev/0/Config/TestingEnabled 1 */
|
---|
353 |
|
---|