1 | /* $Id: bs3-timing-1-32.c32 106061 2024-09-16 14:03:52Z vboxsync $ */
|
---|
2 | /** @file
|
---|
3 | * BS3Kit - bs3-timinig-1, 32-bit C code.
|
---|
4 | */
|
---|
5 |
|
---|
6 | /*
|
---|
7 | * Copyright (C) 2007-2024 Oracle and/or its affiliates.
|
---|
8 | *
|
---|
9 | * This file is part of VirtualBox base platform packages, as
|
---|
10 | * available from https://www.virtualbox.org.
|
---|
11 | *
|
---|
12 | * This program is free software; you can redistribute it and/or
|
---|
13 | * modify it under the terms of the GNU General Public License
|
---|
14 | * as published by the Free Software Foundation, in version 3 of the
|
---|
15 | * License.
|
---|
16 | *
|
---|
17 | * This program is distributed in the hope that it will be useful, but
|
---|
18 | * WITHOUT ANY WARRANTY; without even the implied warranty of
|
---|
19 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
---|
20 | * General Public License for more details.
|
---|
21 | *
|
---|
22 | * You should have received a copy of the GNU General Public License
|
---|
23 | * along with this program; if not, see <https://www.gnu.org/licenses>.
|
---|
24 | *
|
---|
25 | * The contents of this file may alternatively be used under the terms
|
---|
26 | * of the Common Development and Distribution License Version 1.0
|
---|
27 | * (CDDL), a copy of it is provided in the "COPYING.CDDL" file included
|
---|
28 | * in the VirtualBox distribution, in which case the provisions of the
|
---|
29 | * CDDL are applicable instead of those of the GPL.
|
---|
30 | *
|
---|
31 | * You may elect to license modified versions of this file under the
|
---|
32 | * terms and conditions of either the GPL or the CDDL or both.
|
---|
33 | *
|
---|
34 | * SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0
|
---|
35 | */
|
---|
36 |
|
---|
37 |
|
---|
38 | /*********************************************************************************************************************************
|
---|
39 | * Header Files *
|
---|
40 | *********************************************************************************************************************************/
|
---|
41 | #ifndef STANDALONE_EXECUTABLE
|
---|
42 | # include <bs3kit.h>
|
---|
43 | #endif
|
---|
44 | #if defined(RT_ARcH_ARM) || defined(RT_ARCH_ARM64)
|
---|
45 | # include <iprt/asm-arm.h>
|
---|
46 | #else
|
---|
47 | # include <iprt/asm-amd64-x86.h>
|
---|
48 | #endif
|
---|
49 | #include <iprt/asm-math.h>
|
---|
50 | #include <iprt/asm.h>
|
---|
51 | #include <iprt/uint128.h>
|
---|
52 |
|
---|
53 |
|
---|
54 | /*********************************************************************************************************************************
|
---|
55 | * Structures and Typedefs *
|
---|
56 | *********************************************************************************************************************************/
|
---|
57 | /** TSC timing results. */
|
---|
58 | typedef struct BS3TIMING1RESULT
|
---|
59 | {
|
---|
60 | /** Number of nanoseconds elapsed while testing. */
|
---|
61 | uint64_t cNsElapsed;
|
---|
62 | /** Number of CPU ticks elapsed while testing. */
|
---|
63 | uint64_t cTicksElapsed;
|
---|
64 | /** The minium number of ticks between TSC reads. */
|
---|
65 | uint64_t cTicksMin;
|
---|
66 | /** The maximum number of ticks between TSC reads. */
|
---|
67 | uint64_t cTicksMax;
|
---|
68 | /** The sum of all TSC read deltas. */
|
---|
69 | uint64_t cTicksSum;
|
---|
70 | /** Number of loops (TSC read deltas). */
|
---|
71 | uint64_t cTotalLoops;
|
---|
72 | /** Number of times TSC moved backwards. */
|
---|
73 | uint64_t cBackwards;
|
---|
74 | /** Approx log2(cTicks) distribution. */
|
---|
75 | uint64_t aDistribution[65];
|
---|
76 | } BS3TIMING1RESULT;
|
---|
77 |
|
---|
78 |
|
---|
79 | /*********************************************************************************************************************************
|
---|
80 | * Global Variables *
|
---|
81 | *********************************************************************************************************************************/
|
---|
82 | /** The total result. */
|
---|
83 | static BS3TIMING1RESULT g_TotalResult;
|
---|
84 |
|
---|
85 | /** Set if history wrapped (i.e. table is full). */
|
---|
86 | static bool g_fBigHistoryWrapped = false;
|
---|
87 | /** The next history entry. */
|
---|
88 | static uint32_t g_iBigHistory;
|
---|
89 | /** History of large gaps. */
|
---|
90 | static struct { uint64_t uTsc, cTicksDelta; } g_aBigHistory[384];
|
---|
91 |
|
---|
92 |
|
---|
93 |
|
---|
94 | /**
|
---|
95 | * Pretty prints
|
---|
96 | */
|
---|
97 | static void bs3Timing1_PrintTicks(uint64_t cTicks, uint64_t uCpuFreq)
|
---|
98 | {
|
---|
99 | if (uCpuFreq > _128M)
|
---|
100 | {
|
---|
101 | if (cTicks >= uCpuFreq * 1000)
|
---|
102 | Bs3TestPrintf("%'RU64s", cTicks / uCpuFreq);
|
---|
103 | else
|
---|
104 | {
|
---|
105 | const char *pszUnit;
|
---|
106 | uint64_t uValue;
|
---|
107 | if (cTicks >= uCpuFreq)
|
---|
108 | {
|
---|
109 | pszUnit = "s";
|
---|
110 | uValue = (cTicks * RT_MS_1SEC) / uCpuFreq;
|
---|
111 | }
|
---|
112 | else if (cTicks * RT_MS_1SEC >= uCpuFreq)
|
---|
113 | {
|
---|
114 | pszUnit = "ms";
|
---|
115 | uValue = (cTicks * RT_US_1SEC) / uCpuFreq;
|
---|
116 | }
|
---|
117 | else if (cTicks * RT_US_1SEC >= uCpuFreq)
|
---|
118 | {
|
---|
119 | pszUnit = "us";
|
---|
120 | uValue = (cTicks * RT_NS_1SEC) / uCpuFreq;
|
---|
121 | }
|
---|
122 | else if (cTicks * RT_NS_1SEC >= uCpuFreq)
|
---|
123 | {
|
---|
124 | pszUnit = "ns";
|
---|
125 | uValue = (cTicks * UINT64_C(1000000000000)) / uCpuFreq;
|
---|
126 | }
|
---|
127 | else
|
---|
128 | {
|
---|
129 | Bs3TestPrintf("%'RU64ps", (cTicks * UINT64_C(1000000000000)) / uCpuFreq);
|
---|
130 | return;
|
---|
131 | }
|
---|
132 | Bs3TestPrintf("%u.%03u%s (%'RU64 ticks)", (uint32_t)uValue / 1000, (uint32_t)uValue % 1000, pszUnit, cTicks);
|
---|
133 | }
|
---|
134 | }
|
---|
135 | else
|
---|
136 | Bs3TestPrintf("%'RU64 ticks", cTicks);
|
---|
137 | }
|
---|
138 |
|
---|
139 |
|
---|
140 | /**
|
---|
141 | * Prints a result.
|
---|
142 | *
|
---|
143 | * @param pResult The result to print.
|
---|
144 | * @param iRun The run (loop in qpc parlance).
|
---|
145 | * @param uVerbosity The verbosity level.
|
---|
146 | */
|
---|
147 | static void bs3Timing1_PrintResult(BS3TIMING1RESULT const *pResult, unsigned iRun, unsigned uVerbosity)
|
---|
148 | {
|
---|
149 | uint64_t uCpuFreq;
|
---|
150 |
|
---|
151 | /*
|
---|
152 | * Calc CPU frequency.
|
---|
153 | */
|
---|
154 | if (pResult->cNsElapsed > 0 && pResult->cTicksElapsed > 0)
|
---|
155 | {
|
---|
156 | #if 1
|
---|
157 | RTUINT128U Tmp1, Divisor, Result;
|
---|
158 | RTUInt128Div(&Result,
|
---|
159 | RTUInt128MulU64ByU64(&Tmp1, pResult->cTicksElapsed, RT_NS_1SEC),
|
---|
160 | RTUInt128AssignU64(&Divisor, pResult->cNsElapsed));
|
---|
161 | uCpuFreq = Result.s.Lo;
|
---|
162 | #else
|
---|
163 | unsigned const cShift = pResult->cTicksElapsed < UINT64_C(0x000225C17D04) ? 0
|
---|
164 | : pResult->cTicksElapsed < UINT64_C(0x00225C17D04D) ? 4
|
---|
165 | : pResult->cTicksElapsed < UINT64_C(0x0225C17D04DA) ? 8
|
---|
166 | : pResult->cTicksElapsed < UINT64_C(0x225C1D940BF6) ? 12
|
---|
167 | : 16;
|
---|
168 | uCpuFreq = pResult->cTicksElapsed * ((uint64_t)RT_NS_1SEC >> cShift) / (pResult->cNsElapsed >> cShift);
|
---|
169 | #endif
|
---|
170 | }
|
---|
171 | else
|
---|
172 | uCpuFreq = 1;
|
---|
173 |
|
---|
174 | /*
|
---|
175 | * Report results.
|
---|
176 | *
|
---|
177 | * Note! in 32-bit and 16-bit mode, values 4G or higher gets formatted as
|
---|
178 | * hexadecimal to avoid 64-bit division.
|
---|
179 | */
|
---|
180 | Bs3TestPrintf("Loop #%u: %'RU64 tests: ", iRun, pResult->cTotalLoops);
|
---|
181 |
|
---|
182 | Bs3TestPrintf("average ");
|
---|
183 | bs3Timing1_PrintTicks(pResult->cTicksSum / pResult->cTotalLoops, uCpuFreq);
|
---|
184 | Bs3TestPrintf(", min ");
|
---|
185 | bs3Timing1_PrintTicks(pResult->cTicksMin, uCpuFreq);
|
---|
186 | Bs3TestPrintf(", max ");
|
---|
187 | bs3Timing1_PrintTicks(pResult->cTicksMax, uCpuFreq);
|
---|
188 | Bs3TestPrintf("\n");
|
---|
189 |
|
---|
190 | /* Distribution (tick delta log2-ish). */
|
---|
191 | if (uVerbosity > 0)
|
---|
192 | {
|
---|
193 | unsigned iItem = 0;
|
---|
194 | unsigned i;
|
---|
195 | for (i = uVerbosity > 1 ? 0 : 5; i < RT_ELEMENTS(pResult->aDistribution); i++)
|
---|
196 | if (pResult->aDistribution[i] != 0)
|
---|
197 | {
|
---|
198 | if (iItem >= 6)
|
---|
199 | {
|
---|
200 | iItem = 0;
|
---|
201 | Bs3TestPrintf("\n");
|
---|
202 | }
|
---|
203 | iItem++;
|
---|
204 | Bs3TestPrintf(" %'11RU64|2^%-2u", pResult->aDistribution[i], i);
|
---|
205 | }
|
---|
206 | if (uVerbosity > 1)
|
---|
207 | Bs3TestPrintf(iItem < 6 ? " (%'RU64 Hz)\n" : "\n (%'RU64 Hz)\n", uCpuFreq);
|
---|
208 | else
|
---|
209 | Bs3TestPrintf("\n");
|
---|
210 | }
|
---|
211 | if (pResult->cBackwards != 0)
|
---|
212 | Bs3TestFailedF("TSC went backwards %'RU64 time(s)", pResult->cBackwards);
|
---|
213 | }
|
---|
214 |
|
---|
215 |
|
---|
216 | /**
|
---|
217 | * Do one TSC timing iteration.
|
---|
218 | *
|
---|
219 | * @param iRun The iteration number (loop).
|
---|
220 | * @param cSecs The number of seconds to sample TSCs.
|
---|
221 | * @param uVerbosity The noise level.
|
---|
222 | * @param iMinHistory The threshold level to put stuff in g_auTscHistory.
|
---|
223 | */
|
---|
224 | static void bs3Timing1_Tsc_One(unsigned iRun, uint32_t cSecs, unsigned uVerbosity, unsigned iMinHistory)
|
---|
225 | {
|
---|
226 | uint64_t const nsStart = Bs3TestNow();
|
---|
227 | uint64_t const uTscStart = ASMReadTSC();
|
---|
228 | uint64_t const nsDeadline = nsStart + cSecs * RT_NS_1SEC_64;
|
---|
229 | uint64_t cNsElapsed;
|
---|
230 | BS3TIMING1RESULT Result;
|
---|
231 | unsigned i;
|
---|
232 |
|
---|
233 | Bs3MemZero(&Result, sizeof(Result));
|
---|
234 | Result.cTicksMin = UINT64_MAX;
|
---|
235 |
|
---|
236 | /*
|
---|
237 | * Test loop.
|
---|
238 | */
|
---|
239 | do
|
---|
240 | {
|
---|
241 | unsigned cLoops = 100000 + 1;
|
---|
242 | Result.cTotalLoops += cLoops - 1;
|
---|
243 | while (--cLoops != 0)
|
---|
244 | {
|
---|
245 | uint64_t uTscPrev = ASMReadTSC();
|
---|
246 | uint64_t uTscNow = ASMReadTSC();
|
---|
247 | uint64_t cTicks = uTscNow - uTscPrev;
|
---|
248 | unsigned iBit;
|
---|
249 |
|
---|
250 | /* check that it doesn't go backwards*/
|
---|
251 | if ((int64_t)cTicks < 0)
|
---|
252 | Result.cBackwards++;
|
---|
253 |
|
---|
254 | /* min/max/avg */
|
---|
255 | Result.cTicksSum += cTicks;
|
---|
256 | if (cTicks < Result.cTicksMin)
|
---|
257 | Result.cTicksMin = cTicks;
|
---|
258 | if (cTicks > Result.cTicksMax)
|
---|
259 | Result.cTicksMax = cTicks;
|
---|
260 |
|
---|
261 | /* result distribution by most significant bit. */
|
---|
262 | iBit = ASMBitLastSetU64(cTicks);
|
---|
263 | Result.aDistribution[iBit] += 1;
|
---|
264 | if (iBit < iMinHistory)
|
---|
265 | { /* likely */ }
|
---|
266 | else
|
---|
267 | {
|
---|
268 | g_aBigHistory[g_iBigHistory].uTsc = uTscPrev;
|
---|
269 | g_aBigHistory[g_iBigHistory].cTicksDelta = cTicks;
|
---|
270 | if (++g_iBigHistory >= RT_ELEMENTS(g_aBigHistory))
|
---|
271 | {
|
---|
272 | g_iBigHistory = 0;
|
---|
273 | g_fBigHistoryWrapped = true;
|
---|
274 | }
|
---|
275 | }
|
---|
276 | }
|
---|
277 | } while ((cNsElapsed = Bs3TestNow()) < nsDeadline);
|
---|
278 |
|
---|
279 | Result.cTicksElapsed = ASMReadTSC() - uTscStart;
|
---|
280 | Result.cNsElapsed = cNsElapsed - nsStart;
|
---|
281 |
|
---|
282 | bs3Timing1_PrintResult(&Result, iRun, uVerbosity);
|
---|
283 |
|
---|
284 | /* Add to total. */
|
---|
285 | g_TotalResult.cNsElapsed += Result.cNsElapsed;
|
---|
286 | g_TotalResult.cTicksElapsed += Result.cTicksElapsed;
|
---|
287 | if (Result.cTicksMin < g_TotalResult.cTicksMin || g_TotalResult.cTicksMin == 0)
|
---|
288 | g_TotalResult.cTicksMin = Result.cTicksMin;
|
---|
289 | if (Result.cTicksMax > g_TotalResult.cTicksMax)
|
---|
290 | g_TotalResult.cTicksMax += Result.cTicksMax;
|
---|
291 | g_TotalResult.cTicksSum += Result.cTicksSum;
|
---|
292 | g_TotalResult.cTotalLoops += Result.cTotalLoops;
|
---|
293 | g_TotalResult.cBackwards += Result.cBackwards;
|
---|
294 | for (i = 0; i < RT_ELEMENTS(Result.aDistribution); i++)
|
---|
295 | g_TotalResult.aDistribution[i] += Result.aDistribution[i];
|
---|
296 | }
|
---|
297 |
|
---|
298 |
|
---|
299 | /**
|
---|
300 | * The TSC test driver.
|
---|
301 | *
|
---|
302 | * @param cLoops Number of test iterations.
|
---|
303 | * @param cSecs The number of seconds per iteration.
|
---|
304 | * @param uVerbosity How noisy we should be.
|
---|
305 | * @param iMinHistory The threshold for big gap history.
|
---|
306 | */
|
---|
307 | static void bs3Timing1_Tsc_Driver(unsigned cLoops, unsigned cSecs, unsigned uVerbosity, unsigned iMinHistory)
|
---|
308 | {
|
---|
309 | unsigned iLoop;
|
---|
310 |
|
---|
311 | #if 1
|
---|
312 | /*
|
---|
313 | * Verify that the first/last bit in U64 works (didn't).
|
---|
314 | */
|
---|
315 | iLoop = ASMBitLastSetU64( UINT64_C(0x1000100010001000)); if (iLoop != 61) Bs3TestFailedF("%d: iLoop=%d\n", __LINE__, iLoop);
|
---|
316 | iLoop = ASMBitFirstSetU64(UINT64_C(0x1000100010001000)); if (iLoop != 13) Bs3TestFailedF("%d: iLoop=%d\n", __LINE__, iLoop);
|
---|
317 | iLoop = ASMBitLastSetU64( UINT64_C(0x000ffff000000000)); if (iLoop != 52) Bs3TestFailedF("%d: iLoop=%d\n", __LINE__, iLoop);
|
---|
318 | iLoop = ASMBitFirstSetU64(UINT64_C(0x000ffff000000000)); if (iLoop != 37) Bs3TestFailedF("%d: iLoop=%d\n", __LINE__, iLoop);
|
---|
319 | #endif
|
---|
320 |
|
---|
321 | /*
|
---|
322 | * Do the work.
|
---|
323 | */
|
---|
324 | Bs3TestPrintf("Running %u loops, %u second%s each...\n", cLoops, cSecs, cSecs != 1 ? "s" : "");
|
---|
325 | for (iLoop = 1; iLoop <= cLoops; iLoop++)
|
---|
326 | bs3Timing1_Tsc_One(iLoop, cSecs, uVerbosity, iMinHistory);
|
---|
327 |
|
---|
328 | /*
|
---|
329 | * Report the total.
|
---|
330 | */
|
---|
331 | Bs3TestPrintf("Total:\n");
|
---|
332 | bs3Timing1_PrintResult(&g_TotalResult, iLoop, uVerbosity + 1);
|
---|
333 |
|
---|
334 | /*
|
---|
335 | * Dump the large gap history, if any.
|
---|
336 | */
|
---|
337 | if (g_fBigHistoryWrapped || g_iBigHistory > 0)
|
---|
338 | {
|
---|
339 | uint32_t const iFirst = g_fBigHistoryWrapped ? g_iBigHistory : 0;
|
---|
340 | uint32_t const iEnd = g_iBigHistory;
|
---|
341 | uint64_t uTscPrev = g_aBigHistory[iFirst].uTsc;
|
---|
342 | uint32_t i = iFirst;
|
---|
343 | Bs3TestPrintf("Big gap history (TSC, prev delta, test delta|level):\n");
|
---|
344 | do
|
---|
345 | {
|
---|
346 | Bs3TestPrintf(" %'RU64: %'14RU64 - %'14RU64|%u\n", g_aBigHistory[i].uTsc, g_aBigHistory[i].uTsc - uTscPrev,
|
---|
347 | g_aBigHistory[i].cTicksDelta, ASMBitLastSetU64(g_aBigHistory[i].cTicksDelta));
|
---|
348 | uTscPrev = g_aBigHistory[i].uTsc;
|
---|
349 | if (++i >= RT_ELEMENTS(g_aBigHistory))
|
---|
350 | i = 0;
|
---|
351 | } while (i != iEnd);
|
---|
352 | }
|
---|
353 | else
|
---|
354 | Bs3TestPrintf("No big gap history.\n");
|
---|
355 | }
|
---|
356 |
|
---|
357 |
|
---|
358 | #ifndef STANDALONE_EXECUTABLE
|
---|
359 | BS3_DECL(void) bs3Timing1_Tsc_pe32(void)
|
---|
360 | {
|
---|
361 | Bs3TestPrintf("bs3Timing1_Tsc_pe32\n");
|
---|
362 | bs3Timing1_Tsc_Driver(60, 10 /*sec*/, 1 /*uVerbosity*/, 17);
|
---|
363 | }
|
---|
364 | #endif
|
---|
365 |
|
---|
366 | /* P.S. don't forget: VBoxManage setextradata bs3-timing-1 VBoxInternal/Devices/VMMDev/0/Config/TestingEnabled 1 */
|
---|
367 |
|
---|