1 | /* $Id: bs3-memalloc-1.c64 93603 2022-02-04 14:11:42Z vboxsync $ */
|
---|
2 | /** @file
|
---|
3 | * BS3Kit - bs3-timers-1, 64-bit C code.
|
---|
4 | */
|
---|
5 |
|
---|
6 | /*
|
---|
7 | * Copyright (C) 2021-2022 Oracle Corporation
|
---|
8 | *
|
---|
9 | * This file is part of VirtualBox Open Source Edition (OSE), as
|
---|
10 | * available from http://www.virtualbox.org. This file is free software;
|
---|
11 | * you can redistribute it and/or modify it under the terms of the GNU
|
---|
12 | * General Public License (GPL) as published by the Free Software
|
---|
13 | * Foundation, in version 2 as it comes in the "COPYING" file of the
|
---|
14 | * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
|
---|
15 | * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
|
---|
16 | *
|
---|
17 | * The contents of this file may alternatively be used under the terms
|
---|
18 | * of the Common Development and Distribution License Version 1.0
|
---|
19 | * (CDDL) only, as it comes in the "COPYING.CDDL" file of the
|
---|
20 | * VirtualBox OSE distribution, in which case the provisions of the
|
---|
21 | * CDDL are applicable instead of those of the GPL.
|
---|
22 | *
|
---|
23 | * You may elect to license modified versions of this file under the
|
---|
24 | * terms and conditions of either the GPL or the CDDL or both.
|
---|
25 | */
|
---|
26 |
|
---|
27 |
|
---|
28 | /*********************************************************************************************************************************
|
---|
29 | * Header Files *
|
---|
30 | *********************************************************************************************************************************/
|
---|
31 | #include <bs3kit.h>
|
---|
32 | #include <iprt/asm-amd64-x86.h>
|
---|
33 | #include <VBox/VMMDevTesting.h>
|
---|
34 |
|
---|
35 |
|
---|
36 | /*********************************************************************************************************************************
|
---|
37 | * Global Variables *
|
---|
38 | *********************************************************************************************************************************/
|
---|
39 | /** Copy of interesting E820 entries. */
|
---|
40 | static INT15E820ENTRY g_aEntries[16];
|
---|
41 | /** Number of interesting entires. */
|
---|
42 | static unsigned g_cEntries = 0;
|
---|
43 | /** Number of intersting bytes found. */
|
---|
44 | static uint64_t g_cbInteresting = 0;
|
---|
45 | /** Lowest interesting address. */
|
---|
46 | static uint64_t g_uInterestingStart = UINT64_MAX;
|
---|
47 | /** End of interesting addresses. */
|
---|
48 | static uint64_t g_uInterestingEnd = 0;
|
---|
49 |
|
---|
50 |
|
---|
51 | /**
|
---|
52 | * For subsequence touch iterations that doesn't allocate any RAM.
|
---|
53 | *
|
---|
54 | * This may cause page pool activitiy if we've got more memory than we have room
|
---|
55 | * for in the pool. This depends on amount of guest RAM and how much could be
|
---|
56 | * backed by large pages.
|
---|
57 | */
|
---|
58 | static uint64_t CheckTouchedMemory(void)
|
---|
59 | {
|
---|
60 | unsigned iEntry;
|
---|
61 | uint64_t iPage = 0;
|
---|
62 | uint64_t cErrors = 0;
|
---|
63 | for (iEntry = 0; iEntry < g_cEntries; iEntry++)
|
---|
64 | {
|
---|
65 | uint64_t volatile *pu64Cur = (uint64_t *)g_aEntries[iEntry].uBaseAddr;
|
---|
66 | uint64_t cbLeft = g_aEntries[iEntry].cbRange;
|
---|
67 | while (cbLeft >= X86_PAGE_SIZE)
|
---|
68 | {
|
---|
69 | /* Check first. */
|
---|
70 | if (RT_LIKELY( pu64Cur[0] == iPage
|
---|
71 | && pu64Cur[1] == iPage))
|
---|
72 | { /* likely */ }
|
---|
73 | else
|
---|
74 | {
|
---|
75 | Bs3TestFailedF("%p: %#llx + %#llx, expected twice %#llx\n", pu64Cur, pu64Cur[0], pu64Cur[1], iPage);
|
---|
76 | cErrors++;
|
---|
77 | }
|
---|
78 |
|
---|
79 | /* Then write again. */
|
---|
80 | pu64Cur[0] = iPage;
|
---|
81 | pu64Cur[1] = iPage;
|
---|
82 |
|
---|
83 | /* Advance. */
|
---|
84 | iPage++;
|
---|
85 | pu64Cur += X86_PAGE_SIZE / sizeof(*pu64Cur);
|
---|
86 | cbLeft -= X86_PAGE_SIZE;
|
---|
87 | }
|
---|
88 | }
|
---|
89 | return cErrors;
|
---|
90 | }
|
---|
91 |
|
---|
92 |
|
---|
93 | /**
|
---|
94 | * First touching of memory, assuming content is ZERO.
|
---|
95 | */
|
---|
96 | static uint64_t FirstTouchMemory(void)
|
---|
97 | {
|
---|
98 | unsigned iEntry;
|
---|
99 | uint64_t iPage = 0;
|
---|
100 | for (iEntry = 0; iEntry < g_cEntries; iEntry++)
|
---|
101 | {
|
---|
102 | uint64_t volatile *pu64Cur = (uint64_t volatile *)g_aEntries[iEntry].uBaseAddr;
|
---|
103 | uint64_t cbLeft = g_aEntries[iEntry].cbRange;
|
---|
104 | while (cbLeft >= X86_PAGE_SIZE)
|
---|
105 | {
|
---|
106 | /*
|
---|
107 | * Write to the page first so we won't waste time mapping the zero
|
---|
108 | * page and get straight to the actual page allocation.
|
---|
109 | */
|
---|
110 | pu64Cur[0] = iPage;
|
---|
111 |
|
---|
112 | /* Then check that the 2nd qword is zero before writing it. */
|
---|
113 | if (RT_LIKELY(pu64Cur[1] == 0))
|
---|
114 | { /* likely */ }
|
---|
115 | else
|
---|
116 | Bs3TestFailedF("%p: %#llx, expected zero\n", pu64Cur, pu64Cur[1]);
|
---|
117 | pu64Cur[1] = iPage;
|
---|
118 |
|
---|
119 | /* Advance. */
|
---|
120 | iPage++;
|
---|
121 | pu64Cur += X86_PAGE_SIZE / sizeof(*pu64Cur);
|
---|
122 | cbLeft -= X86_PAGE_SIZE;
|
---|
123 | }
|
---|
124 | }
|
---|
125 | return iPage;
|
---|
126 | }
|
---|
127 |
|
---|
128 |
|
---|
129 | /**
|
---|
130 | * Translates a E820 entry type to a string.
|
---|
131 | */
|
---|
132 | static const char *getEntryTypeName(uint32_t uType)
|
---|
133 | {
|
---|
134 | switch (uType)
|
---|
135 | {
|
---|
136 | case INT15E820_TYPE_USABLE: return "USABLE";
|
---|
137 | case INT15E820_TYPE_RESERVED: return "RESERVED";
|
---|
138 | case INT15E820_TYPE_ACPI_RECLAIMABLE: return "ACPI_RECLAIMABLE";
|
---|
139 | case INT15E820_TYPE_ACPI_NVS: return "ACPI_NVS";
|
---|
140 | case INT15E820_TYPE_BAD: return "BAD";
|
---|
141 | default: return "unknown";
|
---|
142 | }
|
---|
143 | }
|
---|
144 |
|
---|
145 | BS3_DECL(void) Main_lm64()
|
---|
146 | {
|
---|
147 | uint32_t uCont;
|
---|
148 | unsigned i;
|
---|
149 |
|
---|
150 | Bs3TestInit("bs3-memalloc-1");
|
---|
151 |
|
---|
152 | /*
|
---|
153 | * Get the E820 memory descriptors and pick out those describing memory not
|
---|
154 | * already used by the Bs3Kit.
|
---|
155 | */
|
---|
156 | Bs3TestSub("INT15h/E820");
|
---|
157 | for (uCont = i = 0; i < 2048; i++)
|
---|
158 | {
|
---|
159 | uint32_t const uEbxCur = uCont;
|
---|
160 | INT15E820ENTRY Entry = { 0, 0, 0, 0 };
|
---|
161 | uint32_t cbEntry = sizeof(Entry);
|
---|
162 | if (!Bs3BiosInt15hE820_lm64(&Entry, &cbEntry, &uCont))
|
---|
163 | {
|
---|
164 | Bs3TestFailedF("int15h/E820 failed i=%u", i);
|
---|
165 | break;
|
---|
166 | }
|
---|
167 | Bs3TestPrintf("#%u/%#x: %#018llx LB %#018llx %s (%d)\n",
|
---|
168 | i, uEbxCur, Entry.uBaseAddr, Entry.cbRange, getEntryTypeName(Entry.uType), Entry.uType);
|
---|
169 | if (Entry.uType == INT15E820_TYPE_USABLE)
|
---|
170 | {
|
---|
171 | if (Entry.uBaseAddr >= _4G)
|
---|
172 | {
|
---|
173 | if (g_cEntries < RT_ELEMENTS(g_aEntries))
|
---|
174 | {
|
---|
175 | g_cbInteresting += Entry.cbRange;
|
---|
176 | if (g_uInterestingStart > Entry.uBaseAddr)
|
---|
177 | g_uInterestingStart = Entry.uBaseAddr;
|
---|
178 | if (g_uInterestingEnd < Entry.uBaseAddr + Entry.cbRange)
|
---|
179 | g_uInterestingEnd = Entry.uBaseAddr + Entry.cbRange;
|
---|
180 | Bs3MemCpy(&g_aEntries[g_cEntries++], &Entry, sizeof(Entry));
|
---|
181 | }
|
---|
182 | else
|
---|
183 | Bs3TestFailedF("Too many interesting E820 entries! Extend g_aEntries!\n");
|
---|
184 | }
|
---|
185 | }
|
---|
186 |
|
---|
187 | /* Done? */
|
---|
188 | if (uCont == 0)
|
---|
189 | break;
|
---|
190 | }
|
---|
191 | if (g_cEntries == 0)
|
---|
192 | Bs3TestFailedF("No interesting E820 entries! Make sure you've assigned more than 4GB to the VM!\n");
|
---|
193 | else
|
---|
194 | {
|
---|
195 | uint64_t uFailurePoint = 0;
|
---|
196 | int rc;
|
---|
197 | Bs3TestPrintf("Found %u interesting entries covering %#llx bytes (%u GB).\n"
|
---|
198 | "From %#llx to %#llx\n",
|
---|
199 | g_cEntries, g_cbInteresting, (unsigned)(g_cbInteresting / _1G), g_uInterestingStart, g_uInterestingEnd);
|
---|
200 |
|
---|
201 | if (g_uBs3EndOfRamAbove4G < g_uInterestingEnd)
|
---|
202 | Bs3TestFailedF("g_uBs3EndOfRamAbove4G (%#llx) is lower than g_uInterestingEnd (%#llx)!\n",
|
---|
203 | g_uBs3EndOfRamAbove4G, g_uInterestingEnd);
|
---|
204 |
|
---|
205 |
|
---|
206 | /*
|
---|
207 | * Map all the memory (Bs3Kit only maps memory below 4G).
|
---|
208 | */
|
---|
209 | Bs3TestSub("Mapping memory above 4GB");
|
---|
210 | if (!(g_uBs3CpuDetected & BS3CPU_F_PSE))
|
---|
211 | Bs3TestFailedF("PSE was not detected!\n");
|
---|
212 | else if (!(ASMGetCR4() & X86_CR4_PSE))
|
---|
213 | Bs3TestFailedF("PSE was not enabled!\n");
|
---|
214 | else if (RT_SUCCESS(rc = Bs3PagingMapRamAbove4GForLM(&uFailurePoint)))
|
---|
215 | {
|
---|
216 | #define PAGES_2_MB(a_cPages) ((a_cPages) / (_1M / X86_PAGE_SIZE))
|
---|
217 | uint64_t cTotalPages;
|
---|
218 | unsigned iLoop;
|
---|
219 |
|
---|
220 | /*
|
---|
221 | * Time touching all the memory.
|
---|
222 | */
|
---|
223 | Bs3TestSub("Allocation speed");
|
---|
224 | {
|
---|
225 | uint64_t const nsStart = Bs3TestNow();
|
---|
226 | uint64_t const uTscStart = ASMReadTSC();
|
---|
227 | uint64_t const cPages = FirstTouchMemory();
|
---|
228 | uint64_t const cTicksElapsed = ASMReadTSC() - uTscStart;
|
---|
229 | uint64_t const cNsElapsed = Bs3TestNow() - nsStart;
|
---|
230 | uint64_t uThruput;
|
---|
231 | Bs3TestValue("Pages", cPages, VMMDEV_TESTING_UNIT_PAGES);
|
---|
232 | Bs3TestValue("MiBs", PAGES_2_MB(cPages), VMMDEV_TESTING_UNIT_MEGABYTES);
|
---|
233 | Bs3TestValue("Alloc elapsed", cNsElapsed, VMMDEV_TESTING_UNIT_NS);
|
---|
234 | Bs3TestValue("Alloc elapsed in ticks", cTicksElapsed, VMMDEV_TESTING_UNIT_TICKS);
|
---|
235 | Bs3TestValue("Page alloc time", cNsElapsed / cPages, VMMDEV_TESTING_UNIT_NS_PER_PAGE);
|
---|
236 | Bs3TestValue("Page alloc time in ticks", cTicksElapsed / cPages, VMMDEV_TESTING_UNIT_TICKS_PER_PAGE);
|
---|
237 | uThruput = cPages * RT_NS_1SEC / cNsElapsed;
|
---|
238 | Bs3TestValue("Alloc thruput", uThruput, VMMDEV_TESTING_UNIT_PAGES_PER_SEC);
|
---|
239 | Bs3TestValue("Alloc thruput in MiBs", PAGES_2_MB(uThruput), VMMDEV_TESTING_UNIT_MEGABYTES_PER_SEC);
|
---|
240 | cTotalPages = cPages;
|
---|
241 | }
|
---|
242 |
|
---|
243 | /*
|
---|
244 | * Time accessing all the memory again. This might give a clue as to page pool performance.
|
---|
245 | */
|
---|
246 | for (iLoop = 0; iLoop < 2; iLoop++)
|
---|
247 | {
|
---|
248 | Bs3TestSub(iLoop == 0 ? "2nd access" : "3rd access");
|
---|
249 | {
|
---|
250 | uint64_t const nsStart = Bs3TestNow();
|
---|
251 | uint64_t const uTscStart = ASMReadTSC();
|
---|
252 | uint64_t const cErrors = CheckTouchedMemory();
|
---|
253 | uint64_t const cTicksElapsed = ASMReadTSC() - uTscStart;
|
---|
254 | uint64_t const cNsElapsed = Bs3TestNow() - nsStart;
|
---|
255 | uint64_t uThruput;
|
---|
256 | Bs3TestValue("Access elapsed", cNsElapsed, VMMDEV_TESTING_UNIT_NS);
|
---|
257 | Bs3TestValue("Access elapsed in ticks", cTicksElapsed, VMMDEV_TESTING_UNIT_TICKS);
|
---|
258 | Bs3TestValue("Page access time", cNsElapsed / cTotalPages, VMMDEV_TESTING_UNIT_NS_PER_PAGE);
|
---|
259 | Bs3TestValue("Page access time in ticks", cTicksElapsed / cTotalPages, VMMDEV_TESTING_UNIT_TICKS_PER_PAGE);
|
---|
260 | uThruput = cTotalPages * RT_NS_1SEC / cNsElapsed;
|
---|
261 | Bs3TestValue("Access thruput", uThruput, VMMDEV_TESTING_UNIT_PAGES_PER_SEC);
|
---|
262 | Bs3TestValue("Access thruput in MiBs", PAGES_2_MB(uThruput), VMMDEV_TESTING_UNIT_MEGABYTES_PER_SEC);
|
---|
263 | }
|
---|
264 | }
|
---|
265 | }
|
---|
266 | else
|
---|
267 | Bs3TestFailedF("Bs3PagingMapRamAbove4GForLM failed at %#llx: %d", uFailurePoint, rc);
|
---|
268 | }
|
---|
269 |
|
---|
270 | Bs3TestTerm();
|
---|
271 | }
|
---|
272 |
|
---|