VirtualBox

source: vbox/trunk/src/VBox/Debugger/DBGPlugInLinux.cpp@ 54212

Last change on this file since 54212 was 54212, checked in by vboxsync, 10 years ago

DBGPlugInLinux.cpp: A quick shot at locating and decoding current kallsyms tables.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 40.2 KB
Line 
1/* $Id: DBGPlugInLinux.cpp 54212 2015-02-15 23:34:11Z vboxsync $ */
2/** @file
3 * DBGPlugInLinux - Debugger and Guest OS Digger Plugin For Linux.
4 */
5
6/*
7 * Copyright (C) 2008-2013 Oracle Corporation
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 */
17
18
19/*******************************************************************************
20* Header Files *
21*******************************************************************************/
22#define LOG_GROUP LOG_GROUP_DBGF ///@todo add new log group.
23#include "DBGPlugIns.h"
24#include "DBGPlugInCommonELF.h"
25#include <VBox/vmm/dbgf.h>
26#include <iprt/string.h>
27#include <iprt/mem.h>
28#include <iprt/stream.h>
29#include <iprt/ctype.h>
30
31
32/*******************************************************************************
33* Structures and Typedefs *
34*******************************************************************************/
35
36/** @name InternalLinux structures
37 * @{ */
38
39
40/** @} */
41
42
43/**
44 * Linux guest OS digger instance data.
45 */
46typedef struct DBGDIGGERLINUX
47{
48 /** Whether the information is valid or not.
49 * (For fending off illegal interface method calls.) */
50 bool fValid;
51 /** Set if 64-bit, clear if 32-bit. */
52 bool f64Bit;
53
54 /** The address of the linux banner.
55 * This is set during probing. */
56 DBGFADDRESS AddrLinuxBanner;
57 /** Kernel base address.
58 * This is set during probing, refined during kallsyms parsing. */
59 DBGFADDRESS AddrKernelBase;
60 /** The kernel size. */
61 uint32_t cbKernel;
62
63 /** The number of kernel symbols (kallsyms_num_syms).
64 * This is set during init. */
65 uint32_t cKernelSymbols;
66 /** The size of the kernel name table (sizeof(kallsyms_names)). */
67 uint32_t cbKernelNames;
68 /** Number of entries in the kernel_markers table. */
69 uint32_t cKernelNameMarkers;
70 /** The size of the kernel symbol token table. */
71 uint32_t cbKernelTokenTable;
72 /** The address of the encoded kernel symbol names (kallsyms_names). */
73 DBGFADDRESS AddrKernelNames;
74 /** The address of the kernel symbol addresses (kallsyms_addresses). */
75 DBGFADDRESS AddrKernelAddresses;
76 /** The address of the kernel symbol name markers (kallsyms_markers). */
77 DBGFADDRESS AddrKernelNameMarkers;
78 /** The address of the kernel symbol token table (kallsyms_token_table). */
79 DBGFADDRESS AddrKernelTokenTable;
80 /** The address of the kernel symbol token index table (kallsyms_token_index). */
81 DBGFADDRESS AddrKernelTokenIndex;
82
83} DBGDIGGERLINUX;
84/** Pointer to the linux guest OS digger instance data. */
85typedef DBGDIGGERLINUX *PDBGDIGGERLINUX;
86
87
88/*******************************************************************************
89* Defined Constants And Macros *
90*******************************************************************************/
91/** Validates a 32-bit linux kernel address */
92#define LNX32_VALID_ADDRESS(Addr) ((Addr) > UINT32_C(0x80000000) && (Addr) < UINT32_C(0xfffff000))
93/** Validates a 64-bit linux kernel address */
94#define LNX64_VALID_ADDRESS(Addr) ((Addr) > UINT64_C(0xffff800000000000) && (Addr) < UINT64_C(0xfffffffffffff000))
95
96/** The max kernel size. */
97#define LNX_MAX_KERNEL_SIZE UINT32_C(0x0f000000)
98
99/** The maximum size we expect for kallsyms_names. */
100#define LNX_MAX_KALLSYMS_NAMES_SIZE UINT32_C(0x200000)
101/** The maximum size we expect for kallsyms_token_table. */
102#define LNX_MAX_KALLSYMS_TOKEN_TABLE_SIZE UINT32_C(0x10000)
103/** The minimum number of symbols we expect in kallsyms_num_syms. */
104#define LNX_MIN_KALLSYMS_SYMBOLS UINT32_C(2048)
105/** The maximum number of symbols we expect in kallsyms_num_syms. */
106#define LNX_MAX_KALLSYMS_SYMBOLS UINT32_C(1048576)
107/** The min length an encoded symbol in kallsyms_names is expected to have. */
108#define LNX_MIN_KALLSYMS_ENC_LENGTH UINT8_C(1)
109/** The max length an encoded symbol in kallsyms_names is expected to have.
110 * @todo check real life here. */
111#define LNX_MAX_KALLSYMS_ENC_LENGTH UINT8_C(28)
112/** The approximate maximum length of a string token. */
113#define LNX_MAX_KALLSYMS_TOKEN_LEN UINT16_C(32)
114
115
116/** Module tag for linux ('linuxmod' on little endian ASCII systems). */
117#define DIG_LNX_MOD_TAG UINT64_C(0x545f5d78758e898c)
118
119
120/*******************************************************************************
121* Internal Functions *
122*******************************************************************************/
123static DECLCALLBACK(int) dbgDiggerLinuxInit(PUVM pUVM, void *pvData);
124
125
126/*******************************************************************************
127* Global Variables *
128*******************************************************************************/
129/** Table of common linux kernel addresses. */
130static uint64_t g_au64LnxKernelAddresses[] =
131{
132 UINT64_C(0xc0100000),
133 UINT64_C(0x90100000),
134 UINT64_C(0xffffffff80200000)
135};
136
137
138/**
139 * @copydoc DBGFOSREG::pfnQueryInterface
140 */
141static DECLCALLBACK(void *) dbgDiggerLinuxQueryInterface(PUVM pUVM, void *pvData, DBGFOSINTERFACE enmIf)
142{
143 return NULL;
144}
145
146
147/**
148 * @copydoc DBGFOSREG::pfnQueryVersion
149 */
150static DECLCALLBACK(int) dbgDiggerLinuxQueryVersion(PUVM pUVM, void *pvData, char *pszVersion, size_t cchVersion)
151{
152 PDBGDIGGERLINUX pThis = (PDBGDIGGERLINUX)pvData;
153 Assert(pThis->fValid);
154
155 /*
156 * It's all in the linux banner.
157 */
158 int rc = DBGFR3MemReadString(pUVM, 0, &pThis->AddrLinuxBanner, pszVersion, cchVersion);
159 if (RT_SUCCESS(rc))
160 {
161 char *pszEnd = RTStrEnd(pszVersion, cchVersion);
162 AssertReturn(pszEnd, VERR_BUFFER_OVERFLOW);
163 while ( pszEnd > pszVersion
164 && RT_C_IS_SPACE(pszEnd[-1]))
165 pszEnd--;
166 *pszEnd = '\0';
167 }
168 else
169 RTStrPrintf(pszVersion, cchVersion, "DBGFR3MemRead -> %Rrc", rc);
170
171 return rc;
172}
173
174
175/**
176 * @copydoc DBGFOSREG::pfnTerm
177 */
178static DECLCALLBACK(void) dbgDiggerLinuxTerm(PUVM pUVM, void *pvData)
179{
180 PDBGDIGGERLINUX pThis = (PDBGDIGGERLINUX)pvData;
181 Assert(pThis->fValid);
182
183 pThis->fValid = false;
184}
185
186
187/**
188 * @copydoc DBGFOSREG::pfnRefresh
189 */
190static DECLCALLBACK(int) dbgDiggerLinuxRefresh(PUVM pUVM, void *pvData)
191{
192 PDBGDIGGERLINUX pThis = (PDBGDIGGERLINUX)pvData;
193 NOREF(pThis);
194 Assert(pThis->fValid);
195
196 /*
197 * For now we'll flush and reload everything.
198 */
199 dbgDiggerLinuxTerm(pUVM, pvData);
200 return dbgDiggerLinuxInit(pUVM, pvData);
201}
202
203
204/**
205 * Worker for dbgDiggerLinuxFindStartOfNamesAndSymbolCount that update the
206 * digger data.
207 *
208 * @returns VINF_SUCCESS.
209 * @param pThis The Linux digger data to update.
210 * @param pAddrKernelNames The kallsyms_names address.
211 * @param cKernelSymbols The number of kernel symbol.
212 * @param cbAddress The guest address size.
213 */
214static int dbgDiggerLinuxFoundStartOfNames(PDBGDIGGERLINUX pThis, PCDBGFADDRESS pAddrKernelNames,
215 uint32_t cKernelSymbols, uint32_t cbAddress)
216{
217 pThis->cKernelSymbols = cKernelSymbols;
218 pThis->AddrKernelNames = *pAddrKernelNames;
219 pThis->AddrKernelAddresses = *pAddrKernelNames;
220 DBGFR3AddrSub(&pThis->AddrKernelAddresses, (cKernelSymbols + 1) * cbAddress);
221
222 Log(("dbgDiggerLinuxFoundStartOfNames: AddrKernelAddresses=%RGv\n"
223 "dbgDiggerLinuxFoundStartOfNames: cKernelSymbols=%#x (at %RGv)\n"
224 "dbgDiggerLinuxFoundStartOfNames: AddrKernelName=%RGv\n",
225 pThis->AddrKernelAddresses.FlatPtr,
226 pThis->cKernelSymbols, pThis->AddrKernelNames.FlatPtr - cbAddress,
227 pThis->AddrKernelNames.FlatPtr));
228 return VINF_SUCCESS;
229}
230
231
232/**
233 * Tries to find the address of the kallsyms_names, kallsyms_num_syms and
234 * kallsyms_addresses symbols.
235 *
236 * The kallsyms_num_syms is read and stored in pThis->cKernelSymbols, while the
237 * addresses of the other two are stored as pThis->AddrKernelNames and
238 * pThis->AddrKernelAddresses.
239 *
240 * @returns VBox status code, success indicating that all three variables have
241 * been found and taken down.
242 * @param pUVM The user mode VM handle.
243 * @param pThis The Linux digger data.
244 * @param pHitAddr An address we think is inside kallsyms_names.
245 */
246static int dbgDiggerLinuxFindStartOfNamesAndSymbolCount(PUVM pUVM, PDBGDIGGERLINUX pThis, PCDBGFADDRESS pHitAddr)
247{
248 /*
249 * Search backwards in chunks.
250 */
251 union
252 {
253 uint8_t ab[0x1000];
254 uint32_t au32[0x1000 / sizeof(uint32_t)];
255 uint64_t au64[0x1000 / sizeof(uint64_t)];
256 } uBuf;
257 uint32_t cbLeft = LNX_MAX_KALLSYMS_NAMES_SIZE;
258 uint32_t cbBuf = pHitAddr->FlatPtr & (sizeof(uBuf) - 1);
259 DBGFADDRESS CurAddr = *pHitAddr;
260 DBGFR3AddrSub(&CurAddr, cbBuf);
261 cbBuf += sizeof(uint64_t) - 1; /* In case our kobj hit is in the first 4/8 bytes. */
262 for (;;)
263 {
264 int rc = DBGFR3MemRead(pUVM, 0 /*idCpu*/, &CurAddr, &uBuf, sizeof(uBuf));
265 if (RT_FAILURE(rc))
266 return rc;
267
268 /*
269 * We assume that the three symbols are aligned on guest pointer boundrary.
270 *
271 * The boundrary between the two tables should be noticable as the number
272 * is unlikely to be more than 16 millions, there will be at least one zero
273 * byte where it is, 64-bit will have 5 zero bytes. Zero bytes aren't all
274 * that common in the kallsyms_names table.
275 *
276 * Also the kallsyms_names table starts with a length byte, which means
277 * we're likely to see a byte in the range 1..31.
278 *
279 * The kallsyms_addresses are mostly sorted (except for the start where the
280 * absolute symbols are), so we'll spot a bunch of kernel addresses
281 * immediately preceeding the kallsyms_num_syms field.
282 *
283 * Lazy bird: If kallsyms_num_syms is on a buffer boundrary, we skip
284 * the check for kernel addresses preceeding it.
285 */
286 if (pThis->f64Bit)
287 {
288 uint32_t i = cbBuf / sizeof(uint64_t);
289 while (i-- > 0)
290 if ( uBuf.au64[i] <= LNX_MAX_KALLSYMS_SYMBOLS
291 && uBuf.au64[i] >= LNX_MIN_KALLSYMS_SYMBOLS)
292 {
293 uint8_t *pb = (uint8_t *)&uBuf.au64[i + 1];
294 if ( pb[0] <= LNX_MAX_KALLSYMS_ENC_LENGTH
295 && pb[0] >= LNX_MIN_KALLSYMS_ENC_LENGTH)
296 {
297 if ( (i <= 0 || LNX64_VALID_ADDRESS(uBuf.au64[i - 1]))
298 && (i <= 1 || LNX64_VALID_ADDRESS(uBuf.au64[i - 2]))
299 && (i <= 2 || LNX64_VALID_ADDRESS(uBuf.au64[i - 3])))
300 return dbgDiggerLinuxFoundStartOfNames(pThis,
301 DBGFR3AddrAdd(&CurAddr, (i + 1) * sizeof(uint64_t)),
302 (uint32_t)uBuf.au64[i], sizeof(uint64_t));
303 }
304 }
305 }
306 else
307 {
308 uint32_t i = cbBuf / sizeof(uint32_t);
309 while (i-- > 0)
310 if ( uBuf.au32[i] <= LNX_MAX_KALLSYMS_SYMBOLS
311 && uBuf.au32[i] >= LNX_MIN_KALLSYMS_SYMBOLS)
312 {
313 uint8_t *pb = (uint8_t *)&uBuf.au32[i + 1];
314 if ( pb[0] <= LNX_MAX_KALLSYMS_ENC_LENGTH
315 && pb[0] >= LNX_MIN_KALLSYMS_ENC_LENGTH)
316 {
317 if ( (i <= 0 || LNX32_VALID_ADDRESS(uBuf.au32[i - 1]))
318 && (i <= 1 || LNX32_VALID_ADDRESS(uBuf.au32[i - 2]))
319 && (i <= 2 || LNX32_VALID_ADDRESS(uBuf.au32[i - 3])))
320 return dbgDiggerLinuxFoundStartOfNames(pThis,
321 DBGFR3AddrAdd(&CurAddr, (i + 1) * sizeof(uint32_t)),
322 uBuf.au32[i], sizeof(uint32_t));
323 }
324 }
325 }
326
327 /*
328 * Advance
329 */
330 if (RT_UNLIKELY(cbLeft <= sizeof(uBuf)))
331 {
332 Log(("dbgDiggerLinuxFindStartOfNamesAndSymbolCount: failed (pHitAddr=%RGv)\n", pHitAddr->FlatPtr));
333 return VERR_NOT_FOUND;
334 }
335 cbLeft -= sizeof(uBuf);
336 DBGFR3AddrSub(&CurAddr, sizeof(uBuf));
337 cbBuf = sizeof(uBuf);
338 }
339}
340
341
342/**
343 * Worker for dbgDiggerLinuxFindEndNames that records the findings.
344 *
345 * @returns VINF_SUCCESS
346 * @param pThis The linux digger data to update.
347 * @param pAddrMarkers The address of the marker (kallsyms_markers).
348 * @param cbMarkerEntry The size of a marker entry (32-bit or 64-bit).
349 */
350static int dbgDiggerLinuxFoundMarkers(PDBGDIGGERLINUX pThis, PCDBGFADDRESS pAddrMarkers, uint32_t cbMarkerEntry)
351{
352 pThis->cbKernelNames = pAddrMarkers->FlatPtr - pThis->AddrKernelNames.FlatPtr - 1;
353 pThis->AddrKernelNameMarkers = *pAddrMarkers;
354 pThis->cKernelNameMarkers = RT_ALIGN_32(pThis->cKernelSymbols, 256) / 256;
355 pThis->AddrKernelTokenTable = *pAddrMarkers;
356 DBGFR3AddrAdd(&pThis->AddrKernelTokenTable, pThis->cKernelNameMarkers * cbMarkerEntry);
357
358 Log(("dbgDiggerLinuxFoundMarkers: AddrKernelNames=%RGv cbKernelNames=%#x\n"
359 "dbgDiggerLinuxFoundMarkers: AddrKernelNameMarkers=%RGv cKernelNameMarkers=%#x\n"
360 "dbgDiggerLinuxFoundMarkers: AddrKernelTokenTable=%RGv\n",
361 pThis->AddrKernelNames.FlatPtr, pThis->cbKernelNames,
362 pThis->AddrKernelNameMarkers.FlatPtr, pThis->cKernelNameMarkers,
363 pThis->AddrKernelTokenTable.FlatPtr));
364 return VINF_SUCCESS;
365}
366
367
368/**
369 * Tries to find the end of kallsyms_names and thereby the start of
370 * kallsyms_markers and kallsyms_token_table.
371 *
372 * The kallsyms_names size is stored in pThis->cbKernelNames, the addresses of
373 * the two other symbols in pThis->AddrKernelNameMarkers and
374 * pThis->AddrKernelTokenTable. The number of marker entries is stored in
375 * pThis->cKernelNameMarkers.
376 *
377 * @returns VBox status code, success indicating that all three variables have
378 * been found and taken down.
379 * @param pUVM The user mode VM handle.
380 * @param pThis The Linux digger data.
381 * @param pHitAddr An address we think is inside kallsyms_names.
382 */
383static int dbgDiggerLinuxFindEndOfNamesAndMore(PUVM pUVM, PDBGDIGGERLINUX pThis, PCDBGFADDRESS pHitAddr)
384{
385 /*
386 * Search forward in chunks.
387 */
388 union
389 {
390 uint8_t ab[0x1000];
391 uint32_t au32[0x1000 / sizeof(uint32_t)];
392 uint64_t au64[0x1000 / sizeof(uint64_t)];
393 } uBuf;
394 bool fPendingZeroHit = false;
395 uint32_t cbLeft = LNX_MAX_KALLSYMS_NAMES_SIZE + sizeof(uBuf);
396 uint32_t offBuf = pHitAddr->FlatPtr & (sizeof(uBuf) - 1);
397 DBGFADDRESS CurAddr = *pHitAddr;
398 DBGFR3AddrSub(&CurAddr, offBuf);
399 for (;;)
400 {
401 int rc = DBGFR3MemRead(pUVM, 0 /*idCpu*/, &CurAddr, &uBuf, sizeof(uBuf));
402 if (RT_FAILURE(rc))
403 return rc;
404
405 /*
406 * The kallsyms_names table is followed by kallsyms_markers we assume,
407 * using sizeof(unsigned long) alignment like the preceeding symbols.
408 *
409 * The kallsyms_markers table has entried sizeof(unsigned long) and
410 * contains offsets into kallsyms_names. The kallsyms_markers used to
411 * index kallsyms_names and reduce seek time when looking up the name
412 * of an address/symbol. Each entry in kallsyms_markers covers 256
413 * symbol names.
414 *
415 * Because of this, the first entry is always zero and all the entries
416 * are ascending. It also follows that the size of the table can be
417 * calculated from kallsyms_num_syms.
418 *
419 * Note! We could also have walked kallsyms_names by skipping
420 * kallsyms_num_syms names, but this is faster and we will
421 * validate the encoded names later.
422 */
423 if (pThis->f64Bit)
424 {
425 if ( RT_UNLIKELY(fPendingZeroHit)
426 && uBuf.au64[0] >= (LNX_MIN_KALLSYMS_ENC_LENGTH + 1) * 256
427 && uBuf.au64[0] <= (LNX_MAX_KALLSYMS_ENC_LENGTH + 1) * 256)
428 return dbgDiggerLinuxFoundMarkers(pThis, DBGFR3AddrSub(&CurAddr, sizeof(uint64_t)), sizeof(uint64_t));
429
430 uint32_t const cEntries = sizeof(uBuf) / sizeof(uint64_t);
431 for (uint32_t i = offBuf / sizeof(uint64_t); i < cEntries; i++)
432 if (uBuf.au64[i] == 0)
433 {
434 if (RT_UNLIKELY(i + 1 >= cEntries))
435 {
436 fPendingZeroHit = true;
437 break;
438 }
439 if ( uBuf.au64[i + 1] >= (LNX_MIN_KALLSYMS_ENC_LENGTH + 1) * 256
440 && uBuf.au64[i + 1] <= (LNX_MAX_KALLSYMS_ENC_LENGTH + 1) * 256)
441 return dbgDiggerLinuxFoundMarkers(pThis, DBGFR3AddrAdd(&CurAddr, i * sizeof(uint64_t)), sizeof(uint64_t));
442 }
443 }
444 else
445 {
446 if ( RT_UNLIKELY(fPendingZeroHit)
447 && uBuf.au32[0] >= (LNX_MIN_KALLSYMS_ENC_LENGTH + 1) * 256
448 && uBuf.au32[0] <= (LNX_MAX_KALLSYMS_ENC_LENGTH + 1) * 256)
449 return dbgDiggerLinuxFoundMarkers(pThis, DBGFR3AddrSub(&CurAddr, sizeof(uint32_t)), sizeof(uint32_t));
450
451 uint32_t const cEntries = sizeof(uBuf) / sizeof(uint32_t);
452 for (uint32_t i = offBuf / sizeof(uint32_t); i < cEntries; i++)
453 if (uBuf.au32[i] == 0)
454 {
455 if (RT_UNLIKELY(i + 1 >= cEntries))
456 {
457 fPendingZeroHit = true;
458 break;
459 }
460 if ( uBuf.au32[i + 1] >= (LNX_MIN_KALLSYMS_ENC_LENGTH + 1) * 256
461 && uBuf.au32[i + 1] <= (LNX_MAX_KALLSYMS_ENC_LENGTH + 1) * 256)
462 return dbgDiggerLinuxFoundMarkers(pThis, DBGFR3AddrAdd(&CurAddr, i * sizeof(uint32_t)), sizeof(uint32_t));
463 }
464 }
465
466 /*
467 * Advance
468 */
469 if (RT_UNLIKELY(cbLeft <= sizeof(uBuf)))
470 {
471 Log(("dbgDiggerLinuxFindEndOfNamesAndMore: failed (pHitAddr=%RGv)\n", pHitAddr->FlatPtr));
472 return VERR_NOT_FOUND;
473 }
474 cbLeft -= sizeof(uBuf);
475 DBGFR3AddrAdd(&CurAddr, sizeof(uBuf));
476 offBuf = 0;
477 }
478}
479
480
481/**
482 * Locates the kallsyms_token_index table.
483 *
484 * Storing the address in pThis->AddrKernelTokenIndex and the size of the token
485 * table in pThis->cbKernelTokenTable.
486 *
487 * @returns VBox status code.
488 * @param pUVM The user mode VM handle.
489 * @param pThis The Linux digger data.
490 */
491static int dbgDiggerLinuxFindTokenIndex(PUVM pUVM, PDBGDIGGERLINUX pThis)
492{
493 /*
494 * The kallsyms_token_table is very much like a string table. Due to the
495 * nature of the compression algorithm it is reasonably short (one example
496 * here is 853 bytes), so we'll not be reading it in chunks but in full.
497 * To be on the safe side, we read 8KB, ASSUMING we won't run into unmapped
498 * memory or any other nasty stuff...
499 */
500 union
501 {
502 uint8_t ab[0x2000];
503 uint16_t au16[0x2000 / sizeof(uint16_t)];
504 } uBuf;
505 DBGFADDRESS CurAddr = pThis->AddrKernelTokenTable;
506 int rc = DBGFR3MemRead(pUVM, 0 /*idCpu*/, &CurAddr, &uBuf, sizeof(uBuf));
507 if (RT_FAILURE(rc))
508 return rc;
509
510 /*
511 * We've got two choices here, either walk the string table or look for
512 * the next structure, kallsyms_token_index.
513 *
514 * The token index is a table of 256 uint16_t entries (index by bytes
515 * from kallsyms_names) that gives offsets in kallsyms_token_table. It
516 * starts with a zero entry and the following entries are sorted in
517 * ascending order. The range of the entries are reasonably small since
518 * kallsyms_token_table is small.
519 *
520 * The alignment seems to be sizeof(unsigned long), just like
521 * kallsyms_token_table.
522 *
523 * So, we start by looking for a zero 16-bit entry.
524 */
525 uint32_t cIncr = (pThis->f64Bit ? sizeof(uint64_t) : sizeof(uint32_t)) / sizeof(uint16_t);
526
527 for (uint32_t i = 0; i < sizeof(uBuf) / sizeof(uint16_t) - 16; i += cIncr)
528 if ( uBuf.au16[i] == 0
529 && uBuf.au16[i + 1] > 0
530 && uBuf.au16[i + 1] <= LNX_MAX_KALLSYMS_TOKEN_LEN
531 && (uint16_t)(uBuf.au16[i + 2] - uBuf.au16[i + 1] - 1U) <= (uint16_t)LNX_MAX_KALLSYMS_TOKEN_LEN
532 && (uint16_t)(uBuf.au16[i + 3] - uBuf.au16[i + 2] - 1U) <= (uint16_t)LNX_MAX_KALLSYMS_TOKEN_LEN
533 && (uint16_t)(uBuf.au16[i + 4] - uBuf.au16[i + 3] - 1U) <= (uint16_t)LNX_MAX_KALLSYMS_TOKEN_LEN
534 && (uint16_t)(uBuf.au16[i + 5] - uBuf.au16[i + 4] - 1U) <= (uint16_t)LNX_MAX_KALLSYMS_TOKEN_LEN
535 && (uint16_t)(uBuf.au16[i + 6] - uBuf.au16[i + 5] - 1U) <= (uint16_t)LNX_MAX_KALLSYMS_TOKEN_LEN
536 )
537 {
538 pThis->AddrKernelTokenIndex = CurAddr;
539 DBGFR3AddrAdd(&pThis->AddrKernelTokenIndex, i * sizeof(uint16_t));
540 pThis->cbKernelTokenTable = i * sizeof(uint16_t);
541 return VINF_SUCCESS;
542 }
543
544 Log(("dbgDiggerLinuxFindTokenIndex: Failed (%RGv..%RGv)\n", CurAddr.FlatPtr, CurAddr.FlatPtr + (RTGCUINTPTR)sizeof(uBuf)));
545 return VERR_NOT_FOUND;
546}
547
548
549/**
550 * Loads the kernel symbols from the kallsyms tables.
551 *
552 * @returns VBox status code.
553 * @param pUVM The user mode VM handle.
554 * @param pThis The Linux digger data.
555 */
556static int dbgDiggerLinuxLoadKernelSymbols(PUVM pUVM, PDBGDIGGERLINUX pThis)
557{
558 /*
559 * Allocate memory for temporary table copies, reading the tables as we go.
560 */
561 uint32_t const cbGuestAddr = pThis->f64Bit ? sizeof(uint64_t) : sizeof(uint32_t);
562 void *pvAddresses = RTMemAllocZ(pThis->cKernelSymbols * cbGuestAddr);
563 int rc = DBGFR3MemRead(pUVM, 0 /*idCpu*/, &pThis->AddrKernelAddresses, pvAddresses, pThis->cKernelSymbols * cbGuestAddr);
564 if (RT_SUCCESS(rc))
565 {
566 uint8_t *pbNames = (uint8_t *)RTMemAllocZ(pThis->cbKernelNames);
567 rc = DBGFR3MemRead(pUVM, 0 /*idCpu*/, &pThis->AddrKernelNames, pbNames, pThis->cbKernelNames);
568 if (RT_SUCCESS(rc))
569 {
570 char *pszzTokens = (char *)RTMemAllocZ(pThis->cbKernelTokenTable);
571 rc = DBGFR3MemRead(pUVM, 0 /*idCpu*/, &pThis->AddrKernelTokenTable, pszzTokens, pThis->cbKernelTokenTable);
572 if (RT_SUCCESS(rc))
573 {
574 uint16_t *paoffTokens = (uint16_t *)RTMemAllocZ(256 * sizeof(uint16_t));
575 rc = DBGFR3MemRead(pUVM, 0 /*idCpu*/, &pThis->AddrKernelTokenIndex, paoffTokens, 256 * sizeof(uint16_t));
576 if (RT_SUCCESS(rc))
577 {
578 /*
579 * Figure out the kernel start and end.
580 */
581 RTGCUINTPTR uKernelStart = pThis->AddrKernelAddresses.FlatPtr;
582 RTGCUINTPTR uKernelEnd = pThis->AddrKernelTokenIndex.FlatPtr + 256 * sizeof(uint16_t);
583 uint32_t i;
584 if (cbGuestAddr == sizeof(uint64_t))
585 {
586 uint64_t *pauAddrs = (uint64_t *)pvAddresses;
587 for (i = 0; i < pThis->cKernelSymbols; i++)
588 if ( pauAddrs[i] < uKernelStart
589 && LNX64_VALID_ADDRESS(pauAddrs[i])
590 && uKernelStart - pauAddrs[i] < LNX_MAX_KERNEL_SIZE)
591 uKernelStart = pauAddrs[i];
592
593 for (i = pThis->cKernelSymbols - 1; i > 0; i--)
594 if ( pauAddrs[i] > uKernelEnd
595 && LNX64_VALID_ADDRESS(pauAddrs[i])
596 && pauAddrs[i] - uKernelEnd < LNX_MAX_KERNEL_SIZE)
597 uKernelEnd = pauAddrs[i];
598 }
599 else
600 {
601 uint32_t *pauAddrs = (uint32_t *)pvAddresses;
602 for (i = 0; i < pThis->cKernelSymbols; i++)
603 if ( pauAddrs[i] < uKernelStart
604 && LNX32_VALID_ADDRESS(pauAddrs[i])
605 && uKernelStart - pauAddrs[i] < LNX_MAX_KERNEL_SIZE)
606 uKernelStart = pauAddrs[i];
607
608 for (i = pThis->cKernelSymbols - 1; i > 0; i--)
609 if ( pauAddrs[i] > uKernelEnd
610 && LNX32_VALID_ADDRESS(pauAddrs[i])
611 && pauAddrs[i] - uKernelEnd < LNX_MAX_KERNEL_SIZE)
612 uKernelEnd = pauAddrs[i];
613 }
614
615 RTGCUINTPTR cbKernel = uKernelEnd - uKernelStart;
616 pThis->cbKernel = (uint32_t)cbKernel;
617 DBGFR3AddrFromFlat(pUVM, &pThis->AddrKernelBase, uKernelStart);
618 Log(("dbgDiggerLinuxLoadKernelSymbols: uKernelStart=%RGv cbKernel=%#x\n", uKernelStart, cbKernel));
619
620 /*
621 * Create a module for the kernel.
622 */
623 RTDBGMOD hMod;
624 rc = RTDbgModCreate(&hMod, "vmlinux", cbKernel, 0 /*fFlags*/);
625 if (RT_SUCCESS(rc))
626 {
627 rc = RTDbgModSetTag(hMod, DIG_LNX_MOD_TAG); AssertRC(rc);
628 rc = VINF_SUCCESS;
629
630 /*
631 * Enumerate the symbols.
632 */
633 uint8_t const *pbCurAddr = (uint8_t const *)pvAddresses;
634 uint32_t offName = 0;
635 uint32_t cLeft = pThis->cKernelSymbols;
636 while (cLeft-- > 0 && RT_SUCCESS(rc))
637 {
638 /* Decode the symbol name first. */
639 if (RT_LIKELY(offName < pThis->cbKernelNames))
640 {
641 uint8_t cbName = pbNames[offName++];
642 if (RT_LIKELY(offName + cbName <= pThis->cbKernelNames))
643 {
644 char szSymbol[4096];
645 uint32_t offSymbol = 0;
646 while (cbName-- > 0)
647 {
648 uint8_t bEnc = pbNames[offName++];
649 uint16_t offToken = paoffTokens[bEnc];
650 if (RT_LIKELY(offToken < pThis->cbKernelTokenTable))
651 {
652 const char *pszToken = &pszzTokens[offToken];
653 char ch;
654 while ((ch = *pszToken++) != '\0')
655 if (offSymbol < sizeof(szSymbol) - 1)
656 szSymbol[offSymbol++] = ch;
657 }
658 else
659 {
660 rc = VERR_INVALID_UTF8_ENCODING;
661 break;
662 }
663 }
664 szSymbol[offSymbol < sizeof(szSymbol) ? offSymbol : sizeof(szSymbol) - 1] = '\0';
665
666 /* The address. */
667 RTGCUINTPTR uSymAddr = cbGuestAddr == sizeof(uint64_t)
668 ? *(uint64_t *)pbCurAddr : *(uint32_t *)pbCurAddr;
669 pbCurAddr += cbGuestAddr;
670
671 /* Add it without the type char. */
672 if (uSymAddr - uKernelStart <= cbKernel)
673 {
674 rc = RTDbgModSymbolAdd(hMod, &szSymbol[1], RTDBGSEGIDX_RVA, uSymAddr - uKernelStart,
675 0 /*cb*/, 0 /*fFlags*/, NULL);
676 if (RT_FAILURE(rc))
677 {
678 if ( rc == VERR_DBG_SYMBOL_NAME_OUT_OF_RANGE
679 || rc == VERR_DBG_INVALID_RVA
680 || rc == VERR_DBG_ADDRESS_CONFLICT
681 || rc == VERR_DBG_DUPLICATE_SYMBOL)
682 {
683 Log2(("dbgDiggerLinuxLoadKernelSymbols: RTDbgModSymbolAdd(,%s,) failed %Rrc (ignored)\n", szSymbol, rc));
684 rc = VINF_SUCCESS;
685 }
686 else
687 Log(("dbgDiggerLinuxLoadKernelSymbols: RTDbgModSymbolAdd(,%s,) failed %Rrc\n", szSymbol, rc));
688 }
689 }
690 }
691 else
692 {
693 rc = VERR_END_OF_STRING;
694 Log(("dbgDiggerLinuxLoadKernelSymbols: offName=%#x cLeft=%#x cbName=%#x cbKernelNames=%#x\n",
695 offName, cLeft, cbName, pThis->cbKernelNames));
696 }
697 }
698 else
699 {
700 rc = VERR_END_OF_STRING;
701 Log(("dbgDiggerLinuxLoadKernelSymbols: offName=%#x cLeft=%#x cbKernelNames=%#x\n",
702 offName, cLeft, pThis->cbKernelNames));
703 }
704 }
705
706 /*
707 * Link the module into the address space.
708 */
709 if (RT_SUCCESS(rc))
710 {
711 RTDBGAS hAs = DBGFR3AsResolveAndRetain(pUVM, DBGF_AS_KERNEL);
712 if (hAs != NIL_RTDBGAS)
713 rc = RTDbgAsModuleLink(hAs, hMod, uKernelStart, RTDBGASLINK_FLAGS_REPLACE);
714 else
715 rc = VERR_INTERNAL_ERROR;
716 RTDbgAsRelease(hAs);
717 }
718 else
719 Log(("dbgDiggerLinuxFindTokenIndex: Failed: %Rrc\n", rc));
720 RTDbgModRelease(hMod);
721 }
722 else
723 Log(("dbgDiggerLinuxFindTokenIndex: RTDbgModCreate failed: %Rrc\n", rc));
724 }
725 else
726 Log(("dbgDiggerLinuxFindTokenIndex: Reading token index at %RGv failed: %Rrc\n",
727 pThis->AddrKernelTokenIndex.FlatPtr, rc));
728 RTMemFree(paoffTokens);
729 }
730 else
731 Log(("dbgDiggerLinuxFindTokenIndex: Reading token table at %RGv failed: %Rrc\n",
732 pThis->AddrKernelTokenTable.FlatPtr, rc));
733 RTMemFree(pszzTokens);
734 }
735 else
736 Log(("dbgDiggerLinuxFindTokenIndex: Reading encoded names at %RGv failed: %Rrc\n",
737 pThis->AddrKernelNames.FlatPtr, rc));
738 RTMemFree(pbNames);
739 }
740 else
741 Log(("dbgDiggerLinuxFindTokenIndex: Reading symbol addresses at %RGv failed: %Rrc\n",
742 pThis->AddrKernelAddresses.FlatPtr, rc));
743 RTMemFree(pvAddresses);
744 return rc;
745}
746
747
748/**
749 * Checks if there is a likely kallsyms_names fragment at pHitAddr.
750 *
751 * @returns true if it's a likely fragment, false if not.
752 * @param pUVM The user mode VM handle.
753 * @param pHitAddr The address where paNeedle was found.
754 * @param pabNeedle The fragment we've been searching for.
755 * @param cbNeedle The length of the fragment.
756 */
757static bool dbgDiggerLinuxIsLikelyNameFragment(PUVM pUVM, PCDBGFADDRESS pHitAddr, uint8_t const *pabNeedle, uint8_t cbNeedle)
758{
759 /*
760 * Examples of lead and tail bytes of our choosen needle in a randomly
761 * picked kernel:
762 * k o b j
763 * 22 6b 6f 62 6a aa
764 * fc 6b 6f 62 6a aa
765 * 82 6b 6f 62 6a 5f - ascii trail byte (_).
766 * ee 6b 6f 62 6a aa
767 * fc 6b 6f 62 6a 5f - ascii trail byte (_).
768 * 0a 74 6b 6f 62 6a 5f ea - ascii lead (t) and trail (_) bytes.
769 * 0b 54 6b 6f 62 6a aa - ascii lead byte (T).
770 * ... omitting 29 samples similar to the last two ...
771 * d8 6b 6f 62 6a aa
772 * d8 6b 6f 62 6a aa
773 * d8 6b 6f 62 6a aa
774 * d8 6b 6f 62 6a aa
775 * f9 5f 6b 6f 62 6a 5f 94 - ascii lead and trail bytes (_)
776 * f9 5f 6b 6f 62 6a 0c - ascii lead byte (_).
777 * fd 6b 6f 62 6a 0f
778 * ... enough.
779 */
780 uint8_t abBuf[32];
781 DBGFADDRESS ReadAddr = *pHitAddr;
782 DBGFR3AddrSub(&ReadAddr, 2);
783 int rc = DBGFR3MemRead(pUVM, 0 /*idCpu*/, &ReadAddr, abBuf, 2 + cbNeedle + 2);
784 if (RT_SUCCESS(rc))
785 {
786 if (memcmp(&abBuf[2], pabNeedle, cbNeedle) == 0) /* paranoia */
787 {
788 uint8_t const bLead = abBuf[1] == '_' || abBuf[1] == 'T' || abBuf[1] == 't' ? abBuf[0] : abBuf[1];
789 uint8_t const offTail = 2 + cbNeedle;
790 uint8_t const bTail = abBuf[offTail] == '_' ? abBuf[offTail] : abBuf[offTail + 1];
791 if ( bLead >= 1 && (bLead < 0x20 || bLead >= 0x80)
792 && bTail >= 1 && (bTail < 0x20 || bTail >= 0x80))
793 return true;
794 Log(("dbgDiggerLinuxIsLikelyNameFragment: failed at %RGv: bLead=%#x bTail=%#x (offTail=%#x)\n",
795 pHitAddr->FlatPtr, bLead, bTail, offTail));
796 }
797 else
798 Log(("dbgDiggerLinuxIsLikelyNameFragment: failed at %RGv: Needle changed!\n", pHitAddr->FlatPtr));
799 }
800 else
801 Log(("dbgDiggerLinuxIsLikelyNameFragment: failed at %RGv: %Rrc\n", pHitAddr->FlatPtr, rc));
802
803 return false;
804}
805
806
807/**
808 * @copydoc DBGFOSREG::pfnInit
809 */
810static DECLCALLBACK(int) dbgDiggerLinuxInit(PUVM pUVM, void *pvData)
811{
812 PDBGDIGGERLINUX pThis = (PDBGDIGGERLINUX)pvData;
813 Assert(!pThis->fValid);
814
815 /*
816 * Assume 64-bit kernels all live way beyond 32-bit address space.
817 */
818 pThis->f64Bit = pThis->AddrLinuxBanner.FlatPtr > UINT32_MAX;
819
820 /*
821 * Go looking for the kallsyms table. If it's there, it will be somewhere
822 * after the linux_banner symbol, so use it for starting the search.
823 */
824 DBGFADDRESS CurAddr = pThis->AddrLinuxBanner;
825 uint32_t cbLeft = LNX_MAX_KERNEL_SIZE;
826 while (cbLeft > 4096)
827 {
828 static const uint8_t s_abNeedle[] = "kobj";
829 DBGFADDRESS HitAddr;
830 int rc = DBGFR3MemScan(pUVM, 0 /*idCpu*/, &CurAddr, cbLeft, 1 /*uAlign*/,
831 s_abNeedle, sizeof(s_abNeedle) - 1, &HitAddr);
832 if (RT_FAILURE(rc))
833 break;
834 if (dbgDiggerLinuxIsLikelyNameFragment(pUVM, &HitAddr, s_abNeedle, sizeof(s_abNeedle) - 1))
835 {
836 /* There will be another hit near by. */
837 DBGFR3AddrAdd(&HitAddr, 1);
838 rc = DBGFR3MemScan(pUVM, 0 /*idCpu*/, &HitAddr, LNX_MAX_KALLSYMS_NAMES_SIZE, 1 /*uAlign*/,
839 s_abNeedle, sizeof(s_abNeedle) - 1, &HitAddr);
840 if ( RT_SUCCESS(rc)
841 && dbgDiggerLinuxIsLikelyNameFragment(pUVM, &HitAddr, s_abNeedle, sizeof(s_abNeedle) - 1))
842 {
843 /*
844 * We've got a very likely candidate for a location inside kallsyms_names.
845 * Try find the start of it, that is to say, try find kallsyms_num_syms.
846 * kallsyms_num_syms is aligned on sizeof(unsigned long) boundrary
847 */
848 rc = dbgDiggerLinuxFindStartOfNamesAndSymbolCount(pUVM, pThis, &HitAddr);
849 if (RT_SUCCESS(rc))
850 rc = dbgDiggerLinuxFindEndOfNamesAndMore(pUVM, pThis, &HitAddr);
851 if (RT_SUCCESS(rc))
852 rc = dbgDiggerLinuxFindTokenIndex(pUVM, pThis);
853 if (RT_SUCCESS(rc))
854 rc = dbgDiggerLinuxLoadKernelSymbols(pUVM, pThis);
855
856 }
857 }
858
859 /*
860 * Advance.
861 */
862 RTGCUINTPTR cbDistance = HitAddr.FlatPtr - CurAddr.FlatPtr + sizeof(s_abNeedle) - 1;
863 if (RT_UNLIKELY(cbDistance >= cbLeft))
864 {
865 Log(("dbgDiggerLinuxInit: Failed to find kallsyms\n"));
866 break;
867 }
868 cbLeft -= cbDistance;
869 DBGFR3AddrAdd(&CurAddr, cbDistance);
870
871 }
872
873 pThis->fValid = true;
874 return VINF_SUCCESS;
875}
876
877
878/**
879 * @copydoc DBGFOSREG::pfnProbe
880 */
881static DECLCALLBACK(bool) dbgDiggerLinuxProbe(PUVM pUVM, void *pvData)
882{
883 PDBGDIGGERLINUX pThis = (PDBGDIGGERLINUX)pvData;
884
885 /*
886 * Look for "Linux version " at the start of the rodata segment.
887 * Hope that this comes before any message buffer or other similar string.
888 *
889 * Note! Only Linux version 2.x.y, where x in {0..6}.
890 */
891 for (unsigned i = 0; i < RT_ELEMENTS(g_au64LnxKernelAddresses); i++)
892 {
893 DBGFADDRESS KernelAddr;
894 DBGFR3AddrFromFlat(pUVM, &KernelAddr, g_au64LnxKernelAddresses[i]);
895 DBGFADDRESS HitAddr;
896 static const uint8_t s_abLinuxVersion2x[] = "Linux version 2.";
897 int rc = DBGFR3MemScan(pUVM, 0, &KernelAddr, LNX_MAX_KERNEL_SIZE, 1,
898 s_abLinuxVersion2x, sizeof(s_abLinuxVersion2x) - 1, &HitAddr);
899 if (RT_SUCCESS(rc))
900 {
901 char szTmp[128];
902 char const *pszY = &szTmp[sizeof(s_abLinuxVersion2x) - 1];
903 rc = DBGFR3MemReadString(pUVM, 0, &HitAddr, szTmp, sizeof(szTmp));
904 if ( RT_SUCCESS(rc)
905 && *pszY >= '0'
906 && *pszY <= '6')
907 {
908 pThis->AddrKernelBase = KernelAddr;
909 pThis->AddrLinuxBanner = HitAddr;
910 return true;
911 }
912 }
913 static const uint8_t s_abLinuxVersion3x[] = "Linux version 3.";
914 rc = DBGFR3MemScan(pUVM, 0, &KernelAddr, LNX_MAX_KERNEL_SIZE, 1,
915 s_abLinuxVersion3x, sizeof(s_abLinuxVersion3x) - 1, &HitAddr);
916 if (RT_SUCCESS(rc))
917 {
918 char szTmp[128];
919 char const *pszY = &szTmp[sizeof(s_abLinuxVersion3x) - 1];
920 rc = DBGFR3MemReadString(pUVM, 0, &HitAddr, szTmp, sizeof(szTmp));
921 if ( RT_SUCCESS(rc)
922 && *pszY >= '0'
923 && *pszY <= '9')
924 {
925 pThis->AddrKernelBase = KernelAddr;
926 pThis->AddrLinuxBanner = HitAddr;
927 return true;
928 }
929 }
930 }
931 return false;
932}
933
934
935/**
936 * @copydoc DBGFOSREG::pfnDestruct
937 */
938static DECLCALLBACK(void) dbgDiggerLinuxDestruct(PUVM pUVM, void *pvData)
939{
940
941}
942
943
944/**
945 * @copydoc DBGFOSREG::pfnConstruct
946 */
947static DECLCALLBACK(int) dbgDiggerLinuxConstruct(PUVM pUVM, void *pvData)
948{
949 return VINF_SUCCESS;
950}
951
952
953const DBGFOSREG g_DBGDiggerLinux =
954{
955 /* .u32Magic = */ DBGFOSREG_MAGIC,
956 /* .fFlags = */ 0,
957 /* .cbData = */ sizeof(DBGDIGGERLINUX),
958 /* .szName = */ "Linux",
959 /* .pfnConstruct = */ dbgDiggerLinuxConstruct,
960 /* .pfnDestruct = */ dbgDiggerLinuxDestruct,
961 /* .pfnProbe = */ dbgDiggerLinuxProbe,
962 /* .pfnInit = */ dbgDiggerLinuxInit,
963 /* .pfnRefresh = */ dbgDiggerLinuxRefresh,
964 /* .pfnTerm = */ dbgDiggerLinuxTerm,
965 /* .pfnQueryVersion = */ dbgDiggerLinuxQueryVersion,
966 /* .pfnQueryInterface = */ dbgDiggerLinuxQueryInterface,
967 /* .u32EndMagic = */ DBGFOSREG_MAGIC
968};
969
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette