VirtualBox

source: vbox/trunk/src/VBox/Main/src-server/USBIdDatabaseGenerator.cpp@ 59381

Last change on this file since 59381 was 58020, checked in by vboxsync, 9 years ago

duh

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 36.1 KB
Line 
1/* $Id: USBIdDatabaseGenerator.cpp 58020 2015-10-03 19:49:54Z vboxsync $ */
2/** @file
3 * USB device vendor and product ID database - generator.
4 */
5
6/*
7 * Copyright (C) 2015 Oracle Corporation
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 */
17
18/*********************************************************************************************************************************
19* Header Files *
20*********************************************************************************************************************************/
21#include <stdio.h>
22
23#include <fstream>
24#include <iostream>
25#include <iomanip>
26#include <algorithm>
27#include <map>
28#include <string>
29#include <vector>
30
31#include <iprt/initterm.h>
32#include <iprt/message.h>
33#include <iprt/string.h>
34#include <iprt/stream.h>
35#include "../../Runtime/include/internal/strhash.h" /** @todo make this one public */
36
37#include "../include/USBIdDatabase.h"
38
39
40/** For verbose output. */
41static bool g_fVerbose = false;
42/** Output prefix for informational output. */
43#define INFO_PREF "USBIdDatabaseGenerator: Info: "
44
45
46using namespace std;
47
48static const char * const header =
49 "/** @file\n"
50 " * USB device vendor and product ID database - Autogenerated from <stupid C++ cannot do %s>\n"
51 " */\n"
52 "\n"
53 "/*\n"
54 " * Copyright (C) 2015 Oracle Corporation\n"
55 " *\n"
56 " * This file is part of VirtualBox Open Source Edition(OSE), as\n"
57 " * available from http ://www.virtualbox.org. This file is free software;\n"
58 " * you can redistribute it and / or modify it under the terms of the GNU\n"
59 " * General Public License(GPL) as published by the Free Software\n"
60 " * Foundation, in version 2 as it comes in the \"COPYING\" file of the\n"
61 " * VirtualBox OSE distribution.VirtualBox OSE is distributed in the\n"
62 " * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.\n"
63 " */"
64 "\n"
65 "\n"
66 "#include \"USBIdDatabase.h\"\n"
67 "\n";
68static const char * const product_header =
69 "/**\n"
70 " * USB devices aliases array.\n"
71 " * Format: VendorId, ProductId, Vendor Name, Product Name\n"
72 " * The source of the list is http://www.linux-usb.org/usb.ids\n"
73 " */\n"
74 "USBIDDBPROD const USBIdDatabase::s_aProducts[] =\n"
75 "{\n";
76const char *product_part2 =
77 "};\n"
78 "\n"
79 "\nconst USBIDDBSTR USBIdDatabase::s_aProductNames[] =\n"
80 "{\n";
81const char *product_footer =
82 "};\n"
83 "\n"
84 "const size_t USBIdDatabase::s_cProducts = RT_ELEMENTS(USBIdDatabase::s_aProducts);\n";
85
86const char *vendor_header =
87 "\nUSBIDDBVENDOR const USBIdDatabase::s_aVendors[] =\n"
88 "{\n";
89const char *vendor_part2 =
90 "};\n"
91 "\n"
92 "\nconst USBIDDBSTR USBIdDatabase::s_aVendorNames[] =\n"
93 "{\n";
94const char *vendor_footer =
95 "};\n"
96 "\n"
97 "const size_t USBIdDatabase::s_cVendors = RT_ELEMENTS(USBIdDatabase::s_aVendors);\n";
98
99const char *start_block = "# Vendors, devices and interfaces. Please keep sorted.";
100const char *end_block = "# List of known device classes, subclasses and protocols";
101
102
103// error codes (complements RTEXITCODE_XXX).
104#define ERROR_OPEN_FILE (12)
105#define ERROR_IN_PARSE_LINE (13)
106#define ERROR_DUPLICATE_ENTRY (14)
107#define ERROR_WRONG_FILE_FORMAT (15)
108#define ERROR_TOO_MANY_PRODUCTS (16)
109
110
111/**
112 * String that will end up in the string table.
113 */
114struct StrTabString
115{
116 /** The string. */
117 std::string str;
118 /** The string hash value. */
119 uint32_t uHash;
120 /** The string table reference. */
121 USBIDDBSTR StrRef;
122 /** Pointer to the next string reference (same string table entry). */
123 struct StrTabString *pNextRef;
124 /** Pointer to the next string with the same hash value (collision). */
125 struct StrTabString *pNextCollision;
126
127 void printRef(ostream &rStrm) const
128 {
129 rStrm << " { 0x" << setfill('0') << setw(6) << hex << StrRef.off << ", 0x"
130 << setfill('0') << setw(2) << hex << StrRef.cch << " }, ";
131 }
132
133 void printRefLine(ostream &rStrm) const
134 {
135 printRef(rStrm);
136 rStrm << endl;
137 }
138};
139typedef struct StrTabString *PSTRTABSTRING;
140
141struct VendorRecord
142{
143 size_t vendorID;
144 size_t iProduct;
145 size_t cProducts;
146 StrTabString vendor;
147};
148
149struct ProductRecord
150{
151 size_t key;
152 size_t vendorID;
153 size_t productID;
154 StrTabString product;
155};
156
157bool operator < (const ProductRecord& lh, const ProductRecord& rh)
158{
159 return lh.key < rh.key;
160}
161
162bool operator < (const VendorRecord& lh, const VendorRecord& rh)
163{
164 return lh.vendorID < rh.vendorID;
165}
166
167bool operator == (const ProductRecord& lh, const ProductRecord& rh)
168{
169 return lh.key == rh.key;
170}
171
172bool operator == (const VendorRecord& lh, const VendorRecord& rh)
173{
174 return lh.vendorID == rh.vendorID;
175}
176
177ostream& operator <<(ostream& stream, const ProductRecord product)
178{
179 stream << " { 0x" << setfill('0') << setw(4) << product.productID << " }, " << endl;
180 return stream;
181}
182
183ostream& operator <<(ostream& stream, const VendorRecord vendor)
184{
185 stream << " { 0x" << setfill('0') << setw(4) << hex << vendor.vendorID
186 << ", 0x" << setfill('0') << setw(4) << hex << vendor.iProduct
187 << ", 0x" << setfill('0') << setw(4) << hex << vendor.cProducts << " }, " << endl;
188 return stream;
189}
190
191namespace State
192{
193 typedef int Value;
194 enum
195 {
196 lookForStartBlock,
197 lookForEndBlock,
198 finished
199 };
200}
201
202typedef vector<ProductRecord> ProductsSet;
203typedef vector<VendorRecord> VendorsSet;
204ProductsSet g_products;
205VendorsSet g_vendors;
206
207
208
209/*
210 * String "compression". We replace the 127 most used words with references.
211 */
212#ifdef USB_ID_DATABASE_WITH_COMPRESSION
213
214typedef std::map<std::string, size_t> WORDFREQMAP;
215typedef WORDFREQMAP::value_type WORDFREQPAIR;
216
217/** The 127 words we've picked to be indexed by reference. */
218static StrTabString g_aCompDict[127];
219
220/**
221 * For sorting the frequency fidning in descending order.
222 *
223 * Comparison operators are put outside to make older gcc versions (like 4.1.1
224 * on lnx64-rel) happy.
225 */
226class WordFreqSortEntry
227{
228public:
229 WORDFREQPAIR const *m_pPair;
230
231public:
232 WordFreqSortEntry(WORDFREQPAIR const *pPair) : m_pPair(pPair) {}
233};
234
235bool operator == (WordFreqSortEntry const &rLeft, WordFreqSortEntry const &rRight)
236{
237 return rLeft.m_pPair->second == rRight.m_pPair->second;
238}
239
240bool operator < (WordFreqSortEntry const &rLeft, WordFreqSortEntry const &rRight)
241{
242 return rLeft.m_pPair->second > rRight.m_pPair->second;
243}
244
245
246/**
247 * Replaces the dictionary words and escapes non-ascii chars in a string.
248 *
249 * @param pString The string to fixup.
250 * @param pcchOld The old string length is added to this (stats)
251 * @param pcchNew The new string length is added to this (stats)
252 */
253static void FixupString(std::string *pString, size_t *pcchOld, size_t *pcchNew)
254{
255 char szNew[USB_ID_DATABASE_MAX_STRING * 2];
256 char *pszDst = szNew;
257 const char *pszSrc = pString->c_str();
258 const char *pszSrcEnd = strchr(pszSrc, '\0');
259
260 *pcchOld += pszSrcEnd - pszSrc;
261
262 char ch;
263 while ((ch = *pszSrc) != '\0')
264 {
265 /* Spaces. */
266 while (ch == ' ')
267 {
268 *pszDst++ = ' ';
269 ch = *++pszSrc;
270 }
271 if (!ch)
272 break;
273
274 /* Find the end of the current word. */
275 size_t cchWord = 1;
276 while ((ch = pszSrc[cchWord]) != ' ' && ch != '\0')
277 cchWord++;
278
279 /* Check for g_aWord matches. */
280 size_t cchMax = pszSrcEnd - pszSrc;
281 for (unsigned i = 0; i < RT_ELEMENTS(g_aCompDict); i++)
282 {
283 size_t cchLen = g_aCompDict[i].str.length();
284 if ( cchLen >= cchWord
285 && cchLen <= cchMax
286 && g_aCompDict[i].str.compare(0, cchLen, pszSrc, cchLen) == 0)
287 {
288 *pszDst++ = (unsigned char)(0x80 | i);
289 pszSrc += cchLen;
290 cchWord = 0;
291 break;
292 }
293 }
294
295 if (cchWord)
296 {
297 /* Copy the current word. */
298 ch = *pszSrc;
299 do
300 {
301 if (!((unsigned char)ch & 0x80))
302 {
303 *pszDst++ = ch;
304 pszSrc++;
305 }
306 else
307 {
308 RTUNICP uc;
309 int rc = RTStrGetCpEx(&pszSrc, &uc);
310 if (RT_SUCCESS(rc))
311 {
312 *pszDst++ = (unsigned char)0xff; /* escape single code point. */
313 pszDst = RTStrPutCp(pszDst, uc);
314 }
315 else
316 {
317 cerr << "Error: RTStrGetCpEx failed with rc=" << rc << endl;
318 exit(3);
319 }
320 }
321 } while ((ch = *pszSrc) != '\0' && ch != ' ');
322 }
323 }
324 *pszDst = '\0';
325 *pcchNew += pszDst - &szNew[0];
326
327 *pString = szNew;
328}
329
330
331/**
332 * Analyzes a string.
333 *
334 * @param pFrequencies The word frequency map.
335 * @param rString The string to analyze.
336 */
337static void AnalyzeString(WORDFREQMAP *pFrequencies, std::string const &rString)
338{
339 const char *psz = rString.c_str();
340
341 /*
342 * For now we just consider words.
343 */
344 char ch;
345 while ((ch = *psz) != '\0')
346 {
347 /* Skip leading spaces. */
348 while (ch == ' ')
349 ch = *++psz;
350 if (!ch)
351 return;
352
353 /* Find end of word. */
354 size_t cchWord = 1;
355 while ((ch = psz[cchWord]) != ' ' && ch != '\0')
356 cchWord++;
357 if (cchWord > 1)
358 {
359 std::string strWord(psz, cchWord);
360 WORDFREQMAP::iterator it = pFrequencies->find(strWord);
361 if (it != pFrequencies->end())
362 it->second += cchWord - 1;
363 else
364 (*pFrequencies)[strWord] = 0;
365 /** @todo could gain hits by including the space after the word, but that
366 * has the same accounting problems as the two words scenario below. */
367
368# if 0 /** @todo need better accounting for overlapping alternatives before this can be enabled. */
369 /* Two words - immediate yields calc may lie when this enabled and we may pick the wrong words. */
370 if (ch == ' ')
371 {
372 ch = psz[++cchWord];
373 if (ch != ' ' && ch != '\0')
374 {
375 size_t const cchSaved = cchWord;
376
377 do
378 cchWord++;
379 while ((ch = psz[cchWord]) != ' ' && ch != '\0');
380
381 strWord = std::string(psz, cchWord);
382 WORDFREQMAP::iterator it = pFrequencies->find(strWord);
383 if (it != pFrequencies->end())
384 it->second += cchWord - 1;
385 else
386 (*pFrequencies)[strWord] = 0;
387
388 cchWord = cchSaved;
389 }
390 }
391# endif
392 }
393
394 /* Advance. */
395 psz += cchWord;
396 }
397}
398
399
400/**
401 * Compresses the vendor and product strings.
402 *
403 * This is very very simple (a lot less work that the string table for
404 * instance).
405 */
406static void DoStringCompression(void)
407{
408 /*
409 * Analyze the strings collecting stats on potential sequences to replace.
410 */
411 WORDFREQMAP Frequencies;
412
413 uint32_t cProducts = 0;
414 for (ProductsSet::iterator it = g_products.begin(); it != g_products.end(); ++it, cProducts++)
415 AnalyzeString(&Frequencies, it->product.str);
416
417 uint32_t cVendors = 0;
418 for (VendorsSet::iterator it = g_vendors.begin(); it != g_vendors.end(); ++it, cVendors++)
419 AnalyzeString(&Frequencies, it->vendor.str);
420
421 if (g_fVerbose)
422 {
423 size_t const cbVendorEntry = sizeof(USBIdDatabase::s_aVendors[0]) + sizeof(USBIdDatabase::s_aVendorNames[0]);
424 size_t const cbVendors = cVendors * cbVendorEntry;
425 cout << INFO_PREF << cVendors << " vendors (" << cbVendors << " bytes)" << endl;
426
427 size_t const cbProductEntry = sizeof(USBIdDatabase::s_aProducts[0]) + sizeof(USBIdDatabase::s_aProductNames[0]);
428 size_t const cbProducts = cProducts * cbProductEntry;
429 cout << INFO_PREF << cProducts << " products (" << cbProducts << " bytes)" << endl;
430 }
431
432 /*
433 * Sort the result and pick the top 127 ones.
434 */
435 std::vector<WordFreqSortEntry> SortMap;
436 for (WORDFREQMAP::iterator it = Frequencies.begin(); it != Frequencies.end(); ++it)
437 {
438 WORDFREQPAIR const &rPair = *it;
439 SortMap.push_back(WordFreqSortEntry(&rPair));
440 }
441
442 sort(SortMap.begin(), SortMap.end());
443
444 size_t cb = 0;
445 unsigned i = 0;
446 for (std::vector<WordFreqSortEntry>::iterator it = SortMap.begin();
447 it != SortMap.end() && i < RT_ELEMENTS(g_aCompDict);
448 ++it, i++)
449 {
450 g_aCompDict[i].str = it->m_pPair->first;
451 cb += it->m_pPair->second;
452 }
453
454 if (g_fVerbose)
455 cout << INFO_PREF "Estimated compression saving " << cb << " bytes" << endl;
456
457 /*
458 * Rework the strings.
459 */
460 size_t cchNew = 0;
461 size_t cchOld = 0;
462 for (ProductsSet::iterator it = g_products.begin(); it != g_products.end(); ++it)
463 FixupString(&it->product.str, &cchOld, &cchNew);
464 for (VendorsSet::iterator it = g_vendors.begin(); it != g_vendors.end(); ++it)
465 FixupString(&it->vendor.str, &cchOld, &cchNew);
466
467 for (i = 0; i < RT_ELEMENTS(g_aCompDict); i++)
468 cchNew += g_aCompDict[i].str.length() + 1;
469
470 if (g_fVerbose)
471 {
472 cout << INFO_PREF "Strings: original: " << cchOld << " bytes; compressed: " << cchNew << " bytes;";
473 if (cchNew < cchOld)
474 cout << " saving " << (cchOld - cchNew) << " bytes (" << ((cchOld - cchNew) * 100 / cchOld) << "%)" << endl;
475 else
476 cout << " wasting " << (cchOld - cchNew) << " bytes!" << endl;
477 cout << INFO_PREF "Average string length is " << (cchOld / (cVendors + cProducts)) << endl;
478 }
479}
480
481
482/**
483 * Writes the compression dictionary to the output stream.
484 *
485 * @param rStrm The output stream.
486 */
487static void WriteCompressionDictionary(ostream &rStrm)
488{
489 rStrm << "const USBIDDBSTR USBIdDatabase::s_aCompDict[" << dec << RT_ELEMENTS(g_aCompDict) << "] = " << endl;
490 rStrm << "{" << endl;
491 for (unsigned i = 0; i < RT_ELEMENTS(g_aCompDict); i++)
492 {
493 g_aCompDict[i].printRef(rStrm);
494 rStrm << " // " << g_aCompDict[i].str << endl;
495 }
496 rStrm << "};" << endl << endl;
497}
498
499#endif /* USB_ID_DATABASE_WITH_COMPRESSION */
500
501
502/*
503 * Compile a string table.
504 */
505
506/** The size of g_papStrHash. */
507static size_t g_cStrHash = 0;
508/** String hash table. */
509static PSTRTABSTRING *g_papStrHash = NULL;
510/** Duplicate strings found by AddString. */
511static size_t g_cDuplicateStrings = 0;
512/** Total length of the unique strings (no terminators). */
513static size_t g_cchUniqueStrings = 0;
514/** Number of unique strings after AddString. */
515static size_t g_cUniqueStrings = 0;
516/** Number of collisions. */
517static size_t g_cCollisions = 0;
518
519/** Number of entries in g_apSortedStrings. */
520static size_t g_cSortedStrings = 0;
521/** The sorted string table. */
522static PSTRTABSTRING *g_papSortedStrings = NULL;
523
524/** The string table. */
525static char *g_pachStrTab = NULL;
526/** The actual string table size. */
527static size_t g_cchStrTab = 0;
528
529
530/**
531 * Adds a string to the hash table.
532 * @param pStr The string.
533 */
534static void AddString(PSTRTABSTRING pStr)
535{
536 pStr->pNextRef = NULL;
537 pStr->pNextCollision = NULL;
538 pStr->StrRef.off = 0;
539 pStr->StrRef.cch = pStr->str.length();
540 size_t cchIgnored;
541 pStr->uHash = sdbm(pStr->str.c_str(), &cchIgnored);
542 Assert(cchIgnored == pStr->str.length());
543
544 size_t idxHash = pStr->uHash % g_cStrHash;
545 PSTRTABSTRING pCur = g_papStrHash[idxHash];
546 if (!pCur)
547 g_papStrHash[idxHash] = pStr;
548 else
549 {
550 /* Look for matching string. */
551 do
552 {
553 if ( pCur->uHash == pStr->uHash
554 && pCur->StrRef.cch == pStr->StrRef.cch
555 && pCur->str == pStr->str)
556 {
557 pStr->pNextRef = pCur->pNextRef;
558 pCur->pNextRef = pStr;
559 g_cDuplicateStrings++;
560 return;
561 }
562 pCur = pCur->pNextCollision;
563 } while (pCur != NULL);
564
565 /* No matching string, insert. */
566 g_cCollisions++;
567 pStr->pNextCollision = g_papStrHash[idxHash];
568 g_papStrHash[idxHash] = pStr;
569 }
570
571 g_cUniqueStrings++;
572 g_cchUniqueStrings += pStr->StrRef.cch;
573}
574
575
576/**
577 * Inserts a string into g_apUniqueStrings.
578 * @param pStr The string.
579 */
580static void InsertUniqueString(PSTRTABSTRING pStr)
581{
582 size_t iIdx;
583 size_t iEnd = g_cSortedStrings;
584 if (iEnd)
585 {
586 size_t iStart = 0;
587 for (;;)
588 {
589 iIdx = iStart + (iEnd - iStart) / 2;
590 if (g_papSortedStrings[iIdx]->StrRef.cch < pStr->StrRef.cch)
591 {
592 if (iIdx <= iStart)
593 break;
594 iEnd = iIdx;
595 }
596 else if (g_papSortedStrings[iIdx]->StrRef.cch > pStr->StrRef.cch)
597 {
598 if (++iIdx >= iEnd)
599 break;
600 iStart = iIdx;
601 }
602 else
603 break;
604 }
605
606 if (iIdx != g_cSortedStrings)
607 memmove(&g_papSortedStrings[iIdx + 1], &g_papSortedStrings[iIdx],
608 (g_cSortedStrings - iIdx) * sizeof(g_papSortedStrings[iIdx]));
609 }
610 else
611 iIdx = 0;
612
613 g_papSortedStrings[iIdx] = pStr;
614 g_cSortedStrings++;
615}
616
617
618/**
619 * Creates a string table.
620 *
621 * This will save space by dropping string terminators, eliminating duplicates
622 * and try find strings that are sub-strings of others.
623 *
624 * Will initialize the StrRef of all StrTabString instances.
625 */
626static void CreateStringTable(void)
627{
628 /*
629 * Allocate a hash table double the size of all strings (to avoid too
630 * many collisions). Add all strings to it, finding duplicates in the
631 * process.
632 */
633 size_t cMaxStrings = g_products.size() + g_vendors.size();
634#ifdef USB_ID_DATABASE_WITH_COMPRESSION
635 cMaxStrings += RT_ELEMENTS(g_aCompDict);
636#endif
637 cMaxStrings *= 2;
638 g_papStrHash = new PSTRTABSTRING[cMaxStrings];
639 g_cStrHash = cMaxStrings;
640 memset(g_papStrHash, 0, cMaxStrings * sizeof(g_papStrHash[0]));
641
642 for (ProductsSet::iterator it = g_products.begin(); it != g_products.end(); ++it)
643 AddString(&it->product);
644 for (VendorsSet::iterator it = g_vendors.begin(); it != g_vendors.end(); ++it)
645 AddString(&it->vendor);
646#ifdef USB_ID_DATABASE_WITH_COMPRESSION
647 for (unsigned i = 0; i < RT_ELEMENTS(g_aCompDict); i++)
648 AddString(&g_aCompDict[i]);
649#endif
650 if (g_fVerbose)
651 cout << INFO_PREF "" << g_cUniqueStrings << " unique string (" << g_cchUniqueStrings << " bytes), "
652 << g_cDuplicateStrings << " duplicates, " << g_cCollisions << " collisions" << endl;
653
654 /*
655 * Create g_papSortedStrings from the hash table. The table is sorted by
656 * string length, with the longer strings first.
657 */
658 g_papSortedStrings = new PSTRTABSTRING[g_cUniqueStrings];
659 g_cSortedStrings = 0;
660 size_t idxHash = g_cStrHash;
661 while (idxHash-- > 0)
662 {
663 PSTRTABSTRING pCur = g_papStrHash[idxHash];
664 if (pCur)
665 {
666 do
667 {
668 InsertUniqueString(pCur);
669 pCur = pCur->pNextCollision;
670 } while (pCur);
671 }
672 }
673
674 /*
675 * Create the actual string table.
676 */
677 g_pachStrTab = new char [g_cchUniqueStrings + 1];
678 g_cchStrTab = 0;
679 for (size_t i = 0; i < g_cSortedStrings; i++)
680 {
681 PSTRTABSTRING pCur = g_papSortedStrings[i];
682 const char * const pszCur = pCur->str.c_str();
683 size_t const cchCur = pCur->StrRef.cch;
684 size_t offStrTab = g_cchStrTab;
685
686 /*
687 * See if the string is a substring already found in the string table.
688 * Excluding the zero terminator increases the chances for this.
689 */
690 size_t cchLeft = g_cchStrTab >= cchCur ? g_cchStrTab - cchCur : 0;
691 const char *pchLeft = g_pachStrTab;
692 char const chFirst = *pszCur;
693 while (cchLeft > 0)
694 {
695 const char *pchCandidate = (const char *)memchr(pchLeft, chFirst, cchLeft);
696 if (!pchCandidate)
697 break;
698 if (memcmp(pchCandidate, pszCur, cchCur) == 0)
699 {
700 offStrTab = pchCandidate - g_pachStrTab;
701 break;
702 }
703
704 cchLeft -= pchCandidate + 1 - pchLeft;
705 pchLeft = pchCandidate + 1;
706 }
707
708 if (offStrTab == g_cchStrTab)
709 {
710 /*
711 * See if the start of the string overlaps the end of the string table.
712 */
713 if (g_cchStrTab && cchCur > 1)
714 {
715 cchLeft = RT_MIN(g_cchStrTab, cchCur - 1);
716 pchLeft = &g_pachStrTab[g_cchStrTab - cchLeft];
717 while (cchLeft > 0)
718 {
719 const char *pchCandidate = (const char *)memchr(pchLeft, chFirst, cchLeft);
720 if (!pchCandidate)
721 break;
722 cchLeft -= pchCandidate - pchLeft;
723 pchLeft = pchCandidate;
724 if (memcmp(pchLeft, pszCur, cchLeft) == 0)
725 {
726 size_t cchToCopy = cchCur - cchLeft;
727 memcpy(&g_pachStrTab[offStrTab], &pszCur[cchLeft], cchToCopy);
728 g_cchStrTab += cchToCopy;
729 offStrTab = pchCandidate - g_pachStrTab;
730 break;
731 }
732 cchLeft--;
733 pchLeft++;
734 }
735 }
736
737 /*
738 * If we didn't have any luck above, just append the string.
739 */
740 if (offStrTab == g_cchStrTab)
741 {
742 memcpy(&g_pachStrTab[offStrTab], pszCur, cchCur);
743 g_cchStrTab += cchCur;
744 }
745 }
746
747 /*
748 * Set the string table offset for all the references to this string.
749 */
750 do
751 {
752 pCur->StrRef.off = (uint32_t)offStrTab;
753 pCur = pCur->pNextRef;
754 } while (pCur != NULL);
755 }
756
757 if (g_fVerbose)
758 cout << INFO_PREF "String table: " << g_cchStrTab << " bytes" << endl;
759}
760
761
762#ifdef VBOX_STRICT
763/**
764 * Sanity checks a string table string.
765 * @param pStr The string to check.
766 */
767static void CheckStrTabString(PSTRTABSTRING pStr)
768{
769 Assert(pStr->StrRef.cch == pStr->str.length());
770 Assert(pStr->StrRef.off < g_cchStrTab);
771 Assert(pStr->StrRef.off + pStr->StrRef.cch <= g_cchStrTab);
772 Assert(memcmp(pStr->str.c_str(), &g_pachStrTab[pStr->StrRef.off], pStr->str.length()) == 0);
773}
774#endif
775
776
777/**
778 * Writes the string table code to the output stream.
779 *
780 * @param rStrm The output stream.
781 */
782static void WriteStringTable(ostream &rStrm)
783{
784#ifdef VBOX_STRICT
785 /*
786 * Do some quick sanity checks while we're here.
787 */
788 for (ProductsSet::iterator it = g_products.begin(); it != g_products.end(); ++it)
789 CheckStrTabString(&it->product);
790 for (VendorsSet::iterator it = g_vendors.begin(); it != g_vendors.end(); ++it)
791 CheckStrTabString(&it->vendor);
792# ifdef USB_ID_DATABASE_WITH_COMPRESSION
793 for (unsigned i = 0; i < RT_ELEMENTS(g_aCompDict); i++)
794 CheckStrTabString(&g_aCompDict[i]);
795# endif
796#endif
797
798 /*
799 * Create a table for speeding up the character categorization.
800 */
801 uint8_t abCharCat[256];
802 RT_ZERO(abCharCat);
803 abCharCat[(unsigned char)'\\'] = 1;
804 abCharCat[(unsigned char)'\''] = 1;
805 for (unsigned i = 0; i < 0x20; i++)
806 abCharCat[i] = 2;
807 for (unsigned i = 0x7f; i < 0x100; i++)
808 abCharCat[i] = 2;
809
810 /*
811 * We follow the sorted string table, one string per line.
812 */
813 rStrm << endl;
814 rStrm << "const size_t USBIdDatabase::s_cchStrTab = " << g_cchStrTab << ";" << endl;
815 rStrm << "const char USBIdDatabase::s_achStrTab[] =" << endl;
816 rStrm << "{" << endl;
817
818 uint32_t off = 0;
819 for (uint32_t i = 0; i < g_cSortedStrings; i++)
820 {
821 PSTRTABSTRING pCur = g_papSortedStrings[i];
822 uint32_t offEnd = pCur->StrRef.off + pCur->StrRef.cch;
823 if (offEnd > off)
824 {
825 /* Comment with a more readable version of the string. */
826 if (off == pCur->StrRef.off)
827 rStrm << " /* 0x";
828 else
829 rStrm << " /* 0X";
830 rStrm << hex << setfill('0') << setw(5) << off << " = \"";
831 for (uint32_t offTmp = off; offTmp < offEnd; offTmp++)
832 {
833 unsigned char uch = g_pachStrTab[offTmp];
834 if (abCharCat[uch] == 0)
835 rStrm << (char)uch;
836 else if (abCharCat[uch] != 1)
837 rStrm << "\\x" << setw(2) << hex << (size_t)uch;
838 else
839 rStrm << "\\" << (char)uch;
840 }
841 rStrm << "\" */" << endl;
842
843 /* Must use char by char here or we may trigger the max string
844 length limit in the compiler, */
845 rStrm << " ";
846 for (; off < offEnd; off++)
847 {
848 unsigned char uch = g_pachStrTab[off];
849 rStrm << "'";
850 if (abCharCat[uch] == 0)
851 rStrm << (char)uch;
852 else if (abCharCat[uch] != 1)
853 rStrm << "\\x" << setw(2) << hex << (size_t)uch;
854 else
855 rStrm << "\\" << (char)uch;
856 rStrm << "',";
857 }
858 rStrm << endl;
859 }
860 }
861
862 rStrm << "};" << endl;
863 rStrm << "AssertCompile(sizeof(USBIdDatabase::s_achStrTab) == 0x" << hex << g_cchStrTab << ");" << endl << endl;
864}
865
866
867/*
868 * Input file parsing.
869 */
870
871/** The size of all the raw strings, including terminators. */
872static size_t g_cbRawStrings = 0;
873
874int ParseAlias(const string& src, size_t& id, string& desc)
875{
876 unsigned int i = 0;
877 if (sscanf(src.c_str(), "%x", &i) != 1)
878 return ERROR_IN_PARSE_LINE;
879
880 /* skip the number and following whitespace. */
881 size_t offNext = src.find_first_of(" \t", 1);
882 offNext = src.find_first_not_of(" \t", offNext);
883 if (offNext != string::npos)
884 {
885 size_t cchLength = src.length() - offNext;
886 if (cchLength <= USB_ID_DATABASE_MAX_STRING)
887 {
888 id = i;
889 desc = src.substr(offNext);
890
891 /* Check the string encoding. */
892 int rc = RTStrValidateEncoding(desc.c_str());
893 if (RT_SUCCESS(rc))
894 {
895 g_cbRawStrings += desc.length() + 1;
896 return RTEXITCODE_SUCCESS;
897 }
898
899 cerr << "Error: Invalid encoding: '" << desc << "' (rc=" << rc << ")" << endl;
900 }
901 cerr << "Error: String to long (" << cchLength << ")" << endl;
902 }
903 else
904 cerr << "Error: Error parsing \"" << src << "\"" << endl;
905 return ERROR_IN_PARSE_LINE;
906}
907
908bool IsCommentOrEmptyLine(const string& str)
909{
910 size_t index = str.find_first_not_of(" \t");// skip left spaces
911 return index == string::npos || str[index] == '#';
912}
913
914bool getline(PRTSTREAM instream, string& resString)
915{
916 const size_t szBuf = 4096;
917 char buf[szBuf] = { 0 };
918
919 int rc = RTStrmGetLine(instream, buf, szBuf);
920 if (RT_SUCCESS(rc))
921 {
922 resString = buf;
923 return true;
924 }
925 else if (rc != VERR_EOF)
926 {
927 cerr << "Warning: Invalid line in file. Error: " << rc << endl;
928 }
929 return false;
930}
931
932int ParseUsbIds(PRTSTREAM instream)
933{
934 State::Value state = State::lookForStartBlock;
935 string line;
936 int res = 0;
937 VendorRecord vendor = { 0, 0, 0, "" };
938
939 while (state != State::finished && getline(instream, line))
940 {
941 switch (state)
942 {
943 case State::lookForStartBlock:
944 {
945 if (line.find(start_block) != string::npos)
946 state = State::lookForEndBlock;
947 break;
948 }
949 case State::lookForEndBlock:
950 {
951 if (line.find(end_block) != string::npos)
952 state = State::finished;
953 else
954 {
955 if (!IsCommentOrEmptyLine(line))
956 {
957 if (line[0] == '\t')
958 {
959 // Parse Product line
960 // first line should be vendor
961 if (vendor.vendorID == 0)
962 {
963 cerr << "Wrong file format. Product before vendor: " << line.c_str() << "'" << endl;
964 return ERROR_WRONG_FILE_FORMAT;
965 }
966 ProductRecord product = { 0, vendor.vendorID, 0, "" };
967 if (ParseAlias(line.substr(1), product.productID, product.product.str) != 0)
968 {
969 cerr << "Error in parsing product line: '" << line.c_str() << "'" << endl;
970 return ERROR_IN_PARSE_LINE;
971 }
972 product.key = RT_MAKE_U32(product.productID, product.vendorID);
973 Assert(product.vendorID == vendor.vendorID);
974 g_products.push_back(product);
975 }
976 else
977 {
978 // Parse vendor line
979 if (ParseAlias(line, vendor.vendorID, vendor.vendor.str) != 0)
980 {
981 cerr << "Error in parsing vendor line: '"
982 << line.c_str() << "'" << endl;
983 return ERROR_IN_PARSE_LINE;
984 }
985 g_vendors.push_back(vendor);
986 }
987 }
988 }
989 break;
990 }
991 }
992 }
993 if (state == State::lookForStartBlock)
994 {
995 cerr << "Error: wrong format of input file. Start line is not found." << endl;
996 return ERROR_WRONG_FILE_FORMAT;
997 }
998 return 0;
999}
1000
1001
1002static int usage(ostream &rOut, const char *argv0)
1003{
1004 rOut << "Usage: " << argv0
1005 << " [linux.org usb list file] [custom usb list file] [-o output file]" << endl;
1006 return RTEXITCODE_SYNTAX;
1007}
1008
1009int main(int argc, char *argv[])
1010{
1011 /*
1012 * Initialize IPRT and convert argv to UTF-8.
1013 */
1014 int rc = RTR3InitExe(argc, &argv, 0);
1015 if (RT_FAILURE(rc))
1016 return RTMsgInitFailure(rc);
1017
1018 /*
1019 * Parse arguments and read input files.
1020 */
1021 if (argc < 4)
1022 {
1023 usage(cerr, argv[0]);
1024 cerr << "Error: Not enough arguments." << endl;
1025 return RTEXITCODE_SYNTAX;
1026 }
1027 ofstream fout;
1028 PRTSTREAM fin;
1029 g_products.reserve(20000);
1030 g_vendors.reserve(3500);
1031
1032 const char *outName = NULL;
1033 for (int i = 1; i < argc; i++)
1034 {
1035 if (strcmp(argv[i], "-o") == 0)
1036 {
1037 outName = argv[++i];
1038 continue;
1039 }
1040 if ( strcmp(argv[i], "-h") == 0
1041 || strcmp(argv[i], "-?") == 0
1042 || strcmp(argv[i], "--help") == 0)
1043 {
1044 usage(cout, argv[0]);
1045 return RTEXITCODE_SUCCESS;
1046 }
1047
1048 rc = RTStrmOpen(argv[i], "r", &fin);
1049 if (RT_FAILURE(rc))
1050 {
1051 cerr << "Error: Failed to open file '" << argv[i] << "' for reading. rc=" << rc << endl;
1052 return ERROR_OPEN_FILE;
1053 }
1054
1055 rc = ParseUsbIds(fin);
1056 if (rc != 0)
1057 {
1058 cerr << "Error: Failed parsing USB devices file '" << argv[i] << "'" << endl;
1059 RTStrmClose(fin);
1060 return rc;
1061 }
1062 RTStrmClose(fin);
1063 }
1064
1065 /*
1066 * Due to USBIDDBVENDOR::iProduct, there is currently a max of 64KB products.
1067 * (Not a problem as we've only have less that 54K products currently.)
1068 */
1069 if (g_products.size() > _64K)
1070 {
1071 cerr << "Error: More than 64K products is not supported (input: " << g_products.size() << ")" << endl;
1072 return ERROR_TOO_MANY_PRODUCTS;
1073 }
1074
1075 /*
1076 * Sort the IDs and fill in the iProduct and cProduct members.
1077 */
1078 sort(g_products.begin(), g_products.end());
1079 sort(g_vendors.begin(), g_vendors.end());
1080
1081 size_t iProduct = 0;
1082 for (size_t iVendor = 0; iVendor < g_vendors.size(); iVendor++)
1083 {
1084 size_t const idVendor = g_vendors[iVendor].vendorID;
1085 g_vendors[iVendor].iProduct = iProduct;
1086 if ( iProduct < g_products.size()
1087 && g_products[iProduct].vendorID <= idVendor)
1088 {
1089 if (g_products[iProduct].vendorID == idVendor)
1090 do
1091 iProduct++;
1092 while ( iProduct < g_products.size()
1093 && g_products[iProduct].vendorID == idVendor);
1094 else
1095 {
1096 cerr << "Error: product without vendor after sorting. impossible!" << endl;
1097 return ERROR_IN_PARSE_LINE;
1098 }
1099 }
1100 g_vendors[iVendor].cProducts = iProduct - g_vendors[iVendor].iProduct;
1101 }
1102
1103 /*
1104 * Verify that all IDs are unique.
1105 */
1106 ProductsSet::iterator ita = adjacent_find(g_products.begin(), g_products.end());
1107 if (ita != g_products.end())
1108 {
1109 cerr << "Error: Duplicate alias detected. " << *ita << endl;
1110 return ERROR_DUPLICATE_ENTRY;
1111 }
1112
1113 /*
1114 * Do string compression and create the string table.
1115 */
1116#ifdef USB_ID_DATABASE_WITH_COMPRESSION
1117 DoStringCompression();
1118#endif
1119 CreateStringTable();
1120
1121 /*
1122 * Print stats.
1123 */
1124 size_t const cbVendorEntry = sizeof(USBIdDatabase::s_aVendors[0]) + sizeof(USBIdDatabase::s_aVendorNames[0]);
1125 size_t const cbProductEntry = sizeof(USBIdDatabase::s_aProducts[0]) + sizeof(USBIdDatabase::s_aProductNames[0]);
1126
1127 size_t cbOldRaw = (g_products.size() + g_vendors.size()) * sizeof(const char *) * 2 + g_cbRawStrings;
1128 size_t cbRaw = g_vendors.size() * cbVendorEntry + g_products.size() * cbProductEntry + g_cbRawStrings;
1129 size_t cbActual = g_vendors.size() * cbVendorEntry + g_products.size() * cbProductEntry + g_cchStrTab;
1130#ifdef USB_ID_DATABASE_WITH_COMPRESSION
1131 cbActual += sizeof(USBIdDatabase::s_aCompDict);
1132#endif
1133 cout << INFO_PREF "Total " << dec << cbActual << " bytes";
1134 if (cbActual < cbRaw)
1135 cout << " saving " << dec << ((cbRaw - cbActual) * 100 / cbRaw) << "% (" << (cbRaw - cbActual) << " bytes)";
1136 else
1137 cout << " wasting " << dec << (cbActual - cbRaw) << " bytes";
1138 cout << "; old version " << cbOldRaw << " bytes + relocs ("
1139 << ((cbOldRaw - cbActual) * 100 / cbOldRaw) << "% save)." << endl;
1140
1141
1142 /*
1143 * Produce the source file.
1144 */
1145 if (!outName)
1146 {
1147 cerr << "Error: Output file is not specified." << endl;
1148 return ERROR_OPEN_FILE;
1149 }
1150
1151 fout.open(outName);
1152 if (!fout.is_open())
1153 {
1154 cerr << "Error: Can not open file to write '" << outName << "'." << endl;
1155 return ERROR_OPEN_FILE;
1156 }
1157
1158 fout << header;
1159
1160 WriteStringTable(fout);
1161#ifdef USB_ID_DATABASE_WITH_COMPRESSION
1162 WriteCompressionDictionary(fout);
1163#endif
1164
1165 fout << product_header;
1166 for (ProductsSet::iterator itp = g_products.begin(); itp != g_products.end(); ++itp)
1167 fout << *itp;
1168 fout << product_part2;
1169 for (ProductsSet::iterator itp = g_products.begin(); itp != g_products.end(); ++itp)
1170 itp->product.printRefLine(fout);
1171 fout << product_footer;
1172
1173 fout << vendor_header;
1174 for (VendorsSet::iterator itv = g_vendors.begin(); itv != g_vendors.end(); ++itv)
1175 fout << *itv;
1176 fout << vendor_part2;
1177 for (VendorsSet::iterator itv = g_vendors.begin(); itv != g_vendors.end(); ++itv)
1178 itv->vendor.printRefLine(fout);
1179 fout << vendor_footer;
1180
1181 fout.close();
1182
1183
1184 return RTEXITCODE_SUCCESS;
1185}
1186
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette