USBIdDatabaseGenerator.cpp@ 59381

Last change on this file since 59381 was 58020, checked in by vboxsync, 9 years ago
duh
Property svn:eol-style set to `native` Property svn:keywords set to `Author Date Id Revision`
File size: 36.1 KB

Line
1	/* $Id: USBIdDatabaseGenerator.cpp 58020 2015-10-03 19:49:54Z vboxsync $ */
2	/** @file
3	* USB device vendor and product ID database - generator.
4	*/
5
6	/*
7	* Copyright (C) 2015 Oracle Corporation
8	*
9	* This file is part of VirtualBox Open Source Edition (OSE), as
10	* available from http://www.virtualbox.org. This file is free software;
11	* you can redistribute it and/or modify it under the terms of the GNU
12	* General Public License (GPL) as published by the Free Software
13	* Foundation, in version 2 as it comes in the "COPYING" file of the
14	* VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15	* hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16	*/
17
18	/*********************************************************************************************************************************
19	* Header Files *
20	*********************************************************************************************************************************/
21	#include <stdio.h>
22
23	#include <fstream>
24	#include <iostream>
25	#include <iomanip>
26	#include <algorithm>
27	#include <map>
28	#include <string>
29	#include <vector>
30
31	#include <iprt/initterm.h>
32	#include <iprt/message.h>
33	#include <iprt/string.h>
34	#include <iprt/stream.h>
35	#include "../../Runtime/include/internal/strhash.h" /** @todo make this one public */
36
37	#include "../include/USBIdDatabase.h"
38
39
40	/** For verbose output. */
41	static bool g_fVerbose = false;
42	/** Output prefix for informational output. */
43	#define INFO_PREF "USBIdDatabaseGenerator: Info: "
44
45
46	using namespace std;
47
48	static const char * const header =
49	"/** @file\n"
50	" * USB device vendor and product ID database - Autogenerated from <stupid C++ cannot do %s>\n"
51	" */\n"
52	"\n"
53	"/*\n"
54	" * Copyright (C) 2015 Oracle Corporation\n"
55	" *\n"
56	" * This file is part of VirtualBox Open Source Edition(OSE), as\n"
57	" * available from http ://www.virtualbox.org. This file is free software;\n"
58	" * you can redistribute it and / or modify it under the terms of the GNU\n"
59	" * General Public License(GPL) as published by the Free Software\n"
60	" * Foundation, in version 2 as it comes in the \"COPYING\" file of the\n"
61	" * VirtualBox OSE distribution.VirtualBox OSE is distributed in the\n"
62	" * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.\n"
63	" */"
64	"\n"
65	"\n"
66	"#include \"USBIdDatabase.h\"\n"
67	"\n";
68	static const char * const product_header =
69	"/**\n"
70	" * USB devices aliases array.\n"
71	" * Format: VendorId, ProductId, Vendor Name, Product Name\n"
72	" * The source of the list is http://www.linux-usb.org/usb.ids\n"
73	" */\n"
74	"USBIDDBPROD const USBIdDatabase::s_aProducts[] =\n"
75	"{\n";
76	const char *product_part2 =
77	"};\n"
78	"\n"
79	"\nconst USBIDDBSTR USBIdDatabase::s_aProductNames[] =\n"
80	"{\n";
81	const char *product_footer =
82	"};\n"
83	"\n"
84	"const size_t USBIdDatabase::s_cProducts = RT_ELEMENTS(USBIdDatabase::s_aProducts);\n";
85
86	const char *vendor_header =
87	"\nUSBIDDBVENDOR const USBIdDatabase::s_aVendors[] =\n"
88	"{\n";
89	const char *vendor_part2 =
90	"};\n"
91	"\n"
92	"\nconst USBIDDBSTR USBIdDatabase::s_aVendorNames[] =\n"
93	"{\n";
94	const char *vendor_footer =
95	"};\n"
96	"\n"
97	"const size_t USBIdDatabase::s_cVendors = RT_ELEMENTS(USBIdDatabase::s_aVendors);\n";
98
99	const char *start_block = "# Vendors, devices and interfaces. Please keep sorted.";
100	const char *end_block = "# List of known device classes, subclasses and protocols";
101
102
103	// error codes (complements RTEXITCODE_XXX).
104	#define ERROR_OPEN_FILE (12)
105	#define ERROR_IN_PARSE_LINE (13)
106	#define ERROR_DUPLICATE_ENTRY (14)
107	#define ERROR_WRONG_FILE_FORMAT (15)
108	#define ERROR_TOO_MANY_PRODUCTS (16)
109
110
111	/**
112	* String that will end up in the string table.
113	*/
114	struct StrTabString
115	{
116	/** The string. */
117	std::string str;
118	/** The string hash value. */
119	uint32_t uHash;
120	/** The string table reference. */
121	USBIDDBSTR StrRef;
122	/** Pointer to the next string reference (same string table entry). */
123	struct StrTabString *pNextRef;
124	/** Pointer to the next string with the same hash value (collision). */
125	struct StrTabString *pNextCollision;
126
127	void printRef(ostream &rStrm) const
128	{
129	rStrm << " { 0x" << setfill('0') << setw(6) << hex << StrRef.off << ", 0x"
130	<< setfill('0') << setw(2) << hex << StrRef.cch << " }, ";
131	}
132
133	void printRefLine(ostream &rStrm) const
134	{
135	printRef(rStrm);
136	rStrm << endl;
137	}
138	};
139	typedef struct StrTabString *PSTRTABSTRING;
140
141	struct VendorRecord
142	{
143	size_t vendorID;
144	size_t iProduct;
145	size_t cProducts;
146	StrTabString vendor;
147	};
148
149	struct ProductRecord
150	{
151	size_t key;
152	size_t vendorID;
153	size_t productID;
154	StrTabString product;
155	};
156
157	bool operator < (const ProductRecord& lh, const ProductRecord& rh)
158	{
159	return lh.key < rh.key;
160	}
161
162	bool operator < (const VendorRecord& lh, const VendorRecord& rh)
163	{
164	return lh.vendorID < rh.vendorID;
165	}
166
167	bool operator == (const ProductRecord& lh, const ProductRecord& rh)
168	{
169	return lh.key == rh.key;
170	}
171
172	bool operator == (const VendorRecord& lh, const VendorRecord& rh)
173	{
174	return lh.vendorID == rh.vendorID;
175	}
176
177	ostream& operator <<(ostream& stream, const ProductRecord product)
178	{
179	stream << " { 0x" << setfill('0') << setw(4) << product.productID << " }, " << endl;
180	return stream;
181	}
182
183	ostream& operator <<(ostream& stream, const VendorRecord vendor)
184	{
185	stream << " { 0x" << setfill('0') << setw(4) << hex << vendor.vendorID
186	<< ", 0x" << setfill('0') << setw(4) << hex << vendor.iProduct
187	<< ", 0x" << setfill('0') << setw(4) << hex << vendor.cProducts << " }, " << endl;
188	return stream;
189	}
190
191	namespace State
192	{
193	typedef int Value;
194	enum
195	{
196	lookForStartBlock,
197	lookForEndBlock,
198	finished
199	};
200	}
201
202	typedef vector<ProductRecord> ProductsSet;
203	typedef vector<VendorRecord> VendorsSet;
204	ProductsSet g_products;
205	VendorsSet g_vendors;
206
207
208
209	/*
210	* String "compression". We replace the 127 most used words with references.
211	*/
212	#ifdef USB_ID_DATABASE_WITH_COMPRESSION
213
214	typedef std::map<std::string, size_t> WORDFREQMAP;
215	typedef WORDFREQMAP::value_type WORDFREQPAIR;
216
217	/** The 127 words we've picked to be indexed by reference. */
218	static StrTabString g_aCompDict[127];
219
220	/**
221	* For sorting the frequency fidning in descending order.
222	*
223	* Comparison operators are put outside to make older gcc versions (like 4.1.1
224	* on lnx64-rel) happy.
225	*/
226	class WordFreqSortEntry
227	{
228	public:
229	WORDFREQPAIR const *m_pPair;
230
231	public:
232	WordFreqSortEntry(WORDFREQPAIR const *pPair) : m_pPair(pPair) {}
233	};
234
235	bool operator == (WordFreqSortEntry const &rLeft, WordFreqSortEntry const &rRight)
236	{
237	return rLeft.m_pPair->second == rRight.m_pPair->second;
238	}
239
240	bool operator < (WordFreqSortEntry const &rLeft, WordFreqSortEntry const &rRight)
241	{
242	return rLeft.m_pPair->second > rRight.m_pPair->second;
243	}
244
245
246	/**
247	* Replaces the dictionary words and escapes non-ascii chars in a string.
248	*
249	* @param pString The string to fixup.
250	* @param pcchOld The old string length is added to this (stats)
251	* @param pcchNew The new string length is added to this (stats)
252	*/
253	static void FixupString(std::string pString, size_t pcchOld, size_t *pcchNew)
254	{
255	char szNew[USB_ID_DATABASE_MAX_STRING * 2];
256	char *pszDst = szNew;
257	const char *pszSrc = pString->c_str();
258	const char *pszSrcEnd = strchr(pszSrc, '\0');
259
260	*pcchOld += pszSrcEnd - pszSrc;
261
262	char ch;
263	while ((ch = *pszSrc) != '\0')
264	{
265	/* Spaces. */
266	while (ch == ' ')
267	{
268	*pszDst++ = ' ';
269	ch = *++pszSrc;
270	}
271	if (!ch)
272	break;
273
274	/* Find the end of the current word. */
275	size_t cchWord = 1;
276	while ((ch = pszSrc[cchWord]) != ' ' && ch != '\0')
277	cchWord++;
278
279	/* Check for g_aWord matches. */
280	size_t cchMax = pszSrcEnd - pszSrc;
281	for (unsigned i = 0; i < RT_ELEMENTS(g_aCompDict); i++)
282	{
283	size_t cchLen = g_aCompDict[i].str.length();
284	if ( cchLen >= cchWord
285	&& cchLen <= cchMax
286	&& g_aCompDict[i].str.compare(0, cchLen, pszSrc, cchLen) == 0)
287	{
288	*pszDst++ = (unsigned char)(0x80 \| i);
289	pszSrc += cchLen;
290	cchWord = 0;
291	break;
292	}
293	}
294
295	if (cchWord)
296	{
297	/* Copy the current word. */
298	ch = *pszSrc;
299	do
300	{
301	if (!((unsigned char)ch & 0x80))
302	{
303	*pszDst++ = ch;
304	pszSrc++;
305	}
306	else
307	{
308	RTUNICP uc;
309	int rc = RTStrGetCpEx(&pszSrc, &uc);
310	if (RT_SUCCESS(rc))
311	{
312	pszDst++ = (unsigned char)0xff; / escape single code point. */
313	pszDst = RTStrPutCp(pszDst, uc);
314	}
315	else
316	{
317	cerr << "Error: RTStrGetCpEx failed with rc=" << rc << endl;
318	exit(3);
319	}
320	}
321	} while ((ch = *pszSrc) != '\0' && ch != ' ');
322	}
323	}
324	*pszDst = '\0';
325	*pcchNew += pszDst - &szNew[0];
326
327	*pString = szNew;
328	}
329
330
331	/**
332	* Analyzes a string.
333	*
334	* @param pFrequencies The word frequency map.
335	* @param rString The string to analyze.
336	*/
337	static void AnalyzeString(WORDFREQMAP *pFrequencies, std::string const &rString)
338	{
339	const char *psz = rString.c_str();
340
341	/*
342	* For now we just consider words.
343	*/
344	char ch;
345	while ((ch = *psz) != '\0')
346	{
347	/* Skip leading spaces. */
348	while (ch == ' ')
349	ch = *++psz;
350	if (!ch)
351	return;
352
353	/* Find end of word. */
354	size_t cchWord = 1;
355	while ((ch = psz[cchWord]) != ' ' && ch != '\0')
356	cchWord++;
357	if (cchWord > 1)
358	{
359	std::string strWord(psz, cchWord);
360	WORDFREQMAP::iterator it = pFrequencies->find(strWord);
361	if (it != pFrequencies->end())
362	it->second += cchWord - 1;
363	else
364	(*pFrequencies)[strWord] = 0;
365	/** @todo could gain hits by including the space after the word, but that
366	* has the same accounting problems as the two words scenario below. */
367
368	# if 0 /** @todo need better accounting for overlapping alternatives before this can be enabled. */
369	/* Two words - immediate yields calc may lie when this enabled and we may pick the wrong words. */
370	if (ch == ' ')
371	{
372	ch = psz[++cchWord];
373	if (ch != ' ' && ch != '\0')
374	{
375	size_t const cchSaved = cchWord;
376
377	do
378	cchWord++;
379	while ((ch = psz[cchWord]) != ' ' && ch != '\0');
380
381	strWord = std::string(psz, cchWord);
382	WORDFREQMAP::iterator it = pFrequencies->find(strWord);
383	if (it != pFrequencies->end())
384	it->second += cchWord - 1;
385	else
386	(*pFrequencies)[strWord] = 0;
387
388	cchWord = cchSaved;
389	}
390	}
391	# endif
392	}
393
394	/* Advance. */
395	psz += cchWord;
396	}
397	}
398
399
400	/**
401	* Compresses the vendor and product strings.
402	*
403	* This is very very simple (a lot less work that the string table for
404	* instance).
405	*/
406	static void DoStringCompression(void)
407	{
408	/*
409	* Analyze the strings collecting stats on potential sequences to replace.
410	*/
411	WORDFREQMAP Frequencies;
412
413	uint32_t cProducts = 0;
414	for (ProductsSet::iterator it = g_products.begin(); it != g_products.end(); ++it, cProducts++)
415	AnalyzeString(&Frequencies, it->product.str);
416
417	uint32_t cVendors = 0;
418	for (VendorsSet::iterator it = g_vendors.begin(); it != g_vendors.end(); ++it, cVendors++)
419	AnalyzeString(&Frequencies, it->vendor.str);
420
421	if (g_fVerbose)
422	{
423	size_t const cbVendorEntry = sizeof(USBIdDatabase::s_aVendors[0]) + sizeof(USBIdDatabase::s_aVendorNames[0]);
424	size_t const cbVendors = cVendors * cbVendorEntry;
425	cout << INFO_PREF << cVendors << " vendors (" << cbVendors << " bytes)" << endl;
426
427	size_t const cbProductEntry = sizeof(USBIdDatabase::s_aProducts[0]) + sizeof(USBIdDatabase::s_aProductNames[0]);
428	size_t const cbProducts = cProducts * cbProductEntry;
429	cout << INFO_PREF << cProducts << " products (" << cbProducts << " bytes)" << endl;
430	}
431
432	/*
433	* Sort the result and pick the top 127 ones.
434	*/
435	std::vector<WordFreqSortEntry> SortMap;
436	for (WORDFREQMAP::iterator it = Frequencies.begin(); it != Frequencies.end(); ++it)
437	{
438	WORDFREQPAIR const &rPair = *it;
439	SortMap.push_back(WordFreqSortEntry(&rPair));
440	}
441
442	sort(SortMap.begin(), SortMap.end());
443
444	size_t cb = 0;
445	unsigned i = 0;
446	for (std::vector<WordFreqSortEntry>::iterator it = SortMap.begin();
447	it != SortMap.end() && i < RT_ELEMENTS(g_aCompDict);
448	++it, i++)
449	{
450	g_aCompDict[i].str = it->m_pPair->first;
451	cb += it->m_pPair->second;
452	}
453
454	if (g_fVerbose)
455	cout << INFO_PREF "Estimated compression saving " << cb << " bytes" << endl;
456
457	/*
458	* Rework the strings.
459	*/
460	size_t cchNew = 0;
461	size_t cchOld = 0;
462	for (ProductsSet::iterator it = g_products.begin(); it != g_products.end(); ++it)
463	FixupString(&it->product.str, &cchOld, &cchNew);
464	for (VendorsSet::iterator it = g_vendors.begin(); it != g_vendors.end(); ++it)
465	FixupString(&it->vendor.str, &cchOld, &cchNew);
466
467	for (i = 0; i < RT_ELEMENTS(g_aCompDict); i++)
468	cchNew += g_aCompDict[i].str.length() + 1;
469
470	if (g_fVerbose)
471	{
472	cout << INFO_PREF "Strings: original: " << cchOld << " bytes; compressed: " << cchNew << " bytes;";
473	if (cchNew < cchOld)
474	cout << " saving " << (cchOld - cchNew) << " bytes (" << ((cchOld - cchNew) * 100 / cchOld) << "%)" << endl;
475	else
476	cout << " wasting " << (cchOld - cchNew) << " bytes!" << endl;
477	cout << INFO_PREF "Average string length is " << (cchOld / (cVendors + cProducts)) << endl;
478	}
479	}
480
481
482	/**
483	* Writes the compression dictionary to the output stream.
484	*
485	* @param rStrm The output stream.
486	*/
487	static void WriteCompressionDictionary(ostream &rStrm)
488	{
489	rStrm << "const USBIDDBSTR USBIdDatabase::s_aCompDict[" << dec << RT_ELEMENTS(g_aCompDict) << "] = " << endl;
490	rStrm << "{" << endl;
491	for (unsigned i = 0; i < RT_ELEMENTS(g_aCompDict); i++)
492	{
493	g_aCompDict[i].printRef(rStrm);
494	rStrm << " // " << g_aCompDict[i].str << endl;
495	}
496	rStrm << "};" << endl << endl;
497	}
498
499	#endif /* USB_ID_DATABASE_WITH_COMPRESSION */
500
501
502	/*
503	* Compile a string table.
504	*/
505
506	/** The size of g_papStrHash. */
507	static size_t g_cStrHash = 0;
508	/** String hash table. */
509	static PSTRTABSTRING *g_papStrHash = NULL;
510	/** Duplicate strings found by AddString. */
511	static size_t g_cDuplicateStrings = 0;
512	/** Total length of the unique strings (no terminators). */
513	static size_t g_cchUniqueStrings = 0;
514	/** Number of unique strings after AddString. */
515	static size_t g_cUniqueStrings = 0;
516	/** Number of collisions. */
517	static size_t g_cCollisions = 0;
518
519	/** Number of entries in g_apSortedStrings. */
520	static size_t g_cSortedStrings = 0;
521	/** The sorted string table. */
522	static PSTRTABSTRING *g_papSortedStrings = NULL;
523
524	/** The string table. */
525	static char *g_pachStrTab = NULL;
526	/** The actual string table size. */
527	static size_t g_cchStrTab = 0;
528
529
530	/**
531	* Adds a string to the hash table.
532	* @param pStr The string.
533	*/
534	static void AddString(PSTRTABSTRING pStr)
535	{
536	pStr->pNextRef = NULL;
537	pStr->pNextCollision = NULL;
538	pStr->StrRef.off = 0;
539	pStr->StrRef.cch = pStr->str.length();
540	size_t cchIgnored;
541	pStr->uHash = sdbm(pStr->str.c_str(), &cchIgnored);
542	Assert(cchIgnored == pStr->str.length());
543
544	size_t idxHash = pStr->uHash % g_cStrHash;
545	PSTRTABSTRING pCur = g_papStrHash[idxHash];
546	if (!pCur)
547	g_papStrHash[idxHash] = pStr;
548	else
549	{
550	/* Look for matching string. */
551	do
552	{
553	if ( pCur->uHash == pStr->uHash
554	&& pCur->StrRef.cch == pStr->StrRef.cch
555	&& pCur->str == pStr->str)
556	{
557	pStr->pNextRef = pCur->pNextRef;
558	pCur->pNextRef = pStr;
559	g_cDuplicateStrings++;
560	return;
561	}
562	pCur = pCur->pNextCollision;
563	} while (pCur != NULL);
564
565	/* No matching string, insert. */
566	g_cCollisions++;
567	pStr->pNextCollision = g_papStrHash[idxHash];
568	g_papStrHash[idxHash] = pStr;
569	}
570
571	g_cUniqueStrings++;
572	g_cchUniqueStrings += pStr->StrRef.cch;
573	}
574
575
576	/**
577	* Inserts a string into g_apUniqueStrings.
578	* @param pStr The string.
579	*/
580	static void InsertUniqueString(PSTRTABSTRING pStr)
581	{
582	size_t iIdx;
583	size_t iEnd = g_cSortedStrings;
584	if (iEnd)
585	{
586	size_t iStart = 0;
587	for (;;)
588	{
589	iIdx = iStart + (iEnd - iStart) / 2;
590	if (g_papSortedStrings[iIdx]->StrRef.cch < pStr->StrRef.cch)
591	{
592	if (iIdx <= iStart)
593	break;
594	iEnd = iIdx;
595	}
596	else if (g_papSortedStrings[iIdx]->StrRef.cch > pStr->StrRef.cch)
597	{
598	if (++iIdx >= iEnd)
599	break;
600	iStart = iIdx;
601	}
602	else
603	break;
604	}
605
606	if (iIdx != g_cSortedStrings)
607	memmove(&g_papSortedStrings[iIdx + 1], &g_papSortedStrings[iIdx],
608	(g_cSortedStrings - iIdx) * sizeof(g_papSortedStrings[iIdx]));
609	}
610	else
611	iIdx = 0;
612
613	g_papSortedStrings[iIdx] = pStr;
614	g_cSortedStrings++;
615	}
616
617
618	/**
619	* Creates a string table.
620	*
621	* This will save space by dropping string terminators, eliminating duplicates
622	* and try find strings that are sub-strings of others.
623	*
624	* Will initialize the StrRef of all StrTabString instances.
625	*/
626	static void CreateStringTable(void)
627	{
628	/*
629	* Allocate a hash table double the size of all strings (to avoid too
630	* many collisions). Add all strings to it, finding duplicates in the
631	* process.
632	*/
633	size_t cMaxStrings = g_products.size() + g_vendors.size();
634	#ifdef USB_ID_DATABASE_WITH_COMPRESSION
635	cMaxStrings += RT_ELEMENTS(g_aCompDict);
636	#endif
637	cMaxStrings *= 2;
638	g_papStrHash = new PSTRTABSTRING[cMaxStrings];
639	g_cStrHash = cMaxStrings;
640	memset(g_papStrHash, 0, cMaxStrings * sizeof(g_papStrHash[0]));
641
642	for (ProductsSet::iterator it = g_products.begin(); it != g_products.end(); ++it)
643	AddString(&it->product);
644	for (VendorsSet::iterator it = g_vendors.begin(); it != g_vendors.end(); ++it)
645	AddString(&it->vendor);
646	#ifdef USB_ID_DATABASE_WITH_COMPRESSION
647	for (unsigned i = 0; i < RT_ELEMENTS(g_aCompDict); i++)
648	AddString(&g_aCompDict[i]);
649	#endif
650	if (g_fVerbose)
651	cout << INFO_PREF "" << g_cUniqueStrings << " unique string (" << g_cchUniqueStrings << " bytes), "
652	<< g_cDuplicateStrings << " duplicates, " << g_cCollisions << " collisions" << endl;
653
654	/*
655	* Create g_papSortedStrings from the hash table. The table is sorted by
656	* string length, with the longer strings first.
657	*/
658	g_papSortedStrings = new PSTRTABSTRING[g_cUniqueStrings];
659	g_cSortedStrings = 0;
660	size_t idxHash = g_cStrHash;
661	while (idxHash-- > 0)
662	{
663	PSTRTABSTRING pCur = g_papStrHash[idxHash];
664	if (pCur)
665	{
666	do
667	{
668	InsertUniqueString(pCur);
669	pCur = pCur->pNextCollision;
670	} while (pCur);
671	}
672	}
673
674	/*
675	* Create the actual string table.
676	*/
677	g_pachStrTab = new char [g_cchUniqueStrings + 1];
678	g_cchStrTab = 0;
679	for (size_t i = 0; i < g_cSortedStrings; i++)
680	{
681	PSTRTABSTRING pCur = g_papSortedStrings[i];
682	const char * const pszCur = pCur->str.c_str();
683	size_t const cchCur = pCur->StrRef.cch;
684	size_t offStrTab = g_cchStrTab;
685
686	/*
687	* See if the string is a substring already found in the string table.
688	* Excluding the zero terminator increases the chances for this.
689	*/
690	size_t cchLeft = g_cchStrTab >= cchCur ? g_cchStrTab - cchCur : 0;
691	const char *pchLeft = g_pachStrTab;
692	char const chFirst = *pszCur;
693	while (cchLeft > 0)
694	{
695	const char pchCandidate = (const char )memchr(pchLeft, chFirst, cchLeft);
696	if (!pchCandidate)
697	break;
698	if (memcmp(pchCandidate, pszCur, cchCur) == 0)
699	{
700	offStrTab = pchCandidate - g_pachStrTab;
701	break;
702	}
703
704	cchLeft -= pchCandidate + 1 - pchLeft;
705	pchLeft = pchCandidate + 1;
706	}
707
708	if (offStrTab == g_cchStrTab)
709	{
710	/*
711	* See if the start of the string overlaps the end of the string table.
712	*/
713	if (g_cchStrTab && cchCur > 1)
714	{
715	cchLeft = RT_MIN(g_cchStrTab, cchCur - 1);
716	pchLeft = &g_pachStrTab[g_cchStrTab - cchLeft];
717	while (cchLeft > 0)
718	{
719	const char pchCandidate = (const char )memchr(pchLeft, chFirst, cchLeft);
720	if (!pchCandidate)
721	break;
722	cchLeft -= pchCandidate - pchLeft;
723	pchLeft = pchCandidate;
724	if (memcmp(pchLeft, pszCur, cchLeft) == 0)
725	{
726	size_t cchToCopy = cchCur - cchLeft;
727	memcpy(&g_pachStrTab[offStrTab], &pszCur[cchLeft], cchToCopy);
728	g_cchStrTab += cchToCopy;
729	offStrTab = pchCandidate - g_pachStrTab;
730	break;
731	}
732	cchLeft--;
733	pchLeft++;
734	}
735	}
736
737	/*
738	* If we didn't have any luck above, just append the string.
739	*/
740	if (offStrTab == g_cchStrTab)
741	{
742	memcpy(&g_pachStrTab[offStrTab], pszCur, cchCur);
743	g_cchStrTab += cchCur;
744	}
745	}
746
747	/*
748	* Set the string table offset for all the references to this string.
749	*/
750	do
751	{
752	pCur->StrRef.off = (uint32_t)offStrTab;
753	pCur = pCur->pNextRef;
754	} while (pCur != NULL);
755	}
756
757	if (g_fVerbose)
758	cout << INFO_PREF "String table: " << g_cchStrTab << " bytes" << endl;
759	}
760
761
762	#ifdef VBOX_STRICT
763	/**
764	* Sanity checks a string table string.
765	* @param pStr The string to check.
766	*/
767	static void CheckStrTabString(PSTRTABSTRING pStr)
768	{
769	Assert(pStr->StrRef.cch == pStr->str.length());
770	Assert(pStr->StrRef.off < g_cchStrTab);
771	Assert(pStr->StrRef.off + pStr->StrRef.cch <= g_cchStrTab);
772	Assert(memcmp(pStr->str.c_str(), &g_pachStrTab[pStr->StrRef.off], pStr->str.length()) == 0);
773	}
774	#endif
775
776
777	/**
778	* Writes the string table code to the output stream.
779	*
780	* @param rStrm The output stream.
781	*/
782	static void WriteStringTable(ostream &rStrm)
783	{
784	#ifdef VBOX_STRICT
785	/*
786	* Do some quick sanity checks while we're here.
787	*/
788	for (ProductsSet::iterator it = g_products.begin(); it != g_products.end(); ++it)
789	CheckStrTabString(&it->product);
790	for (VendorsSet::iterator it = g_vendors.begin(); it != g_vendors.end(); ++it)
791	CheckStrTabString(&it->vendor);
792	# ifdef USB_ID_DATABASE_WITH_COMPRESSION
793	for (unsigned i = 0; i < RT_ELEMENTS(g_aCompDict); i++)
794	CheckStrTabString(&g_aCompDict[i]);
795	# endif
796	#endif
797
798	/*
799	* Create a table for speeding up the character categorization.
800	*/
801	uint8_t abCharCat[256];
802	RT_ZERO(abCharCat);
803	abCharCat[(unsigned char)'\\'] = 1;
804	abCharCat[(unsigned char)'\''] = 1;
805	for (unsigned i = 0; i < 0x20; i++)
806	abCharCat[i] = 2;
807	for (unsigned i = 0x7f; i < 0x100; i++)
808	abCharCat[i] = 2;
809
810	/*
811	* We follow the sorted string table, one string per line.
812	*/
813	rStrm << endl;
814	rStrm << "const size_t USBIdDatabase::s_cchStrTab = " << g_cchStrTab << ";" << endl;
815	rStrm << "const char USBIdDatabase::s_achStrTab[] =" << endl;
816	rStrm << "{" << endl;
817
818	uint32_t off = 0;
819	for (uint32_t i = 0; i < g_cSortedStrings; i++)
820	{
821	PSTRTABSTRING pCur = g_papSortedStrings[i];
822	uint32_t offEnd = pCur->StrRef.off + pCur->StrRef.cch;
823	if (offEnd > off)
824	{
825	/* Comment with a more readable version of the string. */
826	if (off == pCur->StrRef.off)
827	rStrm << " /* 0x";
828	else
829	rStrm << " /* 0X";
830	rStrm << hex << setfill('0') << setw(5) << off << " = \"";
831	for (uint32_t offTmp = off; offTmp < offEnd; offTmp++)
832	{
833	unsigned char uch = g_pachStrTab[offTmp];
834	if (abCharCat[uch] == 0)
835	rStrm << (char)uch;
836	else if (abCharCat[uch] != 1)
837	rStrm << "\\x" << setw(2) << hex << (size_t)uch;
838	else
839	rStrm << "\\" << (char)uch;
840	}
841	rStrm << "\" */" << endl;
842
843	/* Must use char by char here or we may trigger the max string
844	length limit in the compiler, */
845	rStrm << " ";
846	for (; off < offEnd; off++)
847	{
848	unsigned char uch = g_pachStrTab[off];
849	rStrm << "'";
850	if (abCharCat[uch] == 0)
851	rStrm << (char)uch;
852	else if (abCharCat[uch] != 1)
853	rStrm << "\\x" << setw(2) << hex << (size_t)uch;
854	else
855	rStrm << "\\" << (char)uch;
856	rStrm << "',";
857	}
858	rStrm << endl;
859	}
860	}
861
862	rStrm << "};" << endl;
863	rStrm << "AssertCompile(sizeof(USBIdDatabase::s_achStrTab) == 0x" << hex << g_cchStrTab << ");" << endl << endl;
864	}
865
866
867	/*
868	* Input file parsing.
869	*/
870
871	/** The size of all the raw strings, including terminators. */
872	static size_t g_cbRawStrings = 0;
873
874	int ParseAlias(const string& src, size_t& id, string& desc)
875	{
876	unsigned int i = 0;
877	if (sscanf(src.c_str(), "%x", &i) != 1)
878	return ERROR_IN_PARSE_LINE;
879
880	/* skip the number and following whitespace. */
881	size_t offNext = src.find_first_of(" \t", 1);
882	offNext = src.find_first_not_of(" \t", offNext);
883	if (offNext != string::npos)
884	{
885	size_t cchLength = src.length() - offNext;
886	if (cchLength <= USB_ID_DATABASE_MAX_STRING)
887	{
888	id = i;
889	desc = src.substr(offNext);
890
891	/* Check the string encoding. */
892	int rc = RTStrValidateEncoding(desc.c_str());
893	if (RT_SUCCESS(rc))
894	{
895	g_cbRawStrings += desc.length() + 1;
896	return RTEXITCODE_SUCCESS;
897	}
898
899	cerr << "Error: Invalid encoding: '" << desc << "' (rc=" << rc << ")" << endl;
900	}
901	cerr << "Error: String to long (" << cchLength << ")" << endl;
902	}
903	else
904	cerr << "Error: Error parsing \"" << src << "\"" << endl;
905	return ERROR_IN_PARSE_LINE;
906	}
907
908	bool IsCommentOrEmptyLine(const string& str)
909	{
910	size_t index = str.find_first_not_of(" \t");// skip left spaces
911	return index == string::npos \|\| str[index] == '#';
912	}
913
914	bool getline(PRTSTREAM instream, string& resString)
915	{
916	const size_t szBuf = 4096;
917	char buf[szBuf] = { 0 };
918
919	int rc = RTStrmGetLine(instream, buf, szBuf);
920	if (RT_SUCCESS(rc))
921	{
922	resString = buf;
923	return true;
924	}
925	else if (rc != VERR_EOF)
926	{
927	cerr << "Warning: Invalid line in file. Error: " << rc << endl;
928	}
929	return false;
930	}
931
932	int ParseUsbIds(PRTSTREAM instream)
933	{
934	State::Value state = State::lookForStartBlock;
935	string line;
936	int res = 0;
937	VendorRecord vendor = { 0, 0, 0, "" };
938
939	while (state != State::finished && getline(instream, line))
940	{
941	switch (state)
942	{
943	case State::lookForStartBlock:
944	{
945	if (line.find(start_block) != string::npos)
946	state = State::lookForEndBlock;
947	break;
948	}
949	case State::lookForEndBlock:
950	{
951	if (line.find(end_block) != string::npos)
952	state = State::finished;
953	else
954	{
955	if (!IsCommentOrEmptyLine(line))
956	{
957	if (line[0] == '\t')
958	{
959	// Parse Product line
960	// first line should be vendor
961	if (vendor.vendorID == 0)
962	{
963	cerr << "Wrong file format. Product before vendor: " << line.c_str() << "'" << endl;
964	return ERROR_WRONG_FILE_FORMAT;
965	}
966	ProductRecord product = { 0, vendor.vendorID, 0, "" };
967	if (ParseAlias(line.substr(1), product.productID, product.product.str) != 0)
968	{
969	cerr << "Error in parsing product line: '" << line.c_str() << "'" << endl;
970	return ERROR_IN_PARSE_LINE;
971	}
972	product.key = RT_MAKE_U32(product.productID, product.vendorID);
973	Assert(product.vendorID == vendor.vendorID);
974	g_products.push_back(product);
975	}
976	else
977	{
978	// Parse vendor line
979	if (ParseAlias(line, vendor.vendorID, vendor.vendor.str) != 0)
980	{
981	cerr << "Error in parsing vendor line: '"
982	<< line.c_str() << "'" << endl;
983	return ERROR_IN_PARSE_LINE;
984	}
985	g_vendors.push_back(vendor);
986	}
987	}
988	}
989	break;
990	}
991	}
992	}
993	if (state == State::lookForStartBlock)
994	{
995	cerr << "Error: wrong format of input file. Start line is not found." << endl;
996	return ERROR_WRONG_FILE_FORMAT;
997	}
998	return 0;
999	}
1000
1001
1002	static int usage(ostream &rOut, const char *argv0)
1003	{
1004	rOut << "Usage: " << argv0
1005	<< " [linux.org usb list file] [custom usb list file] [-o output file]" << endl;
1006	return RTEXITCODE_SYNTAX;
1007	}
1008
1009	int main(int argc, char *argv[])
1010	{
1011	/*
1012	* Initialize IPRT and convert argv to UTF-8.
1013	*/
1014	int rc = RTR3InitExe(argc, &argv, 0);
1015	if (RT_FAILURE(rc))
1016	return RTMsgInitFailure(rc);
1017
1018	/*
1019	* Parse arguments and read input files.
1020	*/
1021	if (argc < 4)
1022	{
1023	usage(cerr, argv[0]);
1024	cerr << "Error: Not enough arguments." << endl;
1025	return RTEXITCODE_SYNTAX;
1026	}
1027	ofstream fout;
1028	PRTSTREAM fin;
1029	g_products.reserve(20000);
1030	g_vendors.reserve(3500);
1031
1032	const char *outName = NULL;
1033	for (int i = 1; i < argc; i++)
1034	{
1035	if (strcmp(argv[i], "-o") == 0)
1036	{
1037	outName = argv[++i];
1038	continue;
1039	}
1040	if ( strcmp(argv[i], "-h") == 0
1041	\|\| strcmp(argv[i], "-?") == 0
1042	\|\| strcmp(argv[i], "--help") == 0)
1043	{
1044	usage(cout, argv[0]);
1045	return RTEXITCODE_SUCCESS;
1046	}
1047
1048	rc = RTStrmOpen(argv[i], "r", &fin);
1049	if (RT_FAILURE(rc))
1050	{
1051	cerr << "Error: Failed to open file '" << argv[i] << "' for reading. rc=" << rc << endl;
1052	return ERROR_OPEN_FILE;
1053	}
1054
1055	rc = ParseUsbIds(fin);
1056	if (rc != 0)
1057	{
1058	cerr << "Error: Failed parsing USB devices file '" << argv[i] << "'" << endl;
1059	RTStrmClose(fin);
1060	return rc;
1061	}
1062	RTStrmClose(fin);
1063	}
1064
1065	/*
1066	* Due to USBIDDBVENDOR::iProduct, there is currently a max of 64KB products.
1067	* (Not a problem as we've only have less that 54K products currently.)
1068	*/
1069	if (g_products.size() > _64K)
1070	{
1071	cerr << "Error: More than 64K products is not supported (input: " << g_products.size() << ")" << endl;
1072	return ERROR_TOO_MANY_PRODUCTS;
1073	}
1074
1075	/*
1076	* Sort the IDs and fill in the iProduct and cProduct members.
1077	*/
1078	sort(g_products.begin(), g_products.end());
1079	sort(g_vendors.begin(), g_vendors.end());
1080
1081	size_t iProduct = 0;
1082	for (size_t iVendor = 0; iVendor < g_vendors.size(); iVendor++)
1083	{
1084	size_t const idVendor = g_vendors[iVendor].vendorID;
1085	g_vendors[iVendor].iProduct = iProduct;
1086	if ( iProduct < g_products.size()
1087	&& g_products[iProduct].vendorID <= idVendor)
1088	{
1089	if (g_products[iProduct].vendorID == idVendor)
1090	do
1091	iProduct++;
1092	while ( iProduct < g_products.size()
1093	&& g_products[iProduct].vendorID == idVendor);
1094	else
1095	{
1096	cerr << "Error: product without vendor after sorting. impossible!" << endl;
1097	return ERROR_IN_PARSE_LINE;
1098	}
1099	}
1100	g_vendors[iVendor].cProducts = iProduct - g_vendors[iVendor].iProduct;
1101	}
1102
1103	/*
1104	* Verify that all IDs are unique.
1105	*/
1106	ProductsSet::iterator ita = adjacent_find(g_products.begin(), g_products.end());
1107	if (ita != g_products.end())
1108	{
1109	cerr << "Error: Duplicate alias detected. " << *ita << endl;
1110	return ERROR_DUPLICATE_ENTRY;
1111	}
1112
1113	/*
1114	* Do string compression and create the string table.
1115	*/
1116	#ifdef USB_ID_DATABASE_WITH_COMPRESSION
1117	DoStringCompression();
1118	#endif
1119	CreateStringTable();
1120
1121	/*
1122	* Print stats.
1123	*/
1124	size_t const cbVendorEntry = sizeof(USBIdDatabase::s_aVendors[0]) + sizeof(USBIdDatabase::s_aVendorNames[0]);
1125	size_t const cbProductEntry = sizeof(USBIdDatabase::s_aProducts[0]) + sizeof(USBIdDatabase::s_aProductNames[0]);
1126
1127	size_t cbOldRaw = (g_products.size() + g_vendors.size()) * sizeof(const char ) 2 + g_cbRawStrings;
1128	size_t cbRaw = g_vendors.size() * cbVendorEntry + g_products.size() * cbProductEntry + g_cbRawStrings;
1129	size_t cbActual = g_vendors.size() * cbVendorEntry + g_products.size() * cbProductEntry + g_cchStrTab;
1130	#ifdef USB_ID_DATABASE_WITH_COMPRESSION
1131	cbActual += sizeof(USBIdDatabase::s_aCompDict);
1132	#endif
1133	cout << INFO_PREF "Total " << dec << cbActual << " bytes";
1134	if (cbActual < cbRaw)
1135	cout << " saving " << dec << ((cbRaw - cbActual) * 100 / cbRaw) << "% (" << (cbRaw - cbActual) << " bytes)";
1136	else
1137	cout << " wasting " << dec << (cbActual - cbRaw) << " bytes";
1138	cout << "; old version " << cbOldRaw << " bytes + relocs ("
1139	<< ((cbOldRaw - cbActual) * 100 / cbOldRaw) << "% save)." << endl;
1140
1141
1142	/*
1143	* Produce the source file.
1144	*/
1145	if (!outName)
1146	{
1147	cerr << "Error: Output file is not specified." << endl;
1148	return ERROR_OPEN_FILE;
1149	}
1150
1151	fout.open(outName);
1152	if (!fout.is_open())
1153	{
1154	cerr << "Error: Can not open file to write '" << outName << "'." << endl;
1155	return ERROR_OPEN_FILE;
1156	}
1157
1158	fout << header;
1159
1160	WriteStringTable(fout);
1161	#ifdef USB_ID_DATABASE_WITH_COMPRESSION
1162	WriteCompressionDictionary(fout);
1163	#endif
1164
1165	fout << product_header;
1166	for (ProductsSet::iterator itp = g_products.begin(); itp != g_products.end(); ++itp)
1167	fout << *itp;
1168	fout << product_part2;
1169	for (ProductsSet::iterator itp = g_products.begin(); itp != g_products.end(); ++itp)
1170	itp->product.printRefLine(fout);
1171	fout << product_footer;
1172
1173	fout << vendor_header;
1174	for (VendorsSet::iterator itv = g_vendors.begin(); itv != g_vendors.end(); ++itv)
1175	fout << *itv;
1176	fout << vendor_part2;
1177	for (VendorsSet::iterator itv = g_vendors.begin(); itv != g_vendors.end(); ++itv)
1178	itv->vendor.printRefLine(fout);
1179	fout << vendor_footer;
1180
1181	fout.close();
1182
1183
1184	return RTEXITCODE_SUCCESS;
1185	}
1186

Note: See TracBrowser for help on using the repository browser.

source: vbox/trunk/src/VBox/Main/src-server/USBIdDatabaseGenerator.cpp@ 59381

Download in other formats: