VirtualBox

source: vbox/trunk/src/VBox/Runtime/uniread.cpp@ 3282

Last change on this file since 3282 was 2981, checked in by vboxsync, 18 years ago

InnoTek -> innotek: all the headers and comments.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id
File size: 26.0 KB
Line 
1/* $Id: uniread.cpp 2981 2007-06-01 16:01:28Z vboxsync $ */
2/** @file
3 * innotek Portable Runtime - Unicode Specification Reader.
4 */
5
6/*
7 * Copyright (C) 2006-2007 innotek GmbH
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License as published by the Free Software Foundation,
13 * in version 2 as it comes in the "COPYING" file of the VirtualBox OSE
14 * distribution. VirtualBox OSE is distributed in the hope that it will
15 * be useful, but WITHOUT ANY WARRANTY of any kind.
16 *
17 * If you received this file as part of a commercial VirtualBox
18 * distribution, then only the terms of your commercial VirtualBox
19 * license agreement apply instead of the previous paragraph.
20 */
21
22/*******************************************************************************
23* Header Files *
24*******************************************************************************/
25#include <iprt/types.h>
26#include <iprt/stdarg.h>
27
28#include <stdio.h>
29#include <string.h>
30#include <stdlib.h>
31
32
33/**
34 * Strip a line.
35 * @returns pointer to first non-blank char.
36 * @param pszLine The line string to strip.
37 */
38static char *StripLine(char *pszLine)
39{
40 while (*pszLine == ' ' || *pszLine == '\t')
41 pszLine++;
42
43 char *psz = strchr(pszLine, '#');
44 if (psz)
45 *psz = '\0';
46 else
47 psz = strchr(pszLine, '\0');
48 while (psz > pszLine)
49 {
50 switch (psz[-1])
51 {
52 case ' ':
53 case '\t':
54 case '\n':
55 case '\r':
56 *--psz = '\0';
57 continue;
58 }
59 break;
60 }
61
62 return pszLine;
63}
64
65
66/**
67 * Checks if the line is blank or a comment line and should be skipped.
68 * @returns true/false.
69 * @param pszLine The line to consider.
70 */
71static bool IsCommentOrBlankLine(const char *pszLine)
72{
73 while (*pszLine == ' ' || *pszLine == '\t' || *pszLine == '\n' || *pszLine == '\r')
74 pszLine++;
75 return *pszLine == '#' || *pszLine == '\0';
76}
77
78
79/**
80 * Get the first field in the string.
81 *
82 * @returns Pointer to the next field.
83 * @param ppsz Where to store the pointer to the next field.
84 * @param pszLine The line string. (could also be *ppsz from a FirstNext call)
85 */
86static char *FirstField(char **ppsz, char *pszLine)
87{
88 char *psz = strchr(pszLine, ';');
89 if (!psz)
90 *ppsz = psz = strchr(pszLine, '\0');
91 else
92 {
93 *psz = '\0';
94 *ppsz = psz + 1;
95 }
96
97 /* strip */
98 while (*pszLine == ' ' || *pszLine == '\t' || *pszLine == '\r' || *pszLine == '\n')
99 pszLine++;
100 while (psz > pszLine)
101 {
102 switch (psz[-1])
103 {
104 case ' ':
105 case '\t':
106 case '\n':
107 case '\r':
108 *--psz = '\0';
109 continue;
110 }
111 break;
112 }
113 return pszLine;
114}
115
116
117/**
118 * Get the next field in a field enumeration.
119 *
120 * @returns Pointer to the next field.
121 * @param ppsz Where to get and store the string postition.
122 */
123static char *NextField(char **ppsz)
124{
125 return FirstField(ppsz, *ppsz);
126}
127
128
129/**
130 * Converts a code point field to a number.
131 * @returns Code point.
132 * @param psz The field string.
133 */
134static RTUNICP ToNum(const char *psz)
135{
136 char *pszEnd = NULL;
137 unsigned long ul = strtoul(psz, &pszEnd, 16);
138 if (pszEnd && *pszEnd)
139 fprintf(stderr, "warning: failed converting '%s' to a number!\n", psz);
140 return (RTUNICP)ul;
141}
142
143
144/**
145 * Same as ToNum except that if the field is empty the Default is returned.
146 */
147static RTUNICP ToNumDefault(const char *psz, RTUNICP Default)
148{
149 if (*psz)
150 return ToNum(psz);
151 return Default;
152}
153
154
155/**
156 * Converts a code point range to numbers.
157 * @returns The start code point.\
158 * @returns ~(RTUNICP)0 on failure.
159 * @param psz The field string.
160 * @param pLast Where to store the last code point in the range.
161 */
162static RTUNICP ToRange(const char *psz, PRTUNICP pLast)
163{
164 char *pszEnd = NULL;
165 unsigned long ulStart = strtoul(psz, &pszEnd, 16);
166 unsigned long ulLast = ulStart;
167 if (pszEnd && *pszEnd)
168 {
169 if (*pszEnd == '.')
170 {
171 while (*pszEnd == '.')
172 pszEnd++;
173 ulLast = strtoul(pszEnd, &pszEnd, 16);
174 if (pszEnd && *pszEnd)
175 {
176 fprintf(stderr, "warning: failed converting '%s' to a number!\n", psz);
177 return ~(RTUNICP)0;
178 }
179 }
180 else
181 {
182 fprintf(stderr, "warning: failed converting '%s' to a number!\n", psz);
183 return ~(RTUNICP)0;
184 }
185 }
186 *pLast = (RTUNICP)ulLast;
187 return (RTUNICP)ulStart;
188
189}
190
191
192/**
193 * Duplicate a string, optimize certain strings to save memory.
194 *
195 * @returns Pointer to string copy.
196 * @param pszStr The string to duplicate.
197 */
198static char *DupStr(char *pszStr)
199{
200 if (!*pszStr)
201 return "";
202 char *psz = strdup(pszStr);
203 if (psz)
204 return psz;
205
206 fprintf(stderr, "out of memory!\n");
207 exit(1);
208}
209
210
211/**
212 * Array of all possible and impossible unicode code points as of 4.1
213 */
214struct CPINFO
215{
216 RTUNICP CodePoint;
217 RTUNICP SimpleUpperCaseMapping;
218 RTUNICP SimpleLowerCaseMapping;
219 RTUNICP SimpleTitleCaseMapping;
220 char *pszName;
221 /** Set if this is an unused entry */
222 unsigned fNullEntry : 1;
223
224 unsigned fAlphabetic : 1;
225 unsigned fASCIIHexDigit : 1;
226 unsigned fBidiControl : 1;
227 unsigned fDash : 1;
228 unsigned fDefaultIgnorableCodePoint : 1;
229 unsigned fDeprecated : 1;
230 unsigned fDiacritic : 1;
231 unsigned fExtender : 1;
232 unsigned fGraphemeBase : 1;
233 unsigned fGraphemeExtend : 1;
234 unsigned fGraphemeLink : 1;
235 unsigned fHexDigit : 1;
236 unsigned fHyphen : 1;
237 unsigned fIDContinue : 1;
238 unsigned fIdeographic : 1;
239 unsigned fIDSBinaryOperator : 1;
240 unsigned fIDStart : 1;
241 unsigned fIDSTrinaryOperator : 1;
242 unsigned fJoinControl : 1;
243 unsigned fLogicalOrderException : 1;
244 unsigned fLowercase : 1;
245 unsigned fMath : 1;
246 unsigned fNoncharacterCodePoint : 1;
247 unsigned fOtherAlphabetic : 1;
248 unsigned fOtherDefaultIgnorableCodePoint : 1;
249 unsigned fOtherGraphemeExtend : 1;
250 unsigned fOtherIDContinue : 1;
251 unsigned fOtherIDStart : 1;
252 unsigned fOtherLowercase : 1;
253 unsigned fOtherMath : 1;
254 unsigned fOtherUppercase : 1;
255 unsigned fPatternSyntax : 1;
256 unsigned fPatternWhiteSpace : 1;
257 unsigned fQuotationMark : 1;
258 unsigned fRadical : 1;
259 unsigned fSoftDotted : 1;
260 unsigned fSTerm : 1;
261 unsigned fTerminalPunctuation : 1;
262 unsigned fUnifiedIdeograph : 1;
263 unsigned fUppercase : 1;
264 unsigned fVariationSelector : 1;
265 unsigned fWhiteSpace : 1;
266 unsigned fXIDContinue : 1;
267 unsigned fXIDStart : 1;
268
269 /* unprocess stuff, so far. */
270 char *pszGeneralCategory;
271 char *pszCanonicalCombiningClass;
272 char *pszBidiClass;
273 char *pszDecompositionType;
274 char *pszDecompositionMapping;
275 char *pszNumericType;
276 char *pszNumericValue;
277 char *pszBidiMirrored;
278 char *pszUnicode1Name;
279 char *pszISOComment;
280} g_aCPInfo[0xf0000];
281
282
283/**
284 * Creates a 'null' entry at i.
285 * @param i The entry in question.
286 */
287static void NullEntry(unsigned i)
288{
289 g_aCPInfo[i].CodePoint = i;
290 g_aCPInfo[i].fNullEntry = 1;
291 g_aCPInfo[i].pszName = "";
292 g_aCPInfo[i].SimpleUpperCaseMapping = i;
293 g_aCPInfo[i].SimpleLowerCaseMapping = i;
294 g_aCPInfo[i].SimpleTitleCaseMapping = i;
295 g_aCPInfo[i].pszGeneralCategory = "";
296 g_aCPInfo[i].pszCanonicalCombiningClass = "";
297 g_aCPInfo[i].pszBidiClass = "";
298 g_aCPInfo[i].pszDecompositionType = "";
299 g_aCPInfo[i].pszDecompositionMapping = "";
300 g_aCPInfo[i].pszNumericType = "";
301 g_aCPInfo[i].pszNumericValue = "";
302 g_aCPInfo[i].pszBidiMirrored = "";
303 g_aCPInfo[i].pszUnicode1Name = "";
304 g_aCPInfo[i].pszISOComment = "";
305}
306
307
308/**
309 * Read the UnicodeData.txt file.
310 * @returns 0 on success.
311 * @returns !0 on failure.
312 * @param pszFilename The name of the file.
313 */
314static int ReadUnicodeData(const char *pszFilename)
315{
316 /*
317 * Open input.
318 */
319 FILE *pFile = fopen(pszFilename, "r");
320 if (!pFile)
321 {
322 printf("uniread: failed to open '%s' for reading\n", pszFilename);
323 return 1;
324 }
325
326 /*
327 * Parse the input and spit out the output.
328 */
329 char szLine[4096];
330 RTUNICP i = 0;
331 while (fgets(szLine, sizeof(szLine), pFile) != NULL)
332 {
333 if (IsCommentOrBlankLine(szLine))
334 continue;
335
336 char *pszCurField;
337 char *pszCodePoint = FirstField(&pszCurField, StripLine(szLine)); /* 0 */
338 char *pszName = NextField(&pszCurField); /* 1 */
339 char *pszGeneralCategory = NextField(&pszCurField); /* 2 */
340 char *pszCanonicalCombiningClass = NextField(&pszCurField); /* 3 */
341 char *pszBidiClass = NextField(&pszCurField); /* 4 */
342 char *pszDecompositionType = NextField(&pszCurField); /* 5 */
343 char *pszDecompositionMapping = NextField(&pszCurField); /* 6 */
344 char *pszNumericType = NextField(&pszCurField); /* 7 */
345 char *pszNumericValue = NextField(&pszCurField); /* 8 */
346 char *pszBidiMirrored = NextField(&pszCurField); /* 9 */
347 char *pszUnicode1Name = NextField(&pszCurField); /* 10 */
348 char *pszISOComment = NextField(&pszCurField); /* 11 */
349 char *pszSimpleUpperCaseMapping = NextField(&pszCurField); /* 12 */
350 char *pszSimpleLowerCaseMapping = NextField(&pszCurField); /* 13 */
351 char *pszSimpleTitleCaseMapping = NextField(&pszCurField); /* 14 */
352
353 RTUNICP CodePoint = ToNum(pszCodePoint);
354 if (CodePoint >= ELEMENTS(g_aCPInfo))
355 continue;
356
357 /* catchup? */
358 while (i < CodePoint)
359 NullEntry(i++);
360 if (i != CodePoint)
361 {
362 fprintf(stderr, "unitest: error: i=%d CodePoint=%u\n", i, CodePoint);
363 fclose(pFile);
364 return 1;
365 }
366
367 /* this one */
368 g_aCPInfo[i].CodePoint = i;
369 g_aCPInfo[i].fNullEntry = 0;
370 g_aCPInfo[i].pszName = DupStr(pszName);
371 g_aCPInfo[i].SimpleUpperCaseMapping = ToNumDefault(pszSimpleUpperCaseMapping, CodePoint);
372 g_aCPInfo[i].SimpleLowerCaseMapping = ToNumDefault(pszSimpleLowerCaseMapping, CodePoint);
373 g_aCPInfo[i].SimpleTitleCaseMapping = ToNumDefault(pszSimpleTitleCaseMapping, CodePoint);
374 g_aCPInfo[i].pszGeneralCategory = DupStr(pszGeneralCategory);
375 g_aCPInfo[i].pszCanonicalCombiningClass = DupStr(pszCanonicalCombiningClass);
376 g_aCPInfo[i].pszBidiClass = DupStr(pszBidiClass);
377 g_aCPInfo[i].pszDecompositionType = DupStr(pszDecompositionType);
378 g_aCPInfo[i].pszDecompositionMapping = DupStr(pszDecompositionMapping);
379 g_aCPInfo[i].pszNumericType = DupStr(pszNumericType);
380 g_aCPInfo[i].pszNumericValue = DupStr(pszNumericValue);
381 g_aCPInfo[i].pszBidiMirrored = DupStr(pszBidiMirrored);
382 g_aCPInfo[i].pszUnicode1Name = DupStr(pszUnicode1Name);
383 g_aCPInfo[i].pszISOComment = DupStr(pszISOComment);
384 i++;
385 }
386 /* catchup? */
387 while (i < ELEMENTS(g_aCPInfo))
388 NullEntry(i++);
389 fclose(pFile);
390
391 return 0;
392}
393
394
395/**
396 * Applies a property to a code point.
397 *
398 * @param StartCP The code point.
399 * @param pszProperty The property name.
400 */
401static void ApplyProperty(RTUNICP StartCP, const char *pszProperty)
402{
403 if (StartCP >= ELEMENTS(g_aCPInfo))
404 return;
405 struct CPINFO *pCPInfo = &g_aCPInfo[StartCP];
406 /* string switch */
407 if (!strcmp(pszProperty, "ASCII_Hex_Digit")) pCPInfo->fASCIIHexDigit = 1;
408 else if (!strcmp(pszProperty, "Bidi_Control")) pCPInfo->fBidiControl = 1;
409 else if (!strcmp(pszProperty, "Dash")) pCPInfo->fDash = 1;
410 else if (!strcmp(pszProperty, "Deprecated")) pCPInfo->fDeprecated = 1;
411 else if (!strcmp(pszProperty, "Diacritic")) pCPInfo->fDiacritic = 1;
412 else if (!strcmp(pszProperty, "Extender")) pCPInfo->fExtender = 1;
413 else if (!strcmp(pszProperty, "Grapheme_Link")) pCPInfo->fGraphemeLink = 1;
414 else if (!strcmp(pszProperty, "Hex_Digit")) pCPInfo->fHexDigit = 1;
415 else if (!strcmp(pszProperty, "Hyphen")) pCPInfo->fHyphen = 1;
416 else if (!strcmp(pszProperty, "Ideographic")) pCPInfo->fIdeographic = 1;
417 else if (!strcmp(pszProperty, "IDS_Binary_Operator")) pCPInfo->fIDSBinaryOperator = 1;
418 else if (!strcmp(pszProperty, "IDS_Trinary_Operator")) pCPInfo->fIDSTrinaryOperator = 1;
419 else if (!strcmp(pszProperty, "Join_Control")) pCPInfo->fJoinControl = 1;
420 else if (!strcmp(pszProperty, "Logical_Order_Exception")) pCPInfo->fLogicalOrderException = 1;
421 else if (!strcmp(pszProperty, "Noncharacter_Code_Point")) pCPInfo->fNoncharacterCodePoint = 1;
422 else if (!strcmp(pszProperty, "Other_Alphabetic")) pCPInfo->fOtherAlphabetic = 1;
423 else if (!strcmp(pszProperty, "Other_Default_Ignorable_Code_Point")) pCPInfo->fOtherDefaultIgnorableCodePoint = 1;
424 else if (!strcmp(pszProperty, "Other_Grapheme_Extend")) pCPInfo->fOtherGraphemeExtend = 1;
425 else if (!strcmp(pszProperty, "Other_ID_Continue")) pCPInfo->fOtherIDContinue = 1;
426 else if (!strcmp(pszProperty, "Other_ID_Start")) pCPInfo->fOtherIDStart = 1;
427 else if (!strcmp(pszProperty, "Other_Lowercase")) pCPInfo->fOtherLowercase = 1;
428 else if (!strcmp(pszProperty, "Other_Math")) pCPInfo->fOtherMath = 1;
429 else if (!strcmp(pszProperty, "Other_Uppercase")) pCPInfo->fOtherUppercase = 1;
430 else if (!strcmp(pszProperty, "Alphabetic")) pCPInfo->fAlphabetic = 1;
431 else if (!strcmp(pszProperty, "Default_Ignorable_Code_Point")) pCPInfo->fDefaultIgnorableCodePoint = 1;
432 else if (!strcmp(pszProperty, "Grapheme_Base")) pCPInfo->fGraphemeBase = 1;
433 else if (!strcmp(pszProperty, "Grapheme_Extend")) pCPInfo->fGraphemeExtend = 1;
434 else if (!strcmp(pszProperty, "ID_Continue")) pCPInfo->fIDContinue = 1;
435 else if (!strcmp(pszProperty, "ID_Start")) pCPInfo->fIDStart = 1;
436 else if (!strcmp(pszProperty, "XID_Continue")) pCPInfo->fXIDContinue = 1;
437 else if (!strcmp(pszProperty, "XID_Start")) pCPInfo->fXIDStart = 1;
438 else if (!strcmp(pszProperty, "Lowercase")) pCPInfo->fLowercase = 1;
439 else if (!strcmp(pszProperty, "Math")) pCPInfo->fMath = 1;
440 else if (!strcmp(pszProperty, "Uppercase")) pCPInfo->fUppercase = 1;
441 else if (!strcmp(pszProperty, "Pattern_Syntax")) pCPInfo->fPatternSyntax = 1;
442 else if (!strcmp(pszProperty, "Pattern_White_Space")) pCPInfo->fPatternWhiteSpace = 1;
443 else if (!strcmp(pszProperty, "Quotation_Mark")) pCPInfo->fQuotationMark = 1;
444 else if (!strcmp(pszProperty, "Radical")) pCPInfo->fRadical = 1;
445 else if (!strcmp(pszProperty, "Soft_Dotted")) pCPInfo->fSoftDotted = 1;
446 else if (!strcmp(pszProperty, "STerm")) pCPInfo->fSTerm = 1;
447 else if (!strcmp(pszProperty, "Terminal_Punctuation")) pCPInfo->fTerminalPunctuation = 1;
448 else if (!strcmp(pszProperty, "Unified_Ideograph")) pCPInfo->fUnifiedIdeograph = 1;
449 else if (!strcmp(pszProperty, "Variation_Selector")) pCPInfo->fVariationSelector = 1;
450 else if (!strcmp(pszProperty, "White_Space")) pCPInfo->fWhiteSpace = 1;
451 else
452 fprintf(stderr, "uniread: Unknown property '%s'\n", pszProperty);
453}
454
455
456/**
457 * Reads a property file.
458 *
459 * There are several property files, this code can read all
460 * of those but will only make use of the properties it recognizes.
461 *
462 * @returns 0 on success.
463 * @returns !0 on failure.
464 * @param pszFilename The name of the file.
465 */
466static int ReadProperties(const char *pszFilename)
467{
468 /*
469 * Open input.
470 */
471 FILE *pFile = fopen(pszFilename, "r");
472 if (!pFile)
473 {
474 printf("uniread: failed to open '%s' for reading\n", pszFilename);
475 return 1;
476 }
477
478 /*
479 * Parse the input and spit out the output.
480 */
481 char szLine[4096];
482 while (fgets(szLine, sizeof(szLine), pFile) != NULL)
483 {
484 if (IsCommentOrBlankLine(szLine))
485 continue;
486 char *pszCurField;
487 char *pszRange = FirstField(&pszCurField, StripLine(szLine));
488 char *pszProperty = NextField(&pszCurField);
489 if (!*pszProperty)
490 continue;
491
492 RTUNICP LastCP;
493 RTUNICP StartCP = ToRange(pszRange, &LastCP);
494 if (StartCP == ~(RTUNICP)0)
495 continue;
496
497 while (StartCP <= LastCP)
498 ApplyProperty(StartCP++, pszProperty);
499 }
500
501 fclose(pFile);
502
503 return 0;
504}
505
506
507/**
508 * Append a flag to the string.
509 */
510static char *AppendFlag(char *psz, const char *pszFlag)
511{
512 char *pszEnd = strchr(psz, '\0');
513 if (pszEnd != psz)
514 {
515 *pszEnd++ = ' ';
516 *pszEnd++ = '|';
517 *pszEnd++ = ' ';
518 }
519 strcpy(pszEnd, pszFlag);
520 return psz;
521}
522
523/**
524 * Calcs the flags for a code point.
525 * @returns true if there is a flag.
526 * @returns false if the isn't.
527 */
528static bool CalcFlags(struct CPINFO *pInfo, char *pszFlags)
529{
530 pszFlags[0] = '\0';
531 /** @todo read the specs on this other vs standard stuff, and check out the finer points */
532 if (pInfo->fAlphabetic || pInfo->fOtherAlphabetic)
533 AppendFlag(pszFlags, "RTUNI_ALPHA");
534 if (pInfo->fHexDigit || pInfo->fASCIIHexDigit)
535 AppendFlag(pszFlags, "RTUNI_XDIGIT");
536 if (!strcmp(pInfo->pszGeneralCategory, "Nd"))
537 AppendFlag(pszFlags, "RTUNI_DDIGIT");
538 if (pInfo->fWhiteSpace)
539 AppendFlag(pszFlags, "RTUNI_WSPACE");
540 if (pInfo->fUppercase || pInfo->fOtherUppercase)
541 AppendFlag(pszFlags, "RTUNI_UPPER");
542 if (pInfo->fLowercase || pInfo->fOtherLowercase)
543 AppendFlag(pszFlags, "RTUNI_LOWER");
544 //if (pInfo->fNumeric)
545 // AppendFlag(pszFlags, "RTUNI_NUMERIC");
546 if (!*pszFlags)
547 {
548 pszFlags[0] = '0';
549 pszFlags[1] = '\0';
550 return false;
551 }
552 return true;
553}
554
555/** the data store for stream two. */
556static char g_szStream2[10240];
557static unsigned g_offStream2 = 0;
558
559/**
560 * Initializes the 2nd steam.
561 */
562static void Stream2Init(void)
563{
564 g_szStream2[0] = '\0';
565 g_offStream2 = 0;
566}
567
568/**
569 * Flushes the 2nd stream to stdout.
570 */
571static int Stream2Flush(void)
572{
573 fwrite(g_szStream2, 1, g_offStream2, stdout);
574 return 0;
575}
576
577/**
578 * printf to the 2nd stream.
579 */
580static int Stream2Printf(const char *pszFormat, ...)
581{
582 va_list va;
583 va_start(va, pszFormat);
584 int cch = vsprintf(&g_szStream2[g_offStream2], pszFormat, va);
585 va_end(va);
586 g_offStream2 += cch;
587 if (g_offStream2 >= sizeof(g_szStream2))
588 {
589 fprintf(stderr, "error: stream2 overflow!\n");
590 exit(1);
591 }
592 return cch;
593}
594
595
596/**
597 * Print the unidata.cpp file header and include list.
598 */
599int PrintHeader(const char *argv0)
600{
601 /*
602 * Print file header.
603 */
604 printf("/** @file\n"
605 " *\n"
606 " * innotek Portable Runtime - Unicode Tables\n"
607 " *\n"
608 " * Automatically Generated by %s (" __DATE__ " " __TIME__ ")\n"
609 " */\n\n"
610 "/*\n"
611 " * Copyright (C) 2006-2007 innotek GmbH\n"
612 " *\n"
613 " * This file is part of VirtualBox Open Source Edition (OSE), as\n"
614 " * available from http://www.virtualbox.org. This file is free software;\n"
615 " * you can redistribute it and/or modify it under the terms of the GNU\n"
616 " * General Public License as published by the Free Software Foundation,\n"
617 " * in version 2 as it comes in the \"COPYING\" file of the VirtualBox OSE\n"
618 " * distribution. VirtualBox OSE is distributed in the hope that it will\n"
619 " * be useful, but WITHOUT ANY WARRANTY of any kind.\n"
620 " *\n"
621 " * If you received this file as part of a commercial VirtualBox\n"
622 " * distribution, then only the terms of your commercial VirtualBox\n"
623 " * license agreement apply instead of the previous paragraph.\n"
624 " */\n"
625 "\n"
626 "#include <iprt/uni.h>\n"
627 "\n",
628 argv0);
629 return 0;
630}
631
632
633/**
634 * Print the flag tables.
635 */
636int PrintFlags(void)
637{
638 /*
639 * Print flags table.
640 */
641 Stream2Init();
642 Stream2Printf("const RTUNIFLAGSRANGE g_aRTUniFlagRanges[] =\n"
643 "{\n");
644 RTUNICP i = 0;
645 int iStart = -1;
646 while (i < ELEMENTS(g_aCPInfo))
647 {
648 /* figure how far off the next chunk is */
649 char szFlags[256];
650 unsigned iNonNull = i;
651 while ( (g_aCPInfo[iNonNull].fNullEntry || !CalcFlags(&g_aCPInfo[iNonNull], szFlags))
652 && iNonNull < ELEMENTS(g_aCPInfo)
653 && iNonNull >= 256)
654 iNonNull++;
655 if (iNonNull - i > 4096 || iNonNull == ELEMENTS(g_aCPInfo))
656 {
657 if (iStart >= 0)
658 {
659 printf("};\n\n");
660 Stream2Printf(" { 0x%06x, 0x%06x, &g_afRTUniFlags0x%06x[0] },\n", iStart, i, iStart);
661 iStart = -1;
662 }
663 i = iNonNull;
664 }
665 else
666 {
667 if (iStart < 0)
668 {
669 printf("static const uint8_t g_afRTUniFlags0x%06x[] = \n"
670 "{\n", i);
671 iStart = i;
672 }
673 CalcFlags(&g_aCPInfo[i], szFlags);
674 printf(" %50s, /* U+%06x: %s*/\n", szFlags, g_aCPInfo[i].CodePoint, g_aCPInfo[i].pszName);
675 i++;
676 }
677 }
678 Stream2Printf(" { ~(RTUNICP)0, ~(RTUNICP)0, NULL }\n"
679 "};\n\n\n");
680 printf("\n");
681 return Stream2Flush();
682}
683
684
685/**
686 * Prints the upper case tables.
687 */
688static int PrintUpper(void)
689{
690 Stream2Init();
691 Stream2Printf("const RTUNICASERANGE g_aRTUniUpperRanges[] =\n"
692 "{\n");
693 RTUNICP i = 0;
694 int iStart = -1;
695 while (i < ELEMENTS(g_aCPInfo))
696 {
697 /* figure how far off the next chunk is */
698 unsigned iSameCase = i;
699 while ( g_aCPInfo[iSameCase].SimpleUpperCaseMapping == g_aCPInfo[iSameCase].CodePoint
700 && iSameCase < ELEMENTS(g_aCPInfo)
701 && iSameCase >= 256)
702 iSameCase++;
703 if (iSameCase - i > 4096/sizeof(RTUNICP) || iSameCase == ELEMENTS(g_aCPInfo))
704 {
705 if (iStart >= 0)
706 {
707 printf("};\n\n");
708 Stream2Printf(" { 0x%06x, 0x%06x, &g_afRTUniUpper0x%06x[0] },\n", iStart, i, iStart);
709 iStart = -1;
710 }
711 i = iSameCase;
712 }
713 else
714 {
715 if (iStart < 0)
716 {
717 printf("static const RTUNICP g_afRTUniUpper0x%06x[] = \n"
718 "{\n", i);
719 iStart = i;
720 }
721 printf(" 0x%02x, /* U+%06x: %s*/\n", g_aCPInfo[i].SimpleUpperCaseMapping, g_aCPInfo[i].CodePoint, g_aCPInfo[i].pszName);
722 i++;
723 }
724 }
725 Stream2Printf(" { ~(RTUNICP)0, ~(RTUNICP)0, NULL }\n"
726 "};\n\n\n");
727 printf("\n");
728 return Stream2Flush();
729}
730
731
732/**
733 * Prints the lowercase tables.
734 */
735static int PrintLower(void)
736{
737 Stream2Init();
738 Stream2Printf("const RTUNICASERANGE g_aRTUniLowerRanges[] =\n"
739 "{\n");
740 RTUNICP i = 0;
741 int iStart = -1;
742 while (i < ELEMENTS(g_aCPInfo))
743 {
744 /* figure how far off the next chunk is */
745 unsigned iSameCase = i;
746 while ( g_aCPInfo[iSameCase].SimpleLowerCaseMapping == g_aCPInfo[iSameCase].CodePoint
747 && iSameCase < ELEMENTS(g_aCPInfo)
748 && iSameCase >= 256)
749 iSameCase++;
750 if (iSameCase - i > 4096/sizeof(RTUNICP) || iSameCase == ELEMENTS(g_aCPInfo))
751 {
752 if (iStart >= 0)
753 {
754 printf("};\n\n");
755 Stream2Printf(" { 0x%06x, 0x%06x, &g_afRTUniLower0x%06x[0] },\n", iStart, i, iStart);
756 iStart = -1;
757 }
758 i = iSameCase;
759 }
760 else
761 {
762 if (iStart < 0)
763 {
764 printf("static const RTUNICP g_afRTUniLower0x%06x[] = \n"
765 "{\n", i);
766 iStart = i;
767 }
768 printf(" 0x%02x, /* U+%06x: %s*/\n", g_aCPInfo[i].SimpleLowerCaseMapping, g_aCPInfo[i].CodePoint, g_aCPInfo[i].pszName);
769 i++;
770 }
771 }
772 Stream2Printf(" { ~(RTUNICP)0, ~(RTUNICP)0, NULL }\n"
773 "};\n\n\n");
774 printf("\n");
775 return Stream2Flush();
776}
777
778
779int main(int argc, char **argv)
780{
781 /*
782 * Parse args.
783 */
784 if (argc <= 1)
785 {
786 printf("usage: %s [UnicodeData.txt [DerivedCoreProperties.txt [PropList.txt]]]\n", argv[0]);
787 return 1;
788 }
789
790 const char *pszUnicodeData = "UnicodeData.txt";
791 const char *pszDerivedCoreProperties = "DerivedCoreProperties.txt";
792 const char *pszPropList = "PropList.txt";
793 int iFile = 0;
794 for (int argi = 1; argi < argc; argi++)
795 {
796 if (argv[argi][0] != '-')
797 {
798 switch (iFile++)
799 {
800 case 0: pszUnicodeData = argv[argi]; break;
801 case 1: pszDerivedCoreProperties = argv[argi]; break;
802 case 2: pszPropList = argv[argi]; break;
803 default:
804 printf("uniread: syntax error at '%s': too many filenames\n", argv[argi]);
805 return 1;
806 }
807 }
808 else
809 {
810 printf("uniread: syntax error at '%s': Unknown argument\n", argv[argi]);
811 return 1;
812 }
813 }
814
815 /*
816 * Read the data.
817 */
818 int rc = ReadUnicodeData(pszUnicodeData);
819 if (rc)
820 return rc;
821 rc = ReadProperties(pszPropList);
822 if (rc)
823 return rc;
824 rc = ReadProperties(pszDerivedCoreProperties);
825 if (rc)
826 return rc;
827
828 /*
829 * Print stuff.
830 */
831 rc = PrintHeader(argv[0]);
832 if (rc)
833 return rc;
834 rc = PrintFlags();
835 if (rc)
836 return rc;
837 rc = PrintUpper();
838 if (rc)
839 return rc;
840 rc = PrintLower();
841 if (rc)
842 return rc;
843
844 /* done */
845 fflush(stdout);
846
847 return rc;
848}
849
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette