VirtualBox

source: vbox/trunk/src/VBox/Runtime/common/string/uniread.cpp@ 99553

Last change on this file since 99553 was 98107, checked in by vboxsync, 23 months ago

Manual (C) year updates.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id Revision
File size: 42.3 KB
Line 
1/* $Id: uniread.cpp 98107 2023-01-17 22:56:50Z vboxsync $ */
2/** @file
3 * IPRT - Unicode Specification Reader.
4 */
5
6/*
7 * Copyright (C) 2006-2023 Oracle and/or its affiliates.
8 *
9 * This file is part of VirtualBox base platform packages, as
10 * available from https://www.virtualbox.org.
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation, in version 3 of the
15 * License.
16 *
17 * This program is distributed in the hope that it will be useful, but
18 * WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * General Public License for more details.
21 *
22 * You should have received a copy of the GNU General Public License
23 * along with this program; if not, see <https://www.gnu.org/licenses>.
24 *
25 * The contents of this file may alternatively be used under the terms
26 * of the Common Development and Distribution License Version 1.0
27 * (CDDL), a copy of it is provided in the "COPYING.CDDL" file included
28 * in the VirtualBox distribution, in which case the provisions of the
29 * CDDL are applicable instead of those of the GPL.
30 *
31 * You may elect to license modified versions of this file under the
32 * terms and conditions of either the GPL or the CDDL or both.
33 *
34 * SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0
35 */
36
37
38/*********************************************************************************************************************************
39* Header Files *
40*********************************************************************************************************************************/
41#include <iprt/types.h>
42#include <iprt/stdarg.h>
43#include <iprt/ctype.h>
44
45#include <stdio.h>
46#include <string.h>
47#include <stdlib.h>
48#ifdef _MSC_VER
49# include <direct.h>
50#else
51# include <unistd.h>
52#endif
53
54
55/*********************************************************************************************************************************
56* Global Variables *
57*********************************************************************************************************************************/
58/** The file we're currently parsing. */
59static const char *g_pszCurFile;
60/** The current line number. */
61static unsigned g_iLine;
62/** The current output file. */
63static FILE *g_pCurOutFile;
64
65
66/**
67 * Exit the program after printing a parse error.
68 *
69 * @param pszFormat The message.
70 * @param ... Format arguments.
71 */
72static DECL_NO_RETURN(void) ParseError(const char *pszFormat, ...)
73{
74 va_list va;
75 va_start(va, pszFormat);
76 fprintf(stderr, "parse error: %s:%u: ", g_pszCurFile, g_iLine);
77 vfprintf(stderr, pszFormat, va);
78 va_end(va);
79 exit(1);
80}
81
82/**
83 * Strip a line.
84 * @returns pointer to first non-blank char.
85 * @param pszLine The line string to strip.
86 */
87static char *StripLine(char *pszLine)
88{
89 while (*pszLine == ' ' || *pszLine == '\t')
90 pszLine++;
91
92 char *psz = strchr(pszLine, '#');
93 if (psz)
94 *psz = '\0';
95 else
96 psz = strchr(pszLine, '\0');
97 while (psz > pszLine)
98 {
99 switch (psz[-1])
100 {
101 case ' ':
102 case '\t':
103 case '\n':
104 case '\r':
105 *--psz = '\0';
106 continue;
107 }
108 break;
109 }
110
111 return pszLine;
112}
113
114
115/**
116 * Checks if the line is blank or a comment line and should be skipped.
117 * @returns true/false.
118 * @param pszLine The line to consider.
119 */
120static bool IsCommentOrBlankLine(const char *pszLine)
121{
122 while (*pszLine == ' ' || *pszLine == '\t' || *pszLine == '\n' || *pszLine == '\r')
123 pszLine++;
124 return *pszLine == '#' || *pszLine == '\0';
125}
126
127
128/**
129 * Get the first field in the string.
130 *
131 * @returns Pointer to the next field.
132 * @param ppsz Where to store the pointer to the next field.
133 * @param pszLine The line string. (could also be *ppsz from a FirstNext call)
134 */
135static char *FirstField(char **ppsz, char *pszLine)
136{
137 char *psz = strchr(pszLine, ';');
138 if (!psz)
139 *ppsz = psz = strchr(pszLine, '\0');
140 else
141 {
142 *psz = '\0';
143 *ppsz = psz + 1;
144 }
145
146 /* strip */
147 while (*pszLine == ' ' || *pszLine == '\t' || *pszLine == '\r' || *pszLine == '\n')
148 pszLine++;
149 while (psz > pszLine)
150 {
151 switch (psz[-1])
152 {
153 case ' ':
154 case '\t':
155 case '\n':
156 case '\r':
157 *--psz = '\0';
158 continue;
159 }
160 break;
161 }
162 return pszLine;
163}
164
165
166/**
167 * Get the next field in a field enumeration.
168 *
169 * @returns Pointer to the next field.
170 * @param ppsz Where to get and store the string position.
171 */
172static char *NextField(char **ppsz)
173{
174 return FirstField(ppsz, *ppsz);
175}
176
177
178/**
179 * Splits a decomposition field.
180 *
181 * This may start with a type that is enclosed in angle brackets.
182 *
183 * @returns Pointer to the mapping values following the type. @a *ppsz if empty.
184 * @param ppszType Pointer to the type field pointer. On input the type
185 * field contains the combined type and mapping string. On
186 * output this should only contain the type, no angle
187 * brackets. If no type specified, it is replaced with an
188 * empty string (const).
189 */
190static char *SplitDecompField(char **ppszType)
191{
192 /* Empty field? */
193 char *psz = *ppszType;
194 if (!*psz)
195 return psz;
196
197 /* No type? */
198 if (*psz != '<')
199 {
200 *ppszType = (char *)"";
201 return psz;
202 }
203
204 /* Split out the type. */
205 *ppszType = ++psz;
206 psz = strchr(psz, '>');
207 if (!psz)
208 {
209 ParseError("Bad Decomposition Type/Mappings\n");
210 /* not reached: return *ppszType; */
211 }
212 *psz++ = '\0';
213
214 psz = StripLine(psz);
215 if (!*psz)
216 ParseError("Missing decomposition mappings\n");
217 return psz;
218}
219
220/**
221 * Converts a code point field to a number.
222 * @returns Code point.
223 * @param psz The field string.
224 */
225static RTUNICP ToNum(const char *psz)
226{
227 char *pszEnd = NULL;
228 unsigned long ul = strtoul(psz, &pszEnd, 16);
229 if (pszEnd && *pszEnd)
230 ParseError("failed converting '%s' to a number!\n", psz);
231 return (RTUNICP)ul;
232}
233
234
235/**
236 * Same as ToNum except that if the field is empty the Default is returned.
237 */
238static RTUNICP ToNumDefault(const char *psz, RTUNICP Default)
239{
240 if (*psz)
241 return ToNum(psz);
242 return Default;
243}
244
245
246/**
247 * Converts a code point range to numbers.
248 * @returns The start code point.\
249 * @returns ~(RTUNICP)0 on failure.
250 * @param psz The field string.
251 * @param pLast Where to store the last code point in the range.
252 */
253static RTUNICP ToRange(const char *psz, PRTUNICP pLast)
254{
255 char *pszEnd = NULL;
256 unsigned long ulStart = strtoul(psz, &pszEnd, 16);
257 unsigned long ulLast = ulStart;
258 if (pszEnd && *pszEnd)
259 {
260 if (*pszEnd == '.')
261 {
262 while (*pszEnd == '.')
263 pszEnd++;
264 ulLast = strtoul(pszEnd, &pszEnd, 16);
265 if (pszEnd && *pszEnd)
266 {
267 ParseError("failed converting '%s' to a number!\n", psz);
268 /* not reached: return ~(RTUNICP)0;*/
269 }
270 }
271 else
272 {
273 ParseError("failed converting '%s' to a number!\n", psz);
274 /* not reached: return ~(RTUNICP)0; */
275 }
276 }
277 *pLast = (RTUNICP)ulLast;
278 return (RTUNICP)ulStart;
279
280}
281
282/**
283 * For converting the decomposition mappings field and similar.
284 *
285 * @returns Mapping array or NULL if none.
286 * @param psz The string to convert. Can be empty.
287 * @param pcEntries Where to store the number of entries.
288 * @param cMax The max number of entries.
289 */
290static PRTUNICP ToMapping(char *psz, unsigned *pcEntries, unsigned cMax)
291{
292 PRTUNICP paCps = NULL;
293 unsigned cAlloc = 0;
294 unsigned i = 0;
295
296 /* Convert the code points. */
297 while (psz)
298 {
299 /* skip leading spaces */
300 while (RT_C_IS_BLANK(*psz))
301 psz++;
302
303 /* the end? */
304 if (!*psz)
305 break;
306
307 /* room left? */
308 if (i >= cMax)
309 {
310 ParseError("Too many mappings.\n");
311 /* not reached: break; */
312 }
313 if (i >= cAlloc)
314 {
315 cAlloc += 4;
316 paCps = (PRTUNICP)realloc(paCps, cAlloc * sizeof(paCps[0]));
317 if (!paCps)
318 {
319 fprintf(stderr, "out of memory (%u)\n", (unsigned)(cAlloc * sizeof(paCps[0])));
320 exit(1);
321 }
322 }
323
324 /* Find the end. */
325 char *pszThis = psz;
326 while (RT_C_IS_XDIGIT(*psz))
327 psz++;
328 if (*psz && !RT_C_IS_BLANK(*psz))
329 ParseError("Malformed mappings.\n");
330 if (*psz)
331 *psz++ = '\0';
332
333 /* Convert to number and add it. */
334 paCps[i++] = ToNum(pszThis);
335 }
336
337 *pcEntries = i;
338 return paCps;
339}
340
341
342/**
343 * Duplicate a string, optimize certain strings to save memory.
344 *
345 * @returns Pointer to string copy.
346 * @param pszStr The string to duplicate.
347 */
348static char *DupStr(const char *pszStr)
349{
350 if (!*pszStr)
351 return (char*)"";
352 char *psz = strdup(pszStr);
353 if (psz)
354 return psz;
355
356 fprintf(stderr, "out of memory!\n");
357 exit(1);
358}
359
360
361/**
362 * Array of all possible and impossible unicode code points as of 4.1
363 */
364struct CPINFO
365{
366 RTUNICP CodePoint;
367 RTUNICP SimpleUpperCaseMapping;
368 RTUNICP SimpleLowerCaseMapping;
369 RTUNICP SimpleTitleCaseMapping;
370 unsigned CanonicalCombiningClass;
371 const char *pszDecompositionType;
372 unsigned cDecompositionMapping;
373 PRTUNICP paDecompositionMapping;
374 const char *pszName;
375 /** Set if this is an unused entry */
376 unsigned fNullEntry : 1;
377
378 unsigned fAlphabetic : 1;
379 unsigned fASCIIHexDigit : 1;
380 unsigned fBidiControl : 1;
381 unsigned fCaseIgnorable : 1;
382 unsigned fCased : 1;
383 unsigned fChangesWhenCasefolded : 1;
384 unsigned fChangesWhenCasemapped : 1;
385 unsigned fChangesWhenLowercased : 1;
386 unsigned fChangesWhenTitlecased : 1;
387 unsigned fChangesWhenUppercased : 1;
388 unsigned fDash : 1;
389 unsigned fDefaultIgnorableCodePoint : 1;
390 unsigned fDeprecated : 1;
391 unsigned fDiacritic : 1;
392 unsigned fExtender : 1;
393 unsigned fGraphemeBase : 1;
394 unsigned fGraphemeExtend : 1;
395 unsigned fGraphemeLink : 1;
396 unsigned fHexDigit : 1;
397 unsigned fHyphen : 1;
398 unsigned fIDContinue : 1;
399 unsigned fIdeographic : 1;
400 unsigned fIDSBinaryOperator : 1;
401 unsigned fIDStart : 1;
402 unsigned fIDSTrinaryOperator : 1;
403 unsigned fJoinControl : 1;
404 unsigned fLogicalOrderException : 1;
405 unsigned fLowercase : 1;
406 unsigned fMath : 1;
407 unsigned fNoncharacterCodePoint : 1;
408 unsigned fOtherAlphabetic : 1;
409 unsigned fOtherDefaultIgnorableCodePoint : 1;
410 unsigned fOtherGraphemeExtend : 1;
411 unsigned fOtherIDContinue : 1;
412 unsigned fOtherIDStart : 1;
413 unsigned fOtherLowercase : 1;
414 unsigned fOtherMath : 1;
415 unsigned fOtherUppercase : 1;
416 unsigned fPatternSyntax : 1;
417 unsigned fPatternWhiteSpace : 1;
418 unsigned fQuotationMark : 1;
419 unsigned fRadical : 1;
420 unsigned fSoftDotted : 1;
421 unsigned fSTerm : 1;
422 unsigned fTerminalPunctuation : 1;
423 unsigned fUnifiedIdeograph : 1;
424 unsigned fUppercase : 1;
425 unsigned fVariationSelector : 1;
426 unsigned fWhiteSpace : 1;
427 unsigned fXIDContinue : 1;
428 unsigned fXIDStart : 1;
429
430 /** @name DerivedNormalizationProps.txt
431 * @{ */
432 unsigned fFullCompositionExclusion : 1;
433 unsigned fInvNFC_QC : 2; /**< If 1 (NFC_QC == N) then code point 100% sure not part of NFC string. */
434 unsigned fInvNFD_QC : 2; /**< If 1 (NFD_QC == N) then code point 100% sure not part of NFD string. */
435 unsigned fInvNFKC_QC : 2;
436 unsigned fInvNFKD_QC : 2;
437 unsigned fExpandsOnNFC : 1;
438 unsigned fExpandsOnNFD : 1;
439 unsigned fExpandsOnNFKC : 1;
440 unsigned fExpandsOnNFKD : 1;
441 /** @} */
442
443 /* unprocessed stuff, so far. */
444 const char *pszGeneralCategory;
445 const char *pszBidiClass;
446 const char *pszNumericType;
447 const char *pszNumericValueD;
448 const char *pszNumericValueN;
449 const char *pszBidiMirrored;
450 const char *pszUnicode1Name;
451 const char *pszISOComment;
452} g_aCPInfo[0x110000];
453
454
455/**
456 * Creates a 'null' entry at i.
457 * @param i The entry in question.
458 */
459static void NullEntry(unsigned i)
460{
461 g_aCPInfo[i].CodePoint = i;
462 g_aCPInfo[i].fNullEntry = 1;
463 g_aCPInfo[i].SimpleUpperCaseMapping = i;
464 g_aCPInfo[i].SimpleLowerCaseMapping = i;
465 g_aCPInfo[i].SimpleTitleCaseMapping = i;
466 g_aCPInfo[i].pszDecompositionType = "";
467 g_aCPInfo[i].cDecompositionMapping = 0;
468 g_aCPInfo[i].paDecompositionMapping = NULL;
469 g_aCPInfo[i].pszName = "";
470 g_aCPInfo[i].pszGeneralCategory = "";
471 g_aCPInfo[i].pszBidiClass = "";
472 g_aCPInfo[i].pszNumericType = "";
473 g_aCPInfo[i].pszNumericValueD = "";
474 g_aCPInfo[i].pszNumericValueN = "";
475 g_aCPInfo[i].pszBidiMirrored = "";
476 g_aCPInfo[i].pszUnicode1Name = "";
477 g_aCPInfo[i].pszISOComment = "";
478}
479
480
481/**
482 * Open a file for reading, optionally with a base path prefixed.
483 *
484 * @returns file stream on success, NULL w/ complaint on failure.
485 * @param pszBasePath The base path, can be NULL.
486 * @param pszFilename The name of the file to open.
487 */
488static FILE *OpenFile(const char *pszBasePath, const char *pszFilename)
489{
490 FILE *pFile;
491 if ( !pszBasePath
492 || *pszFilename == '/'
493#if defined(_MSC_VER) || defined(__OS2__)
494 || *pszFilename == '\\'
495 || (*pszFilename && pszFilename[1] == ':')
496#endif
497 )
498 {
499 pFile = fopen(pszFilename, "r");
500 if (!pFile)
501 fprintf(stderr, "uniread: failed to open '%s' for reading\n", pszFilename);
502 }
503 else
504 {
505 size_t cchBasePath = strlen(pszBasePath);
506 size_t cchFilename = strlen(pszFilename);
507 char *pszFullName = (char *)malloc(cchBasePath + 1 + cchFilename + 1);
508 if (!pszFullName)
509 {
510 fprintf(stderr, "uniread: failed to allocate %d bytes\n", (int)(cchBasePath + 1 + cchFilename + 1));
511 return NULL;
512 }
513
514 memcpy(pszFullName, pszBasePath, cchBasePath);
515 pszFullName[cchBasePath] = '/';
516 memcpy(&pszFullName[cchBasePath + 1], pszFilename, cchFilename + 1);
517
518 pFile = fopen(pszFullName, "r");
519 if (!pFile)
520 fprintf(stderr, "uniread: failed to open '%s' for reading\n", pszFullName);
521 free(pszFullName);
522 }
523 g_pszCurFile = pszFilename;
524 g_iLine = 0;
525 return pFile;
526}
527
528
529/**
530 * Wrapper around fgets that keep track of the line number.
531 *
532 * @returns See fgets.
533 * @param pszBuf The buffer. See fgets for output definition.
534 * @param cbBuf The buffer size.
535 * @param pFile The file to read from.
536 */
537static char *GetLineFromFile(char *pszBuf, int cbBuf, FILE *pFile)
538{
539 g_iLine++;
540 return fgets(pszBuf, cbBuf, pFile);
541}
542
543
544/**
545 * Closes a file opened by OpenFile
546 *
547 * @param pFile The file to close.
548 */
549static void CloseFile(FILE *pFile)
550{
551 g_pszCurFile = NULL;
552 g_iLine = 0;
553 fclose(pFile);
554}
555
556
557/**
558 * Read the UnicodeData.txt file.
559 * @returns 0 on success.
560 * @returns !0 on failure.
561 * @param pszBasePath The base path, can be NULL.
562 * @param pszFilename The name of the file.
563 */
564static int ReadUnicodeData(const char *pszBasePath, const char *pszFilename)
565{
566 /*
567 * Open input.
568 */
569 FILE *pFile = OpenFile(pszBasePath, pszFilename);
570 if (!pFile)
571 return 1;
572
573 /*
574 * Parse the input and spit out the output.
575 */
576 char szLine[4096];
577 RTUNICP i = 0;
578 while (GetLineFromFile(szLine, sizeof(szLine), pFile) != NULL)
579 {
580 if (IsCommentOrBlankLine(szLine))
581 continue;
582
583 char *pszCurField;
584 char *pszCodePoint = FirstField(&pszCurField, StripLine(szLine)); /* 0 */
585 char *pszName = NextField(&pszCurField); /* 1 */
586 char *pszGeneralCategory = NextField(&pszCurField); /* 2 */
587 char *pszCanonicalCombiningClass = NextField(&pszCurField); /* 3 */
588 char *pszBidiClass = NextField(&pszCurField); /* 4 */
589 char *pszDecompositionType = NextField(&pszCurField); /* 5 */
590 char *pszDecompositionMapping = SplitDecompField(&pszDecompositionType);
591 char *pszNumericType = NextField(&pszCurField); /* 6 */
592 char *pszNumericValueD = NextField(&pszCurField); /* 7 */
593 char *pszNumericValueN = NextField(&pszCurField); /* 8 */
594 char *pszBidiMirrored = NextField(&pszCurField); /* 9 */
595 char *pszUnicode1Name = NextField(&pszCurField); /* 10 */
596 char *pszISOComment = NextField(&pszCurField); /* 11 */
597 char *pszSimpleUpperCaseMapping = NextField(&pszCurField); /* 12 */
598 char *pszSimpleLowerCaseMapping = NextField(&pszCurField); /* 13 */
599 char *pszSimpleTitleCaseMapping = NextField(&pszCurField); /* 14 */
600
601 RTUNICP CodePoint = ToNum(pszCodePoint);
602 if (CodePoint >= RT_ELEMENTS(g_aCPInfo))
603 {
604 ParseError("U+05X is out of range\n", CodePoint);
605 /* not reached: continue;*/
606 }
607
608 /* catchup? */
609 while (i < CodePoint)
610 NullEntry(i++);
611 if (i != CodePoint)
612 {
613 ParseError("i=%d CodePoint=%u\n", i, CodePoint);
614 /* not reached: CloseFile(pFile);
615 return 1; */
616 }
617
618 /* this one */
619 g_aCPInfo[i].CodePoint = i;
620 g_aCPInfo[i].fNullEntry = 0;
621 g_aCPInfo[i].pszName = DupStr(pszName);
622 g_aCPInfo[i].SimpleUpperCaseMapping = ToNumDefault(pszSimpleUpperCaseMapping, CodePoint);
623 g_aCPInfo[i].SimpleLowerCaseMapping = ToNumDefault(pszSimpleLowerCaseMapping, CodePoint);
624 g_aCPInfo[i].SimpleTitleCaseMapping = ToNumDefault(pszSimpleTitleCaseMapping, CodePoint);
625 g_aCPInfo[i].CanonicalCombiningClass = ToNum(pszCanonicalCombiningClass);
626 g_aCPInfo[i].pszDecompositionType = DupStr(pszDecompositionType);
627 g_aCPInfo[i].paDecompositionMapping = ToMapping(pszDecompositionMapping, &g_aCPInfo[i].cDecompositionMapping, 20);
628 g_aCPInfo[i].pszGeneralCategory = DupStr(pszGeneralCategory);
629 g_aCPInfo[i].pszBidiClass = DupStr(pszBidiClass);
630 g_aCPInfo[i].pszNumericType = DupStr(pszNumericType);
631 g_aCPInfo[i].pszNumericValueD = DupStr(pszNumericValueD);
632 g_aCPInfo[i].pszNumericValueN = DupStr(pszNumericValueN);
633 g_aCPInfo[i].pszBidiMirrored = DupStr(pszBidiMirrored);
634 g_aCPInfo[i].pszUnicode1Name = DupStr(pszUnicode1Name);
635 g_aCPInfo[i].pszISOComment = DupStr(pszISOComment);
636 i++;
637 }
638
639 /* catchup? */
640 while (i < RT_ELEMENTS(g_aCPInfo))
641 NullEntry(i++);
642 CloseFile(pFile);
643
644 return 0;
645}
646
647
648/**
649 * Generates excluded data.
650 *
651 * @returns 0 on success, exit code on failure.
652 */
653static int GenerateExcludedData(void)
654{
655 /*
656 * Hangul Syllables U+AC00 to U+D7A3.
657 */
658 for (RTUNICP i = 0xac00; i <= 0xd7a3; i++)
659 {
660 g_aCPInfo[i].fNullEntry = 0;
661 g_aCPInfo[i].fInvNFD_QC = 1;
662 /** @todo generate the decomposition: http://unicode.org/reports/tr15/#Hangul
663 * */
664 }
665
666 /** @todo
667 * CJK Ideographs Extension A (U+3400 - U+4DB5)
668 * CJK Ideographs (U+4E00 - U+9FA5)
669 * CJK Ideograph Extension B (U+20000 - U+2A6D6)
670 * CJK Ideograph Extension C (U+2A700 - U+2B734)
671 */
672
673 return 0;
674}
675
676
677
678/**
679 * Worker for ApplyProperty that handles a yes, no, maybe property value.
680 *
681 * @returns 0 (NO), 1 (YES), 2 (MAYBE).
682 * @param ppszNextField The field cursor, input and output.
683 */
684static int YesNoMaybePropertyValue(char **ppszNextField)
685{
686 if (!**ppszNextField)
687 ParseError("Missing Y/N/M field\n");
688 else
689 {
690 char *psz = NextField(ppszNextField);
691 if (!strcmp(psz, "N"))
692 return 0;
693 if (!strcmp(psz, "Y"))
694 return 1;
695 if (!strcmp(psz, "M"))
696 return 2;
697 ParseError("Unexpected Y/N/M value: '%s'\n", psz);
698 }
699 /* not reached: return 0; */
700}
701
702
703/**
704 * Inverted version of YesNoMaybePropertyValue
705 *
706 * @returns 1 (NO), 0 (YES), 2 (MAYBE).
707 * @param ppszNextField The field cursor, input and output.
708 */
709static int YesNoMaybePropertyValueInv(char **ppszNextField)
710{
711 unsigned rc = YesNoMaybePropertyValue(ppszNextField);
712 switch (rc)
713 {
714 case 0: return 1;
715 case 1: return 0;
716 default: return rc;
717 }
718}
719
720
721/**
722 * Applies a property to a code point.
723 *
724 * @param StartCP The code point.
725 * @param pszProperty The property name.
726 * @param pszNextField The next field.
727 */
728static void ApplyProperty(RTUNICP StartCP, const char *pszProperty, char *pszNextField)
729{
730 if (StartCP >= RT_ELEMENTS(g_aCPInfo))
731 {
732 ParseError("U+%06X is out of the g_aCPInfo range.\n", StartCP);
733 /* not reached: return; */
734 }
735 struct CPINFO *pCPInfo = &g_aCPInfo[StartCP];
736 /* string switch */
737 if (!strcmp(pszProperty, "ASCII_Hex_Digit")) pCPInfo->fASCIIHexDigit = 1;
738 else if (!strcmp(pszProperty, "Alphabetic")) pCPInfo->fAlphabetic = 1;
739 else if (!strcmp(pszProperty, "Bidi_Control")) pCPInfo->fBidiControl = 1;
740 else if (!strcmp(pszProperty, "Case_Ignorable")) pCPInfo->fCaseIgnorable = 1;
741 else if (!strcmp(pszProperty, "Cased")) pCPInfo->fCased = 1;
742 else if (!strcmp(pszProperty, "Changes_When_Casefolded")) pCPInfo->fChangesWhenCasefolded = 1;
743 else if (!strcmp(pszProperty, "Changes_When_Casemapped")) pCPInfo->fChangesWhenCasemapped = 1;
744 else if (!strcmp(pszProperty, "Changes_When_Lowercased")) pCPInfo->fChangesWhenLowercased = 1;
745 else if (!strcmp(pszProperty, "Changes_When_Titlecased")) pCPInfo->fChangesWhenTitlecased = 1;
746 else if (!strcmp(pszProperty, "Changes_When_Uppercased")) pCPInfo->fChangesWhenUppercased = 1;
747 else if (!strcmp(pszProperty, "Dash")) pCPInfo->fDash = 1;
748 else if (!strcmp(pszProperty, "Default_Ignorable_Code_Point")) pCPInfo->fDefaultIgnorableCodePoint = 1;
749 else if (!strcmp(pszProperty, "Deprecated")) pCPInfo->fDeprecated = 1;
750 else if (!strcmp(pszProperty, "Diacritic")) pCPInfo->fDiacritic = 1;
751 else if (!strcmp(pszProperty, "Extender")) pCPInfo->fExtender = 1;
752 else if (!strcmp(pszProperty, "Grapheme_Base")) pCPInfo->fGraphemeBase = 1;
753 else if (!strcmp(pszProperty, "Grapheme_Extend")) pCPInfo->fGraphemeExtend = 1;
754 else if (!strcmp(pszProperty, "Grapheme_Link")) pCPInfo->fGraphemeLink = 1;
755 else if (!strcmp(pszProperty, "Hex_Digit")) pCPInfo->fHexDigit = 1;
756 else if (!strcmp(pszProperty, "Hyphen")) pCPInfo->fHyphen = 1;
757 else if (!strcmp(pszProperty, "ID_Continue")) pCPInfo->fIDContinue = 1;
758 else if (!strcmp(pszProperty, "ID_Start")) pCPInfo->fIDStart = 1;
759 else if (!strcmp(pszProperty, "Ideographic")) pCPInfo->fIdeographic = 1;
760 else if (!strcmp(pszProperty, "IDS_Binary_Operator")) pCPInfo->fIDSBinaryOperator = 1;
761 else if (!strcmp(pszProperty, "IDS_Trinary_Operator")) pCPInfo->fIDSTrinaryOperator = 1;
762 else if (!strcmp(pszProperty, "Join_Control")) pCPInfo->fJoinControl = 1;
763 else if (!strcmp(pszProperty, "Logical_Order_Exception")) pCPInfo->fLogicalOrderException = 1;
764 else if (!strcmp(pszProperty, "Lowercase")) pCPInfo->fLowercase = 1;
765 else if (!strcmp(pszProperty, "Math")) pCPInfo->fMath = 1;
766 else if (!strcmp(pszProperty, "Noncharacter_Code_Point")) pCPInfo->fNoncharacterCodePoint = 1;
767 else if (!strcmp(pszProperty, "Other_Alphabetic")) pCPInfo->fOtherAlphabetic = 1;
768 else if (!strcmp(pszProperty, "Other_Default_Ignorable_Code_Point")) pCPInfo->fOtherDefaultIgnorableCodePoint = 1;
769 else if (!strcmp(pszProperty, "Other_Grapheme_Extend")) pCPInfo->fOtherGraphemeExtend = 1;
770 else if (!strcmp(pszProperty, "Other_ID_Continue")) pCPInfo->fOtherIDContinue = 1;
771 else if (!strcmp(pszProperty, "Other_ID_Start")) pCPInfo->fOtherIDStart = 1;
772 else if (!strcmp(pszProperty, "Other_Lowercase")) pCPInfo->fOtherLowercase = 1;
773 else if (!strcmp(pszProperty, "Other_Math")) pCPInfo->fOtherMath = 1;
774 else if (!strcmp(pszProperty, "Other_Uppercase")) pCPInfo->fOtherUppercase = 1;
775 else if (!strcmp(pszProperty, "Pattern_Syntax")) pCPInfo->fPatternSyntax = 1;
776 else if (!strcmp(pszProperty, "Pattern_White_Space")) pCPInfo->fPatternWhiteSpace = 1;
777 else if (!strcmp(pszProperty, "Quotation_Mark")) pCPInfo->fQuotationMark = 1;
778 else if (!strcmp(pszProperty, "Radical")) pCPInfo->fRadical = 1;
779 else if (!strcmp(pszProperty, "Soft_Dotted")) pCPInfo->fSoftDotted = 1;
780 else if (!strcmp(pszProperty, "STerm")) pCPInfo->fSTerm = 1;
781 else if (!strcmp(pszProperty, "Terminal_Punctuation")) pCPInfo->fTerminalPunctuation = 1;
782 else if (!strcmp(pszProperty, "Unified_Ideograph")) pCPInfo->fUnifiedIdeograph = 1;
783 else if (!strcmp(pszProperty, "Uppercase")) pCPInfo->fUppercase = 1;
784 else if (!strcmp(pszProperty, "Variation_Selector")) pCPInfo->fVariationSelector = 1;
785 else if (!strcmp(pszProperty, "White_Space")) pCPInfo->fWhiteSpace = 1;
786 else if (!strcmp(pszProperty, "XID_Continue")) pCPInfo->fXIDContinue = 1;
787 else if (!strcmp(pszProperty, "XID_Start")) pCPInfo->fXIDStart = 1;
788 /* DerivedNormalizationProps: */
789 else if (!strcmp(pszProperty, "FC_NFKC")) return; /* ignored */
790 else if (!strcmp(pszProperty, "Full_Composition_Exclusion")) pCPInfo->fFullCompositionExclusion = 1;
791 else if (!strcmp(pszProperty, "NFC_QC")) pCPInfo->fInvNFC_QC = YesNoMaybePropertyValueInv(&pszNextField);
792 else if (!strcmp(pszProperty, "NFD_QC")) pCPInfo->fInvNFD_QC = YesNoMaybePropertyValueInv(&pszNextField);
793 else if (!strcmp(pszProperty, "NFKC_QC")) pCPInfo->fInvNFKC_QC = YesNoMaybePropertyValueInv(&pszNextField);
794 else if (!strcmp(pszProperty, "NFKD_QC")) pCPInfo->fInvNFKD_QC = YesNoMaybePropertyValueInv(&pszNextField);
795 else if (!strcmp(pszProperty, "Expands_On_NFC")) pCPInfo->fExpandsOnNFC = 1;
796 else if (!strcmp(pszProperty, "Expands_On_NFD")) pCPInfo->fExpandsOnNFD = 1;
797 else if (!strcmp(pszProperty, "Expands_On_NFKC")) pCPInfo->fExpandsOnNFKC = 1;
798 else if (!strcmp(pszProperty, "Expands_On_NFKD")) pCPInfo->fExpandsOnNFKD = 1;
799 else if (!strcmp(pszProperty, "NFKC_CF")) return; /*ignore */
800 else if (!strcmp(pszProperty, "Changes_When_NFKC_Casefolded")) return; /*ignore */
801 else
802 {
803 ParseError("Unknown property '%s'\n", pszProperty);
804 /* not reached: return; */
805 }
806
807 if (pszNextField && *pszNextField)
808 ParseError("Unexpected next field: '%s'\n", pszNextField);
809}
810
811
812/**
813 * Reads a property file.
814 *
815 * There are several property files, this code can read all
816 * of those but will only make use of the properties it recognizes.
817 *
818 * @returns 0 on success.
819 * @returns !0 on failure.
820 * @param pszBasePath The base path, can be NULL.
821 * @param pszFilename The name of the file.
822 */
823static int ReadProperties(const char *pszBasePath, const char *pszFilename)
824{
825 /*
826 * Open input.
827 */
828 FILE *pFile = OpenFile(pszBasePath, pszFilename);
829 if (!pFile)
830 return 1;
831
832 /*
833 * Parse the input and spit out the output.
834 */
835 char szLine[4096];
836 while (GetLineFromFile(szLine, sizeof(szLine), pFile) != NULL)
837 {
838 if (IsCommentOrBlankLine(szLine))
839 continue;
840 char *pszCurField;
841 char *pszRange = FirstField(&pszCurField, StripLine(szLine));
842 char *pszProperty = NextField(&pszCurField);
843 if (!*pszProperty)
844 {
845 ParseError("no property field.\n");
846 /* not reached: continue; */
847 }
848
849 RTUNICP LastCP;
850 RTUNICP StartCP = ToRange(pszRange, &LastCP);
851 if (StartCP == ~(RTUNICP)0)
852 continue;
853
854 while (StartCP <= LastCP)
855 ApplyProperty(StartCP++, pszProperty, pszCurField);
856 }
857
858 CloseFile(pFile);
859
860 return 0;
861}
862
863
864/**
865 * Append a flag to the string.
866 */
867static char *AppendFlag(char *psz, const char *pszFlag)
868{
869 char *pszEnd = strchr(psz, '\0');
870 if (pszEnd != psz)
871 {
872 *pszEnd++ = ' ';
873 *pszEnd++ = '|';
874 *pszEnd++ = ' ';
875 }
876 strcpy(pszEnd, pszFlag);
877 return psz;
878}
879
880/**
881 * Calcs the flags for a code point.
882 * @returns true if there is a flag.
883 * @returns false if the isn't.
884 */
885static bool CalcFlags(struct CPINFO *pInfo, char *pszFlags)
886{
887 pszFlags[0] = '\0';
888 /** @todo read the specs on this other vs standard stuff, and check out the finer points */
889 if (pInfo->fAlphabetic || pInfo->fOtherAlphabetic)
890 AppendFlag(pszFlags, "RTUNI_ALPHA");
891 if (pInfo->fHexDigit || pInfo->fASCIIHexDigit)
892 AppendFlag(pszFlags, "RTUNI_XDIGIT");
893 if (!strcmp(pInfo->pszGeneralCategory, "Nd"))
894 AppendFlag(pszFlags, "RTUNI_DDIGIT");
895 if (pInfo->fWhiteSpace)
896 AppendFlag(pszFlags, "RTUNI_WSPACE");
897 if (pInfo->fUppercase || pInfo->fOtherUppercase)
898 AppendFlag(pszFlags, "RTUNI_UPPER");
899 if (pInfo->fLowercase || pInfo->fOtherLowercase)
900 AppendFlag(pszFlags, "RTUNI_LOWER");
901 //if (pInfo->???)
902 // AppendFlag(pszFlags, "RTUNI_BSPACE");
903#if 0
904 if (pInfo->fInvNFD_QC != 0 || pInfo->fInvNFC_QC != 0)
905 {
906 AppendFlag(pszFlags, "RTUNI_QC_NFX");
907 if (!pInfo->paDecompositionMapping && pInfo->fInvNFD_QC)
908 fprintf(stderr, "uniread: U+%05X is QC_NFD but has no mappings.\n", pInfo->CodePoint);
909 else if (*pInfo->pszDecompositionType && pInfo->fInvNFD_QC)
910 fprintf(stderr, "uniread: U+%05X is QC_NFD but has no canonical mappings.\n", pInfo->CodePoint);
911 }
912 else if (pInfo->paDecompositionMapping && !*pInfo->pszDecompositionType)
913 fprintf(stderr, "uniread: U+%05X is not QC_NFX but has canonical mappings.\n", pInfo->CodePoint);
914#endif
915
916 if (!*pszFlags)
917 {
918 pszFlags[0] = '0';
919 pszFlags[1] = '\0';
920 return false;
921 }
922 return true;
923}
924
925
926/**
927 * Closes the primary output stream.
928 */
929static int Stream1Close(void)
930{
931 if (g_pCurOutFile && g_pCurOutFile != stdout && g_pCurOutFile != stderr)
932 {
933 if (fclose(g_pCurOutFile) != 0)
934 {
935 fprintf(stderr, "Error closing output file.\n");
936 return -1;
937 }
938 }
939 g_pCurOutFile = NULL;
940 return 0;
941}
942
943
944/**
945 * Initializes the 1st stream to output to a given file.
946 */
947static int Stream1Init(const char *pszName)
948{
949 int rc = Stream1Close();
950 if (!rc)
951 {
952 g_pCurOutFile = fopen(pszName, "w");
953 if (!g_pCurOutFile)
954 {
955 fprintf(stderr, "Error opening output file '%s'.\n", pszName);
956 rc = -1;
957 }
958 }
959 return rc;
960}
961
962
963/**
964 * printf wrapper for the primary output stream.
965 *
966 * @returns See vfprintf.
967 * @param pszFormat The vfprintf format string.
968 * @param ... The format arguments.
969 */
970static int Stream1Printf(const char *pszFormat, ...)
971{
972 int cch;
973 va_list va;
974 va_start(va, pszFormat);
975 cch = vfprintf(g_pCurOutFile, pszFormat, va);
976 va_end(va);
977 return cch;
978}
979
980
981/** the data store for stream two. */
982static char g_szStream2[10240];
983static unsigned volatile g_offStream2 = 0;
984
985/**
986 * Initializes the 2nd steam.
987 */
988static void Stream2Init(void)
989{
990 g_szStream2[0] = '\0';
991 g_offStream2 = 0;
992}
993
994/**
995 * Flushes the 2nd stream to stdout.
996 */
997static int Stream2Flush(void)
998{
999 g_szStream2[g_offStream2] = '\0';
1000 Stream1Printf("%s", g_szStream2);
1001 Stream2Init();
1002 return 0;
1003}
1004
1005/**
1006 * printf to the 2nd stream.
1007 */
1008static int Stream2Printf(const char *pszFormat, ...)
1009{
1010 unsigned offStream2 = g_offStream2;
1011 va_list va;
1012 va_start(va, pszFormat);
1013 int cch = vsprintf(&g_szStream2[offStream2], pszFormat, va);
1014 va_end(va);
1015 offStream2 += cch;
1016 if (offStream2 >= sizeof(g_szStream2))
1017 {
1018 fprintf(stderr, "error: stream2 overflow!\n");
1019 exit(1);
1020 }
1021 g_offStream2 = offStream2;
1022 return cch;
1023}
1024
1025
1026/**
1027 * Print the unidata.cpp file header and include list.
1028 */
1029int PrintHeader(const char *argv0, const char *pszBaseDir)
1030{
1031 char szBuf[1024];
1032 if (!pszBaseDir)
1033 {
1034 memset(szBuf, 0, sizeof(szBuf));
1035#ifdef _MSC_VER
1036 if (!_getcwd(szBuf, sizeof(szBuf)))
1037#else
1038 if (!getcwd(szBuf, sizeof(szBuf)))
1039#endif
1040 return RTEXITCODE_FAILURE;
1041 pszBaseDir = szBuf;
1042 }
1043
1044 const char *pszYear = __DATE__;
1045 pszYear += strlen(pszYear) - 4;
1046
1047 Stream1Printf("/* $" "Id" "$ */\n"
1048 "/** @file\n"
1049 " * IPRT - Unicode Tables.\n"
1050 " *\n"
1051 " * Automatically Generated from %s\n"
1052 " * by %s (" __DATE__ " " __TIME__ ")\n"
1053 " */\n"
1054 "\n"
1055 "/*\n"
1056 " * Copyright (C) 2006-%s Oracle and/or its affiliates.\n"
1057 " *\n"
1058 " * This file is part of VirtualBox base platform packages, as\n"
1059 " * available from https://www.virtualbox.org.\n"
1060 " *\n"
1061 " * This program is free software; you can redistribute it and/or\n"
1062 " * modify it under the terms of the GNU General Public License\n"
1063 " * as published by the Free Software Foundation, in version 3 of the\n"
1064 " * License.\n"
1065 " *\n"
1066 " * This program is distributed in the hope that it will be useful, but\n"
1067 " * WITHOUT ANY WARRANTY; without even the implied warranty of\n"
1068 " * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU\n"
1069 " * General Public License for more details.\n"
1070 " *\n"
1071 " * You should have received a copy of the GNU General Public License\n"
1072 " * along with this program; if not, see <https://www.gnu.org/licenses>.\n"
1073 " *\n"
1074 " * The contents of this file may alternatively be used under the terms\n"
1075 " * of the Common Development and Distribution License Version 1.0\n"
1076 " * (CDDL), a copy of it is provided in the \"COPYING.CDDL\" file included\n"
1077 " * in the VirtualBox distribution, in which case the provisions of the\n"
1078 " * CDDL are applicable instead of those of the GPL.\n"
1079 " *\n"
1080 " * You may elect to license modified versions of this file under the\n"
1081 " * terms and conditions of either the GPL or the CDDL or both.\n"
1082 " *\n"
1083 " * SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0\n"
1084 " */\n"
1085 "\n"
1086 "#include <iprt/uni.h>\n"
1087 "\n",
1088 pszBaseDir, argv0, pszYear);
1089 return 0;
1090}
1091
1092
1093/**
1094 * Print the flag tables.
1095 */
1096int PrintFlags(void)
1097{
1098 /*
1099 * Print flags table.
1100 */
1101 Stream2Init();
1102 Stream2Printf("RT_DECL_DATA_CONST(const RTUNIFLAGSRANGE) g_aRTUniFlagsRanges[] =\n"
1103 "{\n");
1104 RTUNICP i = 0;
1105 int iStart = -1;
1106 while (i < RT_ELEMENTS(g_aCPInfo))
1107 {
1108 /* figure how far off the next chunk is */
1109 char szFlags[256];
1110 unsigned iNonNull = i;
1111 while ( iNonNull < RT_ELEMENTS(g_aCPInfo)
1112 && iNonNull >= 256
1113 && (g_aCPInfo[iNonNull].fNullEntry || !CalcFlags(&g_aCPInfo[iNonNull], szFlags)) )
1114 iNonNull++;
1115 if (iNonNull - i > 4096 || iNonNull == RT_ELEMENTS(g_aCPInfo))
1116 {
1117 if (iStart >= 0)
1118 {
1119 Stream1Printf("};\n\n");
1120 Stream2Printf(" { 0x%06x, 0x%06x, &g_afRTUniFlags0x%06x[0] },\n", iStart, i, iStart);
1121 iStart = -1;
1122 }
1123 i = iNonNull;
1124 }
1125 else
1126 {
1127 if (iStart < 0)
1128 {
1129 Stream1Printf("static const uint8_t g_afRTUniFlags0x%06x[] =\n"
1130 "{\n", i);
1131 iStart = i;
1132 }
1133 CalcFlags(&g_aCPInfo[i], szFlags);
1134 Stream1Printf(" %50s, /* U+%06x: %s*/\n", szFlags, g_aCPInfo[i].CodePoint, g_aCPInfo[i].pszName);
1135 i++;
1136 }
1137 }
1138 Stream2Printf(" { ~(RTUNICP)0, ~(RTUNICP)0, NULL }\n"
1139 "};\n\n\n");
1140 Stream1Printf("\n");
1141 return Stream2Flush();
1142}
1143
1144
1145/**
1146 * Prints the upper case tables.
1147 */
1148static int PrintUpper(void)
1149{
1150 Stream2Init();
1151 Stream2Printf("RT_DECL_DATA_CONST(const RTUNICASERANGE) g_aRTUniUpperRanges[] =\n"
1152 "{\n");
1153 RTUNICP i = 0;
1154 int iStart = -1;
1155 while (i < RT_ELEMENTS(g_aCPInfo))
1156 {
1157 /* figure how far off the next chunk is */
1158 unsigned iSameCase = i;
1159 while ( iSameCase < RT_ELEMENTS(g_aCPInfo)
1160 && g_aCPInfo[iSameCase].SimpleUpperCaseMapping == g_aCPInfo[iSameCase].CodePoint
1161 && iSameCase >= 256)
1162 iSameCase++;
1163 if (iSameCase - i > 4096/sizeof(RTUNICP) || iSameCase == RT_ELEMENTS(g_aCPInfo))
1164 {
1165 if (iStart >= 0)
1166 {
1167 Stream1Printf("};\n\n");
1168 Stream2Printf(" { 0x%06x, 0x%06x, &g_afRTUniUpper0x%06x[0] },\n", iStart, i, iStart);
1169 iStart = -1;
1170 }
1171 i = iSameCase;
1172 }
1173 else
1174 {
1175 if (iStart < 0)
1176 {
1177 Stream1Printf("static const RTUNICP g_afRTUniUpper0x%06x[] =\n"
1178 "{\n", i);
1179 iStart = i;
1180 }
1181 Stream1Printf(" 0x%02x, /* U+%06x: %s*/\n", g_aCPInfo[i].SimpleUpperCaseMapping, g_aCPInfo[i].CodePoint, g_aCPInfo[i].pszName);
1182 i++;
1183 }
1184 }
1185 Stream2Printf(" { ~(RTUNICP)0, ~(RTUNICP)0, NULL }\n"
1186 "};\n\n\n");
1187 Stream1Printf("\n");
1188 return Stream2Flush();
1189}
1190
1191
1192/**
1193 * Prints the lowercase tables.
1194 */
1195static int PrintLower(void)
1196{
1197 Stream2Init();
1198 Stream2Printf("RT_DECL_DATA_CONST(const RTUNICASERANGE) g_aRTUniLowerRanges[] =\n"
1199 "{\n");
1200 RTUNICP i = 0;
1201 int iStart = -1;
1202 while (i < RT_ELEMENTS(g_aCPInfo))
1203 {
1204 /* figure how far off the next chunk is */
1205 unsigned iSameCase = i;
1206 while ( iSameCase < RT_ELEMENTS(g_aCPInfo)
1207 && g_aCPInfo[iSameCase].SimpleLowerCaseMapping == g_aCPInfo[iSameCase].CodePoint
1208 && iSameCase >= 256)
1209 iSameCase++;
1210 if (iSameCase - i > 4096/sizeof(RTUNICP) || iSameCase == RT_ELEMENTS(g_aCPInfo))
1211 {
1212 if (iStart >= 0)
1213 {
1214 Stream1Printf("};\n\n");
1215 Stream2Printf(" { 0x%06x, 0x%06x, &g_afRTUniLower0x%06x[0] },\n", iStart, i, iStart);
1216 iStart = -1;
1217 }
1218 i = iSameCase;
1219 }
1220 else
1221 {
1222 if (iStart < 0)
1223 {
1224 Stream1Printf("static const RTUNICP g_afRTUniLower0x%06x[] =\n"
1225 "{\n", i);
1226 iStart = i;
1227 }
1228 Stream1Printf(" 0x%02x, /* U+%06x: %s*/\n",
1229 g_aCPInfo[i].SimpleLowerCaseMapping, g_aCPInfo[i].CodePoint, g_aCPInfo[i].pszName);
1230 i++;
1231 }
1232 }
1233 Stream2Printf(" { ~(RTUNICP)0, ~(RTUNICP)0, NULL }\n"
1234 "};\n\n\n");
1235 Stream1Printf("\n");
1236 return Stream2Flush();
1237}
1238
1239
1240int main(int argc, char **argv)
1241{
1242 /*
1243 * Parse args.
1244 */
1245 if (argc <= 1)
1246 {
1247 printf("usage: %s [-C|--dir <UCD-dir>] [UnicodeData.txt [DerivedCoreProperties.txt [PropList.txt] [DerivedNormalizationProps.txt]]]\n",
1248 argv[0]);
1249 return 1;
1250 }
1251
1252 const char *pszBaseDir = NULL;
1253 const char *pszUnicodeData = "UnicodeData.txt";
1254 const char *pszDerivedCoreProperties = "DerivedCoreProperties.txt";
1255 const char *pszPropList = "PropList.txt";
1256 const char *pszDerivedNormalizationProps = "DerivedNormalizationProps.txt";
1257 int iFile = 0;
1258 for (int argi = 1; argi < argc; argi++)
1259 {
1260 if (argv[argi][0] != '-')
1261 {
1262 switch (iFile++)
1263 {
1264 case 0: pszUnicodeData = argv[argi]; break;
1265 case 1: pszDerivedCoreProperties = argv[argi]; break;
1266 case 2: pszPropList = argv[argi]; break;
1267 case 3: pszDerivedNormalizationProps = argv[argi]; break;
1268 default:
1269 fprintf(stderr, "uniread: syntax error at '%s': too many filenames\n", argv[argi]);
1270 return 1;
1271 }
1272 }
1273 else if ( !strcmp(argv[argi], "--dir")
1274 || !strcmp(argv[argi], "-C"))
1275 {
1276 if (argi + 1 >= argc)
1277 {
1278 fprintf(stderr, "uniread: syntax error: '%s' is missing the directory name.\n", argv[argi]);
1279 return 1;
1280 }
1281 argi++;
1282 pszBaseDir = argv[argi];
1283 }
1284 else
1285 {
1286 fprintf(stderr, "uniread: syntax error at '%s': Unknown argument\n", argv[argi]);
1287 return 1;
1288 }
1289 }
1290
1291 /*
1292 * Read the data.
1293 */
1294 int rc = ReadUnicodeData(pszBaseDir, pszUnicodeData);
1295 if (rc)
1296 return rc;
1297 rc = GenerateExcludedData();
1298 if (rc)
1299 return rc;
1300 rc = ReadProperties(pszBaseDir, pszPropList);
1301 if (rc)
1302 return rc;
1303 rc = ReadProperties(pszBaseDir, pszDerivedCoreProperties);
1304 if (rc)
1305 return rc;
1306 rc = ReadProperties(pszBaseDir, pszDerivedNormalizationProps);
1307 if (rc)
1308 return rc;
1309
1310 /*
1311 * Produce output files.
1312 */
1313 rc = Stream1Init("unidata-flags.cpp");
1314 if (!rc)
1315 rc = PrintHeader(argv[0], pszBaseDir);
1316 if (!rc)
1317 rc = PrintFlags();
1318
1319 rc = Stream1Init("unidata-upper.cpp");
1320 if (!rc)
1321 rc = PrintHeader(argv[0], pszBaseDir);
1322 if (!rc)
1323 rc = PrintUpper();
1324
1325 rc = Stream1Init("unidata-lower.cpp");
1326 if (!rc)
1327 rc = PrintHeader(argv[0], pszBaseDir);
1328 if (!rc)
1329 rc = PrintLower();
1330 if (!rc)
1331 rc = Stream1Close();
1332
1333 /* done */
1334 return rc;
1335}
1336
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette