uniread.cpp@ 99553

Last change on this file since 99553 was 98107, checked in by vboxsync, 23 months ago
Manual (C) year updates.
Property svn:eol-style set to `native` Property svn:keywords set to `Id Revision`
File size: 42.3 KB

Line
1	/* $Id: uniread.cpp 98107 2023-01-17 22:56:50Z vboxsync $ */
2	/** @file
3	* IPRT - Unicode Specification Reader.
4	*/
5
6	/*
7	* Copyright (C) 2006-2023 Oracle and/or its affiliates.
8	*
9	* This file is part of VirtualBox base platform packages, as
10	* available from https://www.virtualbox.org.
11	*
12	* This program is free software; you can redistribute it and/or
13	* modify it under the terms of the GNU General Public License
14	* as published by the Free Software Foundation, in version 3 of the
15	* License.
16	*
17	* This program is distributed in the hope that it will be useful, but
18	* WITHOUT ANY WARRANTY; without even the implied warranty of
19	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20	* General Public License for more details.
21	*
22	* You should have received a copy of the GNU General Public License
23	* along with this program; if not, see <https://www.gnu.org/licenses>.
24	*
25	* The contents of this file may alternatively be used under the terms
26	* of the Common Development and Distribution License Version 1.0
27	* (CDDL), a copy of it is provided in the "COPYING.CDDL" file included
28	* in the VirtualBox distribution, in which case the provisions of the
29	* CDDL are applicable instead of those of the GPL.
30	*
31	* You may elect to license modified versions of this file under the
32	* terms and conditions of either the GPL or the CDDL or both.
33	*
34	* SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0
35	*/
36
37
38	/*********************************************************************************************************************************
39	* Header Files *
40	*********************************************************************************************************************************/
41	#include <iprt/types.h>
42	#include <iprt/stdarg.h>
43	#include <iprt/ctype.h>
44
45	#include <stdio.h>
46	#include <string.h>
47	#include <stdlib.h>
48	#ifdef _MSC_VER
49	# include <direct.h>
50	#else
51	# include <unistd.h>
52	#endif
53
54
55	/*********************************************************************************************************************************
56	* Global Variables *
57	*********************************************************************************************************************************/
58	/** The file we're currently parsing. */
59	static const char *g_pszCurFile;
60	/** The current line number. */
61	static unsigned g_iLine;
62	/** The current output file. */
63	static FILE *g_pCurOutFile;
64
65
66	/**
67	* Exit the program after printing a parse error.
68	*
69	* @param pszFormat The message.
70	* @param ... Format arguments.
71	*/
72	static DECL_NO_RETURN(void) ParseError(const char *pszFormat, ...)
73	{
74	va_list va;
75	va_start(va, pszFormat);
76	fprintf(stderr, "parse error: %s:%u: ", g_pszCurFile, g_iLine);
77	vfprintf(stderr, pszFormat, va);
78	va_end(va);
79	exit(1);
80	}
81
82	/**
83	* Strip a line.
84	* @returns pointer to first non-blank char.
85	* @param pszLine The line string to strip.
86	*/
87	static char StripLine(char pszLine)
88	{
89	while (pszLine == ' ' \|\| pszLine == '\t')
90	pszLine++;
91
92	char *psz = strchr(pszLine, '#');
93	if (psz)
94	*psz = '\0';
95	else
96	psz = strchr(pszLine, '\0');
97	while (psz > pszLine)
98	{
99	switch (psz[-1])
100	{
101	case ' ':
102	case '\t':
103	case '\n':
104	case '\r':
105	*--psz = '\0';
106	continue;
107	}
108	break;
109	}
110
111	return pszLine;
112	}
113
114
115	/**
116	* Checks if the line is blank or a comment line and should be skipped.
117	* @returns true/false.
118	* @param pszLine The line to consider.
119	*/
120	static bool IsCommentOrBlankLine(const char *pszLine)
121	{
122	while (pszLine == ' ' \|\| pszLine == '\t' \|\| pszLine == '\n' \|\| pszLine == '\r')
123	pszLine++;
124	return pszLine == '#' \|\| pszLine == '\0';
125	}
126
127
128	/**
129	* Get the first field in the string.
130	*
131	* @returns Pointer to the next field.
132	* @param ppsz Where to store the pointer to the next field.
133	* @param pszLine The line string. (could also be *ppsz from a FirstNext call)
134	*/
135	static char FirstField(char ppsz, char pszLine)
136	{
137	char *psz = strchr(pszLine, ';');
138	if (!psz)
139	*ppsz = psz = strchr(pszLine, '\0');
140	else
141	{
142	*psz = '\0';
143	*ppsz = psz + 1;
144	}
145
146	/* strip */
147	while (pszLine == ' ' \|\| pszLine == '\t' \|\| pszLine == '\r' \|\| pszLine == '\n')
148	pszLine++;
149	while (psz > pszLine)
150	{
151	switch (psz[-1])
152	{
153	case ' ':
154	case '\t':
155	case '\n':
156	case '\r':
157	*--psz = '\0';
158	continue;
159	}
160	break;
161	}
162	return pszLine;
163	}
164
165
166	/**
167	* Get the next field in a field enumeration.
168	*
169	* @returns Pointer to the next field.
170	* @param ppsz Where to get and store the string position.
171	*/
172	static char NextField(char *ppsz)
173	{
174	return FirstField(ppsz, *ppsz);
175	}
176
177
178	/**
179	* Splits a decomposition field.
180	*
181	* This may start with a type that is enclosed in angle brackets.
182	*
183	* @returns Pointer to the mapping values following the type. @a *ppsz if empty.
184	* @param ppszType Pointer to the type field pointer. On input the type
185	* field contains the combined type and mapping string. On
186	* output this should only contain the type, no angle
187	* brackets. If no type specified, it is replaced with an
188	* empty string (const).
189	*/
190	static char SplitDecompField(char *ppszType)
191	{
192	/* Empty field? */
193	char psz = ppszType;
194	if (!*psz)
195	return psz;
196
197	/* No type? */
198	if (*psz != '<')
199	{
200	ppszType = (char )"";
201	return psz;
202	}
203
204	/* Split out the type. */
205	*ppszType = ++psz;
206	psz = strchr(psz, '>');
207	if (!psz)
208	{
209	ParseError("Bad Decomposition Type/Mappings\n");
210	/* not reached: return ppszType; /
211	}
212	*psz++ = '\0';
213
214	psz = StripLine(psz);
215	if (!*psz)
216	ParseError("Missing decomposition mappings\n");
217	return psz;
218	}
219
220	/**
221	* Converts a code point field to a number.
222	* @returns Code point.
223	* @param psz The field string.
224	*/
225	static RTUNICP ToNum(const char *psz)
226	{
227	char *pszEnd = NULL;
228	unsigned long ul = strtoul(psz, &pszEnd, 16);
229	if (pszEnd && *pszEnd)
230	ParseError("failed converting '%s' to a number!\n", psz);
231	return (RTUNICP)ul;
232	}
233
234
235	/**
236	* Same as ToNum except that if the field is empty the Default is returned.
237	*/
238	static RTUNICP ToNumDefault(const char *psz, RTUNICP Default)
239	{
240	if (*psz)
241	return ToNum(psz);
242	return Default;
243	}
244
245
246	/**
247	* Converts a code point range to numbers.
248	* @returns The start code point.\
249	* @returns ~(RTUNICP)0 on failure.
250	* @param psz The field string.
251	* @param pLast Where to store the last code point in the range.
252	*/
253	static RTUNICP ToRange(const char *psz, PRTUNICP pLast)
254	{
255	char *pszEnd = NULL;
256	unsigned long ulStart = strtoul(psz, &pszEnd, 16);
257	unsigned long ulLast = ulStart;
258	if (pszEnd && *pszEnd)
259	{
260	if (*pszEnd == '.')
261	{
262	while (*pszEnd == '.')
263	pszEnd++;
264	ulLast = strtoul(pszEnd, &pszEnd, 16);
265	if (pszEnd && *pszEnd)
266	{
267	ParseError("failed converting '%s' to a number!\n", psz);
268	/* not reached: return ~(RTUNICP)0;*/
269	}
270	}
271	else
272	{
273	ParseError("failed converting '%s' to a number!\n", psz);
274	/* not reached: return ~(RTUNICP)0; */
275	}
276	}
277	*pLast = (RTUNICP)ulLast;
278	return (RTUNICP)ulStart;
279
280	}
281
282	/**
283	* For converting the decomposition mappings field and similar.
284	*
285	* @returns Mapping array or NULL if none.
286	* @param psz The string to convert. Can be empty.
287	* @param pcEntries Where to store the number of entries.
288	* @param cMax The max number of entries.
289	*/
290	static PRTUNICP ToMapping(char psz, unsigned pcEntries, unsigned cMax)
291	{
292	PRTUNICP paCps = NULL;
293	unsigned cAlloc = 0;
294	unsigned i = 0;
295
296	/* Convert the code points. */
297	while (psz)
298	{
299	/* skip leading spaces */
300	while (RT_C_IS_BLANK(*psz))
301	psz++;
302
303	/* the end? */
304	if (!*psz)
305	break;
306
307	/* room left? */
308	if (i >= cMax)
309	{
310	ParseError("Too many mappings.\n");
311	/* not reached: break; */
312	}
313	if (i >= cAlloc)
314	{
315	cAlloc += 4;
316	paCps = (PRTUNICP)realloc(paCps, cAlloc * sizeof(paCps[0]));
317	if (!paCps)
318	{
319	fprintf(stderr, "out of memory (%u)\n", (unsigned)(cAlloc * sizeof(paCps[0])));
320	exit(1);
321	}
322	}
323
324	/* Find the end. */
325	char *pszThis = psz;
326	while (RT_C_IS_XDIGIT(*psz))
327	psz++;
328	if (psz && !RT_C_IS_BLANK(psz))
329	ParseError("Malformed mappings.\n");
330	if (*psz)
331	*psz++ = '\0';
332
333	/* Convert to number and add it. */
334	paCps[i++] = ToNum(pszThis);
335	}
336
337	*pcEntries = i;
338	return paCps;
339	}
340
341
342	/**
343	* Duplicate a string, optimize certain strings to save memory.
344	*
345	* @returns Pointer to string copy.
346	* @param pszStr The string to duplicate.
347	*/
348	static char DupStr(const char pszStr)
349	{
350	if (!*pszStr)
351	return (char*)"";
352	char *psz = strdup(pszStr);
353	if (psz)
354	return psz;
355
356	fprintf(stderr, "out of memory!\n");
357	exit(1);
358	}
359
360
361	/**
362	* Array of all possible and impossible unicode code points as of 4.1
363	*/
364	struct CPINFO
365	{
366	RTUNICP CodePoint;
367	RTUNICP SimpleUpperCaseMapping;
368	RTUNICP SimpleLowerCaseMapping;
369	RTUNICP SimpleTitleCaseMapping;
370	unsigned CanonicalCombiningClass;
371	const char *pszDecompositionType;
372	unsigned cDecompositionMapping;
373	PRTUNICP paDecompositionMapping;
374	const char *pszName;
375	/** Set if this is an unused entry */
376	unsigned fNullEntry : 1;
377
378	unsigned fAlphabetic : 1;
379	unsigned fASCIIHexDigit : 1;
380	unsigned fBidiControl : 1;
381	unsigned fCaseIgnorable : 1;
382	unsigned fCased : 1;
383	unsigned fChangesWhenCasefolded : 1;
384	unsigned fChangesWhenCasemapped : 1;
385	unsigned fChangesWhenLowercased : 1;
386	unsigned fChangesWhenTitlecased : 1;
387	unsigned fChangesWhenUppercased : 1;
388	unsigned fDash : 1;
389	unsigned fDefaultIgnorableCodePoint : 1;
390	unsigned fDeprecated : 1;
391	unsigned fDiacritic : 1;
392	unsigned fExtender : 1;
393	unsigned fGraphemeBase : 1;
394	unsigned fGraphemeExtend : 1;
395	unsigned fGraphemeLink : 1;
396	unsigned fHexDigit : 1;
397	unsigned fHyphen : 1;
398	unsigned fIDContinue : 1;
399	unsigned fIdeographic : 1;
400	unsigned fIDSBinaryOperator : 1;
401	unsigned fIDStart : 1;
402	unsigned fIDSTrinaryOperator : 1;
403	unsigned fJoinControl : 1;
404	unsigned fLogicalOrderException : 1;
405	unsigned fLowercase : 1;
406	unsigned fMath : 1;
407	unsigned fNoncharacterCodePoint : 1;
408	unsigned fOtherAlphabetic : 1;
409	unsigned fOtherDefaultIgnorableCodePoint : 1;
410	unsigned fOtherGraphemeExtend : 1;
411	unsigned fOtherIDContinue : 1;
412	unsigned fOtherIDStart : 1;
413	unsigned fOtherLowercase : 1;
414	unsigned fOtherMath : 1;
415	unsigned fOtherUppercase : 1;
416	unsigned fPatternSyntax : 1;
417	unsigned fPatternWhiteSpace : 1;
418	unsigned fQuotationMark : 1;
419	unsigned fRadical : 1;
420	unsigned fSoftDotted : 1;
421	unsigned fSTerm : 1;
422	unsigned fTerminalPunctuation : 1;
423	unsigned fUnifiedIdeograph : 1;
424	unsigned fUppercase : 1;
425	unsigned fVariationSelector : 1;
426	unsigned fWhiteSpace : 1;
427	unsigned fXIDContinue : 1;
428	unsigned fXIDStart : 1;
429
430	/** @name DerivedNormalizationProps.txt
431	* @{ */
432	unsigned fFullCompositionExclusion : 1;
433	unsigned fInvNFC_QC : 2; /*< If 1 (NFC_QC == N) then code point 100% sure not part of NFC string. /
434	unsigned fInvNFD_QC : 2; /*< If 1 (NFD_QC == N) then code point 100% sure not part of NFD string. /
435	unsigned fInvNFKC_QC : 2;
436	unsigned fInvNFKD_QC : 2;
437	unsigned fExpandsOnNFC : 1;
438	unsigned fExpandsOnNFD : 1;
439	unsigned fExpandsOnNFKC : 1;
440	unsigned fExpandsOnNFKD : 1;
441	/** @} */
442
443	/* unprocessed stuff, so far. */
444	const char *pszGeneralCategory;
445	const char *pszBidiClass;
446	const char *pszNumericType;
447	const char *pszNumericValueD;
448	const char *pszNumericValueN;
449	const char *pszBidiMirrored;
450	const char *pszUnicode1Name;
451	const char *pszISOComment;
452	} g_aCPInfo[0x110000];
453
454
455	/**
456	* Creates a 'null' entry at i.
457	* @param i The entry in question.
458	*/
459	static void NullEntry(unsigned i)
460	{
461	g_aCPInfo[i].CodePoint = i;
462	g_aCPInfo[i].fNullEntry = 1;
463	g_aCPInfo[i].SimpleUpperCaseMapping = i;
464	g_aCPInfo[i].SimpleLowerCaseMapping = i;
465	g_aCPInfo[i].SimpleTitleCaseMapping = i;
466	g_aCPInfo[i].pszDecompositionType = "";
467	g_aCPInfo[i].cDecompositionMapping = 0;
468	g_aCPInfo[i].paDecompositionMapping = NULL;
469	g_aCPInfo[i].pszName = "";
470	g_aCPInfo[i].pszGeneralCategory = "";
471	g_aCPInfo[i].pszBidiClass = "";
472	g_aCPInfo[i].pszNumericType = "";
473	g_aCPInfo[i].pszNumericValueD = "";
474	g_aCPInfo[i].pszNumericValueN = "";
475	g_aCPInfo[i].pszBidiMirrored = "";
476	g_aCPInfo[i].pszUnicode1Name = "";
477	g_aCPInfo[i].pszISOComment = "";
478	}
479
480
481	/**
482	* Open a file for reading, optionally with a base path prefixed.
483	*
484	* @returns file stream on success, NULL w/ complaint on failure.
485	* @param pszBasePath The base path, can be NULL.
486	* @param pszFilename The name of the file to open.
487	*/
488	static FILE OpenFile(const char pszBasePath, const char *pszFilename)
489	{
490	FILE *pFile;
491	if ( !pszBasePath
492	\|\| *pszFilename == '/'
493	#if defined(_MSC_VER) \|\| defined(__OS2__)
494	\|\| *pszFilename == '\\'
495	\|\| (*pszFilename && pszFilename[1] == ':')
496	#endif
497	)
498	{
499	pFile = fopen(pszFilename, "r");
500	if (!pFile)
501	fprintf(stderr, "uniread: failed to open '%s' for reading\n", pszFilename);
502	}
503	else
504	{
505	size_t cchBasePath = strlen(pszBasePath);
506	size_t cchFilename = strlen(pszFilename);
507	char pszFullName = (char )malloc(cchBasePath + 1 + cchFilename + 1);
508	if (!pszFullName)
509	{
510	fprintf(stderr, "uniread: failed to allocate %d bytes\n", (int)(cchBasePath + 1 + cchFilename + 1));
511	return NULL;
512	}
513
514	memcpy(pszFullName, pszBasePath, cchBasePath);
515	pszFullName[cchBasePath] = '/';
516	memcpy(&pszFullName[cchBasePath + 1], pszFilename, cchFilename + 1);
517
518	pFile = fopen(pszFullName, "r");
519	if (!pFile)
520	fprintf(stderr, "uniread: failed to open '%s' for reading\n", pszFullName);
521	free(pszFullName);
522	}
523	g_pszCurFile = pszFilename;
524	g_iLine = 0;
525	return pFile;
526	}
527
528
529	/**
530	* Wrapper around fgets that keep track of the line number.
531	*
532	* @returns See fgets.
533	* @param pszBuf The buffer. See fgets for output definition.
534	* @param cbBuf The buffer size.
535	* @param pFile The file to read from.
536	*/
537	static char GetLineFromFile(char pszBuf, int cbBuf, FILE *pFile)
538	{
539	g_iLine++;
540	return fgets(pszBuf, cbBuf, pFile);
541	}
542
543
544	/**
545	* Closes a file opened by OpenFile
546	*
547	* @param pFile The file to close.
548	*/
549	static void CloseFile(FILE *pFile)
550	{
551	g_pszCurFile = NULL;
552	g_iLine = 0;
553	fclose(pFile);
554	}
555
556
557	/**
558	* Read the UnicodeData.txt file.
559	* @returns 0 on success.
560	* @returns !0 on failure.
561	* @param pszBasePath The base path, can be NULL.
562	* @param pszFilename The name of the file.
563	*/
564	static int ReadUnicodeData(const char pszBasePath, const char pszFilename)
565	{
566	/*
567	* Open input.
568	*/
569	FILE *pFile = OpenFile(pszBasePath, pszFilename);
570	if (!pFile)
571	return 1;
572
573	/*
574	* Parse the input and spit out the output.
575	*/
576	char szLine[4096];
577	RTUNICP i = 0;
578	while (GetLineFromFile(szLine, sizeof(szLine), pFile) != NULL)
579	{
580	if (IsCommentOrBlankLine(szLine))
581	continue;
582
583	char *pszCurField;
584	char pszCodePoint = FirstField(&pszCurField, StripLine(szLine)); / 0 */
585	char pszName = NextField(&pszCurField); / 1 */
586	char pszGeneralCategory = NextField(&pszCurField); / 2 */
587	char pszCanonicalCombiningClass = NextField(&pszCurField); / 3 */
588	char pszBidiClass = NextField(&pszCurField); / 4 */
589	char pszDecompositionType = NextField(&pszCurField); / 5 */
590	char *pszDecompositionMapping = SplitDecompField(&pszDecompositionType);
591	char pszNumericType = NextField(&pszCurField); / 6 */
592	char pszNumericValueD = NextField(&pszCurField); / 7 */
593	char pszNumericValueN = NextField(&pszCurField); / 8 */
594	char pszBidiMirrored = NextField(&pszCurField); / 9 */
595	char pszUnicode1Name = NextField(&pszCurField); / 10 */
596	char pszISOComment = NextField(&pszCurField); / 11 */
597	char pszSimpleUpperCaseMapping = NextField(&pszCurField); / 12 */
598	char pszSimpleLowerCaseMapping = NextField(&pszCurField); / 13 */
599	char pszSimpleTitleCaseMapping = NextField(&pszCurField); / 14 */
600
601	RTUNICP CodePoint = ToNum(pszCodePoint);
602	if (CodePoint >= RT_ELEMENTS(g_aCPInfo))
603	{
604	ParseError("U+05X is out of range\n", CodePoint);
605	/* not reached: continue;*/
606	}
607
608	/* catchup? */
609	while (i < CodePoint)
610	NullEntry(i++);
611	if (i != CodePoint)
612	{
613	ParseError("i=%d CodePoint=%u\n", i, CodePoint);
614	/* not reached: CloseFile(pFile);
615	return 1; */
616	}
617
618	/* this one */
619	g_aCPInfo[i].CodePoint = i;
620	g_aCPInfo[i].fNullEntry = 0;
621	g_aCPInfo[i].pszName = DupStr(pszName);
622	g_aCPInfo[i].SimpleUpperCaseMapping = ToNumDefault(pszSimpleUpperCaseMapping, CodePoint);
623	g_aCPInfo[i].SimpleLowerCaseMapping = ToNumDefault(pszSimpleLowerCaseMapping, CodePoint);
624	g_aCPInfo[i].SimpleTitleCaseMapping = ToNumDefault(pszSimpleTitleCaseMapping, CodePoint);
625	g_aCPInfo[i].CanonicalCombiningClass = ToNum(pszCanonicalCombiningClass);
626	g_aCPInfo[i].pszDecompositionType = DupStr(pszDecompositionType);
627	g_aCPInfo[i].paDecompositionMapping = ToMapping(pszDecompositionMapping, &g_aCPInfo[i].cDecompositionMapping, 20);
628	g_aCPInfo[i].pszGeneralCategory = DupStr(pszGeneralCategory);
629	g_aCPInfo[i].pszBidiClass = DupStr(pszBidiClass);
630	g_aCPInfo[i].pszNumericType = DupStr(pszNumericType);
631	g_aCPInfo[i].pszNumericValueD = DupStr(pszNumericValueD);
632	g_aCPInfo[i].pszNumericValueN = DupStr(pszNumericValueN);
633	g_aCPInfo[i].pszBidiMirrored = DupStr(pszBidiMirrored);
634	g_aCPInfo[i].pszUnicode1Name = DupStr(pszUnicode1Name);
635	g_aCPInfo[i].pszISOComment = DupStr(pszISOComment);
636	i++;
637	}
638
639	/* catchup? */
640	while (i < RT_ELEMENTS(g_aCPInfo))
641	NullEntry(i++);
642	CloseFile(pFile);
643
644	return 0;
645	}
646
647
648	/**
649	* Generates excluded data.
650	*
651	* @returns 0 on success, exit code on failure.
652	*/
653	static int GenerateExcludedData(void)
654	{
655	/*
656	* Hangul Syllables U+AC00 to U+D7A3.
657	*/
658	for (RTUNICP i = 0xac00; i <= 0xd7a3; i++)
659	{
660	g_aCPInfo[i].fNullEntry = 0;
661	g_aCPInfo[i].fInvNFD_QC = 1;
662	/** @todo generate the decomposition: http://unicode.org/reports/tr15/#Hangul
663	* */
664	}
665
666	/** @todo
667	* CJK Ideographs Extension A (U+3400 - U+4DB5)
668	* CJK Ideographs (U+4E00 - U+9FA5)
669	* CJK Ideograph Extension B (U+20000 - U+2A6D6)
670	* CJK Ideograph Extension C (U+2A700 - U+2B734)
671	*/
672
673	return 0;
674	}
675
676
677
678	/**
679	* Worker for ApplyProperty that handles a yes, no, maybe property value.
680	*
681	* @returns 0 (NO), 1 (YES), 2 (MAYBE).
682	* @param ppszNextField The field cursor, input and output.
683	*/
684	static int YesNoMaybePropertyValue(char **ppszNextField)
685	{
686	if (!**ppszNextField)
687	ParseError("Missing Y/N/M field\n");
688	else
689	{
690	char *psz = NextField(ppszNextField);
691	if (!strcmp(psz, "N"))
692	return 0;
693	if (!strcmp(psz, "Y"))
694	return 1;
695	if (!strcmp(psz, "M"))
696	return 2;
697	ParseError("Unexpected Y/N/M value: '%s'\n", psz);
698	}
699	/* not reached: return 0; */
700	}
701
702
703	/**
704	* Inverted version of YesNoMaybePropertyValue
705	*
706	* @returns 1 (NO), 0 (YES), 2 (MAYBE).
707	* @param ppszNextField The field cursor, input and output.
708	*/
709	static int YesNoMaybePropertyValueInv(char **ppszNextField)
710	{
711	unsigned rc = YesNoMaybePropertyValue(ppszNextField);
712	switch (rc)
713	{
714	case 0: return 1;
715	case 1: return 0;
716	default: return rc;
717	}
718	}
719
720
721	/**
722	* Applies a property to a code point.
723	*
724	* @param StartCP The code point.
725	* @param pszProperty The property name.
726	* @param pszNextField The next field.
727	*/
728	static void ApplyProperty(RTUNICP StartCP, const char pszProperty, char pszNextField)
729	{
730	if (StartCP >= RT_ELEMENTS(g_aCPInfo))
731	{
732	ParseError("U+%06X is out of the g_aCPInfo range.\n", StartCP);
733	/* not reached: return; */
734	}
735	struct CPINFO *pCPInfo = &g_aCPInfo[StartCP];
736	/* string switch */
737	if (!strcmp(pszProperty, "ASCII_Hex_Digit")) pCPInfo->fASCIIHexDigit = 1;
738	else if (!strcmp(pszProperty, "Alphabetic")) pCPInfo->fAlphabetic = 1;
739	else if (!strcmp(pszProperty, "Bidi_Control")) pCPInfo->fBidiControl = 1;
740	else if (!strcmp(pszProperty, "Case_Ignorable")) pCPInfo->fCaseIgnorable = 1;
741	else if (!strcmp(pszProperty, "Cased")) pCPInfo->fCased = 1;
742	else if (!strcmp(pszProperty, "Changes_When_Casefolded")) pCPInfo->fChangesWhenCasefolded = 1;
743	else if (!strcmp(pszProperty, "Changes_When_Casemapped")) pCPInfo->fChangesWhenCasemapped = 1;
744	else if (!strcmp(pszProperty, "Changes_When_Lowercased")) pCPInfo->fChangesWhenLowercased = 1;
745	else if (!strcmp(pszProperty, "Changes_When_Titlecased")) pCPInfo->fChangesWhenTitlecased = 1;
746	else if (!strcmp(pszProperty, "Changes_When_Uppercased")) pCPInfo->fChangesWhenUppercased = 1;
747	else if (!strcmp(pszProperty, "Dash")) pCPInfo->fDash = 1;
748	else if (!strcmp(pszProperty, "Default_Ignorable_Code_Point")) pCPInfo->fDefaultIgnorableCodePoint = 1;
749	else if (!strcmp(pszProperty, "Deprecated")) pCPInfo->fDeprecated = 1;
750	else if (!strcmp(pszProperty, "Diacritic")) pCPInfo->fDiacritic = 1;
751	else if (!strcmp(pszProperty, "Extender")) pCPInfo->fExtender = 1;
752	else if (!strcmp(pszProperty, "Grapheme_Base")) pCPInfo->fGraphemeBase = 1;
753	else if (!strcmp(pszProperty, "Grapheme_Extend")) pCPInfo->fGraphemeExtend = 1;
754	else if (!strcmp(pszProperty, "Grapheme_Link")) pCPInfo->fGraphemeLink = 1;
755	else if (!strcmp(pszProperty, "Hex_Digit")) pCPInfo->fHexDigit = 1;
756	else if (!strcmp(pszProperty, "Hyphen")) pCPInfo->fHyphen = 1;
757	else if (!strcmp(pszProperty, "ID_Continue")) pCPInfo->fIDContinue = 1;
758	else if (!strcmp(pszProperty, "ID_Start")) pCPInfo->fIDStart = 1;
759	else if (!strcmp(pszProperty, "Ideographic")) pCPInfo->fIdeographic = 1;
760	else if (!strcmp(pszProperty, "IDS_Binary_Operator")) pCPInfo->fIDSBinaryOperator = 1;
761	else if (!strcmp(pszProperty, "IDS_Trinary_Operator")) pCPInfo->fIDSTrinaryOperator = 1;
762	else if (!strcmp(pszProperty, "Join_Control")) pCPInfo->fJoinControl = 1;
763	else if (!strcmp(pszProperty, "Logical_Order_Exception")) pCPInfo->fLogicalOrderException = 1;
764	else if (!strcmp(pszProperty, "Lowercase")) pCPInfo->fLowercase = 1;
765	else if (!strcmp(pszProperty, "Math")) pCPInfo->fMath = 1;
766	else if (!strcmp(pszProperty, "Noncharacter_Code_Point")) pCPInfo->fNoncharacterCodePoint = 1;
767	else if (!strcmp(pszProperty, "Other_Alphabetic")) pCPInfo->fOtherAlphabetic = 1;
768	else if (!strcmp(pszProperty, "Other_Default_Ignorable_Code_Point")) pCPInfo->fOtherDefaultIgnorableCodePoint = 1;
769	else if (!strcmp(pszProperty, "Other_Grapheme_Extend")) pCPInfo->fOtherGraphemeExtend = 1;
770	else if (!strcmp(pszProperty, "Other_ID_Continue")) pCPInfo->fOtherIDContinue = 1;
771	else if (!strcmp(pszProperty, "Other_ID_Start")) pCPInfo->fOtherIDStart = 1;
772	else if (!strcmp(pszProperty, "Other_Lowercase")) pCPInfo->fOtherLowercase = 1;
773	else if (!strcmp(pszProperty, "Other_Math")) pCPInfo->fOtherMath = 1;
774	else if (!strcmp(pszProperty, "Other_Uppercase")) pCPInfo->fOtherUppercase = 1;
775	else if (!strcmp(pszProperty, "Pattern_Syntax")) pCPInfo->fPatternSyntax = 1;
776	else if (!strcmp(pszProperty, "Pattern_White_Space")) pCPInfo->fPatternWhiteSpace = 1;
777	else if (!strcmp(pszProperty, "Quotation_Mark")) pCPInfo->fQuotationMark = 1;
778	else if (!strcmp(pszProperty, "Radical")) pCPInfo->fRadical = 1;
779	else if (!strcmp(pszProperty, "Soft_Dotted")) pCPInfo->fSoftDotted = 1;
780	else if (!strcmp(pszProperty, "STerm")) pCPInfo->fSTerm = 1;
781	else if (!strcmp(pszProperty, "Terminal_Punctuation")) pCPInfo->fTerminalPunctuation = 1;
782	else if (!strcmp(pszProperty, "Unified_Ideograph")) pCPInfo->fUnifiedIdeograph = 1;
783	else if (!strcmp(pszProperty, "Uppercase")) pCPInfo->fUppercase = 1;
784	else if (!strcmp(pszProperty, "Variation_Selector")) pCPInfo->fVariationSelector = 1;
785	else if (!strcmp(pszProperty, "White_Space")) pCPInfo->fWhiteSpace = 1;
786	else if (!strcmp(pszProperty, "XID_Continue")) pCPInfo->fXIDContinue = 1;
787	else if (!strcmp(pszProperty, "XID_Start")) pCPInfo->fXIDStart = 1;
788	/* DerivedNormalizationProps: */
789	else if (!strcmp(pszProperty, "FC_NFKC")) return; /* ignored */
790	else if (!strcmp(pszProperty, "Full_Composition_Exclusion")) pCPInfo->fFullCompositionExclusion = 1;
791	else if (!strcmp(pszProperty, "NFC_QC")) pCPInfo->fInvNFC_QC = YesNoMaybePropertyValueInv(&pszNextField);
792	else if (!strcmp(pszProperty, "NFD_QC")) pCPInfo->fInvNFD_QC = YesNoMaybePropertyValueInv(&pszNextField);
793	else if (!strcmp(pszProperty, "NFKC_QC")) pCPInfo->fInvNFKC_QC = YesNoMaybePropertyValueInv(&pszNextField);
794	else if (!strcmp(pszProperty, "NFKD_QC")) pCPInfo->fInvNFKD_QC = YesNoMaybePropertyValueInv(&pszNextField);
795	else if (!strcmp(pszProperty, "Expands_On_NFC")) pCPInfo->fExpandsOnNFC = 1;
796	else if (!strcmp(pszProperty, "Expands_On_NFD")) pCPInfo->fExpandsOnNFD = 1;
797	else if (!strcmp(pszProperty, "Expands_On_NFKC")) pCPInfo->fExpandsOnNFKC = 1;
798	else if (!strcmp(pszProperty, "Expands_On_NFKD")) pCPInfo->fExpandsOnNFKD = 1;
799	else if (!strcmp(pszProperty, "NFKC_CF")) return; /ignore /
800	else if (!strcmp(pszProperty, "Changes_When_NFKC_Casefolded")) return; /ignore /
801	else
802	{
803	ParseError("Unknown property '%s'\n", pszProperty);
804	/* not reached: return; */
805	}
806
807	if (pszNextField && *pszNextField)
808	ParseError("Unexpected next field: '%s'\n", pszNextField);
809	}
810
811
812	/**
813	* Reads a property file.
814	*
815	* There are several property files, this code can read all
816	* of those but will only make use of the properties it recognizes.
817	*
818	* @returns 0 on success.
819	* @returns !0 on failure.
820	* @param pszBasePath The base path, can be NULL.
821	* @param pszFilename The name of the file.
822	*/
823	static int ReadProperties(const char pszBasePath, const char pszFilename)
824	{
825	/*
826	* Open input.
827	*/
828	FILE *pFile = OpenFile(pszBasePath, pszFilename);
829	if (!pFile)
830	return 1;
831
832	/*
833	* Parse the input and spit out the output.
834	*/
835	char szLine[4096];
836	while (GetLineFromFile(szLine, sizeof(szLine), pFile) != NULL)
837	{
838	if (IsCommentOrBlankLine(szLine))
839	continue;
840	char *pszCurField;
841	char *pszRange = FirstField(&pszCurField, StripLine(szLine));
842	char *pszProperty = NextField(&pszCurField);
843	if (!*pszProperty)
844	{
845	ParseError("no property field.\n");
846	/* not reached: continue; */
847	}
848
849	RTUNICP LastCP;
850	RTUNICP StartCP = ToRange(pszRange, &LastCP);
851	if (StartCP == ~(RTUNICP)0)
852	continue;
853
854	while (StartCP <= LastCP)
855	ApplyProperty(StartCP++, pszProperty, pszCurField);
856	}
857
858	CloseFile(pFile);
859
860	return 0;
861	}
862
863
864	/**
865	* Append a flag to the string.
866	*/
867	static char AppendFlag(char psz, const char *pszFlag)
868	{
869	char *pszEnd = strchr(psz, '\0');
870	if (pszEnd != psz)
871	{
872	*pszEnd++ = ' ';
873	*pszEnd++ = '\|';
874	*pszEnd++ = ' ';
875	}
876	strcpy(pszEnd, pszFlag);
877	return psz;
878	}
879
880	/**
881	* Calcs the flags for a code point.
882	* @returns true if there is a flag.
883	* @returns false if the isn't.
884	*/
885	static bool CalcFlags(struct CPINFO pInfo, char pszFlags)
886	{
887	pszFlags[0] = '\0';
888	/** @todo read the specs on this other vs standard stuff, and check out the finer points */
889	if (pInfo->fAlphabetic \|\| pInfo->fOtherAlphabetic)
890	AppendFlag(pszFlags, "RTUNI_ALPHA");
891	if (pInfo->fHexDigit \|\| pInfo->fASCIIHexDigit)
892	AppendFlag(pszFlags, "RTUNI_XDIGIT");
893	if (!strcmp(pInfo->pszGeneralCategory, "Nd"))
894	AppendFlag(pszFlags, "RTUNI_DDIGIT");
895	if (pInfo->fWhiteSpace)
896	AppendFlag(pszFlags, "RTUNI_WSPACE");
897	if (pInfo->fUppercase \|\| pInfo->fOtherUppercase)
898	AppendFlag(pszFlags, "RTUNI_UPPER");
899	if (pInfo->fLowercase \|\| pInfo->fOtherLowercase)
900	AppendFlag(pszFlags, "RTUNI_LOWER");
901	//if (pInfo->???)
902	// AppendFlag(pszFlags, "RTUNI_BSPACE");
903	#if 0
904	if (pInfo->fInvNFD_QC != 0 \|\| pInfo->fInvNFC_QC != 0)
905	{
906	AppendFlag(pszFlags, "RTUNI_QC_NFX");
907	if (!pInfo->paDecompositionMapping && pInfo->fInvNFD_QC)
908	fprintf(stderr, "uniread: U+%05X is QC_NFD but has no mappings.\n", pInfo->CodePoint);
909	else if (*pInfo->pszDecompositionType && pInfo->fInvNFD_QC)
910	fprintf(stderr, "uniread: U+%05X is QC_NFD but has no canonical mappings.\n", pInfo->CodePoint);
911	}
912	else if (pInfo->paDecompositionMapping && !*pInfo->pszDecompositionType)
913	fprintf(stderr, "uniread: U+%05X is not QC_NFX but has canonical mappings.\n", pInfo->CodePoint);
914	#endif
915
916	if (!*pszFlags)
917	{
918	pszFlags[0] = '0';
919	pszFlags[1] = '\0';
920	return false;
921	}
922	return true;
923	}
924
925
926	/**
927	* Closes the primary output stream.
928	*/
929	static int Stream1Close(void)
930	{
931	if (g_pCurOutFile && g_pCurOutFile != stdout && g_pCurOutFile != stderr)
932	{
933	if (fclose(g_pCurOutFile) != 0)
934	{
935	fprintf(stderr, "Error closing output file.\n");
936	return -1;
937	}
938	}
939	g_pCurOutFile = NULL;
940	return 0;
941	}
942
943
944	/**
945	* Initializes the 1st stream to output to a given file.
946	*/
947	static int Stream1Init(const char *pszName)
948	{
949	int rc = Stream1Close();
950	if (!rc)
951	{
952	g_pCurOutFile = fopen(pszName, "w");
953	if (!g_pCurOutFile)
954	{
955	fprintf(stderr, "Error opening output file '%s'.\n", pszName);
956	rc = -1;
957	}
958	}
959	return rc;
960	}
961
962
963	/**
964	* printf wrapper for the primary output stream.
965	*
966	* @returns See vfprintf.
967	* @param pszFormat The vfprintf format string.
968	* @param ... The format arguments.
969	*/
970	static int Stream1Printf(const char *pszFormat, ...)
971	{
972	int cch;
973	va_list va;
974	va_start(va, pszFormat);
975	cch = vfprintf(g_pCurOutFile, pszFormat, va);
976	va_end(va);
977	return cch;
978	}
979
980
981	/** the data store for stream two. */
982	static char g_szStream2[10240];
983	static unsigned volatile g_offStream2 = 0;
984
985	/**
986	* Initializes the 2nd steam.
987	*/
988	static void Stream2Init(void)
989	{
990	g_szStream2[0] = '\0';
991	g_offStream2 = 0;
992	}
993
994	/**
995	* Flushes the 2nd stream to stdout.
996	*/
997	static int Stream2Flush(void)
998	{
999	g_szStream2[g_offStream2] = '\0';
1000	Stream1Printf("%s", g_szStream2);
1001	Stream2Init();
1002	return 0;
1003	}
1004
1005	/**
1006	* printf to the 2nd stream.
1007	*/
1008	static int Stream2Printf(const char *pszFormat, ...)
1009	{
1010	unsigned offStream2 = g_offStream2;
1011	va_list va;
1012	va_start(va, pszFormat);
1013	int cch = vsprintf(&g_szStream2[offStream2], pszFormat, va);
1014	va_end(va);
1015	offStream2 += cch;
1016	if (offStream2 >= sizeof(g_szStream2))
1017	{
1018	fprintf(stderr, "error: stream2 overflow!\n");
1019	exit(1);
1020	}
1021	g_offStream2 = offStream2;
1022	return cch;
1023	}
1024
1025
1026	/**
1027	* Print the unidata.cpp file header and include list.
1028	*/
1029	int PrintHeader(const char argv0, const char pszBaseDir)
1030	{
1031	char szBuf[1024];
1032	if (!pszBaseDir)
1033	{
1034	memset(szBuf, 0, sizeof(szBuf));
1035	#ifdef _MSC_VER
1036	if (!_getcwd(szBuf, sizeof(szBuf)))
1037	#else
1038	if (!getcwd(szBuf, sizeof(szBuf)))
1039	#endif
1040	return RTEXITCODE_FAILURE;
1041	pszBaseDir = szBuf;
1042	}
1043
1044	const char *pszYear = __DATE__;
1045	pszYear += strlen(pszYear) - 4;
1046
1047	Stream1Printf("/* $" "Id" "$ */\n"
1048	"/** @file\n"
1049	" * IPRT - Unicode Tables.\n"
1050	" *\n"
1051	" * Automatically Generated from %s\n"
1052	" * by %s (" __DATE__ " " __TIME__ ")\n"
1053	" */\n"
1054	"\n"
1055	"/*\n"
1056	" * Copyright (C) 2006-%s Oracle and/or its affiliates.\n"
1057	" *\n"
1058	" * This file is part of VirtualBox base platform packages, as\n"
1059	" * available from https://www.virtualbox.org.\n"
1060	" *\n"
1061	" * This program is free software; you can redistribute it and/or\n"
1062	" * modify it under the terms of the GNU General Public License\n"
1063	" * as published by the Free Software Foundation, in version 3 of the\n"
1064	" * License.\n"
1065	" *\n"
1066	" * This program is distributed in the hope that it will be useful, but\n"
1067	" * WITHOUT ANY WARRANTY; without even the implied warranty of\n"
1068	" * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU\n"
1069	" * General Public License for more details.\n"
1070	" *\n"
1071	" * You should have received a copy of the GNU General Public License\n"
1072	" * along with this program; if not, see <https://www.gnu.org/licenses>.\n"
1073	" *\n"
1074	" * The contents of this file may alternatively be used under the terms\n"
1075	" * of the Common Development and Distribution License Version 1.0\n"
1076	" * (CDDL), a copy of it is provided in the \"COPYING.CDDL\" file included\n"
1077	" * in the VirtualBox distribution, in which case the provisions of the\n"
1078	" * CDDL are applicable instead of those of the GPL.\n"
1079	" *\n"
1080	" * You may elect to license modified versions of this file under the\n"
1081	" * terms and conditions of either the GPL or the CDDL or both.\n"
1082	" *\n"
1083	" * SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0\n"
1084	" */\n"
1085	"\n"
1086	"#include <iprt/uni.h>\n"
1087	"\n",
1088	pszBaseDir, argv0, pszYear);
1089	return 0;
1090	}
1091
1092
1093	/**
1094	* Print the flag tables.
1095	*/
1096	int PrintFlags(void)
1097	{
1098	/*
1099	* Print flags table.
1100	*/
1101	Stream2Init();
1102	Stream2Printf("RT_DECL_DATA_CONST(const RTUNIFLAGSRANGE) g_aRTUniFlagsRanges[] =\n"
1103	"{\n");
1104	RTUNICP i = 0;
1105	int iStart = -1;
1106	while (i < RT_ELEMENTS(g_aCPInfo))
1107	{
1108	/* figure how far off the next chunk is */
1109	char szFlags[256];
1110	unsigned iNonNull = i;
1111	while ( iNonNull < RT_ELEMENTS(g_aCPInfo)
1112	&& iNonNull >= 256
1113	&& (g_aCPInfo[iNonNull].fNullEntry \|\| !CalcFlags(&g_aCPInfo[iNonNull], szFlags)) )
1114	iNonNull++;
1115	if (iNonNull - i > 4096 \|\| iNonNull == RT_ELEMENTS(g_aCPInfo))
1116	{
1117	if (iStart >= 0)
1118	{
1119	Stream1Printf("};\n\n");
1120	Stream2Printf(" { 0x%06x, 0x%06x, &g_afRTUniFlags0x%06x[0] },\n", iStart, i, iStart);
1121	iStart = -1;
1122	}
1123	i = iNonNull;
1124	}
1125	else
1126	{
1127	if (iStart < 0)
1128	{
1129	Stream1Printf("static const uint8_t g_afRTUniFlags0x%06x[] =\n"
1130	"{\n", i);
1131	iStart = i;
1132	}
1133	CalcFlags(&g_aCPInfo[i], szFlags);
1134	Stream1Printf(" %50s, /* U+%06x: %s*/\n", szFlags, g_aCPInfo[i].CodePoint, g_aCPInfo[i].pszName);
1135	i++;
1136	}
1137	}
1138	Stream2Printf(" { ~(RTUNICP)0, ~(RTUNICP)0, NULL }\n"
1139	"};\n\n\n");
1140	Stream1Printf("\n");
1141	return Stream2Flush();
1142	}
1143
1144
1145	/**
1146	* Prints the upper case tables.
1147	*/
1148	static int PrintUpper(void)
1149	{
1150	Stream2Init();
1151	Stream2Printf("RT_DECL_DATA_CONST(const RTUNICASERANGE) g_aRTUniUpperRanges[] =\n"
1152	"{\n");
1153	RTUNICP i = 0;
1154	int iStart = -1;
1155	while (i < RT_ELEMENTS(g_aCPInfo))
1156	{
1157	/* figure how far off the next chunk is */
1158	unsigned iSameCase = i;
1159	while ( iSameCase < RT_ELEMENTS(g_aCPInfo)
1160	&& g_aCPInfo[iSameCase].SimpleUpperCaseMapping == g_aCPInfo[iSameCase].CodePoint
1161	&& iSameCase >= 256)
1162	iSameCase++;
1163	if (iSameCase - i > 4096/sizeof(RTUNICP) \|\| iSameCase == RT_ELEMENTS(g_aCPInfo))
1164	{
1165	if (iStart >= 0)
1166	{
1167	Stream1Printf("};\n\n");
1168	Stream2Printf(" { 0x%06x, 0x%06x, &g_afRTUniUpper0x%06x[0] },\n", iStart, i, iStart);
1169	iStart = -1;
1170	}
1171	i = iSameCase;
1172	}
1173	else
1174	{
1175	if (iStart < 0)
1176	{
1177	Stream1Printf("static const RTUNICP g_afRTUniUpper0x%06x[] =\n"
1178	"{\n", i);
1179	iStart = i;
1180	}
1181	Stream1Printf(" 0x%02x, /* U+%06x: %s*/\n", g_aCPInfo[i].SimpleUpperCaseMapping, g_aCPInfo[i].CodePoint, g_aCPInfo[i].pszName);
1182	i++;
1183	}
1184	}
1185	Stream2Printf(" { ~(RTUNICP)0, ~(RTUNICP)0, NULL }\n"
1186	"};\n\n\n");
1187	Stream1Printf("\n");
1188	return Stream2Flush();
1189	}
1190
1191
1192	/**
1193	* Prints the lowercase tables.
1194	*/
1195	static int PrintLower(void)
1196	{
1197	Stream2Init();
1198	Stream2Printf("RT_DECL_DATA_CONST(const RTUNICASERANGE) g_aRTUniLowerRanges[] =\n"
1199	"{\n");
1200	RTUNICP i = 0;
1201	int iStart = -1;
1202	while (i < RT_ELEMENTS(g_aCPInfo))
1203	{
1204	/* figure how far off the next chunk is */
1205	unsigned iSameCase = i;
1206	while ( iSameCase < RT_ELEMENTS(g_aCPInfo)
1207	&& g_aCPInfo[iSameCase].SimpleLowerCaseMapping == g_aCPInfo[iSameCase].CodePoint
1208	&& iSameCase >= 256)
1209	iSameCase++;
1210	if (iSameCase - i > 4096/sizeof(RTUNICP) \|\| iSameCase == RT_ELEMENTS(g_aCPInfo))
1211	{
1212	if (iStart >= 0)
1213	{
1214	Stream1Printf("};\n\n");
1215	Stream2Printf(" { 0x%06x, 0x%06x, &g_afRTUniLower0x%06x[0] },\n", iStart, i, iStart);
1216	iStart = -1;
1217	}
1218	i = iSameCase;
1219	}
1220	else
1221	{
1222	if (iStart < 0)
1223	{
1224	Stream1Printf("static const RTUNICP g_afRTUniLower0x%06x[] =\n"
1225	"{\n", i);
1226	iStart = i;
1227	}
1228	Stream1Printf(" 0x%02x, /* U+%06x: %s*/\n",
1229	g_aCPInfo[i].SimpleLowerCaseMapping, g_aCPInfo[i].CodePoint, g_aCPInfo[i].pszName);
1230	i++;
1231	}
1232	}
1233	Stream2Printf(" { ~(RTUNICP)0, ~(RTUNICP)0, NULL }\n"
1234	"};\n\n\n");
1235	Stream1Printf("\n");
1236	return Stream2Flush();
1237	}
1238
1239
1240	int main(int argc, char **argv)
1241	{
1242	/*
1243	* Parse args.
1244	*/
1245	if (argc <= 1)
1246	{
1247	printf("usage: %s [-C\|--dir <UCD-dir>] [UnicodeData.txt [DerivedCoreProperties.txt [PropList.txt] [DerivedNormalizationProps.txt]]]\n",
1248	argv[0]);
1249	return 1;
1250	}
1251
1252	const char *pszBaseDir = NULL;
1253	const char *pszUnicodeData = "UnicodeData.txt";
1254	const char *pszDerivedCoreProperties = "DerivedCoreProperties.txt";
1255	const char *pszPropList = "PropList.txt";
1256	const char *pszDerivedNormalizationProps = "DerivedNormalizationProps.txt";
1257	int iFile = 0;
1258	for (int argi = 1; argi < argc; argi++)
1259	{
1260	if (argv[argi][0] != '-')
1261	{
1262	switch (iFile++)
1263	{
1264	case 0: pszUnicodeData = argv[argi]; break;
1265	case 1: pszDerivedCoreProperties = argv[argi]; break;
1266	case 2: pszPropList = argv[argi]; break;
1267	case 3: pszDerivedNormalizationProps = argv[argi]; break;
1268	default:
1269	fprintf(stderr, "uniread: syntax error at '%s': too many filenames\n", argv[argi]);
1270	return 1;
1271	}
1272	}
1273	else if ( !strcmp(argv[argi], "--dir")
1274	\|\| !strcmp(argv[argi], "-C"))
1275	{
1276	if (argi + 1 >= argc)
1277	{
1278	fprintf(stderr, "uniread: syntax error: '%s' is missing the directory name.\n", argv[argi]);
1279	return 1;
1280	}
1281	argi++;
1282	pszBaseDir = argv[argi];
1283	}
1284	else
1285	{
1286	fprintf(stderr, "uniread: syntax error at '%s': Unknown argument\n", argv[argi]);
1287	return 1;
1288	}
1289	}
1290
1291	/*
1292	* Read the data.
1293	*/
1294	int rc = ReadUnicodeData(pszBaseDir, pszUnicodeData);
1295	if (rc)
1296	return rc;
1297	rc = GenerateExcludedData();
1298	if (rc)
1299	return rc;
1300	rc = ReadProperties(pszBaseDir, pszPropList);
1301	if (rc)
1302	return rc;
1303	rc = ReadProperties(pszBaseDir, pszDerivedCoreProperties);
1304	if (rc)
1305	return rc;
1306	rc = ReadProperties(pszBaseDir, pszDerivedNormalizationProps);
1307	if (rc)
1308	return rc;
1309
1310	/*
1311	* Produce output files.
1312	*/
1313	rc = Stream1Init("unidata-flags.cpp");
1314	if (!rc)
1315	rc = PrintHeader(argv[0], pszBaseDir);
1316	if (!rc)
1317	rc = PrintFlags();
1318
1319	rc = Stream1Init("unidata-upper.cpp");
1320	if (!rc)
1321	rc = PrintHeader(argv[0], pszBaseDir);
1322	if (!rc)
1323	rc = PrintUpper();
1324
1325	rc = Stream1Init("unidata-lower.cpp");
1326	if (!rc)
1327	rc = PrintHeader(argv[0], pszBaseDir);
1328	if (!rc)
1329	rc = PrintLower();
1330	if (!rc)
1331	rc = Stream1Close();
1332
1333	/* done */
1334	return rc;
1335	}
1336

Note: See TracBrowser for help on using the repository browser.

source: vbox/trunk/src/VBox/Runtime/common/string/uniread.cpp@ 99553

Download in other formats: