uniread.cpp@ 57358

最後變更在這個檔案從57358是 57358,由 vboxsync 提交於 10 年前
*: scm cleanup run.
屬性 svn:eol-style 設為 `native` 屬性 svn:keywords 設為 `Id Revision`
檔案大小: 41.1 KB

行
1	/* $Id: uniread.cpp 57358 2015-08-14 15:16:38Z vboxsync $ */
2	/** @file
3	* IPRT - Unicode Specification Reader.
4	*/
5
6	/*
7	* Copyright (C) 2006-2015 Oracle Corporation
8	*
9	* This file is part of VirtualBox Open Source Edition (OSE), as
10	* available from http://www.alldomusa.eu.org. This file is free software;
11	* you can redistribute it and/or modify it under the terms of the GNU
12	* General Public License (GPL) as published by the Free Software
13	* Foundation, in version 2 as it comes in the "COPYING" file of the
14	* VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15	* hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16	*
17	* The contents of this file may alternatively be used under the terms
18	* of the Common Development and Distribution License Version 1.0
19	* (CDDL) only, as it comes in the "COPYING.CDDL" file of the
20	* VirtualBox OSE distribution, in which case the provisions of the
21	* CDDL are applicable instead of those of the GPL.
22	*
23	* You may elect to license modified versions of this file under the
24	* terms and conditions of either the GPL or the CDDL or both.
25	*/
26
27
28	/*********************************************************************************************************************************
29	* Header Files *
30	*********************************************************************************************************************************/
31	#include <iprt/types.h>
32	#include <iprt/stdarg.h>
33	#include <iprt/ctype.h>
34
35	#include <stdio.h>
36	#include <string.h>
37	#include <stdlib.h>
38	#ifdef _MSC_VER
39	# include <direct.h>
40	#else
41	# include <unistd.h>
42	#endif
43
44
45	/*********************************************************************************************************************************
46	* Global Variables *
47	*********************************************************************************************************************************/
48	/** The file we're currently parsing. */
49	static const char *g_pszCurFile;
50	/** The current line number. */
51	static unsigned g_iLine;
52	/** The current output file. */
53	static FILE *g_pCurOutFile;
54
55
56	/**
57	* Exit the program after printing a parse error.
58	*
59	* @param pszFormat The message.
60	* @param ... Format arguments.
61	*/
62	static void ParseError(const char *pszFormat, ...)
63	{
64	va_list va;
65	va_start(va, pszFormat);
66	fprintf(stderr, "parse error: %s:%u: ", g_pszCurFile, g_iLine);
67	vfprintf(stderr, pszFormat, va);
68	va_end(va);
69	exit(1);
70	}
71
72	/**
73	* Strip a line.
74	* @returns pointer to first non-blank char.
75	* @param pszLine The line string to strip.
76	*/
77	static char StripLine(char pszLine)
78	{
79	while (pszLine == ' ' \|\| pszLine == '\t')
80	pszLine++;
81
82	char *psz = strchr(pszLine, '#');
83	if (psz)
84	*psz = '\0';
85	else
86	psz = strchr(pszLine, '\0');
87	while (psz > pszLine)
88	{
89	switch (psz[-1])
90	{
91	case ' ':
92	case '\t':
93	case '\n':
94	case '\r':
95	*--psz = '\0';
96	continue;
97	}
98	break;
99	}
100
101	return pszLine;
102	}
103
104
105	/**
106	* Checks if the line is blank or a comment line and should be skipped.
107	* @returns true/false.
108	* @param pszLine The line to consider.
109	*/
110	static bool IsCommentOrBlankLine(const char *pszLine)
111	{
112	while (pszLine == ' ' \|\| pszLine == '\t' \|\| pszLine == '\n' \|\| pszLine == '\r')
113	pszLine++;
114	return pszLine == '#' \|\| pszLine == '\0';
115	}
116
117
118	/**
119	* Get the first field in the string.
120	*
121	* @returns Pointer to the next field.
122	* @param ppsz Where to store the pointer to the next field.
123	* @param pszLine The line string. (could also be *ppsz from a FirstNext call)
124	*/
125	static char FirstField(char ppsz, char pszLine)
126	{
127	char *psz = strchr(pszLine, ';');
128	if (!psz)
129	*ppsz = psz = strchr(pszLine, '\0');
130	else
131	{
132	*psz = '\0';
133	*ppsz = psz + 1;
134	}
135
136	/* strip */
137	while (pszLine == ' ' \|\| pszLine == '\t' \|\| pszLine == '\r' \|\| pszLine == '\n')
138	pszLine++;
139	while (psz > pszLine)
140	{
141	switch (psz[-1])
142	{
143	case ' ':
144	case '\t':
145	case '\n':
146	case '\r':
147	*--psz = '\0';
148	continue;
149	}
150	break;
151	}
152	return pszLine;
153	}
154
155
156	/**
157	* Get the next field in a field enumeration.
158	*
159	* @returns Pointer to the next field.
160	* @param ppsz Where to get and store the string position.
161	*/
162	static char NextField(char *ppsz)
163	{
164	return FirstField(ppsz, *ppsz);
165	}
166
167
168	/**
169	* Splits a decomposition field.
170	*
171	* This may start with a type that is enclosed in angle brackets.
172	*
173	* @returns Pointer to the mapping values following the type. @a *ppsz if empty.
174	* @param ppszType Pointer to the type field pointer. On input the type
175	* field contains the combined type and mapping string. On
176	* output this should only contain the type, no angle
177	* brackets. If no type specified, it is replaced with an
178	* empty string (const).
179	*/
180	static char SplitDecompField(char *ppszType)
181	{
182	/* Empty field? */
183	char psz = ppszType;
184	if (!*psz)
185	return psz;
186
187	/* No type? */
188	if (*psz != '<')
189	{
190	ppszType = (char )"";
191	return psz;
192	}
193
194	/* Split out the type. */
195	*ppszType = ++psz;
196	psz = strchr(psz, '>');
197	if (!psz)
198	{
199	ParseError("Bad Decomposition Type/Mappings\n");
200	return *ppszType;
201	}
202	*psz++ = '\0';
203
204	psz = StripLine(psz);
205	if (!*psz)
206	ParseError("Missing decomposition mappings\n");
207	return psz;
208	}
209
210	/**
211	* Converts a code point field to a number.
212	* @returns Code point.
213	* @param psz The field string.
214	*/
215	static RTUNICP ToNum(const char *psz)
216	{
217	char *pszEnd = NULL;
218	unsigned long ul = strtoul(psz, &pszEnd, 16);
219	if (pszEnd && *pszEnd)
220	ParseError("failed converting '%s' to a number!\n", psz);
221	return (RTUNICP)ul;
222	}
223
224
225	/**
226	* Same as ToNum except that if the field is empty the Default is returned.
227	*/
228	static RTUNICP ToNumDefault(const char *psz, RTUNICP Default)
229	{
230	if (*psz)
231	return ToNum(psz);
232	return Default;
233	}
234
235
236	/**
237	* Converts a code point range to numbers.
238	* @returns The start code point.\
239	* @returns ~(RTUNICP)0 on failure.
240	* @param psz The field string.
241	* @param pLast Where to store the last code point in the range.
242	*/
243	static RTUNICP ToRange(const char *psz, PRTUNICP pLast)
244	{
245	char *pszEnd = NULL;
246	unsigned long ulStart = strtoul(psz, &pszEnd, 16);
247	unsigned long ulLast = ulStart;
248	if (pszEnd && *pszEnd)
249	{
250	if (*pszEnd == '.')
251	{
252	while (*pszEnd == '.')
253	pszEnd++;
254	ulLast = strtoul(pszEnd, &pszEnd, 16);
255	if (pszEnd && *pszEnd)
256	{
257	ParseError("failed converting '%s' to a number!\n", psz);
258	return ~(RTUNICP)0;
259	}
260	}
261	else
262	{
263	ParseError("failed converting '%s' to a number!\n", psz);
264	return ~(RTUNICP)0;
265	}
266	}
267	*pLast = (RTUNICP)ulLast;
268	return (RTUNICP)ulStart;
269
270	}
271
272	/**
273	* For converting the decomposition mappings field and similar.
274	*
275	* @returns Mapping array or NULL if none.
276	* @param psz The string to convert. Can be empty.
277	* @param pcEntries Where to store the number of entries.
278	* @param cMax The max number of entries.
279	*/
280	static PRTUNICP ToMapping(char psz, unsigned pcEntries, unsigned cMax)
281	{
282	PRTUNICP paCps = NULL;
283	unsigned cAlloc = 0;
284	unsigned i = 0;
285
286	/* Convert the code points. */
287	while (psz)
288	{
289	/* skip leading spaces */
290	while (RT_C_IS_BLANK(*psz))
291	psz++;
292
293	/* the end? */
294	if (!*psz)
295	break;
296
297	/* room left? */
298	if (i >= cMax)
299	{
300	ParseError("Too many mappings.\n");
301	break;
302	}
303	if (i >= cAlloc)
304	{
305	cAlloc += 4;
306	paCps = (PRTUNICP)realloc(paCps, cAlloc * sizeof(paCps[0]));
307	if (!paCps)
308	{
309	fprintf(stderr, "out of memory (%u)\n", (unsigned)(cAlloc * sizeof(paCps[0])));
310	exit(1);
311	}
312	}
313
314	/* Find the end. */
315	char *pszThis = psz;
316	while (RT_C_IS_XDIGIT(*psz))
317	psz++;
318	if (psz && !RT_C_IS_BLANK(psz))
319	ParseError("Malformed mappings.\n");
320	if (*psz)
321	*psz++ = '\0';
322
323	/* Convert to number and add it. */
324	paCps[i++] = ToNum(pszThis);
325	}
326
327	*pcEntries = i;
328	return paCps;
329	}
330
331
332	/**
333	* Duplicate a string, optimize certain strings to save memory.
334	*
335	* @returns Pointer to string copy.
336	* @param pszStr The string to duplicate.
337	*/
338	static char DupStr(const char pszStr)
339	{
340	if (!*pszStr)
341	return (char*)"";
342	char *psz = strdup(pszStr);
343	if (psz)
344	return psz;
345
346	fprintf(stderr, "out of memory!\n");
347	exit(1);
348	}
349
350
351	/**
352	* Array of all possible and impossible unicode code points as of 4.1
353	*/
354	struct CPINFO
355	{
356	RTUNICP CodePoint;
357	RTUNICP SimpleUpperCaseMapping;
358	RTUNICP SimpleLowerCaseMapping;
359	RTUNICP SimpleTitleCaseMapping;
360	unsigned CanonicalCombiningClass;
361	const char *pszDecompositionType;
362	unsigned cDecompositionMapping;
363	PRTUNICP paDecompositionMapping;
364	const char *pszName;
365	/** Set if this is an unused entry */
366	unsigned fNullEntry : 1;
367
368	unsigned fAlphabetic : 1;
369	unsigned fASCIIHexDigit : 1;
370	unsigned fBidiControl : 1;
371	unsigned fCaseIgnorable : 1;
372	unsigned fCased : 1;
373	unsigned fChangesWhenCasefolded : 1;
374	unsigned fChangesWhenCasemapped : 1;
375	unsigned fChangesWhenLowercased : 1;
376	unsigned fChangesWhenTitlecased : 1;
377	unsigned fChangesWhenUppercased : 1;
378	unsigned fDash : 1;
379	unsigned fDefaultIgnorableCodePoint : 1;
380	unsigned fDeprecated : 1;
381	unsigned fDiacritic : 1;
382	unsigned fExtender : 1;
383	unsigned fGraphemeBase : 1;
384	unsigned fGraphemeExtend : 1;
385	unsigned fGraphemeLink : 1;
386	unsigned fHexDigit : 1;
387	unsigned fHyphen : 1;
388	unsigned fIDContinue : 1;
389	unsigned fIdeographic : 1;
390	unsigned fIDSBinaryOperator : 1;
391	unsigned fIDStart : 1;
392	unsigned fIDSTrinaryOperator : 1;
393	unsigned fJoinControl : 1;
394	unsigned fLogicalOrderException : 1;
395	unsigned fLowercase : 1;
396	unsigned fMath : 1;
397	unsigned fNoncharacterCodePoint : 1;
398	unsigned fOtherAlphabetic : 1;
399	unsigned fOtherDefaultIgnorableCodePoint : 1;
400	unsigned fOtherGraphemeExtend : 1;
401	unsigned fOtherIDContinue : 1;
402	unsigned fOtherIDStart : 1;
403	unsigned fOtherLowercase : 1;
404	unsigned fOtherMath : 1;
405	unsigned fOtherUppercase : 1;
406	unsigned fPatternSyntax : 1;
407	unsigned fPatternWhiteSpace : 1;
408	unsigned fQuotationMark : 1;
409	unsigned fRadical : 1;
410	unsigned fSoftDotted : 1;
411	unsigned fSTerm : 1;
412	unsigned fTerminalPunctuation : 1;
413	unsigned fUnifiedIdeograph : 1;
414	unsigned fUppercase : 1;
415	unsigned fVariationSelector : 1;
416	unsigned fWhiteSpace : 1;
417	unsigned fXIDContinue : 1;
418	unsigned fXIDStart : 1;
419
420	/** @name DerivedNormalizationProps.txt
421	* @{ */
422	unsigned fFullCompositionExclusion : 1;
423	unsigned fInvNFC_QC : 2; /*< If 1 (NFC_QC == N) then code point 100% sure not part of NFC string. /
424	unsigned fInvNFD_QC : 2; /*< If 1 (NFD_QC == N) then code point 100% sure not part of NFD string. /
425	unsigned fInvNFKC_QC : 2;
426	unsigned fInvNFKD_QC : 2;
427	unsigned fExpandsOnNFC : 1;
428	unsigned fExpandsOnNFD : 1;
429	unsigned fExpandsOnNFKC : 1;
430	unsigned fExpandsOnNFKD : 1;
431	/** @} */
432
433	/* unprocessed stuff, so far. */
434	const char *pszGeneralCategory;
435	const char *pszBidiClass;
436	const char *pszNumericType;
437	const char *pszNumericValueD;
438	const char *pszNumericValueN;
439	const char *pszBidiMirrored;
440	const char *pszUnicode1Name;
441	const char *pszISOComment;
442	} g_aCPInfo[0x110000];
443
444
445	/**
446	* Creates a 'null' entry at i.
447	* @param i The entry in question.
448	*/
449	static void NullEntry(unsigned i)
450	{
451	g_aCPInfo[i].CodePoint = i;
452	g_aCPInfo[i].fNullEntry = 1;
453	g_aCPInfo[i].SimpleUpperCaseMapping = i;
454	g_aCPInfo[i].SimpleLowerCaseMapping = i;
455	g_aCPInfo[i].SimpleTitleCaseMapping = i;
456	g_aCPInfo[i].pszDecompositionType = "";
457	g_aCPInfo[i].cDecompositionMapping = 0;
458	g_aCPInfo[i].paDecompositionMapping = NULL;
459	g_aCPInfo[i].pszName = "";
460	g_aCPInfo[i].pszGeneralCategory = "";
461	g_aCPInfo[i].pszBidiClass = "";
462	g_aCPInfo[i].pszNumericType = "";
463	g_aCPInfo[i].pszNumericValueD = "";
464	g_aCPInfo[i].pszNumericValueN = "";
465	g_aCPInfo[i].pszBidiMirrored = "";
466	g_aCPInfo[i].pszUnicode1Name = "";
467	g_aCPInfo[i].pszISOComment = "";
468	}
469
470
471	/**
472	* Open a file for reading, optionally with a base path prefixed.
473	*
474	* @returns file stream on success, NULL w/ complaint on failure.
475	* @param pszBasePath The base path, can be NULL.
476	* @param pszFilename The name of the file to open.
477	*/
478	static FILE OpenFile(const char pszBasePath, const char *pszFilename)
479	{
480	FILE *pFile;
481	if ( !pszBasePath
482	\|\| *pszFilename == '/'
483	#if defined(_MSC_VER) \|\| defined(__OS2__)
484	\|\| *pszFilename == '\\'
485	\|\| (*pszFilename && pszFilename[1] == ':')
486	#endif
487	)
488	{
489	pFile = fopen(pszFilename, "r");
490	if (!pFile)
491	fprintf(stderr, "uniread: failed to open '%s' for reading\n", pszFilename);
492	}
493	else
494	{
495	size_t cchBasePath = strlen(pszBasePath);
496	size_t cchFilename = strlen(pszFilename);
497	char pszFullName = (char )malloc(cchBasePath + 1 + cchFilename + 1);
498	if (!pszFullName)
499	{
500	fprintf(stderr, "uniread: failed to allocate %d bytes\n", (int)(cchBasePath + 1 + cchFilename + 1));
501	return NULL;
502	}
503
504	memcpy(pszFullName, pszBasePath, cchBasePath);
505	pszFullName[cchBasePath] = '/';
506	memcpy(&pszFullName[cchBasePath + 1], pszFilename, cchFilename + 1);
507
508	pFile = fopen(pszFullName, "r");
509	if (!pFile)
510	fprintf(stderr, "uniread: failed to open '%s' for reading\n", pszFullName);
511	free(pszFullName);
512	}
513	g_pszCurFile = pszFilename;
514	g_iLine = 0;
515	return pFile;
516	}
517
518
519	/**
520	* Wrapper around fgets that keep track of the line number.
521	*
522	* @returns See fgets.
523	* @param pszBuf The buffer. See fgets for output definition.
524	* @param cbBuf The buffer size.
525	* @param pFile The file to read from.
526	*/
527	static char GetLineFromFile(char pszBuf, int cbBuf, FILE *pFile)
528	{
529	g_iLine++;
530	return fgets(pszBuf, cbBuf, pFile);
531	}
532
533
534	/**
535	* Closes a file opened by OpenFile
536	*
537	* @param pFile The file to close.
538	*/
539	static void CloseFile(FILE *pFile)
540	{
541	g_pszCurFile = NULL;
542	g_iLine = 0;
543	fclose(pFile);
544	}
545
546
547	/**
548	* Read the UnicodeData.txt file.
549	* @returns 0 on success.
550	* @returns !0 on failure.
551	* @param pszBasePath The base path, can be NULL.
552	* @param pszFilename The name of the file.
553	*/
554	static int ReadUnicodeData(const char pszBasePath, const char pszFilename)
555	{
556	/*
557	* Open input.
558	*/
559	FILE *pFile = OpenFile(pszBasePath, pszFilename);
560	if (!pFile)
561	return 1;
562
563	/*
564	* Parse the input and spit out the output.
565	*/
566	char szLine[4096];
567	RTUNICP i = 0;
568	while (GetLineFromFile(szLine, sizeof(szLine), pFile) != NULL)
569	{
570	if (IsCommentOrBlankLine(szLine))
571	continue;
572
573	char *pszCurField;
574	char pszCodePoint = FirstField(&pszCurField, StripLine(szLine)); / 0 */
575	char pszName = NextField(&pszCurField); / 1 */
576	char pszGeneralCategory = NextField(&pszCurField); / 2 */
577	char pszCanonicalCombiningClass = NextField(&pszCurField); / 3 */
578	char pszBidiClass = NextField(&pszCurField); / 4 */
579	char pszDecompositionType = NextField(&pszCurField); / 5 */
580	char *pszDecompositionMapping = SplitDecompField(&pszDecompositionType);
581	char pszNumericType = NextField(&pszCurField); / 6 */
582	char pszNumericValueD = NextField(&pszCurField); / 7 */
583	char pszNumericValueN = NextField(&pszCurField); / 8 */
584	char pszBidiMirrored = NextField(&pszCurField); / 9 */
585	char pszUnicode1Name = NextField(&pszCurField); / 10 */
586	char pszISOComment = NextField(&pszCurField); / 11 */
587	char pszSimpleUpperCaseMapping = NextField(&pszCurField); / 12 */
588	char pszSimpleLowerCaseMapping = NextField(&pszCurField); / 13 */
589	char pszSimpleTitleCaseMapping = NextField(&pszCurField); / 14 */
590
591	RTUNICP CodePoint = ToNum(pszCodePoint);
592	if (CodePoint >= RT_ELEMENTS(g_aCPInfo))
593	{
594	ParseError("U+05X is out of range\n", CodePoint);
595	continue;
596	}
597
598	/* catchup? */
599	while (i < CodePoint)
600	NullEntry(i++);
601	if (i != CodePoint)
602	{
603	ParseError("i=%d CodePoint=%u\n", i, CodePoint);
604	CloseFile(pFile);
605	return 1;
606	}
607
608	/* this one */
609	g_aCPInfo[i].CodePoint = i;
610	g_aCPInfo[i].fNullEntry = 0;
611	g_aCPInfo[i].pszName = DupStr(pszName);
612	g_aCPInfo[i].SimpleUpperCaseMapping = ToNumDefault(pszSimpleUpperCaseMapping, CodePoint);
613	g_aCPInfo[i].SimpleLowerCaseMapping = ToNumDefault(pszSimpleLowerCaseMapping, CodePoint);
614	g_aCPInfo[i].SimpleTitleCaseMapping = ToNumDefault(pszSimpleTitleCaseMapping, CodePoint);
615	g_aCPInfo[i].CanonicalCombiningClass = ToNum(pszCanonicalCombiningClass);
616	g_aCPInfo[i].pszDecompositionType = DupStr(pszDecompositionType);
617	g_aCPInfo[i].paDecompositionMapping = ToMapping(pszDecompositionMapping, &g_aCPInfo[i].cDecompositionMapping, 20);
618	g_aCPInfo[i].pszGeneralCategory = DupStr(pszGeneralCategory);
619	g_aCPInfo[i].pszBidiClass = DupStr(pszBidiClass);
620	g_aCPInfo[i].pszNumericType = DupStr(pszNumericType);
621	g_aCPInfo[i].pszNumericValueD = DupStr(pszNumericValueD);
622	g_aCPInfo[i].pszNumericValueN = DupStr(pszNumericValueN);
623	g_aCPInfo[i].pszBidiMirrored = DupStr(pszBidiMirrored);
624	g_aCPInfo[i].pszUnicode1Name = DupStr(pszUnicode1Name);
625	g_aCPInfo[i].pszISOComment = DupStr(pszISOComment);
626	i++;
627	}
628
629	/* catchup? */
630	while (i < RT_ELEMENTS(g_aCPInfo))
631	NullEntry(i++);
632	CloseFile(pFile);
633
634	return 0;
635	}
636
637
638	/**
639	* Generates excluded data.
640	*
641	* @returns 0 on success, exit code on failure.
642	*/
643	static int GenerateExcludedData(void)
644	{
645	/*
646	* Hangul Syllables U+AC00 to U+D7A3.
647	*/
648	for (RTUNICP i = 0xac00; i <= 0xd7a3; i++)
649	{
650	g_aCPInfo[i].fNullEntry = 0;
651	g_aCPInfo[i].fInvNFD_QC = 1;
652	/** @todo generate the decomposition: http://unicode.org/reports/tr15/#Hangul
653	* */
654	}
655
656	/** @todo
657	* CJK Ideographs Extension A (U+3400 - U+4DB5)
658	* CJK Ideographs (U+4E00 - U+9FA5)
659	* CJK Ideograph Extension B (U+20000 - U+2A6D6)
660	* CJK Ideograph Extension C (U+2A700 - U+2B734)
661	*/
662
663	return 0;
664	}
665
666
667
668	/**
669	* Worker for ApplyProperty that handles a yes, no, maybe property value.
670	*
671	* @returns 0 (NO), 1 (YES), 2 (MAYBE).
672	* @param ppszNextField The field cursor, input and output.
673	*/
674	static int YesNoMaybePropertyValue(char **ppszNextField)
675	{
676	if (!**ppszNextField)
677	{
678	ParseError("Missing Y/N/M field\n");
679	return 0;
680	}
681	char *psz = NextField(ppszNextField);
682	if (!strcmp(psz, "N"))
683	return 0;
684	if (!strcmp(psz, "Y"))
685	return 1;
686	if (!strcmp(psz, "M"))
687	return 2;
688	ParseError("Unexpected Y/N/M value: '%s'\n", psz);
689	return 0;
690	}
691
692
693	/**
694	* Inverted version of YesNoMaybePropertyValue
695	*
696	* @returns 1 (NO), 0 (YES), 2 (MAYBE).
697	* @param ppszNextField The field cursor, input and output.
698	*/
699	static int YesNoMaybePropertyValueInv(char **ppszNextField)
700	{
701	unsigned rc = YesNoMaybePropertyValue(ppszNextField);
702	switch (rc)
703	{
704	case 0: return 1;
705	case 1: return 0;
706	default: return rc;
707	}
708	}
709
710
711	/**
712	* Applies a property to a code point.
713	*
714	* @param StartCP The code point.
715	* @param pszProperty The property name.
716	*/
717	static void ApplyProperty(RTUNICP StartCP, const char pszProperty, char pszNextField)
718	{
719	if (StartCP >= RT_ELEMENTS(g_aCPInfo))
720	{
721	ParseError("U+%06X is out of the g_aCPInfo range.\n", StartCP);
722	return;
723	}
724	struct CPINFO *pCPInfo = &g_aCPInfo[StartCP];
725	/* string switch */
726	if (!strcmp(pszProperty, "ASCII_Hex_Digit")) pCPInfo->fASCIIHexDigit = 1;
727	else if (!strcmp(pszProperty, "Alphabetic")) pCPInfo->fAlphabetic = 1;
728	else if (!strcmp(pszProperty, "Bidi_Control")) pCPInfo->fBidiControl = 1;
729	else if (!strcmp(pszProperty, "Case_Ignorable")) pCPInfo->fCaseIgnorable = 1;
730	else if (!strcmp(pszProperty, "Cased")) pCPInfo->fCased = 1;
731	else if (!strcmp(pszProperty, "Changes_When_Casefolded")) pCPInfo->fChangesWhenCasefolded = 1;
732	else if (!strcmp(pszProperty, "Changes_When_Casemapped")) pCPInfo->fChangesWhenCasemapped = 1;
733	else if (!strcmp(pszProperty, "Changes_When_Lowercased")) pCPInfo->fChangesWhenLowercased = 1;
734	else if (!strcmp(pszProperty, "Changes_When_Titlecased")) pCPInfo->fChangesWhenTitlecased = 1;
735	else if (!strcmp(pszProperty, "Changes_When_Uppercased")) pCPInfo->fChangesWhenUppercased = 1;
736	else if (!strcmp(pszProperty, "Dash")) pCPInfo->fDash = 1;
737	else if (!strcmp(pszProperty, "Default_Ignorable_Code_Point")) pCPInfo->fDefaultIgnorableCodePoint = 1;
738	else if (!strcmp(pszProperty, "Deprecated")) pCPInfo->fDeprecated = 1;
739	else if (!strcmp(pszProperty, "Diacritic")) pCPInfo->fDiacritic = 1;
740	else if (!strcmp(pszProperty, "Extender")) pCPInfo->fExtender = 1;
741	else if (!strcmp(pszProperty, "Grapheme_Base")) pCPInfo->fGraphemeBase = 1;
742	else if (!strcmp(pszProperty, "Grapheme_Extend")) pCPInfo->fGraphemeExtend = 1;
743	else if (!strcmp(pszProperty, "Grapheme_Link")) pCPInfo->fGraphemeLink = 1;
744	else if (!strcmp(pszProperty, "Hex_Digit")) pCPInfo->fHexDigit = 1;
745	else if (!strcmp(pszProperty, "Hyphen")) pCPInfo->fHyphen = 1;
746	else if (!strcmp(pszProperty, "ID_Continue")) pCPInfo->fIDContinue = 1;
747	else if (!strcmp(pszProperty, "ID_Start")) pCPInfo->fIDStart = 1;
748	else if (!strcmp(pszProperty, "Ideographic")) pCPInfo->fIdeographic = 1;
749	else if (!strcmp(pszProperty, "IDS_Binary_Operator")) pCPInfo->fIDSBinaryOperator = 1;
750	else if (!strcmp(pszProperty, "IDS_Trinary_Operator")) pCPInfo->fIDSTrinaryOperator = 1;
751	else if (!strcmp(pszProperty, "Join_Control")) pCPInfo->fJoinControl = 1;
752	else if (!strcmp(pszProperty, "Logical_Order_Exception")) pCPInfo->fLogicalOrderException = 1;
753	else if (!strcmp(pszProperty, "Lowercase")) pCPInfo->fLowercase = 1;
754	else if (!strcmp(pszProperty, "Math")) pCPInfo->fMath = 1;
755	else if (!strcmp(pszProperty, "Noncharacter_Code_Point")) pCPInfo->fNoncharacterCodePoint = 1;
756	else if (!strcmp(pszProperty, "Other_Alphabetic")) pCPInfo->fOtherAlphabetic = 1;
757	else if (!strcmp(pszProperty, "Other_Default_Ignorable_Code_Point")) pCPInfo->fOtherDefaultIgnorableCodePoint = 1;
758	else if (!strcmp(pszProperty, "Other_Grapheme_Extend")) pCPInfo->fOtherGraphemeExtend = 1;
759	else if (!strcmp(pszProperty, "Other_ID_Continue")) pCPInfo->fOtherIDContinue = 1;
760	else if (!strcmp(pszProperty, "Other_ID_Start")) pCPInfo->fOtherIDStart = 1;
761	else if (!strcmp(pszProperty, "Other_Lowercase")) pCPInfo->fOtherLowercase = 1;
762	else if (!strcmp(pszProperty, "Other_Math")) pCPInfo->fOtherMath = 1;
763	else if (!strcmp(pszProperty, "Other_Uppercase")) pCPInfo->fOtherUppercase = 1;
764	else if (!strcmp(pszProperty, "Pattern_Syntax")) pCPInfo->fPatternSyntax = 1;
765	else if (!strcmp(pszProperty, "Pattern_White_Space")) pCPInfo->fPatternWhiteSpace = 1;
766	else if (!strcmp(pszProperty, "Quotation_Mark")) pCPInfo->fQuotationMark = 1;
767	else if (!strcmp(pszProperty, "Radical")) pCPInfo->fRadical = 1;
768	else if (!strcmp(pszProperty, "Soft_Dotted")) pCPInfo->fSoftDotted = 1;
769	else if (!strcmp(pszProperty, "STerm")) pCPInfo->fSTerm = 1;
770	else if (!strcmp(pszProperty, "Terminal_Punctuation")) pCPInfo->fTerminalPunctuation = 1;
771	else if (!strcmp(pszProperty, "Unified_Ideograph")) pCPInfo->fUnifiedIdeograph = 1;
772	else if (!strcmp(pszProperty, "Uppercase")) pCPInfo->fUppercase = 1;
773	else if (!strcmp(pszProperty, "Variation_Selector")) pCPInfo->fVariationSelector = 1;
774	else if (!strcmp(pszProperty, "White_Space")) pCPInfo->fWhiteSpace = 1;
775	else if (!strcmp(pszProperty, "XID_Continue")) pCPInfo->fXIDContinue = 1;
776	else if (!strcmp(pszProperty, "XID_Start")) pCPInfo->fXIDStart = 1;
777	/* DerivedNormalizationProps: */
778	else if (!strcmp(pszProperty, "FC_NFKC")) return; /* ignored */
779	else if (!strcmp(pszProperty, "Full_Composition_Exclusion")) pCPInfo->fFullCompositionExclusion = 1;
780	else if (!strcmp(pszProperty, "NFC_QC")) pCPInfo->fInvNFC_QC = YesNoMaybePropertyValueInv(&pszNextField);
781	else if (!strcmp(pszProperty, "NFD_QC")) pCPInfo->fInvNFD_QC = YesNoMaybePropertyValueInv(&pszNextField);
782	else if (!strcmp(pszProperty, "NFKC_QC")) pCPInfo->fInvNFKC_QC = YesNoMaybePropertyValueInv(&pszNextField);
783	else if (!strcmp(pszProperty, "NFKD_QC")) pCPInfo->fInvNFKD_QC = YesNoMaybePropertyValueInv(&pszNextField);
784	else if (!strcmp(pszProperty, "Expands_On_NFC")) pCPInfo->fExpandsOnNFC = 1;
785	else if (!strcmp(pszProperty, "Expands_On_NFD")) pCPInfo->fExpandsOnNFD = 1;
786	else if (!strcmp(pszProperty, "Expands_On_NFKC")) pCPInfo->fExpandsOnNFKC = 1;
787	else if (!strcmp(pszProperty, "Expands_On_NFKD")) pCPInfo->fExpandsOnNFKD = 1;
788	else if (!strcmp(pszProperty, "NFKC_CF")) return; /ignore /
789	else if (!strcmp(pszProperty, "Changes_When_NFKC_Casefolded")) return; /ignore /
790	else
791	{
792	ParseError("Unknown property '%s'\n", pszProperty);
793	return;
794	}
795
796	if (pszNextField && *pszNextField)
797	ParseError("Unexpected next field: '%s'\n", pszNextField);
798	}
799
800
801	/**
802	* Reads a property file.
803	*
804	* There are several property files, this code can read all
805	* of those but will only make use of the properties it recognizes.
806	*
807	* @returns 0 on success.
808	* @returns !0 on failure.
809	* @param pszBasePath The base path, can be NULL.
810	* @param pszFilename The name of the file.
811	*/
812	static int ReadProperties(const char pszBasePath, const char pszFilename)
813	{
814	/*
815	* Open input.
816	*/
817	FILE *pFile = OpenFile(pszBasePath, pszFilename);
818	if (!pFile)
819	return 1;
820
821	/*
822	* Parse the input and spit out the output.
823	*/
824	char szLine[4096];
825	while (GetLineFromFile(szLine, sizeof(szLine), pFile) != NULL)
826	{
827	if (IsCommentOrBlankLine(szLine))
828	continue;
829	char *pszCurField;
830	char *pszRange = FirstField(&pszCurField, StripLine(szLine));
831	char *pszProperty = NextField(&pszCurField);
832	if (!*pszProperty)
833	{
834	ParseError("no property field.\n");
835	continue;
836	}
837
838	RTUNICP LastCP;
839	RTUNICP StartCP = ToRange(pszRange, &LastCP);
840	if (StartCP == ~(RTUNICP)0)
841	continue;
842
843	while (StartCP <= LastCP)
844	ApplyProperty(StartCP++, pszProperty, pszCurField);
845	}
846
847	CloseFile(pFile);
848
849	return 0;
850	}
851
852
853	/**
854	* Append a flag to the string.
855	*/
856	static char AppendFlag(char psz, const char *pszFlag)
857	{
858	char *pszEnd = strchr(psz, '\0');
859	if (pszEnd != psz)
860	{
861	*pszEnd++ = ' ';
862	*pszEnd++ = '\|';
863	*pszEnd++ = ' ';
864	}
865	strcpy(pszEnd, pszFlag);
866	return psz;
867	}
868
869	/**
870	* Calcs the flags for a code point.
871	* @returns true if there is a flag.
872	* @returns false if the isn't.
873	*/
874	static bool CalcFlags(struct CPINFO pInfo, char pszFlags)
875	{
876	pszFlags[0] = '\0';
877	/** @todo read the specs on this other vs standard stuff, and check out the finer points */
878	if (pInfo->fAlphabetic \|\| pInfo->fOtherAlphabetic)
879	AppendFlag(pszFlags, "RTUNI_ALPHA");
880	if (pInfo->fHexDigit \|\| pInfo->fASCIIHexDigit)
881	AppendFlag(pszFlags, "RTUNI_XDIGIT");
882	if (!strcmp(pInfo->pszGeneralCategory, "Nd"))
883	AppendFlag(pszFlags, "RTUNI_DDIGIT");
884	if (pInfo->fWhiteSpace)
885	AppendFlag(pszFlags, "RTUNI_WSPACE");
886	if (pInfo->fUppercase \|\| pInfo->fOtherUppercase)
887	AppendFlag(pszFlags, "RTUNI_UPPER");
888	if (pInfo->fLowercase \|\| pInfo->fOtherLowercase)
889	AppendFlag(pszFlags, "RTUNI_LOWER");
890	//if (pInfo->???)
891	// AppendFlag(pszFlags, "RTUNI_BSPACE");
892	#if 0
893	if (pInfo->fInvNFD_QC != 0 \|\| pInfo->fInvNFC_QC != 0)
894	{
895	AppendFlag(pszFlags, "RTUNI_QC_NFX");
896	if (!pInfo->paDecompositionMapping && pInfo->fInvNFD_QC)
897	fprintf(stderr, "uniread: U+%05X is QC_NFD but has no mappings.\n", pInfo->CodePoint);
898	else if (*pInfo->pszDecompositionType && pInfo->fInvNFD_QC)
899	fprintf(stderr, "uniread: U+%05X is QC_NFD but has no canonical mappings.\n", pInfo->CodePoint);
900	}
901	else if (pInfo->paDecompositionMapping && !*pInfo->pszDecompositionType)
902	fprintf(stderr, "uniread: U+%05X is not QC_NFX but has canonical mappings.\n", pInfo->CodePoint);
903	#endif
904
905	if (!*pszFlags)
906	{
907	pszFlags[0] = '0';
908	pszFlags[1] = '\0';
909	return false;
910	}
911	return true;
912	}
913
914
915	/**
916	* Closes the primary output stream.
917	*/
918	static int Stream1Close(void)
919	{
920	if (g_pCurOutFile && g_pCurOutFile != stdout && g_pCurOutFile != stderr)
921	{
922	if (fclose(g_pCurOutFile) != 0)
923	{
924	fprintf(stderr, "Error closing output file.\n");
925	return -1;
926	}
927	}
928	g_pCurOutFile = NULL;
929	return 0;
930	}
931
932
933	/**
934	* Initializes the 1st stream to output to a given file.
935	*/
936	static int Stream1Init(const char *pszName)
937	{
938	int rc = Stream1Close();
939	if (!rc)
940	{
941	g_pCurOutFile = fopen(pszName, "w");
942	if (!g_pCurOutFile)
943	{
944	fprintf(stderr, "Error opening output file '%s'.\n", pszName);
945	rc = -1;
946	}
947	}
948	return rc;
949	}
950
951
952	/**
953	* printf wrapper for the primary output stream.
954	*
955	* @returns See vfprintf.
956	* @param pszFormat The vfprintf format string.
957	* @param ... The format arguments.
958	*/
959	static int Stream1Printf(const char *pszFormat, ...)
960	{
961	int cch;
962	va_list va;
963	va_start(va, pszFormat);
964	cch = vfprintf(g_pCurOutFile, pszFormat, va);
965	va_end(va);
966	return cch;
967	}
968
969
970	/** the data store for stream two. */
971	static char g_szStream2[10240];
972	static unsigned volatile g_offStream2 = 0;
973
974	/**
975	* Initializes the 2nd steam.
976	*/
977	static void Stream2Init(void)
978	{
979	g_szStream2[0] = '\0';
980	g_offStream2 = 0;
981	}
982
983	/**
984	* Flushes the 2nd stream to stdout.
985	*/
986	static int Stream2Flush(void)
987	{
988	g_szStream2[g_offStream2] = '\0';
989	Stream1Printf("%s", g_szStream2);
990	Stream2Init();
991	return 0;
992	}
993
994	/**
995	* printf to the 2nd stream.
996	*/
997	static int Stream2Printf(const char *pszFormat, ...)
998	{
999	unsigned offStream2 = g_offStream2;
1000	va_list va;
1001	va_start(va, pszFormat);
1002	int cch = vsprintf(&g_szStream2[offStream2], pszFormat, va);
1003	va_end(va);
1004	offStream2 += cch;
1005	if (offStream2 >= sizeof(g_szStream2))
1006	{
1007	fprintf(stderr, "error: stream2 overflow!\n");
1008	exit(1);
1009	}
1010	g_offStream2 = offStream2;
1011	return cch;
1012	}
1013
1014
1015	/**
1016	* Print the unidata.cpp file header and include list.
1017	*/
1018	int PrintHeader(const char argv0, const char pszBaseDir)
1019	{
1020	char szBuf[1024];
1021	if (!pszBaseDir)
1022	{
1023	memset(szBuf, 0, sizeof(szBuf));
1024	#ifdef _MSC_VER
1025	_getcwd(szBuf, sizeof(szBuf));
1026	#else
1027	getcwd(szBuf, sizeof(szBuf));
1028	#endif
1029	pszBaseDir = szBuf;
1030	}
1031
1032	Stream1Printf("/* $" "Id" "$ */\n"
1033	"/** @file\n"
1034	" * IPRT - Unicode Tables.\n"
1035	" *\n"
1036	" * Automatically Generated from %s\n"
1037	" * by %s (" __DATE__ " " __TIME__ ")\n"
1038	" */\n"
1039	"\n"
1040	"/*\n"
1041	" * Copyright (C) 2006-2015 Oracle Corporation \n"
1042	" *\n"
1043	" * This file is part of VirtualBox Open Source Edition (OSE), as\n"
1044	" * available from http://www.alldomusa.eu.org. This file is free software;\n"
1045	" * you can redistribute it and/or modify it under the terms of the GNU\n"
1046	" * General Public License (GPL) as published by the Free Software\n"
1047	" * Foundation, in version 2 as it comes in the \"COPYING\" file of the\n"
1048	" * VirtualBox OSE distribution. VirtualBox OSE is distributed in the\n"
1049	" * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.\n"
1050	" *\n"
1051	" * The contents of this file may alternatively be used under the terms\n"
1052	" * of the Common Development and Distribution License Version 1.0\n"
1053	" * (CDDL) only, as it comes in the \"COPYING.CDDL\" file of the\n"
1054	" * VirtualBox OSE distribution, in which case the provisions of the\n"
1055	" * CDDL are applicable instead of those of the GPL.\n"
1056	" *\n"
1057	" * You may elect to license modified versions of this file under the\n"
1058	" * terms and conditions of either the GPL or the CDDL or both.\n"
1059	" */\n"
1060	"\n"
1061	"#include <iprt/uni.h>\n"
1062	"\n",
1063	pszBaseDir, argv0);
1064	return 0;
1065	}
1066
1067
1068	/**
1069	* Print the flag tables.
1070	*/
1071	int PrintFlags(void)
1072	{
1073	/*
1074	* Print flags table.
1075	*/
1076	Stream2Init();
1077	Stream2Printf("RT_DECL_DATA_CONST(const RTUNIFLAGSRANGE) g_aRTUniFlagsRanges[] =\n"
1078	"{\n");
1079	RTUNICP i = 0;
1080	int iStart = -1;
1081	while (i < RT_ELEMENTS(g_aCPInfo))
1082	{
1083	/* figure how far off the next chunk is */
1084	char szFlags[256];
1085	unsigned iNonNull = i;
1086	while ( iNonNull < RT_ELEMENTS(g_aCPInfo)
1087	&& iNonNull >= 256
1088	&& (g_aCPInfo[iNonNull].fNullEntry \|\| !CalcFlags(&g_aCPInfo[iNonNull], szFlags)) )
1089	iNonNull++;
1090	if (iNonNull - i > 4096 \|\| iNonNull == RT_ELEMENTS(g_aCPInfo))
1091	{
1092	if (iStart >= 0)
1093	{
1094	Stream1Printf("};\n\n");
1095	Stream2Printf(" { 0x%06x, 0x%06x, &g_afRTUniFlags0x%06x[0] },\n", iStart, i, iStart);
1096	iStart = -1;
1097	}
1098	i = iNonNull;
1099	}
1100	else
1101	{
1102	if (iStart < 0)
1103	{
1104	Stream1Printf("static const uint8_t g_afRTUniFlags0x%06x[] =\n"
1105	"{\n", i);
1106	iStart = i;
1107	}
1108	CalcFlags(&g_aCPInfo[i], szFlags);
1109	Stream1Printf(" %50s, /* U+%06x: %s*/\n", szFlags, g_aCPInfo[i].CodePoint, g_aCPInfo[i].pszName);
1110	i++;
1111	}
1112	}
1113	Stream2Printf(" { ~(RTUNICP)0, ~(RTUNICP)0, NULL }\n"
1114	"};\n\n\n");
1115	Stream1Printf("\n");
1116	return Stream2Flush();
1117	}
1118
1119
1120	/**
1121	* Prints the upper case tables.
1122	*/
1123	static int PrintUpper(void)
1124	{
1125	Stream2Init();
1126	Stream2Printf("RT_DECL_DATA_CONST(const RTUNICASERANGE) g_aRTUniUpperRanges[] =\n"
1127	"{\n");
1128	RTUNICP i = 0;
1129	int iStart = -1;
1130	while (i < RT_ELEMENTS(g_aCPInfo))
1131	{
1132	/* figure how far off the next chunk is */
1133	unsigned iSameCase = i;
1134	while ( iSameCase < RT_ELEMENTS(g_aCPInfo)
1135	&& g_aCPInfo[iSameCase].SimpleUpperCaseMapping == g_aCPInfo[iSameCase].CodePoint
1136	&& iSameCase >= 256)
1137	iSameCase++;
1138	if (iSameCase - i > 4096/sizeof(RTUNICP) \|\| iSameCase == RT_ELEMENTS(g_aCPInfo))
1139	{
1140	if (iStart >= 0)
1141	{
1142	Stream1Printf("};\n\n");
1143	Stream2Printf(" { 0x%06x, 0x%06x, &g_afRTUniUpper0x%06x[0] },\n", iStart, i, iStart);
1144	iStart = -1;
1145	}
1146	i = iSameCase;
1147	}
1148	else
1149	{
1150	if (iStart < 0)
1151	{
1152	Stream1Printf("static const RTUNICP g_afRTUniUpper0x%06x[] =\n"
1153	"{\n", i);
1154	iStart = i;
1155	}
1156	Stream1Printf(" 0x%02x, /* U+%06x: %s*/\n", g_aCPInfo[i].SimpleUpperCaseMapping, g_aCPInfo[i].CodePoint, g_aCPInfo[i].pszName);
1157	i++;
1158	}
1159	}
1160	Stream2Printf(" { ~(RTUNICP)0, ~(RTUNICP)0, NULL }\n"
1161	"};\n\n\n");
1162	Stream1Printf("\n");
1163	return Stream2Flush();
1164	}
1165
1166
1167	/**
1168	* Prints the lowercase tables.
1169	*/
1170	static int PrintLower(void)
1171	{
1172	Stream2Init();
1173	Stream2Printf("RT_DECL_DATA_CONST(const RTUNICASERANGE) g_aRTUniLowerRanges[] =\n"
1174	"{\n");
1175	RTUNICP i = 0;
1176	int iStart = -1;
1177	while (i < RT_ELEMENTS(g_aCPInfo))
1178	{
1179	/* figure how far off the next chunk is */
1180	unsigned iSameCase = i;
1181	while ( iSameCase < RT_ELEMENTS(g_aCPInfo)
1182	&& g_aCPInfo[iSameCase].SimpleLowerCaseMapping == g_aCPInfo[iSameCase].CodePoint
1183	&& iSameCase >= 256)
1184	iSameCase++;
1185	if (iSameCase - i > 4096/sizeof(RTUNICP) \|\| iSameCase == RT_ELEMENTS(g_aCPInfo))
1186	{
1187	if (iStart >= 0)
1188	{
1189	Stream1Printf("};\n\n");
1190	Stream2Printf(" { 0x%06x, 0x%06x, &g_afRTUniLower0x%06x[0] },\n", iStart, i, iStart);
1191	iStart = -1;
1192	}
1193	i = iSameCase;
1194	}
1195	else
1196	{
1197	if (iStart < 0)
1198	{
1199	Stream1Printf("static const RTUNICP g_afRTUniLower0x%06x[] =\n"
1200	"{\n", i);
1201	iStart = i;
1202	}
1203	Stream1Printf(" 0x%02x, /* U+%06x: %s*/\n",
1204	g_aCPInfo[i].SimpleLowerCaseMapping, g_aCPInfo[i].CodePoint, g_aCPInfo[i].pszName);
1205	i++;
1206	}
1207	}
1208	Stream2Printf(" { ~(RTUNICP)0, ~(RTUNICP)0, NULL }\n"
1209	"};\n\n\n");
1210	Stream1Printf("\n");
1211	return Stream2Flush();
1212	}
1213
1214
1215	int main(int argc, char **argv)
1216	{
1217	/*
1218	* Parse args.
1219	*/
1220	if (argc <= 1)
1221	{
1222	printf("usage: %s [-C\|--dir <UCD-dir>] [UnicodeData.txt [DerivedCoreProperties.txt [PropList.txt] [DerivedNormalizationProps.txt]]]\n",
1223	argv[0]);
1224	return 1;
1225	}
1226
1227	const char *pszBaseDir = NULL;
1228	const char *pszUnicodeData = "UnicodeData.txt";
1229	const char *pszDerivedCoreProperties = "DerivedCoreProperties.txt";
1230	const char *pszPropList = "PropList.txt";
1231	const char *pszDerivedNormalizationProps = "DerivedNormalizationProps.txt";
1232	int iFile = 0;
1233	for (int argi = 1; argi < argc; argi++)
1234	{
1235	if (argv[argi][0] != '-')
1236	{
1237	switch (iFile++)
1238	{
1239	case 0: pszUnicodeData = argv[argi]; break;
1240	case 1: pszDerivedCoreProperties = argv[argi]; break;
1241	case 2: pszPropList = argv[argi]; break;
1242	case 3: pszDerivedNormalizationProps = argv[argi]; break;
1243	default:
1244	fprintf(stderr, "uniread: syntax error at '%s': too many filenames\n", argv[argi]);
1245	return 1;
1246	}
1247	}
1248	else if ( !strcmp(argv[argi], "--dir")
1249	\|\| !strcmp(argv[argi], "-C"))
1250	{
1251	if (argi + 1 >= argc)
1252	{
1253	fprintf(stderr, "uniread: syntax error: '%s' is missing the directory name.\n", argv[argi]);
1254	return 1;
1255	}
1256	argi++;
1257	pszBaseDir = argv[argi];
1258	}
1259	else
1260	{
1261	fprintf(stderr, "uniread: syntax error at '%s': Unknown argument\n", argv[argi]);
1262	return 1;
1263	}
1264	}
1265
1266	/*
1267	* Read the data.
1268	*/
1269	int rc = ReadUnicodeData(pszBaseDir, pszUnicodeData);
1270	if (rc)
1271	return rc;
1272	rc = GenerateExcludedData();
1273	if (rc)
1274	return rc;
1275	rc = ReadProperties(pszBaseDir, pszPropList);
1276	if (rc)
1277	return rc;
1278	rc = ReadProperties(pszBaseDir, pszDerivedCoreProperties);
1279	if (rc)
1280	return rc;
1281	rc = ReadProperties(pszBaseDir, pszDerivedNormalizationProps);
1282	if (rc)
1283	return rc;
1284
1285	/*
1286	* Produce output files.
1287	*/
1288	rc = Stream1Init("unidata-flags.cpp");
1289	if (!rc)
1290	rc = PrintHeader(argv[0], pszBaseDir);
1291	if (!rc)
1292	rc = PrintFlags();
1293
1294	rc = Stream1Init("unidata-upper.cpp");
1295	if (!rc)
1296	rc = PrintHeader(argv[0], pszBaseDir);
1297	if (!rc)
1298	rc = PrintUpper();
1299
1300	rc = Stream1Init("unidata-lower.cpp");
1301	if (!rc)
1302	rc = PrintHeader(argv[0], pszBaseDir);
1303	if (!rc)
1304	rc = PrintLower();
1305	if (!rc)
1306	rc = Stream1Close();
1307
1308	/* done */
1309	return rc;
1310	}
1311

注意: 瀏覽 TracBrowser 來幫助您使用儲存庫瀏覽器

source: vbox/trunk/src/VBox/Runtime/common/string/uniread.cpp@ 57358

以其他格式下載: