VirtualBox

source: vbox/trunk/include/iprt/uni.h@ 3810

Last change on this file since 3810 was 3630, checked in by vboxsync, 17 years ago

iprt_hdr_h -> _iprt_hdr_h

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 8.6 KB
Line 
1/** @file
2 * innotek Portable Runtime - Unicode Code Points.
3 */
4
5/*
6 * Copyright (C) 2006-2007 innotek GmbH
7 *
8 * This file is part of VirtualBox Open Source Edition (OSE), as
9 * available from http://www.virtualbox.org. This file is free software;
10 * you can redistribute it and/or modify it under the terms of the GNU
11 * General Public License as published by the Free Software Foundation,
12 * in version 2 as it comes in the "COPYING" file of the VirtualBox OSE
13 * distribution. VirtualBox OSE is distributed in the hope that it will
14 * be useful, but WITHOUT ANY WARRANTY of any kind.
15 *
16 * If you received this file as part of a commercial VirtualBox
17 * distribution, then only the terms of your commercial VirtualBox
18 * license agreement apply instead of the previous paragraph.
19 */
20
21#ifndef ___iprt_uni_h
22#define ___iprt_uni_h
23
24/** @defgroup grp_rt_uni RTUniCp - Unicode Code Points
25 * @ingroup grp_rt
26 * @{
27 */
28
29/** @def RTUNI_USE_WCTYPE
30 * Define RTUNI_USE_WCTYPE to not use the IPRT unicode data but the
31 * data which the C runtime library provides. */
32#ifdef __DOXYGEN__
33# define RTUNI_USE_WCTYPE
34#endif
35
36#include <iprt/types.h>
37#ifdef RTUNI_USE_WCTYPE
38# include <wctype.h>
39#endif
40
41__BEGIN_DECLS
42
43
44/** Max value a RTUNICP type can hold. */
45#define RTUNICP_MAX ( ~(RTUNICP)0 )
46
47/** Invalid code point.
48 * This is returned when encountered invalid encodings or invalid
49 * unicode code points. */
50#define RTUNICP_INVALID ( 0xfffffffe )
51
52
53
54#ifndef RTUNI_USE_WCTYPE
55/**
56 * A unicode flags range.
57 * @internal
58 */
59typedef struct RTUNIFLAGSRANGE
60{
61 /** The first code point of the range. */
62 RTUNICP BeginCP;
63 /** The last + 1 code point of the range. */
64 RTUNICP EndCP;
65 /** Pointer to the array of case folded code points. */
66 const uint8_t *pafFlags;
67} RTUNIFLAGSRANGE;
68/** Pointer to a flags range.
69 * @internal */
70typedef RTUNIFLAGSRANGE *PRTUNIFLAGSRANGE;
71/** Pointer to a const flags range.
72 * @internal */
73typedef const RTUNIFLAGSRANGE *PCRTUNIFLAGSRANGE;
74
75/**
76 * A unicode case folded range.
77 * @internal
78 */
79typedef struct RTUNICASERANGE
80{
81 /** The first code point of the range. */
82 RTUNICP BeginCP;
83 /** The last + 1 code point of the range. */
84 RTUNICP EndCP;
85 /** Pointer to the array of case folded code points. */
86 PCRTUNICP paFoldedCPs;
87} RTUNICASERANGE;
88/** Pointer to a case folded range.
89 * @internal */
90typedef RTUNICASERANGE *PRTUNICASERANGE;
91/** Pointer to a const case folded range.
92 * @internal */
93typedef const RTUNICASERANGE *PCRTUNICASERANGE;
94
95/** @name Unicode Code Point Flags.
96 * @internal
97 * @{ */
98#define RTUNI_UPPER BIT(0)
99#define RTUNI_LOWER BIT(1)
100#define RTUNI_ALPHA BIT(2)
101#define RTUNI_XDIGIT BIT(3)
102#define RTUNI_DDIGIT BIT(4)
103#define RTUNI_WSPACE BIT(5)
104/*#define RTUNI_BSPACE BIT(6) - later */
105/** @} */
106
107
108/**
109 * Array of flags ranges.
110 * @internal
111 */
112extern RTDATADECL(const RTUNIFLAGSRANGE) g_aRTUniFlagsRanges[];
113
114/**
115 * Gets the flags for a unicode code point.
116 *
117 * @returns The flag mask. (RTUNI_*)
118 * @param CodePoint The unicode code point.
119 * @internal
120 */
121DECLINLINE(RTUNICP) rtUniCpFlags(RTUNICP CodePoint)
122{
123 PCRTUNIFLAGSRANGE pCur = &g_aRTUniFlagsRanges[0];
124 do
125 {
126 if (pCur->EndCP > CodePoint)
127 {
128 if (pCur->BeginCP <= CodePoint)
129 CodePoint = pCur->pafFlags[CodePoint - pCur->BeginCP];
130 break;
131 }
132 pCur++;
133 } while (pCur->EndCP != RTUNICP_MAX);
134 return CodePoint;
135}
136
137
138/**
139 * Checks if a unicode code point is upper case.
140 *
141 * @returns true if it is.
142 * @returns false if it isn't.
143 * @param CodePoint The code point.
144 */
145DECLINLINE(bool) RTUniCpIsUpper(RTUNICP CodePoint)
146{
147 return (rtUniCpFlags(CodePoint) & RTUNI_UPPER) != 0;
148}
149
150
151/**
152 * Checks if a unicode code point is lower case.
153 *
154 * @returns true if it is.
155 * @returns false if it isn't.
156 * @param CodePoint The code point.
157 */
158DECLINLINE(bool) RTUniCpIsLower(RTUNICP CodePoint)
159{
160 return (rtUniCpFlags(CodePoint) & RTUNI_LOWER) != 0;
161}
162
163
164/**
165 * Checks if a unicode code point is alphabetic.
166 *
167 * @returns true if it is.
168 * @returns false if it isn't.
169 * @param CodePoint The code point.
170 */
171DECLINLINE(bool) RTUniCpIsAlphabetic(RTUNICP CodePoint)
172{
173 return (rtUniCpFlags(CodePoint) & RTUNI_ALPHA) != 0;
174}
175
176
177/**
178 * Checks if a unicode code point is a decimal digit.
179 *
180 * @returns true if it is.
181 * @returns false if it isn't.
182 * @param CodePoint The code point.
183 */
184DECLINLINE(bool) RTUniCpIsDecDigit(RTUNICP CodePoint)
185{
186 return (rtUniCpFlags(CodePoint) & RTUNI_DDIGIT) != 0;
187}
188
189
190/**
191 * Checks if a unicode code point is a hexadecimal digit.
192 *
193 * @returns true if it is.
194 * @returns false if it isn't.
195 * @param CodePoint The code point.
196 */
197DECLINLINE(bool) RTUniCpIsHexDigit(RTUNICP CodePoint)
198{
199 return (rtUniCpFlags(CodePoint) & RTUNI_XDIGIT) != 0;
200}
201
202
203/**
204 * Checks if a unicode code point is white space.
205 *
206 * @returns true if it is.
207 * @returns false if it isn't.
208 * @param CodePoint The code point.
209 */
210DECLINLINE(bool) RTUniCpIsSpace(RTUNICP CodePoint)
211{
212 return (rtUniCpFlags(CodePoint) & RTUNI_WSPACE) != 0;
213}
214
215
216
217/**
218 * Array of uppercase ranges.
219 * @internal
220 */
221extern RTDATADECL(const RTUNICASERANGE) g_aRTUniUpperRanges[];
222
223/**
224 * Array of lowercase ranges.
225 * @internal
226 */
227extern RTDATADECL(const RTUNICASERANGE) g_aRTUniLowerRanges[];
228
229
230/**
231 * Folds a unicode code point using the specified range array.
232 *
233 * @returns FOlded code point.
234 * @param CodePoint The unicode code point to fold.
235 * @param pCur The case folding range to use.
236 */
237DECLINLINE(RTUNICP) rtUniCpFold(RTUNICP CodePoint, PCRTUNICASERANGE pCur)
238{
239 do
240 {
241 if (pCur->EndCP > CodePoint)
242 {
243 if (pCur->BeginCP <= CodePoint)
244 CodePoint = pCur->paFoldedCPs[CodePoint - pCur->BeginCP];
245 break;
246 }
247 pCur++;
248 } while (pCur->EndCP != RTUNICP_MAX);
249 return CodePoint;
250}
251
252
253/**
254 * Folds a unicode code point to upper case.
255 *
256 * @returns Folded code point.
257 * @param CodePoint The unicode code point to fold.
258 */
259DECLINLINE(RTUNICP) RTUniCpToUpper(RTUNICP CodePoint)
260{
261 return rtUniCpFold(CodePoint, &g_aRTUniUpperRanges[0]);
262}
263
264
265/**
266 * Folds a unicode code point to lower case.
267 *
268 * @returns Folded code point.
269 * @param CodePoint The unicode code point to fold.
270 */
271DECLINLINE(RTUNICP) RTUniCpToLower(RTUNICP CodePoint)
272{
273 return rtUniCpFold(CodePoint, &g_aRTUniLowerRanges[0]);
274}
275
276
277#else /* RTUNI_USE_WCTYPE */
278
279
280/**
281 * Checks if a unicode code point is upper case.
282 *
283 * @returns true if it is.
284 * @returns false if it isn't.
285 * @param CodePoint The code point.
286 */
287DECLINLINE(bool) RTUniCpIsUpper(RTUNICP CodePoint)
288{
289 return !!iswupper(CodePoint);
290}
291
292
293/**
294 * Checks if a unicode code point is lower case.
295 *
296 * @returns true if it is.
297 * @returns false if it isn't.
298 * @param CodePoint The code point.
299 */
300DECLINLINE(bool) RTUniCpIsLower(RTUNICP CodePoint)
301{
302 return !!iswlower(CodePoint);
303}
304
305
306/**
307 * Checks if a unicode code point is alphabetic.
308 *
309 * @returns true if it is.
310 * @returns false if it isn't.
311 * @param CodePoint The code point.
312 */
313DECLINLINE(bool) RTUniCpIsAlphabetic(RTUNICP CodePoint)
314{
315 return !!iswalpha(CodePoint);
316}
317
318
319/**
320 * Checks if a unicode code point is a decimal digit.
321 *
322 * @returns true if it is.
323 * @returns false if it isn't.
324 * @param CodePoint The code point.
325 */
326DECLINLINE(bool) RTUniCpIsDecDigit(RTUNICP CodePoint)
327{
328 return !!iswdigit(CodePoint);
329}
330
331
332/**
333 * Checks if a unicode code point is a hexadecimal digit.
334 *
335 * @returns true if it is.
336 * @returns false if it isn't.
337 * @param CodePoint The code point.
338 */
339DECLINLINE(bool) RTUniCpIsHexDigit(RTUNICP CodePoint)
340{
341 return !!iswxdigit(CodePoint);
342}
343
344
345/**
346 * Checks if a unicode code point is white space.
347 *
348 * @returns true if it is.
349 * @returns false if it isn't.
350 * @param CodePoint The code point.
351 */
352DECLINLINE(bool) RTUniCpIsSpace(RTUNICP CodePoint)
353{
354 return !!iswspace(CodePoint);
355}
356
357
358/**
359 * Folds a unicode code point to upper case.
360 *
361 * @returns Folded code point.
362 * @param CodePoint The unicode code point to fold.
363 */
364DECLINLINE(RTUNICP) RTUniCpToUpper(RTUNICP CodePoint)
365{
366 return towupper(CodePoint);
367}
368
369
370/**
371 * Folds a unicode code point to lower case.
372 *
373 * @returns Folded code point.
374 * @param CodePoint The unicode code point to fold.
375 */
376DECLINLINE(RTUNICP) RTUniCpToLower(RTUNICP CodePoint)
377{
378 return towlower(CodePoint);
379}
380
381
382#endif /* RTUNI_USE_WCTYPE */
383
384
385/**
386 * Frees a unicode string.
387 *
388 * @param pusz The string to free.
389 */
390RTDECL(void) RTUniFree(PRTUNICP pusz);
391
392
393__END_DECLS
394/** @} */
395
396
397#endif
398
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette