VirtualBox

source: vbox/trunk/include/iprt/uni.h@ 3251

Last change on this file since 3251 was 2981, checked in by vboxsync, 17 years ago

InnoTek -> innotek: all the headers and comments.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 8.6 KB
Line 
1/** @file
2 *
3 * innotek Portable Runtime - Unicode Code Points.
4 */
5
6/*
7 * Copyright (C) 2006-2007 innotek GmbH
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License as published by the Free Software Foundation,
13 * in version 2 as it comes in the "COPYING" file of the VirtualBox OSE
14 * distribution. VirtualBox OSE is distributed in the hope that it will
15 * be useful, but WITHOUT ANY WARRANTY of any kind.
16 *
17 * If you received this file as part of a commercial VirtualBox
18 * distribution, then only the terms of your commercial VirtualBox
19 * license agreement apply instead of the previous paragraph.
20 */
21
22#ifndef __iprt_uni_h__
23#define __iprt_uni_h__
24
25/** @defgroup grp_rt_uni RTUniCp - Unicode Code Points
26 * @ingroup grp_rt
27 * @{
28 */
29
30/** @def RTUNI_USE_WCTYPE
31 * Define RTUNI_USE_WCTYPE to not use the IPRT unicode data but the
32 * data which the C runtime library provides. */
33#ifdef __DOXYGEN__
34# define RTUNI_USE_WCTYPE
35#endif
36
37#include <iprt/types.h>
38#ifdef RTUNI_USE_WCTYPE
39# include <wctype.h>
40#endif
41
42__BEGIN_DECLS
43
44
45/** Max value a RTUNICP type can hold. */
46#define RTUNICP_MAX ( ~(RTUNICP)0 )
47
48/** Invalid code point.
49 * This is returned when encountered invalid encodings or invalid
50 * unicode code points. */
51#define RTUNICP_INVALID ( 0xfffffffe )
52
53
54
55#ifndef RTUNI_USE_WCTYPE
56/**
57 * A unicode flags range.
58 * @internal
59 */
60typedef struct RTUNIFLAGSRANGE
61{
62 /** The first code point of the range. */
63 RTUNICP BeginCP;
64 /** The last + 1 code point of the range. */
65 RTUNICP EndCP;
66 /** Pointer to the array of case folded code points. */
67 const uint8_t *pafFlags;
68} RTUNIFLAGSRANGE;
69/** Pointer to a flags range.
70 * @internal */
71typedef RTUNIFLAGSRANGE *PRTUNIFLAGSRANGE;
72/** Pointer to a const flags range.
73 * @internal */
74typedef const RTUNIFLAGSRANGE *PCRTUNIFLAGSRANGE;
75
76/**
77 * A unicode case folded range.
78 * @internal
79 */
80typedef struct RTUNICASERANGE
81{
82 /** The first code point of the range. */
83 RTUNICP BeginCP;
84 /** The last + 1 code point of the range. */
85 RTUNICP EndCP;
86 /** Pointer to the array of case folded code points. */
87 PCRTUNICP paFoldedCPs;
88} RTUNICASERANGE;
89/** Pointer to a case folded range.
90 * @internal */
91typedef RTUNICASERANGE *PRTUNICASERANGE;
92/** Pointer to a const case folded range.
93 * @internal */
94typedef const RTUNICASERANGE *PCRTUNICASERANGE;
95
96/** @name Unicode Code Point Flags.
97 * @internal
98 * @{ */
99#define RTUNI_UPPER BIT(0)
100#define RTUNI_LOWER BIT(1)
101#define RTUNI_ALPHA BIT(2)
102#define RTUNI_XDIGIT BIT(3)
103#define RTUNI_DDIGIT BIT(4)
104#define RTUNI_WSPACE BIT(5)
105/*#define RTUNI_BSPACE BIT(6) - later */
106/** @} */
107
108
109/**
110 * Array of flags ranges.
111 * @internal
112 */
113extern RTDATADECL(const RTUNIFLAGSRANGE) g_aRTUniFlagsRanges[];
114
115/**
116 * Gets the flags for a unicode code point.
117 *
118 * @returns The flag mask. (RTUNI_*)
119 * @param CodePoint The unicode code point.
120 * @internal
121 */
122DECLINLINE(RTUNICP) rtUniCpFlags(RTUNICP CodePoint)
123{
124 PCRTUNIFLAGSRANGE pCur = &g_aRTUniFlagsRanges[0];
125 do
126 {
127 if (pCur->EndCP > CodePoint)
128 {
129 if (pCur->BeginCP <= CodePoint)
130 CodePoint = pCur->pafFlags[CodePoint - pCur->BeginCP];
131 break;
132 }
133 pCur++;
134 } while (pCur->EndCP != RTUNICP_MAX);
135 return CodePoint;
136}
137
138
139/**
140 * Checks if a unicode code point is upper case.
141 *
142 * @returns true if it is.
143 * @returns false if it isn't.
144 * @param CodePoint The code point.
145 */
146DECLINLINE(bool) RTUniCpIsUpper(RTUNICP CodePoint)
147{
148 return (rtUniCpFlags(CodePoint) & RTUNI_UPPER) != 0;
149}
150
151
152/**
153 * Checks if a unicode code point is lower case.
154 *
155 * @returns true if it is.
156 * @returns false if it isn't.
157 * @param CodePoint The code point.
158 */
159DECLINLINE(bool) RTUniCpIsLower(RTUNICP CodePoint)
160{
161 return (rtUniCpFlags(CodePoint) & RTUNI_LOWER) != 0;
162}
163
164
165/**
166 * Checks if a unicode code point is alphabetic.
167 *
168 * @returns true if it is.
169 * @returns false if it isn't.
170 * @param CodePoint The code point.
171 */
172DECLINLINE(bool) RTUniCpIsAlphabetic(RTUNICP CodePoint)
173{
174 return (rtUniCpFlags(CodePoint) & RTUNI_ALPHA) != 0;
175}
176
177
178/**
179 * Checks if a unicode code point is a decimal digit.
180 *
181 * @returns true if it is.
182 * @returns false if it isn't.
183 * @param CodePoint The code point.
184 */
185DECLINLINE(bool) RTUniCpIsDecDigit(RTUNICP CodePoint)
186{
187 return (rtUniCpFlags(CodePoint) & RTUNI_DDIGIT) != 0;
188}
189
190
191/**
192 * Checks if a unicode code point is a hexadecimal digit.
193 *
194 * @returns true if it is.
195 * @returns false if it isn't.
196 * @param CodePoint The code point.
197 */
198DECLINLINE(bool) RTUniCpIsHexDigit(RTUNICP CodePoint)
199{
200 return (rtUniCpFlags(CodePoint) & RTUNI_XDIGIT) != 0;
201}
202
203
204/**
205 * Checks if a unicode code point is white space.
206 *
207 * @returns true if it is.
208 * @returns false if it isn't.
209 * @param CodePoint The code point.
210 */
211DECLINLINE(bool) RTUniCpIsSpace(RTUNICP CodePoint)
212{
213 return (rtUniCpFlags(CodePoint) & RTUNI_WSPACE) != 0;
214}
215
216
217
218/**
219 * Array of uppercase ranges.
220 * @internal
221 */
222extern RTDATADECL(const RTUNICASERANGE) g_aRTUniUpperRanges[];
223
224/**
225 * Array of lowercase ranges.
226 * @internal
227 */
228extern RTDATADECL(const RTUNICASERANGE) g_aRTUniLowerRanges[];
229
230
231/**
232 * Folds a unicode code point using the specified range array.
233 *
234 * @returns FOlded code point.
235 * @param CodePoint The unicode code point to fold.
236 * @param pCur The case folding range to use.
237 */
238DECLINLINE(RTUNICP) rtUniCpFold(RTUNICP CodePoint, PCRTUNICASERANGE pCur)
239{
240 do
241 {
242 if (pCur->EndCP > CodePoint)
243 {
244 if (pCur->BeginCP <= CodePoint)
245 CodePoint = pCur->paFoldedCPs[CodePoint - pCur->BeginCP];
246 break;
247 }
248 pCur++;
249 } while (pCur->EndCP != RTUNICP_MAX);
250 return CodePoint;
251}
252
253
254/**
255 * Folds a unicode code point to upper case.
256 *
257 * @returns Folded code point.
258 * @param CodePoint The unicode code point to fold.
259 */
260DECLINLINE(RTUNICP) RTUniCpToUpper(RTUNICP CodePoint)
261{
262 return rtUniCpFold(CodePoint, &g_aRTUniUpperRanges[0]);
263}
264
265
266/**
267 * Folds a unicode code point to lower case.
268 *
269 * @returns Folded code point.
270 * @param CodePoint The unicode code point to fold.
271 */
272DECLINLINE(RTUNICP) RTUniCpToLower(RTUNICP CodePoint)
273{
274 return rtUniCpFold(CodePoint, &g_aRTUniLowerRanges[0]);
275}
276
277
278#else /* RTUNI_USE_WCTYPE */
279
280
281/**
282 * Checks if a unicode code point is upper case.
283 *
284 * @returns true if it is.
285 * @returns false if it isn't.
286 * @param CodePoint The code point.
287 */
288DECLINLINE(bool) RTUniCpIsUpper(RTUNICP CodePoint)
289{
290 return !!iswupper(CodePoint);
291}
292
293
294/**
295 * Checks if a unicode code point is lower case.
296 *
297 * @returns true if it is.
298 * @returns false if it isn't.
299 * @param CodePoint The code point.
300 */
301DECLINLINE(bool) RTUniCpIsLower(RTUNICP CodePoint)
302{
303 return !!iswlower(CodePoint);
304}
305
306
307/**
308 * Checks if a unicode code point is alphabetic.
309 *
310 * @returns true if it is.
311 * @returns false if it isn't.
312 * @param CodePoint The code point.
313 */
314DECLINLINE(bool) RTUniCpIsAlphabetic(RTUNICP CodePoint)
315{
316 return !!iswalpha(CodePoint);
317}
318
319
320/**
321 * Checks if a unicode code point is a decimal digit.
322 *
323 * @returns true if it is.
324 * @returns false if it isn't.
325 * @param CodePoint The code point.
326 */
327DECLINLINE(bool) RTUniCpIsDecDigit(RTUNICP CodePoint)
328{
329 return !!iswdigit(CodePoint);
330}
331
332
333/**
334 * Checks if a unicode code point is a hexadecimal digit.
335 *
336 * @returns true if it is.
337 * @returns false if it isn't.
338 * @param CodePoint The code point.
339 */
340DECLINLINE(bool) RTUniCpIsHexDigit(RTUNICP CodePoint)
341{
342 return !!iswxdigit(CodePoint);
343}
344
345
346/**
347 * Checks if a unicode code point is white space.
348 *
349 * @returns true if it is.
350 * @returns false if it isn't.
351 * @param CodePoint The code point.
352 */
353DECLINLINE(bool) RTUniCpIsSpace(RTUNICP CodePoint)
354{
355 return !!iswspace(CodePoint);
356}
357
358
359/**
360 * Folds a unicode code point to upper case.
361 *
362 * @returns Folded code point.
363 * @param CodePoint The unicode code point to fold.
364 */
365DECLINLINE(RTUNICP) RTUniCpToUpper(RTUNICP CodePoint)
366{
367 return towupper(CodePoint);
368}
369
370
371/**
372 * Folds a unicode code point to lower case.
373 *
374 * @returns Folded code point.
375 * @param CodePoint The unicode code point to fold.
376 */
377DECLINLINE(RTUNICP) RTUniCpToLower(RTUNICP CodePoint)
378{
379 return towlower(CodePoint);
380}
381
382
383#endif /* RTUNI_USE_WCTYPE */
384
385
386/**
387 * Frees a unicode string.
388 *
389 * @param pusz The string to free.
390 */
391RTDECL(void) RTUniFree(PRTUNICP pusz);
392
393
394__END_DECLS
395/** @} */
396
397
398#endif
399
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette