VirtualBox

source: vbox/trunk/include/iprt/uni.h@ 5999

Last change on this file since 5999 was 5999, checked in by vboxsync, 17 years ago

The Giant CDDL Dual-License Header Change.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 8.9 KB
Line 
1/** @file
2 * innotek Portable Runtime - Unicode Code Points.
3 */
4
5/*
6 * Copyright (C) 2006-2007 innotek GmbH
7 *
8 * This file is part of VirtualBox Open Source Edition (OSE), as
9 * available from http://www.virtualbox.org. This file is free software;
10 * you can redistribute it and/or modify it under the terms of the GNU
11 * General Public License (GPL) as published by the Free Software
12 * Foundation, in version 2 as it comes in the "COPYING" file of the
13 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
14 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
15 *
16 * The contents of this file may alternatively be used under the terms
17 * of the Common Development and Distribution License Version 1.0
18 * (CDDL) only, as it comes in the "COPYING.CDDL" file of the
19 * VirtualBox OSE distribution, in which case the provisions of the
20 * CDDL are applicable instead of those of the GPL.
21 *
22 * You may elect to license modified versions of this file under the
23 * terms and conditions of either the GPL or the CDDL or both.
24 */
25
26#ifndef ___iprt_uni_h
27#define ___iprt_uni_h
28
29/** @defgroup grp_rt_uni RTUniCp - Unicode Code Points
30 * @ingroup grp_rt
31 * @{
32 */
33
34/** @def RTUNI_USE_WCTYPE
35 * Define RTUNI_USE_WCTYPE to not use the IPRT unicode data but the
36 * data which the C runtime library provides. */
37#ifdef __DOXYGEN__
38# define RTUNI_USE_WCTYPE
39#endif
40
41#include <iprt/types.h>
42#ifdef RTUNI_USE_WCTYPE
43# include <wctype.h>
44#endif
45
46__BEGIN_DECLS
47
48
49/** Max value a RTUNICP type can hold. */
50#define RTUNICP_MAX ( ~(RTUNICP)0 )
51
52/** Invalid code point.
53 * This is returned when encountered invalid encodings or invalid
54 * unicode code points. */
55#define RTUNICP_INVALID ( 0xfffffffe )
56
57
58
59#ifndef RTUNI_USE_WCTYPE
60/**
61 * A unicode flags range.
62 * @internal
63 */
64typedef struct RTUNIFLAGSRANGE
65{
66 /** The first code point of the range. */
67 RTUNICP BeginCP;
68 /** The last + 1 code point of the range. */
69 RTUNICP EndCP;
70 /** Pointer to the array of case folded code points. */
71 const uint8_t *pafFlags;
72} RTUNIFLAGSRANGE;
73/** Pointer to a flags range.
74 * @internal */
75typedef RTUNIFLAGSRANGE *PRTUNIFLAGSRANGE;
76/** Pointer to a const flags range.
77 * @internal */
78typedef const RTUNIFLAGSRANGE *PCRTUNIFLAGSRANGE;
79
80/**
81 * A unicode case folded range.
82 * @internal
83 */
84typedef struct RTUNICASERANGE
85{
86 /** The first code point of the range. */
87 RTUNICP BeginCP;
88 /** The last + 1 code point of the range. */
89 RTUNICP EndCP;
90 /** Pointer to the array of case folded code points. */
91 PCRTUNICP paFoldedCPs;
92} RTUNICASERANGE;
93/** Pointer to a case folded range.
94 * @internal */
95typedef RTUNICASERANGE *PRTUNICASERANGE;
96/** Pointer to a const case folded range.
97 * @internal */
98typedef const RTUNICASERANGE *PCRTUNICASERANGE;
99
100/** @name Unicode Code Point Flags.
101 * @internal
102 * @{ */
103#define RTUNI_UPPER RT_BIT(0)
104#define RTUNI_LOWER RT_BIT(1)
105#define RTUNI_ALPHA RT_BIT(2)
106#define RTUNI_XDIGIT RT_BIT(3)
107#define RTUNI_DDIGIT RT_BIT(4)
108#define RTUNI_WSPACE RT_BIT(5)
109/*#define RTUNI_BSPACE RT_BIT(6) - later */
110/** @} */
111
112
113/**
114 * Array of flags ranges.
115 * @internal
116 */
117extern RTDATADECL(const RTUNIFLAGSRANGE) g_aRTUniFlagsRanges[];
118
119/**
120 * Gets the flags for a unicode code point.
121 *
122 * @returns The flag mask. (RTUNI_*)
123 * @param CodePoint The unicode code point.
124 * @internal
125 */
126DECLINLINE(RTUNICP) rtUniCpFlags(RTUNICP CodePoint)
127{
128 PCRTUNIFLAGSRANGE pCur = &g_aRTUniFlagsRanges[0];
129 do
130 {
131 if (pCur->EndCP > CodePoint)
132 {
133 if (pCur->BeginCP <= CodePoint)
134 CodePoint = pCur->pafFlags[CodePoint - pCur->BeginCP];
135 break;
136 }
137 pCur++;
138 } while (pCur->EndCP != RTUNICP_MAX);
139 return CodePoint;
140}
141
142
143/**
144 * Checks if a unicode code point is upper case.
145 *
146 * @returns true if it is.
147 * @returns false if it isn't.
148 * @param CodePoint The code point.
149 */
150DECLINLINE(bool) RTUniCpIsUpper(RTUNICP CodePoint)
151{
152 return (rtUniCpFlags(CodePoint) & RTUNI_UPPER) != 0;
153}
154
155
156/**
157 * Checks if a unicode code point is lower case.
158 *
159 * @returns true if it is.
160 * @returns false if it isn't.
161 * @param CodePoint The code point.
162 */
163DECLINLINE(bool) RTUniCpIsLower(RTUNICP CodePoint)
164{
165 return (rtUniCpFlags(CodePoint) & RTUNI_LOWER) != 0;
166}
167
168
169/**
170 * Checks if a unicode code point is alphabetic.
171 *
172 * @returns true if it is.
173 * @returns false if it isn't.
174 * @param CodePoint The code point.
175 */
176DECLINLINE(bool) RTUniCpIsAlphabetic(RTUNICP CodePoint)
177{
178 return (rtUniCpFlags(CodePoint) & RTUNI_ALPHA) != 0;
179}
180
181
182/**
183 * Checks if a unicode code point is a decimal digit.
184 *
185 * @returns true if it is.
186 * @returns false if it isn't.
187 * @param CodePoint The code point.
188 */
189DECLINLINE(bool) RTUniCpIsDecDigit(RTUNICP CodePoint)
190{
191 return (rtUniCpFlags(CodePoint) & RTUNI_DDIGIT) != 0;
192}
193
194
195/**
196 * Checks if a unicode code point is a hexadecimal digit.
197 *
198 * @returns true if it is.
199 * @returns false if it isn't.
200 * @param CodePoint The code point.
201 */
202DECLINLINE(bool) RTUniCpIsHexDigit(RTUNICP CodePoint)
203{
204 return (rtUniCpFlags(CodePoint) & RTUNI_XDIGIT) != 0;
205}
206
207
208/**
209 * Checks if a unicode code point is white space.
210 *
211 * @returns true if it is.
212 * @returns false if it isn't.
213 * @param CodePoint The code point.
214 */
215DECLINLINE(bool) RTUniCpIsSpace(RTUNICP CodePoint)
216{
217 return (rtUniCpFlags(CodePoint) & RTUNI_WSPACE) != 0;
218}
219
220
221
222/**
223 * Array of uppercase ranges.
224 * @internal
225 */
226extern RTDATADECL(const RTUNICASERANGE) g_aRTUniUpperRanges[];
227
228/**
229 * Array of lowercase ranges.
230 * @internal
231 */
232extern RTDATADECL(const RTUNICASERANGE) g_aRTUniLowerRanges[];
233
234
235/**
236 * Folds a unicode code point using the specified range array.
237 *
238 * @returns FOlded code point.
239 * @param CodePoint The unicode code point to fold.
240 * @param pCur The case folding range to use.
241 */
242DECLINLINE(RTUNICP) rtUniCpFold(RTUNICP CodePoint, PCRTUNICASERANGE pCur)
243{
244 do
245 {
246 if (pCur->EndCP > CodePoint)
247 {
248 if (pCur->BeginCP <= CodePoint)
249 CodePoint = pCur->paFoldedCPs[CodePoint - pCur->BeginCP];
250 break;
251 }
252 pCur++;
253 } while (pCur->EndCP != RTUNICP_MAX);
254 return CodePoint;
255}
256
257
258/**
259 * Folds a unicode code point to upper case.
260 *
261 * @returns Folded code point.
262 * @param CodePoint The unicode code point to fold.
263 */
264DECLINLINE(RTUNICP) RTUniCpToUpper(RTUNICP CodePoint)
265{
266 return rtUniCpFold(CodePoint, &g_aRTUniUpperRanges[0]);
267}
268
269
270/**
271 * Folds a unicode code point to lower case.
272 *
273 * @returns Folded code point.
274 * @param CodePoint The unicode code point to fold.
275 */
276DECLINLINE(RTUNICP) RTUniCpToLower(RTUNICP CodePoint)
277{
278 return rtUniCpFold(CodePoint, &g_aRTUniLowerRanges[0]);
279}
280
281
282#else /* RTUNI_USE_WCTYPE */
283
284
285/**
286 * Checks if a unicode code point is upper case.
287 *
288 * @returns true if it is.
289 * @returns false if it isn't.
290 * @param CodePoint The code point.
291 */
292DECLINLINE(bool) RTUniCpIsUpper(RTUNICP CodePoint)
293{
294 return !!iswupper(CodePoint);
295}
296
297
298/**
299 * Checks if a unicode code point is lower case.
300 *
301 * @returns true if it is.
302 * @returns false if it isn't.
303 * @param CodePoint The code point.
304 */
305DECLINLINE(bool) RTUniCpIsLower(RTUNICP CodePoint)
306{
307 return !!iswlower(CodePoint);
308}
309
310
311/**
312 * Checks if a unicode code point is alphabetic.
313 *
314 * @returns true if it is.
315 * @returns false if it isn't.
316 * @param CodePoint The code point.
317 */
318DECLINLINE(bool) RTUniCpIsAlphabetic(RTUNICP CodePoint)
319{
320 return !!iswalpha(CodePoint);
321}
322
323
324/**
325 * Checks if a unicode code point is a decimal digit.
326 *
327 * @returns true if it is.
328 * @returns false if it isn't.
329 * @param CodePoint The code point.
330 */
331DECLINLINE(bool) RTUniCpIsDecDigit(RTUNICP CodePoint)
332{
333 return !!iswdigit(CodePoint);
334}
335
336
337/**
338 * Checks if a unicode code point is a hexadecimal digit.
339 *
340 * @returns true if it is.
341 * @returns false if it isn't.
342 * @param CodePoint The code point.
343 */
344DECLINLINE(bool) RTUniCpIsHexDigit(RTUNICP CodePoint)
345{
346 return !!iswxdigit(CodePoint);
347}
348
349
350/**
351 * Checks if a unicode code point is white space.
352 *
353 * @returns true if it is.
354 * @returns false if it isn't.
355 * @param CodePoint The code point.
356 */
357DECLINLINE(bool) RTUniCpIsSpace(RTUNICP CodePoint)
358{
359 return !!iswspace(CodePoint);
360}
361
362
363/**
364 * Folds a unicode code point to upper case.
365 *
366 * @returns Folded code point.
367 * @param CodePoint The unicode code point to fold.
368 */
369DECLINLINE(RTUNICP) RTUniCpToUpper(RTUNICP CodePoint)
370{
371 return towupper(CodePoint);
372}
373
374
375/**
376 * Folds a unicode code point to lower case.
377 *
378 * @returns Folded code point.
379 * @param CodePoint The unicode code point to fold.
380 */
381DECLINLINE(RTUNICP) RTUniCpToLower(RTUNICP CodePoint)
382{
383 return towlower(CodePoint);
384}
385
386
387#endif /* RTUNI_USE_WCTYPE */
388
389
390/**
391 * Frees a unicode string.
392 *
393 * @param pusz The string to free.
394 */
395RTDECL(void) RTUniFree(PRTUNICP pusz);
396
397
398__END_DECLS
399/** @} */
400
401
402#endif
403
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette