VirtualBox

source: vbox/trunk/include/iprt/uni.h@ 23473

Last change on this file since 23473 was 20374, checked in by vboxsync, 16 years ago

*: s/RT_\(BEGIN|END\)_DECLS/RT_C_DECLS_\1/g

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 9.0 KB
Line 
1/** @file
2 * IPRT - Unicode Code Points.
3 */
4
5/*
6 * Copyright (C) 2006-2007 Sun Microsystems, Inc.
7 *
8 * This file is part of VirtualBox Open Source Edition (OSE), as
9 * available from http://www.virtualbox.org. This file is free software;
10 * you can redistribute it and/or modify it under the terms of the GNU
11 * General Public License (GPL) as published by the Free Software
12 * Foundation, in version 2 as it comes in the "COPYING" file of the
13 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
14 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
15 *
16 * The contents of this file may alternatively be used under the terms
17 * of the Common Development and Distribution License Version 1.0
18 * (CDDL) only, as it comes in the "COPYING.CDDL" file of the
19 * VirtualBox OSE distribution, in which case the provisions of the
20 * CDDL are applicable instead of those of the GPL.
21 *
22 * You may elect to license modified versions of this file under the
23 * terms and conditions of either the GPL or the CDDL or both.
24 *
25 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa
26 * Clara, CA 95054 USA or visit http://www.sun.com if you need
27 * additional information or have any questions.
28 */
29
30#ifndef ___iprt_uni_h
31#define ___iprt_uni_h
32
33/** @defgroup grp_rt_uni RTUniCp - Unicode Code Points
34 * @ingroup grp_rt
35 * @{
36 */
37
38/** @def RTUNI_USE_WCTYPE
39 * Define RTUNI_USE_WCTYPE to not use the IPRT unicode data but the
40 * data which the C runtime library provides. */
41#ifdef DOXYGEN_RUNNING
42# define RTUNI_USE_WCTYPE
43#endif
44
45#include <iprt/types.h>
46#ifdef RTUNI_USE_WCTYPE
47# include <wctype.h>
48#endif
49
50RT_C_DECLS_BEGIN
51
52
53/** Max value a RTUNICP type can hold. */
54#define RTUNICP_MAX ( ~(RTUNICP)0 )
55
56/** Invalid code point.
57 * This is returned when encountered invalid encodings or invalid
58 * unicode code points. */
59#define RTUNICP_INVALID ( 0xfffffffe )
60
61
62
63#ifndef RTUNI_USE_WCTYPE
64/**
65 * A unicode flags range.
66 * @internal
67 */
68typedef struct RTUNIFLAGSRANGE
69{
70 /** The first code point of the range. */
71 RTUNICP BeginCP;
72 /** The last + 1 code point of the range. */
73 RTUNICP EndCP;
74 /** Pointer to the array of case folded code points. */
75 const uint8_t *pafFlags;
76} RTUNIFLAGSRANGE;
77/** Pointer to a flags range.
78 * @internal */
79typedef RTUNIFLAGSRANGE *PRTUNIFLAGSRANGE;
80/** Pointer to a const flags range.
81 * @internal */
82typedef const RTUNIFLAGSRANGE *PCRTUNIFLAGSRANGE;
83
84/**
85 * A unicode case folded range.
86 * @internal
87 */
88typedef struct RTUNICASERANGE
89{
90 /** The first code point of the range. */
91 RTUNICP BeginCP;
92 /** The last + 1 code point of the range. */
93 RTUNICP EndCP;
94 /** Pointer to the array of case folded code points. */
95 PCRTUNICP paFoldedCPs;
96} RTUNICASERANGE;
97/** Pointer to a case folded range.
98 * @internal */
99typedef RTUNICASERANGE *PRTUNICASERANGE;
100/** Pointer to a const case folded range.
101 * @internal */
102typedef const RTUNICASERANGE *PCRTUNICASERANGE;
103
104/** @name Unicode Code Point Flags.
105 * @internal
106 * @{ */
107#define RTUNI_UPPER RT_BIT(0)
108#define RTUNI_LOWER RT_BIT(1)
109#define RTUNI_ALPHA RT_BIT(2)
110#define RTUNI_XDIGIT RT_BIT(3)
111#define RTUNI_DDIGIT RT_BIT(4)
112#define RTUNI_WSPACE RT_BIT(5)
113/*#define RTUNI_BSPACE RT_BIT(6) - later */
114/** @} */
115
116
117/**
118 * Array of flags ranges.
119 * @internal
120 */
121extern RTDATADECL(const RTUNIFLAGSRANGE) g_aRTUniFlagsRanges[];
122
123/**
124 * Gets the flags for a unicode code point.
125 *
126 * @returns The flag mask. (RTUNI_*)
127 * @param CodePoint The unicode code point.
128 * @internal
129 */
130DECLINLINE(RTUNICP) rtUniCpFlags(RTUNICP CodePoint)
131{
132 PCRTUNIFLAGSRANGE pCur = &g_aRTUniFlagsRanges[0];
133 do
134 {
135 if (pCur->EndCP > CodePoint)
136 {
137 if (pCur->BeginCP <= CodePoint)
138 CodePoint = pCur->pafFlags[CodePoint - pCur->BeginCP];
139 break;
140 }
141 pCur++;
142 } while (pCur->EndCP != RTUNICP_MAX);
143 return CodePoint;
144}
145
146
147/**
148 * Checks if a unicode code point is upper case.
149 *
150 * @returns true if it is.
151 * @returns false if it isn't.
152 * @param CodePoint The code point.
153 */
154DECLINLINE(bool) RTUniCpIsUpper(RTUNICP CodePoint)
155{
156 return (rtUniCpFlags(CodePoint) & RTUNI_UPPER) != 0;
157}
158
159
160/**
161 * Checks if a unicode code point is lower case.
162 *
163 * @returns true if it is.
164 * @returns false if it isn't.
165 * @param CodePoint The code point.
166 */
167DECLINLINE(bool) RTUniCpIsLower(RTUNICP CodePoint)
168{
169 return (rtUniCpFlags(CodePoint) & RTUNI_LOWER) != 0;
170}
171
172
173/**
174 * Checks if a unicode code point is alphabetic.
175 *
176 * @returns true if it is.
177 * @returns false if it isn't.
178 * @param CodePoint The code point.
179 */
180DECLINLINE(bool) RTUniCpIsAlphabetic(RTUNICP CodePoint)
181{
182 return (rtUniCpFlags(CodePoint) & RTUNI_ALPHA) != 0;
183}
184
185
186/**
187 * Checks if a unicode code point is a decimal digit.
188 *
189 * @returns true if it is.
190 * @returns false if it isn't.
191 * @param CodePoint The code point.
192 */
193DECLINLINE(bool) RTUniCpIsDecDigit(RTUNICP CodePoint)
194{
195 return (rtUniCpFlags(CodePoint) & RTUNI_DDIGIT) != 0;
196}
197
198
199/**
200 * Checks if a unicode code point is a hexadecimal digit.
201 *
202 * @returns true if it is.
203 * @returns false if it isn't.
204 * @param CodePoint The code point.
205 */
206DECLINLINE(bool) RTUniCpIsHexDigit(RTUNICP CodePoint)
207{
208 return (rtUniCpFlags(CodePoint) & RTUNI_XDIGIT) != 0;
209}
210
211
212/**
213 * Checks if a unicode code point is white space.
214 *
215 * @returns true if it is.
216 * @returns false if it isn't.
217 * @param CodePoint The code point.
218 */
219DECLINLINE(bool) RTUniCpIsSpace(RTUNICP CodePoint)
220{
221 return (rtUniCpFlags(CodePoint) & RTUNI_WSPACE) != 0;
222}
223
224
225
226/**
227 * Array of uppercase ranges.
228 * @internal
229 */
230extern RTDATADECL(const RTUNICASERANGE) g_aRTUniUpperRanges[];
231
232/**
233 * Array of lowercase ranges.
234 * @internal
235 */
236extern RTDATADECL(const RTUNICASERANGE) g_aRTUniLowerRanges[];
237
238
239/**
240 * Folds a unicode code point using the specified range array.
241 *
242 * @returns FOlded code point.
243 * @param CodePoint The unicode code point to fold.
244 * @param pCur The case folding range to use.
245 */
246DECLINLINE(RTUNICP) rtUniCpFold(RTUNICP CodePoint, PCRTUNICASERANGE pCur)
247{
248 do
249 {
250 if (pCur->EndCP > CodePoint)
251 {
252 if (pCur->BeginCP <= CodePoint)
253 CodePoint = pCur->paFoldedCPs[CodePoint - pCur->BeginCP];
254 break;
255 }
256 pCur++;
257 } while (pCur->EndCP != RTUNICP_MAX);
258 return CodePoint;
259}
260
261
262/**
263 * Folds a unicode code point to upper case.
264 *
265 * @returns Folded code point.
266 * @param CodePoint The unicode code point to fold.
267 */
268DECLINLINE(RTUNICP) RTUniCpToUpper(RTUNICP CodePoint)
269{
270 return rtUniCpFold(CodePoint, &g_aRTUniUpperRanges[0]);
271}
272
273
274/**
275 * Folds a unicode code point to lower case.
276 *
277 * @returns Folded code point.
278 * @param CodePoint The unicode code point to fold.
279 */
280DECLINLINE(RTUNICP) RTUniCpToLower(RTUNICP CodePoint)
281{
282 return rtUniCpFold(CodePoint, &g_aRTUniLowerRanges[0]);
283}
284
285
286#else /* RTUNI_USE_WCTYPE */
287
288
289/**
290 * Checks if a unicode code point is upper case.
291 *
292 * @returns true if it is.
293 * @returns false if it isn't.
294 * @param CodePoint The code point.
295 */
296DECLINLINE(bool) RTUniCpIsUpper(RTUNICP CodePoint)
297{
298 return !!iswupper(CodePoint);
299}
300
301
302/**
303 * Checks if a unicode code point is lower case.
304 *
305 * @returns true if it is.
306 * @returns false if it isn't.
307 * @param CodePoint The code point.
308 */
309DECLINLINE(bool) RTUniCpIsLower(RTUNICP CodePoint)
310{
311 return !!iswlower(CodePoint);
312}
313
314
315/**
316 * Checks if a unicode code point is alphabetic.
317 *
318 * @returns true if it is.
319 * @returns false if it isn't.
320 * @param CodePoint The code point.
321 */
322DECLINLINE(bool) RTUniCpIsAlphabetic(RTUNICP CodePoint)
323{
324 return !!iswalpha(CodePoint);
325}
326
327
328/**
329 * Checks if a unicode code point is a decimal digit.
330 *
331 * @returns true if it is.
332 * @returns false if it isn't.
333 * @param CodePoint The code point.
334 */
335DECLINLINE(bool) RTUniCpIsDecDigit(RTUNICP CodePoint)
336{
337 return !!iswdigit(CodePoint);
338}
339
340
341/**
342 * Checks if a unicode code point is a hexadecimal digit.
343 *
344 * @returns true if it is.
345 * @returns false if it isn't.
346 * @param CodePoint The code point.
347 */
348DECLINLINE(bool) RTUniCpIsHexDigit(RTUNICP CodePoint)
349{
350 return !!iswxdigit(CodePoint);
351}
352
353
354/**
355 * Checks if a unicode code point is white space.
356 *
357 * @returns true if it is.
358 * @returns false if it isn't.
359 * @param CodePoint The code point.
360 */
361DECLINLINE(bool) RTUniCpIsSpace(RTUNICP CodePoint)
362{
363 return !!iswspace(CodePoint);
364}
365
366
367/**
368 * Folds a unicode code point to upper case.
369 *
370 * @returns Folded code point.
371 * @param CodePoint The unicode code point to fold.
372 */
373DECLINLINE(RTUNICP) RTUniCpToUpper(RTUNICP CodePoint)
374{
375 return towupper(CodePoint);
376}
377
378
379/**
380 * Folds a unicode code point to lower case.
381 *
382 * @returns Folded code point.
383 * @param CodePoint The unicode code point to fold.
384 */
385DECLINLINE(RTUNICP) RTUniCpToLower(RTUNICP CodePoint)
386{
387 return towlower(CodePoint);
388}
389
390
391#endif /* RTUNI_USE_WCTYPE */
392
393
394/**
395 * Frees a unicode string.
396 *
397 * @param pusz The string to free.
398 */
399RTDECL(void) RTUniFree(PRTUNICP pusz);
400
401
402RT_C_DECLS_END
403/** @} */
404
405
406#endif
407
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette