VirtualBox

source: vbox/trunk/include/iprt/string.h@ 3624

Last change on this file since 3624 was 3624, checked in by vboxsync, 17 years ago

FreeBSD kernel.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 49.8 KB
Line 
1/** @file
2 * innotek Portable Runtime - String Manipluation.
3 */
4
5/*
6 * Copyright (C) 2006-2007 innotek GmbH
7 *
8 * This file is part of VirtualBox Open Source Edition (OSE), as
9 * available from http://www.virtualbox.org. This file is free software;
10 * you can redistribute it and/or modify it under the terms of the GNU
11 * General Public License as published by the Free Software Foundation,
12 * in version 2 as it comes in the "COPYING" file of the VirtualBox OSE
13 * distribution. VirtualBox OSE is distributed in the hope that it will
14 * be useful, but WITHOUT ANY WARRANTY of any kind.
15 *
16 * If you received this file as part of a commercial VirtualBox
17 * distribution, then only the terms of your commercial VirtualBox
18 * license agreement apply instead of the previous paragraph.
19 */
20
21
22#ifndef __iprt_string_h__
23#define __iprt_string_h__
24
25#include <iprt/cdefs.h>
26#include <iprt/types.h>
27#include <iprt/stdarg.h>
28#include <iprt/err.h> /* for VINF_SUCCESS */
29#if defined(__LINUX__) && defined(__KERNEL__)
30# include <linux/string.h>
31#elif defined(__FREEBSD__) && defined(_KERNEL)
32 /*
33 * Kludge for the FreeBSD kernel:
34 * Some of the string.h stuff clashes with sys/libkern.h, so just wrap
35 * it up while including string.h to keep things quiet. It's nothing
36 * important that's clashing, after all.
37 */
38# define strdup strdup_string_h
39# include <string.h>
40# undef strdup
41#else
42# include <string.h>
43#endif
44
45/*
46 * Supply prototypes for standard string functions provided by
47 * IPRT instead of the operating environment.
48 */
49#if defined(__DARWIN__) && defined(KERNEL)
50__BEGIN_DECLS
51void *memchr(const void *pv, int ch, size_t cb);
52char *strpbrk(const char *pszStr, const char *pszChars);
53__END_DECLS
54#endif
55
56
57/** @defgroup grp_rt_str RTStr - String Manipulation
58 * Mostly UTF-8 related helpers where the standard string functions won't do.
59 * @ingroup grp_rt
60 * @{
61 */
62
63__BEGIN_DECLS
64
65
66/**
67 * The maximum string length.
68 */
69#define RTSTR_MAX (~(size_t)0)
70
71
72#ifdef IN_RING3
73
74/**
75 * Allocates tmp buffer, translates pszString from UTF8 to current codepage.
76 *
77 * @returns iprt status code.
78 * @param ppszString Receives pointer of allocated native CP string.
79 * The returned pointer must be freed using RTStrFree().
80 * @param pszString UTF-8 string to convert.
81 */
82RTR3DECL(int) RTStrUtf8ToCurrentCP(char **ppszString, const char *pszString);
83
84/**
85 * Allocates tmp buffer, translates pszString from current codepage to UTF-8.
86 *
87 * @returns iprt status code.
88 * @param ppszString Receives pointer of allocated UTF-8 string.
89 * The returned pointer must be freed using RTStrFree().
90 * @param pszString Native string to convert.
91 */
92RTR3DECL(int) RTStrCurrentCPToUtf8(char **ppszString, const char *pszString);
93
94#endif
95
96/**
97 * Free string allocated by any of the non-UCS-2 string functions.
98 *
99 * @returns iprt status code.
100 * @param pszString Pointer to buffer with string to free.
101 * NULL is accepted.
102 */
103RTDECL(void) RTStrFree(char *pszString);
104
105/**
106 * Allocates a new copy of the given UTF-8 string.
107 *
108 * @returns Pointer to the allocated UTF-8 string.
109 * @param pszString UTF-8 string to duplicate.
110 */
111RTDECL(char *) RTStrDup(const char *pszString);
112
113/**
114 * Allocates a new copy of the given UTF-8 string.
115 *
116 * @returns iprt status code.
117 * @param ppszString Receives pointer of the allocated UTF-8 string.
118 * The returned pointer must be freed using RTStrFree().
119 * @param pszString UTF-8 string to duplicate.
120 */
121RTDECL(int) RTStrDupEx(char **ppszString, const char *pszString);
122
123/**
124 * Gets the number of code points the string is made up of, excluding
125 * the terminator.
126 *
127 *
128 * @returns Number of code points (RTUNICP).
129 * @returns 0 if the string was incorrectly encoded.
130 * @param psz The string.
131 */
132RTDECL(size_t) RTStrUniLen(const char *psz);
133
134/**
135 * Gets the number of code points the string is made up of, excluding
136 * the terminator.
137 *
138 * This function will validate the string, and incorrectly encoded UTF-8
139 * strings will be rejected.
140 *
141 * @returns iprt status code.
142 * @param psz The string.
143 * @param cch The max string length. Use RTSTR_MAX to process the entire string.
144 * @param pcuc Where to store the code point count.
145 * This is undefined on failure.
146 */
147RTDECL(int) RTStrUniLenEx(const char *psz, size_t cch, size_t *pcuc);
148
149/**
150 * Translate a UTF-8 string into an unicode string (i.e. RTUNICPs), allocating the string buffer.
151 *
152 * @returns iprt status code.
153 * @param pszString UTF-8 string to convert.
154 * @param ppUniString Receives pointer to the allocated unicode string.
155 * The returned string must be freed using RTUniFree().
156 */
157RTDECL(int) RTStrToUni(const char *pszString, PRTUNICP *ppUniString);
158
159/**
160 * Translates pszString from UTF-8 to an array of code points, allocating the result
161 * array if requested.
162 *
163 * @returns iprt status code.
164 * @param pszString UTF-8 string to convert.
165 * @param cchString The maximum size in chars (the type) to convert. The conversion stop
166 * when it reaches cchString or the string terminator ('\\0').
167 * Use RTSTR_MAX to translate the entire string.
168 * @param ppaCps If cCps is non-zero, this must either be pointing to pointer to
169 * a buffer of the specified size, or pointer to a NULL pointer.
170 * If *ppusz is NULL or cCps is zero a buffer of at least cCps items
171 * will be allocated to hold the translated string.
172 * If a buffer was requirest it must be freed using RTUtf16Free().
173 * @param cCps The number of code points in the unicode string. This includes the terminator.
174 * @param pcCps Where to store the length of the translated string. (Optional)
175 * This field will be updated even on failure, however the value is only
176 * specified for the following two error codes. On VERR_BUFFER_OVERFLOW
177 * and VERR_NO_STR_MEMORY it contains the required buffer space.
178 */
179RTDECL(int) RTStrToUniEx(const char *pszString, size_t cchString, PRTUNICP *ppaCps, size_t cCps, size_t *pcCps);
180
181/**
182 * Calculates the length of the string in RTUTF16 items.
183 *
184 * This function will validate the string, and incorrectly encoded UTF-8
185 * strings will be rejected. The primary purpose of this function is to
186 * help allocate buffers for RTStrToUtf16Ex of the correct size. For most
187 * other puroses RTStrCalcUtf16LenEx() should be used.
188 *
189 * @returns Number of RTUTF16 items.
190 * @returns 0 if the string was incorrectly encoded.
191 * @param psz The string.
192 */
193RTDECL(size_t) RTStrCalcUtf16Len(const char *psz);
194
195/**
196 * Calculates the length of the string in RTUTF16 items.
197 *
198 * This function will validate the string, and incorrectly encoded UTF-8
199 * strings will be rejected.
200 *
201 * @returns iprt status code.
202 * @param psz The string.
203 * @param cch The max string length. Use RTSTR_MAX to process the entire string.
204 * @param pcwc Where to store the string length. Optional.
205 * This is undefined on failure.
206 */
207RTDECL(int) RTStrCalcUtf16LenEx(const char *psz, size_t cch, size_t *pcwc);
208
209/**
210 * Translate a UTF-8 string into a UTF-16 allocating the result buffer.
211 *
212 * @returns iprt status code.
213 * @param pszString UTF-8 string to convert.
214 * @param ppwszString Receives pointer to the allocated UTF-16 string.
215 * The returned string must be freed using RTUtf16Free().
216 */
217RTDECL(int) RTStrToUtf16(const char *pszString, PRTUTF16 *ppwszString);
218
219/**
220 * Translates pszString from UTF-8 to UTF-16, allocating the result buffer if requested.
221 *
222 * @returns iprt status code.
223 * @param pszString UTF-8 string to convert.
224 * @param cchString The maximum size in chars (the type) to convert. The conversion stop
225 * when it reaches cchString or the string terminator ('\\0').
226 * Use RTSTR_MAX to translate the entire string.
227 * @param ppwsz If cwc is non-zero, this must either be pointing to pointer to
228 * a buffer of the specified size, or pointer to a NULL pointer.
229 * If *ppwsz is NULL or cwc is zero a buffer of at least cwc items
230 * will be allocated to hold the translated string.
231 * If a buffer was requirest it must be freed using RTUtf16Free().
232 * @param cwc The buffer size in RTUTF16s. This includes the terminator.
233 * @param pcwc Where to store the length of the translated string. (Optional)
234 * This field will be updated even on failure, however the value is only
235 * specified for the following two error codes. On VERR_BUFFER_OVERFLOW
236 * and VERR_NO_STR_MEMORY it contains the required buffer space.
237 */
238RTDECL(int) RTStrToUtf16Ex(const char *pszString, size_t cchString, PRTUTF16 *ppwsz, size_t cwc, size_t *pcwc);
239
240/**
241 * Allocates tmp buffer, translates pszString from UTF8 to UCS-2.
242 *
243 * @returns iprt status code.
244 * @param ppwszString Receives pointer of allocated UCS-2 string.
245 * The returned pointer must be freed using RTStrUcs2Free().
246 * @param pszString UTF-8 string to convert.
247 * @deprecated Use RTStrToUtf16().
248 */
249DECLINLINE(int) RTStrUtf8ToUcs2(PRTUCS2 *ppwszString, const char *pszString)
250{
251 return RTStrToUtf16(pszString, ppwszString);
252}
253
254/**
255 * Translates pszString from UTF8 to backwater UCS-2, can allocate a temp buffer.
256 *
257 * @returns iprt status code.
258 * @param ppwszString Receives pointer of allocated UCS-2 string.
259 * The returned pointer must be freed using RTStrUcs2Free().
260 * @param cwc Length of target buffer in RTUCS2s including the trailing '\\0'.
261 * If 0 a temporary buffer is allocated.
262 * @param pszString UTF-8 string to convert.
263 * @deprecated Use RTStrToUtf16Ex().
264 */
265DECLINLINE(int) RTStrUtf8ToUcs2Ex(PRTUCS2 *ppwszString, unsigned cwc, const char *pszString)
266{
267 return RTStrToUtf16Ex(pszString, RTSTR_MAX, ppwszString, cwc, NULL);
268}
269
270
271/**
272 * Get the unicode code point at the given string position.
273 *
274 * @returns unicode code point.
275 * @returns RTUNICP_INVALID if the encoding is invalid.
276 * @param psz The string.
277 */
278RTDECL(RTUNICP) RTStrGetCpInternal(const char *psz);
279
280/**
281 * Get the unicode code point at the given string position.
282 *
283 * @returns unicode code point.
284 * @returns RTUNICP_INVALID if the encoding is invalid.
285 * @param ppsz The string.
286 * @param pCp Where to store the unicode code point.
287 */
288RTDECL(int) RTStrGetCpExInternal(const char **ppsz, PRTUNICP pCp);
289
290/**
291 * Put the unicode code point at the given string position
292 * and return the pointer to the char following it.
293 *
294 * This function will not consider anything at or following the the
295 * buffer area pointed to by psz. It is therefore not suitable for
296 * inserting code points into a string, only appending/overwriting.
297 *
298 * @returns pointer to the char following the written code point.
299 * @param psz The string.
300 * @param CodePoint The code point to write.
301 * This sould not be RTUNICP_INVALID or any other charater
302 * out of the UTF-8 range.
303 *
304 * @remark This is a worker function for RTStrPutCp().
305 *
306 */
307RTDECL(char *) RTStrPutCpInternal(char *psz, RTUNICP CodePoint);
308
309/**
310 * Get the unicode code point at the given string position.
311 *
312 * @returns unicode code point.
313 * @returns RTUNICP_INVALID if the encoding is invalid.
314 * @param psz The string.
315 *
316 * @remark We optimize this operation by using an inline function for
317 * the most frequent and simplest sequence, the rest is
318 * handled by RTStrGetCpInternal().
319 */
320DECLINLINE(RTUNICP) RTStrGetCp(const char *psz)
321{
322 const unsigned char uch = *(const unsigned char *)psz;
323 if (!(uch & BIT(7)))
324 return uch;
325 return RTStrGetCpInternal(psz);
326}
327
328/**
329 * Get the unicode code point at the given string position.
330 *
331 * @returns iprt status code.
332 * @param ppsz Pointer to the string pointer. This will be updated to
333 * point to the char following the current code point.
334 * @param pCp Where to store the code point.
335 * RTUNICP_INVALID is stored here on failure.
336 *
337 * @remark We optimize this operation by using an inline function for
338 * the most frequent and simplest sequence, the rest is
339 * handled by RTStrGetCpExInternal().
340 */
341DECLINLINE(int) RTStrGetCpEx(const char **ppsz, PRTUNICP pCp)
342{
343 const unsigned char uch = **(const unsigned char **)ppsz;
344 if (!(uch & BIT(7)))
345 {
346 (*ppsz)++;
347 *pCp = uch;
348 return VINF_SUCCESS;
349 }
350 return RTStrGetCpExInternal(ppsz, pCp);
351}
352
353/**
354 * Put the unicode code point at the given string position
355 * and return the pointer to the char following it.
356 *
357 * This function will not consider anything at or following the the
358 * buffer area pointed to by psz. It is therefore not suitable for
359 * inserting code points into a string, only appending/overwriting.
360 *
361 * @returns pointer to the char following the written code point.
362 * @param psz The string.
363 * @param CodePoint The code point to write.
364 * This sould not be RTUNICP_INVALID or any other charater
365 * out of the UTF-8 range.
366 *
367 * @remark We optimize this operation by using an inline function for
368 * the most frequent and simplest sequence, the rest is
369 * handled by RTStrPutCpInternal().
370 */
371DECLINLINE(char *) RTStrPutCp(char *psz, RTUNICP CodePoint)
372{
373 if (CodePoint < 0x80)
374 {
375 *psz++ = (unsigned char)CodePoint;
376 return psz;
377 }
378 return RTStrPutCpInternal(psz, CodePoint);
379}
380
381/**
382 * Skips ahead, past the current code point.
383 *
384 * @returns Pointer to the char after the current code point.
385 * @param psz Pointer to the current code point.
386 * @remark This will not move the next valid code point, only past the current one.
387 */
388DECLINLINE(char *) RTStrNextCp(const char *psz)
389{
390 RTUNICP Cp;
391 RTStrGetCpEx(&psz, &Cp);
392 return (char *)psz;
393}
394
395/**
396 * Skips back to the previous code point.
397 *
398 * @returns Pointer to the char before the current code point.
399 * @returns pszStart on failure.
400 * @param pszStart Pointer to the start of the string.
401 * @param psz Pointer to the current code point.
402 */
403RTDECL(char *) RTStrPrevCp(const char *pszStart, const char *psz);
404
405
406
407#ifndef DECLARED_FNRTSTROUTPUT /* duplicated in iprt/log.h */
408#define DECLARED_FNRTSTROUTPUT
409/**
410 * Output callback.
411 *
412 * @returns number of bytes written.
413 * @param pvArg User argument.
414 * @param pachChars Pointer to an array of utf-8 characters.
415 * @param cbChars Number of bytes in the character array pointed to by pachChars.
416 */
417typedef DECLCALLBACK(size_t) FNRTSTROUTPUT(void *pvArg, const char *pachChars, size_t cbChars);
418/** Pointer to callback function. */
419typedef FNRTSTROUTPUT *PFNRTSTROUTPUT;
420#endif
421
422/** Format flag.
423 * These are used by RTStrFormat extensions and RTStrFormatNumber, mind
424 * that not all flags makes sense to both of the functions.
425 * @{ */
426#define RTSTR_F_CAPITAL 0x0001
427#define RTSTR_F_LEFT 0x0002
428#define RTSTR_F_ZEROPAD 0x0004
429#define RTSTR_F_SPECIAL 0x0008
430#define RTSTR_F_VALSIGNED 0x0010
431#define RTSTR_F_PLUS 0x0020
432#define RTSTR_F_BLANK 0x0040
433#define RTSTR_F_WIDTH 0x0080
434#define RTSTR_F_PRECISION 0x0100
435
436#define RTSTR_F_BIT_MASK 0xf800
437#define RTSTR_F_8BIT 0x0800
438#define RTSTR_F_16BIT 0x1000
439#define RTSTR_F_32BIT 0x2000
440#define RTSTR_F_64BIT 0x4000
441#define RTSTR_F_128BIT 0x8000
442/** @} */
443
444/** @def RTSTR_GET_BIT_FLAG
445 * Gets the bit flag for the specified type.
446 */
447#define RTSTR_GET_BIT_FLAG(type) \
448 ( sizeof(type) == 32 ? RTSTR_F_32BIT \
449 : sizeof(type) == 64 ? RTSTR_F_64BIT \
450 : sizeof(type) == 16 ? RTSTR_F_16BIT \
451 : sizeof(type) == 8 ? RTSTR_F_8BIT \
452 : sizeof(type) == 128? RTSTR_F_128BIT \
453 : 0)
454
455
456/**
457 * Callback to format non-standard format specifiers.
458 *
459 * @returns The number of bytes formatted.
460 * @param pvArg Formatter argument.
461 * @param pfnOutput Pointer to output function.
462 * @param pvArgOutput Argument for the output function.
463 * @param ppszFormat Pointer to the format string pointer. Advance this till the char
464 * after the format specifier.
465 * @param pArgs Pointer to the argument list. Use this to fetch the arguments.
466 * @param cchWidth Format Width. -1 if not specified.
467 * @param cchPrecision Format Precision. -1 if not specified.
468 * @param fFlags Flags (RTSTR_NTFS_*).
469 * @param chArgSize The argument size specifier, 'l' or 'L'.
470 */
471typedef DECLCALLBACK(int) FNSTRFORMAT(void *pvArg, PFNRTSTROUTPUT pfnOutput, void *pvArgOutput,
472 const char **ppszFormat, va_list *pArgs, int cchWidth, int cchPrecision, unsigned fFlags, char chArgSize);
473/** Pointer to a FNSTRFORMAT() function. */
474typedef FNSTRFORMAT *PFNSTRFORMAT;
475
476
477/**
478 * Partial implementation of a printf like formatter.
479 * It doesn't do everything correct, and there is no floating point support.
480 * However, it supports custom formats by the means of a format callback.
481 *
482 * @returns number of bytes formatted.
483 * @param pfnOutput Output worker.
484 * Called in two ways. Normally with a string and its length.
485 * For termination, it's called with NULL for string, 0 for length.
486 * @param pvArgOutput Argument to the output worker.
487 * @param pfnFormat Custom format worker.
488 * @param pvArgFormat Argument to the format worker.
489 * @param pszFormat Format string pointer.
490 * @param args Argument list.
491 */
492RTDECL(size_t) RTStrFormatV(PFNRTSTROUTPUT pfnOutput, void *pvArgOutput, PFNSTRFORMAT pfnFormat, void *pvArgFormat, const char *pszFormat, va_list args);
493
494/**
495 * Partial implementation of a printf like formatter.
496 * It doesn't do everything correct, and there is no floating point support.
497 * However, it supports custom formats by the means of a format callback.
498 *
499 * @returns number of bytes formatted.
500 * @param pfnOutput Output worker.
501 * Called in two ways. Normally with a string and its length.
502 * For termination, it's called with NULL for string, 0 for length.
503 * @param pvArgOutput Argument to the output worker.
504 * @param pfnFormat Custom format worker.
505 * @param pvArgFormat Argument to the format worker.
506 * @param pszFormat Format string.
507 * @param ... Argument list.
508 */
509RTDECL(size_t) RTStrFormat(PFNRTSTROUTPUT pfnOutput, void *pvArgOutput, PFNSTRFORMAT pfnFormat, void *pvArgFormat, const char *pszFormat, ...);
510
511/**
512 * Formats an integer number according to the parameters.
513 *
514 * @returns Length of the formatted number.
515 * @param psz Pointer to output string buffer of sufficient size.
516 * @param u64Value Value to format.
517 * @param uiBase Number representation base.
518 * @param cchWidth Width.
519 * @param cchPrecision Precision.
520 * @param fFlags Flags (NTFS_*).
521 */
522RTDECL(int) RTStrFormatNumber(char *psz, uint64_t u64Value, unsigned int uiBase, signed int cchWidth, signed int cchPrecision, unsigned int fFlags);
523
524/**
525 * String printf.
526 *
527 * @returns The length of the returned string (in pszBuffer).
528 * @param pszBuffer Output buffer.
529 * @param cchBuffer Size of the output buffer.
530 * @param pszFormat The format string.
531 * @param args The format argument.
532 */
533RTDECL(size_t) RTStrPrintfV(char *pszBuffer, size_t cchBuffer, const char *pszFormat, va_list args);
534
535/**
536 * String printf.
537 *
538 * @returns The length of the returned string (in pszBuffer).
539 * @param pszBuffer Output buffer.
540 * @param cchBuffer Size of the output buffer.
541 * @param pszFormat The format string.
542 * @param ... The format argument.
543 */
544RTDECL(size_t) RTStrPrintf(char *pszBuffer, size_t cchBuffer, const char *pszFormat, ...);
545
546
547/**
548 * String printf with custom formatting.
549 *
550 * @returns The length of the returned string (in pszBuffer).
551 * @param pfnFormat Pointer to handler function for the custom formats.
552 * @param pvArg Argument to the pfnFormat function.
553 * @param pszBuffer Output buffer.
554 * @param cchBuffer Size of the output buffer.
555 * @param pszFormat The format string.
556 * @param args The format argument.
557 */
558RTDECL(size_t) RTStrPrintfExV(PFNSTRFORMAT pfnFormat, void *pvArg, char *pszBuffer, size_t cchBuffer, const char *pszFormat, va_list args);
559
560/**
561 * String printf with custom formatting.
562 *
563 * @returns The length of the returned string (in pszBuffer).
564 * @param pfnFormat Pointer to handler function for the custom formats.
565 * @param pvArg Argument to the pfnFormat function.
566 * @param pszBuffer Output buffer.
567 * @param cchBuffer Size of the output buffer.
568 * @param pszFormat The format string.
569 * @param ... The format argument.
570 */
571RTDECL(size_t) RTStrPrintfEx(PFNSTRFORMAT pfnFormat, void *pvArg, char *pszBuffer, size_t cchBuffer, const char *pszFormat, ...);
572
573
574/**
575 * Allocating string printf.
576 *
577 * @returns The length of the string in the returned *ppszBuffer.
578 * @returns -1 on failure.
579 * @param ppszBuffer Where to store the pointer to the allocated output buffer.
580 * The buffer should be freed using RTStrFree().
581 * On failure *ppszBuffer will be set to NULL.
582 * @param pszFormat The format string.
583 * @param args The format argument.
584 */
585RTDECL(int) RTStrAPrintfV(char **ppszBuffer, const char *pszFormat, va_list args);
586
587/**
588 * Allocating string printf.
589 *
590 * @returns The length of the string in the returned *ppszBuffer.
591 * @returns -1 on failure.
592 * @param ppszBuffer Where to store the pointer to the allocated output buffer.
593 * The buffer should be freed using RTStrFree().
594 * On failure *ppszBuffer will be set to NULL.
595 * @param pszFormat The format string.
596 * @param ... The format argument.
597 */
598RTDECL(int) RTStrAPrintf(char **ppszBuffer, const char *pszFormat, ...);
599
600
601/**
602 * Strips blankspaces from both ends of the string.
603 *
604 * @returns Pointer to first non-blank char in the string.
605 * @param psz The string to strip.
606 */
607RTDECL(char *) RTStrStrip(char *psz);
608
609/**
610 * Strips blankspaces from the start of the string.
611 *
612 * @returns Pointer to first non-blank char in the string.
613 * @param psz The string to strip.
614 */
615RTDECL(char *) RTStrStripL(const char *psz);
616
617/**
618 * Strips blankspaces from the end of the string.
619 *
620 * @returns psz.
621 * @param psz The string to strip.
622 */
623RTDECL(char *) RTStrStripR(char *psz);
624
625
626/** @defgroup rt_str_conv String To/From Number Conversions
627 * @ingroup grp_rt_str
628 * @{ */
629
630/**
631 * Converts a string representation of a number to a 64-bit unsigned number.
632 *
633 * @returns iprt status code.
634 * Warnings are used to indicate conversion problems.
635 * @param pszValue Pointer to the string value.
636 * @param ppszNext Where to store the pointer to the first char following the number. (Optional)
637 * @param uBase The base of the representation used.
638 * If 0 the function will look for known prefixes before defaulting to 10.
639 * @param pu64 Where to store the converted number. (optional)
640 */
641RTDECL(int) RTStrToUInt64Ex(const char *pszValue, char **ppszNext, unsigned uBase, uint64_t *pu64);
642
643/**
644 * Converts a string representation of a number to a 64-bit unsigned number.
645 * The base is guessed.
646 *
647 * @returns 64-bit unsigned number on success.
648 * @returns 0 on failure.
649 * @param pszValue Pointer to the string value.
650 */
651RTDECL(uint64_t) RTStrToUInt64(const char *pszValue);
652
653/**
654 * Converts a string representation of a number to a 32-bit unsigned number.
655 *
656 * @returns iprt status code.
657 * Warnings are used to indicate conversion problems.
658 * @param pszValue Pointer to the string value.
659 * @param ppszNext Where to store the pointer to the first char following the number. (Optional)
660 * @param uBase The base of the representation used.
661 * If 0 the function will look for known prefixes before defaulting to 10.
662 * @param pu32 Where to store the converted number. (optional)
663 */
664RTDECL(int) RTStrToUInt32Ex(const char *pszValue, char **ppszNext, unsigned uBase, uint32_t *pu32);
665
666/**
667 * Converts a string representation of a number to a 64-bit unsigned number.
668 * The base is guessed.
669 *
670 * @returns 32-bit unsigned number on success.
671 * @returns 0 on failure.
672 * @param pszValue Pointer to the string value.
673 */
674RTDECL(uint32_t) RTStrToUInt32(const char *pszValue);
675
676/**
677 * Converts a string representation of a number to a 16-bit unsigned number.
678 *
679 * @returns iprt status code.
680 * Warnings are used to indicate conversion problems.
681 * @param pszValue Pointer to the string value.
682 * @param ppszNext Where to store the pointer to the first char following the number. (Optional)
683 * @param uBase The base of the representation used.
684 * If 0 the function will look for known prefixes before defaulting to 10.
685 * @param pu16 Where to store the converted number. (optional)
686 */
687RTDECL(int) RTStrToUInt16Ex(const char *pszValue, char **ppszNext, unsigned uBase, uint16_t *pu16);
688
689/**
690 * Converts a string representation of a number to a 16-bit unsigned number.
691 * The base is guessed.
692 *
693 * @returns 16-bit unsigned number on success.
694 * @returns 0 on failure.
695 * @param pszValue Pointer to the string value.
696 */
697RTDECL(uint16_t) RTStrToUInt16(const char *pszValue);
698
699/**
700 * Converts a string representation of a number to a 8-bit unsigned number.
701 *
702 * @returns iprt status code.
703 * Warnings are used to indicate conversion problems.
704 * @param pszValue Pointer to the string value.
705 * @param ppszNext Where to store the pointer to the first char following the number. (Optional)
706 * @param uBase The base of the representation used.
707 * If 0 the function will look for known prefixes before defaulting to 10.
708 * @param pu8 Where to store the converted number. (optional)
709 */
710RTDECL(int) RTStrToUInt8Ex(const char *pszValue, char **ppszNext, unsigned uBase, uint8_t *pu8);
711
712/**
713 * Converts a string representation of a number to a 8-bit unsigned number.
714 * The base is guessed.
715 *
716 * @returns 8-bit unsigned number on success.
717 * @returns 0 on failure.
718 * @param pszValue Pointer to the string value.
719 */
720RTDECL(uint8_t) RTStrToUInt8(const char *pszValue);
721
722/**
723 * Converts a string representation of a number to a 64-bit signed number.
724 *
725 * @returns iprt status code.
726 * Warnings are used to indicate conversion problems.
727 * @param pszValue Pointer to the string value.
728 * @param ppszNext Where to store the pointer to the first char following the number. (Optional)
729 * @param uBase The base of the representation used.
730 * If 0 the function will look for known prefixes before defaulting to 10.
731 * @param pi64 Where to store the converted number. (optional)
732 */
733RTDECL(int) RTStrToInt64Ex(const char *pszValue, char **ppszNext, unsigned uBase, int64_t *pi64);
734
735/**
736 * Converts a string representation of a number to a 64-bit signed number.
737 * The base is guessed.
738 *
739 * @returns 64-bit signed number on success.
740 * @returns 0 on failure.
741 * @param pszValue Pointer to the string value.
742 */
743RTDECL(int64_t) RTStrToInt64(const char *pszValue);
744
745/**
746 * Converts a string representation of a number to a 32-bit signed number.
747 *
748 * @returns iprt status code.
749 * Warnings are used to indicate conversion problems.
750 * @param pszValue Pointer to the string value.
751 * @param ppszNext Where to store the pointer to the first char following the number. (Optional)
752 * @param uBase The base of the representation used.
753 * If 0 the function will look for known prefixes before defaulting to 10.
754 * @param pi32 Where to store the converted number. (optional)
755 */
756RTDECL(int) RTStrToInt32Ex(const char *pszValue, char **ppszNext, unsigned uBase, int32_t *pi32);
757
758/**
759 * Converts a string representation of a number to a 32-bit signed number.
760 * The base is guessed.
761 *
762 * @returns 32-bit signed number on success.
763 * @returns 0 on failure.
764 * @param pszValue Pointer to the string value.
765 */
766RTDECL(int32_t) RTStrToInt32(const char *pszValue);
767
768/**
769 * Converts a string representation of a number to a 16-bit signed number.
770 *
771 * @returns iprt status code.
772 * Warnings are used to indicate conversion problems.
773 * @param pszValue Pointer to the string value.
774 * @param ppszNext Where to store the pointer to the first char following the number. (Optional)
775 * @param uBase The base of the representation used.
776 * If 0 the function will look for known prefixes before defaulting to 10.
777 * @param pi16 Where to store the converted number. (optional)
778 */
779RTDECL(int) RTStrToInt16Ex(const char *pszValue, char **ppszNext, unsigned uBase, int16_t *pi16);
780
781/**
782 * Converts a string representation of a number to a 16-bit signed number.
783 * The base is guessed.
784 *
785 * @returns 16-bit signed number on success.
786 * @returns 0 on failure.
787 * @param pszValue Pointer to the string value.
788 */
789RTDECL(int16_t) RTStrToInt16(const char *pszValue);
790
791/**
792 * Converts a string representation of a number to a 8-bit signed number.
793 *
794 * @returns iprt status code.
795 * Warnings are used to indicate conversion problems.
796 * @param pszValue Pointer to the string value.
797 * @param ppszNext Where to store the pointer to the first char following the number. (Optional)
798 * @param uBase The base of the representation used.
799 * If 0 the function will look for known prefixes before defaulting to 10.
800 * @param pi8 Where to store the converted number. (optional)
801 */
802RTDECL(int) RTStrToInt8Ex(const char *pszValue, char **ppszNext, unsigned uBase, int8_t *pi8);
803
804/**
805 * Converts a string representation of a number to a 8-bit signed number.
806 * The base is guessed.
807 *
808 * @returns 8-bit signed number on success.
809 * @returns 0 on failure.
810 * @param pszValue Pointer to the string value.
811 */
812RTDECL(int8_t) RTStrToInt8(const char *pszValue);
813
814/**
815 * Performs a case insensitive string compare between two UTF-8 strings.
816 *
817 * This is a simplified compare, as only the simplified lower/upper case folding
818 * specified by the unicode specs are used. It does not consider character pairs
819 * as they are used in some languages, just simple upper & lower case compares.
820 *
821 * @returns < 0 if the first string less than the second string.
822 * @returns 0 if the first string identical to the second string.
823 * @returns > 0 if the first string greater than the second string.
824 * @param psz1 First UTF-8 string.
825 * @param psz2 Second UTF-8 string.
826 */
827RTDECL(int) RTStrICmp(const char *psz1, const char *psz2);
828
829/** @} */
830
831
832/** @defgroup rt_str_space Unique String Space
833 * @ingroup grp_rt_str
834 * @{
835 */
836
837/** Pointer to a string name space container node core. */
838typedef struct RTSTRSPACECORE *PRTSTRSPACECORE;
839/** Pointer to a pointer to a string name space container node core. */
840typedef PRTSTRSPACECORE *PPRTSTRSPACECORE;
841
842/**
843 * String name space container node core.
844 */
845typedef struct RTSTRSPACECORE
846{
847 /** Hash key. Don't touch. */
848 uint32_t Key;
849 /** Pointer to the left leaf node. Don't touch. */
850 PRTSTRSPACECORE pLeft;
851 /** Pointer to the left rigth node. Don't touch. */
852 PRTSTRSPACECORE pRight;
853 /** Pointer to the list of string with the same key. Don't touch. */
854 PRTSTRSPACECORE pList;
855 /** Height of this tree: max(heigth(left), heigth(right)) + 1. Don't touch */
856 unsigned char uchHeight;
857 /** The string length. Read only! */
858 size_t cchString;
859 /** Pointer to the string. Read only! */
860 const char * pszString;
861} RTSTRSPACECORE;
862
863/** String space. (Initialize with NULL.) */
864typedef PRTSTRSPACECORE RTSTRSPACE;
865/** Pointer to a string space. */
866typedef PPRTSTRSPACECORE PRTSTRSPACE;
867
868
869/**
870 * Inserts a string into a unique string space.
871 *
872 * @returns true on success.
873 * @returns false if the string collieded with an existing string.
874 * @param pStrSpace The space to insert it into.
875 * @param pStr The string node.
876 */
877RTDECL(bool) RTStrSpaceInsert(PRTSTRSPACE pStrSpace, PRTSTRSPACECORE pStr);
878
879/**
880 * Removes a string from a unique string space.
881 *
882 * @returns Pointer to the removed string node.
883 * @returns NULL if the string was not found in the string space.
884 * @param pStrSpace The space to insert it into.
885 * @param pszString The string to remove.
886 */
887RTDECL(PRTSTRSPACECORE) RTStrSpaceRemove(PRTSTRSPACE pStrSpace, const char *pszString);
888
889/**
890 * Gets a string from a unique string space.
891 *
892 * @returns Pointer to the string node.
893 * @returns NULL if the string was not found in the string space.
894 * @param pStrSpace The space to insert it into.
895 * @param pszString The string to get.
896 */
897RTDECL(PRTSTRSPACECORE) RTStrSpaceGet(PRTSTRSPACE pStrSpace, const char *pszString);
898
899/**
900 * Callback function for RTStrSpaceEnumerate() and RTStrSpaceDestroy().
901 *
902 * @returns 0 on continue.
903 * @returns Non-zero to aborts the operation.
904 * @param pStr The string node
905 * @param pvUser The user specified argument.
906 */
907typedef DECLCALLBACK(int) FNRTSTRSPACECALLBACK(PRTSTRSPACECORE pStr, void *pvUser);
908/** Pointer to callback function for RTStrSpaceEnumerate() and RTStrSpaceDestroy(). */
909typedef FNRTSTRSPACECALLBACK *PFNRTSTRSPACECALLBACK;
910
911/**
912 * Destroys the string space.
913 * The caller supplies a callback which will be called for each of
914 * the string nodes in for freeing their memory and other resources.
915 *
916 * @returns 0 or what ever non-zero return value pfnCallback returned
917 * when aborting the destruction.
918 * @param pStrSpace The space to insert it into.
919 * @param pfnCallback The callback.
920 * @param pvUser The user argument.
921 */
922RTDECL(int) RTStrSpaceDestroy(PRTSTRSPACE pStrSpace, PFNRTSTRSPACECALLBACK pfnCallback, void *pvUser);
923
924/**
925 * Enumerates the string space.
926 * The caller supplies a callback which will be called for each of
927 * the string nodes.
928 *
929 * @returns 0 or what ever non-zero return value pfnCallback returned
930 * when aborting the destruction.
931 * @param pStrSpace The space to insert it into.
932 * @param pfnCallback The callback.
933 * @param pvUser The user argument.
934 */
935RTDECL(int) RTStrSpaceEnumerate(PRTSTRSPACE pStrSpace, PFNRTSTRSPACECALLBACK pfnCallback, void *pvUser);
936
937/** @} */
938
939
940/** @defgroup rt_str_utf16 UTF-16 String Manipulation
941 * @ingroup grp_rt_str
942 * @{
943 */
944
945/**
946 * Free a UTF-16 string allocated by RTStrUtf8ToUtf16(), RTStrUtf8ToUtf16Ex(),
947 * RTUtf16Dup() or RTUtf16DupEx().
948 *
949 * @returns iprt status code.
950 * @param pwszString The UTF-16 string to free. NULL is accepted.
951 */
952RTDECL(void) RTUtf16Free(PRTUTF16 pwszString);
953
954/**
955 * Allocates a new copy of the specified UTF-16 string.
956 *
957 * @returns Pointer to the allocated string copy. Use RTUtf16Free() to free it.
958 * @returns NULL when out of memory.
959 * @param pwszString UTF-16 string to duplicate.
960 * @remark This function will not make any attempt to validate the encoding.
961 */
962RTDECL(PRTUTF16) RTUtf16Dup(PCRTUTF16 pwszString);
963
964/**
965 * Allocates a new copy of the specified UTF-16 string.
966 *
967 * @returns iprt status code.
968 * @param ppwszString Receives pointer of the allocated UTF-16 string.
969 * The returned pointer must be freed using RTUtf16Free().
970 * @param pwszString UTF-16 string to duplicate.
971 * @param cwcExtra Number of extra RTUTF16 items to allocate.
972 * @remark This function will not make any attempt to validate the encoding.
973 */
974RTDECL(int) RTUtf16DupEx(PRTUTF16 *ppwszString, PCRTUTF16 pwszString, size_t cwcExtra);
975
976/**
977 * Returns the length of a UTF-16 string in UTF-16 characters
978 * without trailing '\\0'.
979 *
980 * Surrogate pairs counts as two UTF-16 characters here. Use RTUtf16CpCnt()
981 * to get the exact number of code points in the string.
982 *
983 * @returns The number of RTUTF16 items in the string.
984 * @param pwszString Pointer the UTF-16 string.
985 * @remark This function will not make any attempt to validate the encoding.
986 */
987RTDECL(size_t) RTUtf16Len(PCRTUTF16 pwszString);
988
989/**
990 * Performs a case sensitive string compare between two UTF-16 strings.
991 *
992 * @returns < 0 if the first string less than the second string.s
993 * @returns 0 if the first string identical to the second string.
994 * @returns > 0 if the first string greater than the second string.
995 * @param pwsz1 First UTF-16 string.
996 * @param pwsz2 Second UTF-16 string.
997 * @remark This function will not make any attempt to validate the encoding.
998 */
999RTDECL(int) RTUtf16Cmp(register PCRTUTF16 pwsz1, register PCRTUTF16 pwsz2);
1000
1001/**
1002 * Performs a case insensitive string compare between two UTF-16 strings.
1003 *
1004 * This is a simplified compare, as only the simplified lower/upper case folding
1005 * specified by the unicode specs are used. It does not consider character pairs
1006 * as they are used in some languages, just simple upper & lower case compares.
1007 *
1008 * @returns < 0 if the first string less than the second string.
1009 * @returns 0 if the first string identical to the second string.
1010 * @returns > 0 if the first string greater than the second string.
1011 * @param pwsz1 First UTF-16 string.
1012 * @param pwsz2 Second UTF-16 string.
1013 */
1014RTDECL(int) RTUtf16ICmp(PCRTUTF16 pwsz1, PCRTUTF16 pwsz2);
1015
1016/**
1017 * Performs a case insensitive string compare between two UTF-16 strings
1018 * using the current locale of the process (if applicable).
1019 *
1020 * This differs from RTUtf16ICmp() in that it will try, if a locale with the
1021 * required data is available, to do a correct case-insensitive compare. It
1022 * follows that it is more complex and thereby likely to be more expensive.
1023 *
1024 * @returns < 0 if the first string less than the second string.
1025 * @returns 0 if the first string identical to the second string.
1026 * @returns > 0 if the first string greater than the second string.
1027 * @param pwsz1 First UTF-16 string.
1028 * @param pwsz2 Second UTF-16 string.
1029 */
1030RTDECL(int) RTUtf16LocaleICmp(PCRTUTF16 pwsz1, PCRTUTF16 pwsz2);
1031
1032/**
1033 * Folds a UTF-16 string to lowercase.
1034 *
1035 * This is a very simple folding; is uses the simple lowercase
1036 * code point, it is not related to any locale just the most common
1037 * lowercase codepoint setup by the unicode specs, and it will not
1038 * create new surrogate pairs or remove existing ones.
1039 *
1040 * @returns Pointer to the passed in string.
1041 * @param pwsz The string to fold.
1042 */
1043RTDECL(PRTUTF16) RTUtf16ToLower(PRTUTF16 pwsz);
1044
1045/**
1046 * Folds a UTF-16 string to uppercase.
1047 *
1048 * This is a very simple folding; is uses the simple uppercase
1049 * code point, it is not related to any locale just the most common
1050 * uppercase codepoint setup by the unicode specs, and it will not
1051 * create new surrogate pairs or remove existing ones.
1052 *
1053 * @returns Pointer to the passed in string.
1054 * @param pwsz The string to fold.
1055 */
1056RTDECL(PRTUTF16) RTUtf16ToUpper(PRTUTF16 pwsz);
1057
1058/**
1059 * Translate a UTF-16 string into a UTF-8 allocating the result buffer.
1060 *
1061 * @returns iprt status code.
1062 * @param pwszString UTF-16 string to convert.
1063 * @param ppszString Receives pointer of allocated UTF-8 string.
1064 * The returned pointer must be freed using RTStrFree().
1065 */
1066RTDECL(int) RTUtf16ToUtf8(PCRTUTF16 pwszString, char **ppszString);
1067
1068/**
1069 * Translates UTF-16 to UTF-8 using buffer provided by the caller or
1070 * a fittingly sized buffer allocated by the function.
1071 *
1072 * @returns iprt status code.
1073 * @param pwszString The UTF-16 string to convert.
1074 * @param cwcString The number of RTUTF16 items to translation from pwszString.
1075 * The translate will stop when reaching cwcString or the terminator ('\\0').
1076 * Use RTSTR_MAX to translate the entire string.
1077 * @param ppsz If cch is non-zero, this must either be pointing to pointer to
1078 * a buffer of the specified size, or pointer to a NULL pointer.
1079 * If *ppsz is NULL or cch is zero a buffer of at least cch chars
1080 * will be allocated to hold the translated string.
1081 * If a buffer was requirest it must be freed using RTUtf16Free().
1082 * @param cch The buffer size in chars (the type). This includes the terminator.
1083 * @param pcch Where to store the length of the translated string. (Optional)
1084 * This field will be updated even on failure, however the value is only
1085 * specified for the following two error codes. On VERR_BUFFER_OVERFLOW
1086 * and VERR_NO_STR_MEMORY it contains the required buffer space.
1087 */
1088RTDECL(int) RTUtf16ToUtf8Ex(PCRTUTF16 pwszString, size_t cwcString, char **ppsz, size_t cch, size_t *pcch);
1089
1090
1091/**
1092 * Allocates tmp buffer, translates pwszString from UCS-2 to UTF8.
1093 *
1094 * @returns iprt status code.
1095 * @param ppszString Receives pointer of allocated UTF8 string.
1096 * The returned pointer must be freed using RTStrFree().
1097 * @param pwszString UCS-2 string to convert.
1098 * @deprecated Use RTUtf16ToUtf8().
1099 */
1100DECLINLINE(int) RTStrUcs2ToUtf8(char **ppszString, PCRTUCS2 pwszString)
1101{
1102 return RTUtf16ToUtf8(pwszString, ppszString);
1103}
1104
1105/**
1106 * Translates UCS-2 to UTF-8 using buffer provided by the caller or
1107 * a fittingly sized buffer allocated by the function.
1108 *
1109 * @returns iprt status code.
1110 * @param ppszString If cch is not zero, this points to the pointer to the
1111 * buffer where the converted string shall be resulted.
1112 * If cch is zero, this is where the pointer to the allocated
1113 * buffer with the converted string is stored. The allocated
1114 * buffer must be freed by using RTStrFree().
1115 * @param cch Size of the passed in buffer (*ppszString).
1116 * If 0 a fittingly sized buffer is allocated.
1117 * @param pwszString UCS-2 string to convert.
1118 * @deprecated
1119 */
1120DECLINLINE(int) RTStrUcs2ToUtf8Ex(char **ppszString, size_t cch, PCRTUCS2 pwszString)
1121{
1122 return RTUtf16ToUtf8Ex(pwszString, RTSTR_MAX, ppszString, cch, NULL);
1123}
1124
1125/**
1126 * Free a UCS-2 string allocated by RTStrUtf8ToUcs2().
1127 *
1128 * @returns iprt status code.
1129 * @param pwszString Pointer to buffer with unicode string to free.
1130 * NULL is accepted.
1131 * @deprecated
1132 */
1133DECLINLINE(void) RTStrUcs2Free(PRTUCS2 pwszString)
1134{
1135 RTUtf16Free(pwszString);
1136}
1137
1138/**
1139 * Allocates a new copy of the given UCS-2 string.
1140 *
1141 * @returns Pointer to the allocated string copy. Use RTStrUcs2Free() to free it.
1142 * @returns NULL when out of memory.
1143 * @param pwszString UCS-2 string to duplicate.
1144 * @deprecated
1145 */
1146DECLINLINE(PRTUCS2) RTStrUcs2Dup(PCRTUCS2 pwszString)
1147{
1148 return RTUtf16Dup(pwszString);
1149}
1150
1151/**
1152 * Allocates a new copy of the given UCS-2 string.
1153 *
1154 * @returns iprt status code.
1155 * @param ppwszString Receives pointer of the allocated UCS-2 string.
1156 * The returned pointer must be freed using RTStrUcs2Free().
1157 * @param pwszString UCS-2 string to duplicate.
1158 * @deprecated
1159 */
1160DECLINLINE(int) RTStrUcs2DupEx(PRTUCS2 *ppwszString, PCRTUCS2 pwszString)
1161{
1162 return RTUtf16DupEx(ppwszString, pwszString, 0);
1163}
1164
1165/**
1166 * Returns the length of a UCS-2 string in UCS-2 characters
1167 * without trailing '\\0'.
1168 *
1169 * @returns Length of input string in UCS-2 characters.
1170 * @param pwszString Pointer the UCS-2 string.
1171 * @deprecated
1172 */
1173DECLINLINE(size_t) RTStrUcs2Len(PCRTUCS2 pwszString)
1174{
1175 return RTUtf16Len(pwszString);
1176}
1177
1178/**
1179 * Performs a case sensitive string compare between two UCS-2 strings.
1180 *
1181 * @returns < 0 if the first string less than the second string.
1182 * @returns 0 if the first string identical to the second string.
1183 * @returns > 0 if the first string greater than the second string.
1184 * @param pwsz1 First UCS-2 string.
1185 * @param pwsz2 Second UCS-2 string.
1186 * @deprecated
1187 */
1188DECLINLINE(int) RTStrUcs2Cmp(register PCRTUCS2 pwsz1, register PCRTUCS2 pwsz2)
1189{
1190 return RTUtf16Cmp(pwsz1, pwsz2);
1191}
1192
1193
1194/**
1195 * Get the unicode code point at the given string position.
1196 *
1197 * @returns unicode code point.
1198 * @returns RTUNICP_INVALID if the encoding is invalid.
1199 * @param pwsz The string.
1200 *
1201 * @remark This is an internal worker for RTUtf16GetCp().
1202 */
1203RTDECL(RTUNICP) RTUtf16GetCpInternal(PCRTUTF16 pwsz);
1204
1205/**
1206 * Get the unicode code point at the given string position.
1207 *
1208 * @returns iprt status code.
1209 * @param ppwsz Pointer to the string pointer. This will be updated to
1210 * point to the char following the current code point.
1211 * @param pCp Where to store the code point.
1212 * RTUNICP_INVALID is stored here on failure.
1213 *
1214 * @remark This is an internal worker for RTUtf16GetCpEx().
1215 */
1216RTDECL(int) RTUtf16GetCpExInternal(PCRTUTF16 *ppwsz, PRTUNICP pCp);
1217
1218/**
1219 * Put the unicode code point at the given string position
1220 * and return the pointer to the char following it.
1221 *
1222 * This function will not consider anything at or following the the
1223 * buffer area pointed to by pwsz. It is therefore not suitable for
1224 * inserting code points into a string, only appending/overwriting.
1225 *
1226 * @returns pointer to the char following the written code point.
1227 * @param pwsz The string.
1228 * @param CodePoint The code point to write.
1229 * This sould not be RTUNICP_INVALID or any other charater
1230 * out of the UTF-16 range.
1231 *
1232 * @remark This is an internal worker for RTUtf16GetCpEx().
1233 */
1234RTDECL(PRTUTF16) RTUtf16PutCpInternal(PRTUTF16 pwsz, RTUNICP CodePoint);
1235
1236/**
1237 * Get the unicode code point at the given string position.
1238 *
1239 * @returns unicode code point.
1240 * @returns RTUNICP_INVALID if the encoding is invalid.
1241 * @param pwsz The string.
1242 *
1243 * @remark We optimize this operation by using an inline function for
1244 * everything which isn't a surrogate pair or an endian indicator.
1245 */
1246DECLINLINE(RTUNICP) RTUtf16GetCp(PCRTUTF16 pwsz)
1247{
1248 const RTUTF16 wc = *pwsz;
1249 if (wc < 0xd800 || (wc > 0xdfff && wc < 0xfffe))
1250 return wc;
1251 return RTUtf16GetCpInternal(pwsz);
1252}
1253
1254/**
1255 * Get the unicode code point at the given string position.
1256 *
1257 * @returns iprt status code.
1258 * @param ppwsz Pointer to the string pointer. This will be updated to
1259 * point to the char following the current code point.
1260 * @param pCp Where to store the code point.
1261 * RTUNICP_INVALID is stored here on failure.
1262 *
1263 * @remark We optimize this operation by using an inline function for
1264 * everything which isn't a surrogate pair or and endian indicator.
1265 */
1266DECLINLINE(int) RTUtf16GetCpEx(PCRTUTF16 *ppwsz, PRTUNICP pCp)
1267{
1268 const RTUTF16 wc = **ppwsz;
1269 if (wc < 0xd800 || (wc > 0xdfff && wc < 0xfffe))
1270 {
1271 (*ppwsz)++;
1272 *pCp = wc;
1273 return VINF_SUCCESS;
1274 }
1275 return RTUtf16GetCpExInternal(ppwsz, pCp);
1276}
1277
1278/**
1279 * Put the unicode code point at the given string position
1280 * and return the pointer to the char following it.
1281 *
1282 * This function will not consider anything at or following the the
1283 * buffer area pointed to by pwsz. It is therefore not suitable for
1284 * inserting code points into a string, only appending/overwriting.
1285 *
1286 * @returns pointer to the char following the written code point.
1287 * @param pwsz The string.
1288 * @param CodePoint The code point to write.
1289 * This sould not be RTUNICP_INVALID or any other charater
1290 * out of the UTF-16 range.
1291 *
1292 * @remark We optimize this operation by using an inline function for
1293 * everything which isn't a surrogate pair or and endian indicator.
1294 */
1295DECLINLINE(PRTUTF16) RTUtf16PutCp(PRTUTF16 pwsz, RTUNICP CodePoint)
1296{
1297 if (CodePoint < 0xd800 || (CodePoint > 0xd800 && CodePoint < 0xfffe))
1298 {
1299 *pwsz++ = (RTUTF16)CodePoint;
1300 return pwsz;
1301 }
1302 return RTUtf16PutCpInternal(pwsz, CodePoint);
1303}
1304
1305/**
1306 * Skips ahead, past the current code point.
1307 *
1308 * @returns Pointer to the char after the current code point.
1309 * @param pwsz Pointer to the current code point.
1310 * @remark This will not move the next valid code point, only past the current one.
1311 */
1312DECLINLINE(PRTUTF16) RTUtf16NextCp(PCRTUTF16 pwsz)
1313{
1314 RTUNICP Cp;
1315 RTUtf16GetCpEx(&pwsz, &Cp);
1316 return (PRTUTF16)pwsz;
1317}
1318
1319/**
1320 * Skips backwards, to the previous code point.
1321 *
1322 * @returns Pointer to the char after the current code point.
1323 * @param pwszStart Pointer to the start of the string.
1324 * @param pwsz Pointer to the current code point.
1325 */
1326RTDECL(PRTUTF16) RTUtf16PrevCp(PCRTUTF16 pwszStart, PCRTUTF16 pwsz);
1327
1328
1329/**
1330 * Checks if the UTF-16 char is the high surrogate char (i.e.
1331 * the 1st char in the pair).
1332 *
1333 * @returns true if it is.
1334 * @returns false if it isn't.
1335 * @param wc The character to investigate.
1336 */
1337DECLINLINE(bool) RTUtf16IsHighSurrogate(RTUTF16 wc)
1338{
1339 return wc >= 0xd800 && wc <= 0xdbff;
1340}
1341
1342/**
1343 * Checks if the UTF-16 char is the low surrogate char (i.e.
1344 * the 2nd char in the pair).
1345 *
1346 * @returns true if it is.
1347 * @returns false if it isn't.
1348 * @param wc The character to investigate.
1349 */
1350DECLINLINE(bool) RTUtf16IsLowSurrogate(RTUTF16 wc)
1351{
1352 return wc >= 0xdc00 && wc <= 0xdfff;
1353}
1354
1355
1356/**
1357 * Checks if the two UTF-16 chars form a valid surrogate pair.
1358 *
1359 * @returns true if they do.
1360 * @returns false if they doesn't.
1361 * @param wcHigh The high (1st) character.
1362 * @param wcLow The low (2nd) character.
1363 */
1364DECLINLINE(bool) RTUtf16IsSurrogatePair(RTUTF16 wcHigh, RTUTF16 wcLow)
1365{
1366 return RTUtf16IsHighSurrogate(wcHigh)
1367 && RTUtf16IsLowSurrogate(wcLow);
1368}
1369
1370/** @} */
1371
1372__END_DECLS
1373
1374/** @} */
1375
1376#endif
1377
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette