VirtualBox

source: vbox/trunk/include/iprt/string.h@ 3636

Last change on this file since 3636 was 3636, checked in by vboxsync, 17 years ago

AMD64 -> RT_ARCH_AMD64; X86 -> RT_ARCH_X86; [OS] (except LINUX) -> RT_OS_[OS].

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 49.8 KB
Line 
1/** @file
2 * innotek Portable Runtime - String Manipluation.
3 */
4
5/*
6 * Copyright (C) 2006-2007 innotek GmbH
7 *
8 * This file is part of VirtualBox Open Source Edition (OSE), as
9 * available from http://www.virtualbox.org. This file is free software;
10 * you can redistribute it and/or modify it under the terms of the GNU
11 * General Public License as published by the Free Software Foundation,
12 * in version 2 as it comes in the "COPYING" file of the VirtualBox OSE
13 * distribution. VirtualBox OSE is distributed in the hope that it will
14 * be useful, but WITHOUT ANY WARRANTY of any kind.
15 *
16 * If you received this file as part of a commercial VirtualBox
17 * distribution, then only the terms of your commercial VirtualBox
18 * license agreement apply instead of the previous paragraph.
19 */
20
21#ifndef ___iprt_string_h
22#define ___iprt_string_h
23
24#include <iprt/cdefs.h>
25#include <iprt/types.h>
26#include <iprt/stdarg.h>
27#include <iprt/err.h> /* for VINF_SUCCESS */
28#if defined(__LINUX__) && defined(__KERNEL__)
29# include <linux/string.h>
30#elif defined(RT_OS_FREEBSD) && defined(_KERNEL)
31 /*
32 * Kludge for the FreeBSD kernel:
33 * Some of the string.h stuff clashes with sys/libkern.h, so just wrap
34 * it up while including string.h to keep things quiet. It's nothing
35 * important that's clashing, after all.
36 */
37# define strdup strdup_string_h
38# include <string.h>
39# undef strdup
40#else
41# include <string.h>
42#endif
43
44/*
45 * Supply prototypes for standard string functions provided by
46 * IPRT instead of the operating environment.
47 */
48#if defined(RT_OS_DARWIN) && defined(KERNEL)
49__BEGIN_DECLS
50void *memchr(const void *pv, int ch, size_t cb);
51char *strpbrk(const char *pszStr, const char *pszChars);
52__END_DECLS
53#endif
54
55
56/** @defgroup grp_rt_str RTStr - String Manipulation
57 * Mostly UTF-8 related helpers where the standard string functions won't do.
58 * @ingroup grp_rt
59 * @{
60 */
61
62__BEGIN_DECLS
63
64
65/**
66 * The maximum string length.
67 */
68#define RTSTR_MAX (~(size_t)0)
69
70
71#ifdef IN_RING3
72
73/**
74 * Allocates tmp buffer, translates pszString from UTF8 to current codepage.
75 *
76 * @returns iprt status code.
77 * @param ppszString Receives pointer of allocated native CP string.
78 * The returned pointer must be freed using RTStrFree().
79 * @param pszString UTF-8 string to convert.
80 */
81RTR3DECL(int) RTStrUtf8ToCurrentCP(char **ppszString, const char *pszString);
82
83/**
84 * Allocates tmp buffer, translates pszString from current codepage to UTF-8.
85 *
86 * @returns iprt status code.
87 * @param ppszString Receives pointer of allocated UTF-8 string.
88 * The returned pointer must be freed using RTStrFree().
89 * @param pszString Native string to convert.
90 */
91RTR3DECL(int) RTStrCurrentCPToUtf8(char **ppszString, const char *pszString);
92
93#endif
94
95/**
96 * Free string allocated by any of the non-UCS-2 string functions.
97 *
98 * @returns iprt status code.
99 * @param pszString Pointer to buffer with string to free.
100 * NULL is accepted.
101 */
102RTDECL(void) RTStrFree(char *pszString);
103
104/**
105 * Allocates a new copy of the given UTF-8 string.
106 *
107 * @returns Pointer to the allocated UTF-8 string.
108 * @param pszString UTF-8 string to duplicate.
109 */
110RTDECL(char *) RTStrDup(const char *pszString);
111
112/**
113 * Allocates a new copy of the given UTF-8 string.
114 *
115 * @returns iprt status code.
116 * @param ppszString Receives pointer of the allocated UTF-8 string.
117 * The returned pointer must be freed using RTStrFree().
118 * @param pszString UTF-8 string to duplicate.
119 */
120RTDECL(int) RTStrDupEx(char **ppszString, const char *pszString);
121
122/**
123 * Gets the number of code points the string is made up of, excluding
124 * the terminator.
125 *
126 *
127 * @returns Number of code points (RTUNICP).
128 * @returns 0 if the string was incorrectly encoded.
129 * @param psz The string.
130 */
131RTDECL(size_t) RTStrUniLen(const char *psz);
132
133/**
134 * Gets the number of code points the string is made up of, excluding
135 * the terminator.
136 *
137 * This function will validate the string, and incorrectly encoded UTF-8
138 * strings will be rejected.
139 *
140 * @returns iprt status code.
141 * @param psz The string.
142 * @param cch The max string length. Use RTSTR_MAX to process the entire string.
143 * @param pcuc Where to store the code point count.
144 * This is undefined on failure.
145 */
146RTDECL(int) RTStrUniLenEx(const char *psz, size_t cch, size_t *pcuc);
147
148/**
149 * Translate a UTF-8 string into an unicode string (i.e. RTUNICPs), allocating the string buffer.
150 *
151 * @returns iprt status code.
152 * @param pszString UTF-8 string to convert.
153 * @param ppUniString Receives pointer to the allocated unicode string.
154 * The returned string must be freed using RTUniFree().
155 */
156RTDECL(int) RTStrToUni(const char *pszString, PRTUNICP *ppUniString);
157
158/**
159 * Translates pszString from UTF-8 to an array of code points, allocating the result
160 * array if requested.
161 *
162 * @returns iprt status code.
163 * @param pszString UTF-8 string to convert.
164 * @param cchString The maximum size in chars (the type) to convert. The conversion stop
165 * when it reaches cchString or the string terminator ('\\0').
166 * Use RTSTR_MAX to translate the entire string.
167 * @param ppaCps If cCps is non-zero, this must either be pointing to pointer to
168 * a buffer of the specified size, or pointer to a NULL pointer.
169 * If *ppusz is NULL or cCps is zero a buffer of at least cCps items
170 * will be allocated to hold the translated string.
171 * If a buffer was requirest it must be freed using RTUtf16Free().
172 * @param cCps The number of code points in the unicode string. This includes the terminator.
173 * @param pcCps Where to store the length of the translated string. (Optional)
174 * This field will be updated even on failure, however the value is only
175 * specified for the following two error codes. On VERR_BUFFER_OVERFLOW
176 * and VERR_NO_STR_MEMORY it contains the required buffer space.
177 */
178RTDECL(int) RTStrToUniEx(const char *pszString, size_t cchString, PRTUNICP *ppaCps, size_t cCps, size_t *pcCps);
179
180/**
181 * Calculates the length of the string in RTUTF16 items.
182 *
183 * This function will validate the string, and incorrectly encoded UTF-8
184 * strings will be rejected. The primary purpose of this function is to
185 * help allocate buffers for RTStrToUtf16Ex of the correct size. For most
186 * other puroses RTStrCalcUtf16LenEx() should be used.
187 *
188 * @returns Number of RTUTF16 items.
189 * @returns 0 if the string was incorrectly encoded.
190 * @param psz The string.
191 */
192RTDECL(size_t) RTStrCalcUtf16Len(const char *psz);
193
194/**
195 * Calculates the length of the string in RTUTF16 items.
196 *
197 * This function will validate the string, and incorrectly encoded UTF-8
198 * strings will be rejected.
199 *
200 * @returns iprt status code.
201 * @param psz The string.
202 * @param cch The max string length. Use RTSTR_MAX to process the entire string.
203 * @param pcwc Where to store the string length. Optional.
204 * This is undefined on failure.
205 */
206RTDECL(int) RTStrCalcUtf16LenEx(const char *psz, size_t cch, size_t *pcwc);
207
208/**
209 * Translate a UTF-8 string into a UTF-16 allocating the result buffer.
210 *
211 * @returns iprt status code.
212 * @param pszString UTF-8 string to convert.
213 * @param ppwszString Receives pointer to the allocated UTF-16 string.
214 * The returned string must be freed using RTUtf16Free().
215 */
216RTDECL(int) RTStrToUtf16(const char *pszString, PRTUTF16 *ppwszString);
217
218/**
219 * Translates pszString from UTF-8 to UTF-16, allocating the result buffer if requested.
220 *
221 * @returns iprt status code.
222 * @param pszString UTF-8 string to convert.
223 * @param cchString The maximum size in chars (the type) to convert. The conversion stop
224 * when it reaches cchString or the string terminator ('\\0').
225 * Use RTSTR_MAX to translate the entire string.
226 * @param ppwsz If cwc is non-zero, this must either be pointing to pointer to
227 * a buffer of the specified size, or pointer to a NULL pointer.
228 * If *ppwsz is NULL or cwc is zero a buffer of at least cwc items
229 * will be allocated to hold the translated string.
230 * If a buffer was requirest it must be freed using RTUtf16Free().
231 * @param cwc The buffer size in RTUTF16s. This includes the terminator.
232 * @param pcwc Where to store the length of the translated string. (Optional)
233 * This field will be updated even on failure, however the value is only
234 * specified for the following two error codes. On VERR_BUFFER_OVERFLOW
235 * and VERR_NO_STR_MEMORY it contains the required buffer space.
236 */
237RTDECL(int) RTStrToUtf16Ex(const char *pszString, size_t cchString, PRTUTF16 *ppwsz, size_t cwc, size_t *pcwc);
238
239/**
240 * Allocates tmp buffer, translates pszString from UTF8 to UCS-2.
241 *
242 * @returns iprt status code.
243 * @param ppwszString Receives pointer of allocated UCS-2 string.
244 * The returned pointer must be freed using RTStrUcs2Free().
245 * @param pszString UTF-8 string to convert.
246 * @deprecated Use RTStrToUtf16().
247 */
248DECLINLINE(int) RTStrUtf8ToUcs2(PRTUCS2 *ppwszString, const char *pszString)
249{
250 return RTStrToUtf16(pszString, ppwszString);
251}
252
253/**
254 * Translates pszString from UTF8 to backwater UCS-2, can allocate a temp buffer.
255 *
256 * @returns iprt status code.
257 * @param ppwszString Receives pointer of allocated UCS-2 string.
258 * The returned pointer must be freed using RTStrUcs2Free().
259 * @param cwc Length of target buffer in RTUCS2s including the trailing '\\0'.
260 * If 0 a temporary buffer is allocated.
261 * @param pszString UTF-8 string to convert.
262 * @deprecated Use RTStrToUtf16Ex().
263 */
264DECLINLINE(int) RTStrUtf8ToUcs2Ex(PRTUCS2 *ppwszString, unsigned cwc, const char *pszString)
265{
266 return RTStrToUtf16Ex(pszString, RTSTR_MAX, ppwszString, cwc, NULL);
267}
268
269
270/**
271 * Get the unicode code point at the given string position.
272 *
273 * @returns unicode code point.
274 * @returns RTUNICP_INVALID if the encoding is invalid.
275 * @param psz The string.
276 */
277RTDECL(RTUNICP) RTStrGetCpInternal(const char *psz);
278
279/**
280 * Get the unicode code point at the given string position.
281 *
282 * @returns unicode code point.
283 * @returns RTUNICP_INVALID if the encoding is invalid.
284 * @param ppsz The string.
285 * @param pCp Where to store the unicode code point.
286 */
287RTDECL(int) RTStrGetCpExInternal(const char **ppsz, PRTUNICP pCp);
288
289/**
290 * Put the unicode code point at the given string position
291 * and return the pointer to the char following it.
292 *
293 * This function will not consider anything at or following the the
294 * buffer area pointed to by psz. It is therefore not suitable for
295 * inserting code points into a string, only appending/overwriting.
296 *
297 * @returns pointer to the char following the written code point.
298 * @param psz The string.
299 * @param CodePoint The code point to write.
300 * This sould not be RTUNICP_INVALID or any other charater
301 * out of the UTF-8 range.
302 *
303 * @remark This is a worker function for RTStrPutCp().
304 *
305 */
306RTDECL(char *) RTStrPutCpInternal(char *psz, RTUNICP CodePoint);
307
308/**
309 * Get the unicode code point at the given string position.
310 *
311 * @returns unicode code point.
312 * @returns RTUNICP_INVALID if the encoding is invalid.
313 * @param psz The string.
314 *
315 * @remark We optimize this operation by using an inline function for
316 * the most frequent and simplest sequence, the rest is
317 * handled by RTStrGetCpInternal().
318 */
319DECLINLINE(RTUNICP) RTStrGetCp(const char *psz)
320{
321 const unsigned char uch = *(const unsigned char *)psz;
322 if (!(uch & BIT(7)))
323 return uch;
324 return RTStrGetCpInternal(psz);
325}
326
327/**
328 * Get the unicode code point at the given string position.
329 *
330 * @returns iprt status code.
331 * @param ppsz Pointer to the string pointer. This will be updated to
332 * point to the char following the current code point.
333 * @param pCp Where to store the code point.
334 * RTUNICP_INVALID is stored here on failure.
335 *
336 * @remark We optimize this operation by using an inline function for
337 * the most frequent and simplest sequence, the rest is
338 * handled by RTStrGetCpExInternal().
339 */
340DECLINLINE(int) RTStrGetCpEx(const char **ppsz, PRTUNICP pCp)
341{
342 const unsigned char uch = **(const unsigned char **)ppsz;
343 if (!(uch & BIT(7)))
344 {
345 (*ppsz)++;
346 *pCp = uch;
347 return VINF_SUCCESS;
348 }
349 return RTStrGetCpExInternal(ppsz, pCp);
350}
351
352/**
353 * Put the unicode code point at the given string position
354 * and return the pointer to the char following it.
355 *
356 * This function will not consider anything at or following the the
357 * buffer area pointed to by psz. It is therefore not suitable for
358 * inserting code points into a string, only appending/overwriting.
359 *
360 * @returns pointer to the char following the written code point.
361 * @param psz The string.
362 * @param CodePoint The code point to write.
363 * This sould not be RTUNICP_INVALID or any other charater
364 * out of the UTF-8 range.
365 *
366 * @remark We optimize this operation by using an inline function for
367 * the most frequent and simplest sequence, the rest is
368 * handled by RTStrPutCpInternal().
369 */
370DECLINLINE(char *) RTStrPutCp(char *psz, RTUNICP CodePoint)
371{
372 if (CodePoint < 0x80)
373 {
374 *psz++ = (unsigned char)CodePoint;
375 return psz;
376 }
377 return RTStrPutCpInternal(psz, CodePoint);
378}
379
380/**
381 * Skips ahead, past the current code point.
382 *
383 * @returns Pointer to the char after the current code point.
384 * @param psz Pointer to the current code point.
385 * @remark This will not move the next valid code point, only past the current one.
386 */
387DECLINLINE(char *) RTStrNextCp(const char *psz)
388{
389 RTUNICP Cp;
390 RTStrGetCpEx(&psz, &Cp);
391 return (char *)psz;
392}
393
394/**
395 * Skips back to the previous code point.
396 *
397 * @returns Pointer to the char before the current code point.
398 * @returns pszStart on failure.
399 * @param pszStart Pointer to the start of the string.
400 * @param psz Pointer to the current code point.
401 */
402RTDECL(char *) RTStrPrevCp(const char *pszStart, const char *psz);
403
404
405
406#ifndef DECLARED_FNRTSTROUTPUT /* duplicated in iprt/log.h */
407#define DECLARED_FNRTSTROUTPUT
408/**
409 * Output callback.
410 *
411 * @returns number of bytes written.
412 * @param pvArg User argument.
413 * @param pachChars Pointer to an array of utf-8 characters.
414 * @param cbChars Number of bytes in the character array pointed to by pachChars.
415 */
416typedef DECLCALLBACK(size_t) FNRTSTROUTPUT(void *pvArg, const char *pachChars, size_t cbChars);
417/** Pointer to callback function. */
418typedef FNRTSTROUTPUT *PFNRTSTROUTPUT;
419#endif
420
421/** Format flag.
422 * These are used by RTStrFormat extensions and RTStrFormatNumber, mind
423 * that not all flags makes sense to both of the functions.
424 * @{ */
425#define RTSTR_F_CAPITAL 0x0001
426#define RTSTR_F_LEFT 0x0002
427#define RTSTR_F_ZEROPAD 0x0004
428#define RTSTR_F_SPECIAL 0x0008
429#define RTSTR_F_VALSIGNED 0x0010
430#define RTSTR_F_PLUS 0x0020
431#define RTSTR_F_BLANK 0x0040
432#define RTSTR_F_WIDTH 0x0080
433#define RTSTR_F_PRECISION 0x0100
434
435#define RTSTR_F_BIT_MASK 0xf800
436#define RTSTR_F_8BIT 0x0800
437#define RTSTR_F_16BIT 0x1000
438#define RTSTR_F_32BIT 0x2000
439#define RTSTR_F_64BIT 0x4000
440#define RTSTR_F_128BIT 0x8000
441/** @} */
442
443/** @def RTSTR_GET_BIT_FLAG
444 * Gets the bit flag for the specified type.
445 */
446#define RTSTR_GET_BIT_FLAG(type) \
447 ( sizeof(type) == 32 ? RTSTR_F_32BIT \
448 : sizeof(type) == 64 ? RTSTR_F_64BIT \
449 : sizeof(type) == 16 ? RTSTR_F_16BIT \
450 : sizeof(type) == 8 ? RTSTR_F_8BIT \
451 : sizeof(type) == 128? RTSTR_F_128BIT \
452 : 0)
453
454
455/**
456 * Callback to format non-standard format specifiers.
457 *
458 * @returns The number of bytes formatted.
459 * @param pvArg Formatter argument.
460 * @param pfnOutput Pointer to output function.
461 * @param pvArgOutput Argument for the output function.
462 * @param ppszFormat Pointer to the format string pointer. Advance this till the char
463 * after the format specifier.
464 * @param pArgs Pointer to the argument list. Use this to fetch the arguments.
465 * @param cchWidth Format Width. -1 if not specified.
466 * @param cchPrecision Format Precision. -1 if not specified.
467 * @param fFlags Flags (RTSTR_NTFS_*).
468 * @param chArgSize The argument size specifier, 'l' or 'L'.
469 */
470typedef DECLCALLBACK(int) FNSTRFORMAT(void *pvArg, PFNRTSTROUTPUT pfnOutput, void *pvArgOutput,
471 const char **ppszFormat, va_list *pArgs, int cchWidth, int cchPrecision, unsigned fFlags, char chArgSize);
472/** Pointer to a FNSTRFORMAT() function. */
473typedef FNSTRFORMAT *PFNSTRFORMAT;
474
475
476/**
477 * Partial implementation of a printf like formatter.
478 * It doesn't do everything correct, and there is no floating point support.
479 * However, it supports custom formats by the means of a format callback.
480 *
481 * @returns number of bytes formatted.
482 * @param pfnOutput Output worker.
483 * Called in two ways. Normally with a string and its length.
484 * For termination, it's called with NULL for string, 0 for length.
485 * @param pvArgOutput Argument to the output worker.
486 * @param pfnFormat Custom format worker.
487 * @param pvArgFormat Argument to the format worker.
488 * @param pszFormat Format string pointer.
489 * @param args Argument list.
490 */
491RTDECL(size_t) RTStrFormatV(PFNRTSTROUTPUT pfnOutput, void *pvArgOutput, PFNSTRFORMAT pfnFormat, void *pvArgFormat, const char *pszFormat, va_list args);
492
493/**
494 * Partial implementation of a printf like formatter.
495 * It doesn't do everything correct, and there is no floating point support.
496 * However, it supports custom formats by the means of a format callback.
497 *
498 * @returns number of bytes formatted.
499 * @param pfnOutput Output worker.
500 * Called in two ways. Normally with a string and its length.
501 * For termination, it's called with NULL for string, 0 for length.
502 * @param pvArgOutput Argument to the output worker.
503 * @param pfnFormat Custom format worker.
504 * @param pvArgFormat Argument to the format worker.
505 * @param pszFormat Format string.
506 * @param ... Argument list.
507 */
508RTDECL(size_t) RTStrFormat(PFNRTSTROUTPUT pfnOutput, void *pvArgOutput, PFNSTRFORMAT pfnFormat, void *pvArgFormat, const char *pszFormat, ...);
509
510/**
511 * Formats an integer number according to the parameters.
512 *
513 * @returns Length of the formatted number.
514 * @param psz Pointer to output string buffer of sufficient size.
515 * @param u64Value Value to format.
516 * @param uiBase Number representation base.
517 * @param cchWidth Width.
518 * @param cchPrecision Precision.
519 * @param fFlags Flags (NTFS_*).
520 */
521RTDECL(int) RTStrFormatNumber(char *psz, uint64_t u64Value, unsigned int uiBase, signed int cchWidth, signed int cchPrecision, unsigned int fFlags);
522
523/**
524 * String printf.
525 *
526 * @returns The length of the returned string (in pszBuffer).
527 * @param pszBuffer Output buffer.
528 * @param cchBuffer Size of the output buffer.
529 * @param pszFormat The format string.
530 * @param args The format argument.
531 */
532RTDECL(size_t) RTStrPrintfV(char *pszBuffer, size_t cchBuffer, const char *pszFormat, va_list args);
533
534/**
535 * String printf.
536 *
537 * @returns The length of the returned string (in pszBuffer).
538 * @param pszBuffer Output buffer.
539 * @param cchBuffer Size of the output buffer.
540 * @param pszFormat The format string.
541 * @param ... The format argument.
542 */
543RTDECL(size_t) RTStrPrintf(char *pszBuffer, size_t cchBuffer, const char *pszFormat, ...);
544
545
546/**
547 * String printf with custom formatting.
548 *
549 * @returns The length of the returned string (in pszBuffer).
550 * @param pfnFormat Pointer to handler function for the custom formats.
551 * @param pvArg Argument to the pfnFormat function.
552 * @param pszBuffer Output buffer.
553 * @param cchBuffer Size of the output buffer.
554 * @param pszFormat The format string.
555 * @param args The format argument.
556 */
557RTDECL(size_t) RTStrPrintfExV(PFNSTRFORMAT pfnFormat, void *pvArg, char *pszBuffer, size_t cchBuffer, const char *pszFormat, va_list args);
558
559/**
560 * String printf with custom formatting.
561 *
562 * @returns The length of the returned string (in pszBuffer).
563 * @param pfnFormat Pointer to handler function for the custom formats.
564 * @param pvArg Argument to the pfnFormat function.
565 * @param pszBuffer Output buffer.
566 * @param cchBuffer Size of the output buffer.
567 * @param pszFormat The format string.
568 * @param ... The format argument.
569 */
570RTDECL(size_t) RTStrPrintfEx(PFNSTRFORMAT pfnFormat, void *pvArg, char *pszBuffer, size_t cchBuffer, const char *pszFormat, ...);
571
572
573/**
574 * Allocating string printf.
575 *
576 * @returns The length of the string in the returned *ppszBuffer.
577 * @returns -1 on failure.
578 * @param ppszBuffer Where to store the pointer to the allocated output buffer.
579 * The buffer should be freed using RTStrFree().
580 * On failure *ppszBuffer will be set to NULL.
581 * @param pszFormat The format string.
582 * @param args The format argument.
583 */
584RTDECL(int) RTStrAPrintfV(char **ppszBuffer, const char *pszFormat, va_list args);
585
586/**
587 * Allocating string printf.
588 *
589 * @returns The length of the string in the returned *ppszBuffer.
590 * @returns -1 on failure.
591 * @param ppszBuffer Where to store the pointer to the allocated output buffer.
592 * The buffer should be freed using RTStrFree().
593 * On failure *ppszBuffer will be set to NULL.
594 * @param pszFormat The format string.
595 * @param ... The format argument.
596 */
597RTDECL(int) RTStrAPrintf(char **ppszBuffer, const char *pszFormat, ...);
598
599
600/**
601 * Strips blankspaces from both ends of the string.
602 *
603 * @returns Pointer to first non-blank char in the string.
604 * @param psz The string to strip.
605 */
606RTDECL(char *) RTStrStrip(char *psz);
607
608/**
609 * Strips blankspaces from the start of the string.
610 *
611 * @returns Pointer to first non-blank char in the string.
612 * @param psz The string to strip.
613 */
614RTDECL(char *) RTStrStripL(const char *psz);
615
616/**
617 * Strips blankspaces from the end of the string.
618 *
619 * @returns psz.
620 * @param psz The string to strip.
621 */
622RTDECL(char *) RTStrStripR(char *psz);
623
624
625/** @defgroup rt_str_conv String To/From Number Conversions
626 * @ingroup grp_rt_str
627 * @{ */
628
629/**
630 * Converts a string representation of a number to a 64-bit unsigned number.
631 *
632 * @returns iprt status code.
633 * Warnings are used to indicate conversion problems.
634 * @param pszValue Pointer to the string value.
635 * @param ppszNext Where to store the pointer to the first char following the number. (Optional)
636 * @param uBase The base of the representation used.
637 * If 0 the function will look for known prefixes before defaulting to 10.
638 * @param pu64 Where to store the converted number. (optional)
639 */
640RTDECL(int) RTStrToUInt64Ex(const char *pszValue, char **ppszNext, unsigned uBase, uint64_t *pu64);
641
642/**
643 * Converts a string representation of a number to a 64-bit unsigned number.
644 * The base is guessed.
645 *
646 * @returns 64-bit unsigned number on success.
647 * @returns 0 on failure.
648 * @param pszValue Pointer to the string value.
649 */
650RTDECL(uint64_t) RTStrToUInt64(const char *pszValue);
651
652/**
653 * Converts a string representation of a number to a 32-bit unsigned number.
654 *
655 * @returns iprt status code.
656 * Warnings are used to indicate conversion problems.
657 * @param pszValue Pointer to the string value.
658 * @param ppszNext Where to store the pointer to the first char following the number. (Optional)
659 * @param uBase The base of the representation used.
660 * If 0 the function will look for known prefixes before defaulting to 10.
661 * @param pu32 Where to store the converted number. (optional)
662 */
663RTDECL(int) RTStrToUInt32Ex(const char *pszValue, char **ppszNext, unsigned uBase, uint32_t *pu32);
664
665/**
666 * Converts a string representation of a number to a 64-bit unsigned number.
667 * The base is guessed.
668 *
669 * @returns 32-bit unsigned number on success.
670 * @returns 0 on failure.
671 * @param pszValue Pointer to the string value.
672 */
673RTDECL(uint32_t) RTStrToUInt32(const char *pszValue);
674
675/**
676 * Converts a string representation of a number to a 16-bit unsigned number.
677 *
678 * @returns iprt status code.
679 * Warnings are used to indicate conversion problems.
680 * @param pszValue Pointer to the string value.
681 * @param ppszNext Where to store the pointer to the first char following the number. (Optional)
682 * @param uBase The base of the representation used.
683 * If 0 the function will look for known prefixes before defaulting to 10.
684 * @param pu16 Where to store the converted number. (optional)
685 */
686RTDECL(int) RTStrToUInt16Ex(const char *pszValue, char **ppszNext, unsigned uBase, uint16_t *pu16);
687
688/**
689 * Converts a string representation of a number to a 16-bit unsigned number.
690 * The base is guessed.
691 *
692 * @returns 16-bit unsigned number on success.
693 * @returns 0 on failure.
694 * @param pszValue Pointer to the string value.
695 */
696RTDECL(uint16_t) RTStrToUInt16(const char *pszValue);
697
698/**
699 * Converts a string representation of a number to a 8-bit unsigned number.
700 *
701 * @returns iprt status code.
702 * Warnings are used to indicate conversion problems.
703 * @param pszValue Pointer to the string value.
704 * @param ppszNext Where to store the pointer to the first char following the number. (Optional)
705 * @param uBase The base of the representation used.
706 * If 0 the function will look for known prefixes before defaulting to 10.
707 * @param pu8 Where to store the converted number. (optional)
708 */
709RTDECL(int) RTStrToUInt8Ex(const char *pszValue, char **ppszNext, unsigned uBase, uint8_t *pu8);
710
711/**
712 * Converts a string representation of a number to a 8-bit unsigned number.
713 * The base is guessed.
714 *
715 * @returns 8-bit unsigned number on success.
716 * @returns 0 on failure.
717 * @param pszValue Pointer to the string value.
718 */
719RTDECL(uint8_t) RTStrToUInt8(const char *pszValue);
720
721/**
722 * Converts a string representation of a number to a 64-bit signed number.
723 *
724 * @returns iprt status code.
725 * Warnings are used to indicate conversion problems.
726 * @param pszValue Pointer to the string value.
727 * @param ppszNext Where to store the pointer to the first char following the number. (Optional)
728 * @param uBase The base of the representation used.
729 * If 0 the function will look for known prefixes before defaulting to 10.
730 * @param pi64 Where to store the converted number. (optional)
731 */
732RTDECL(int) RTStrToInt64Ex(const char *pszValue, char **ppszNext, unsigned uBase, int64_t *pi64);
733
734/**
735 * Converts a string representation of a number to a 64-bit signed number.
736 * The base is guessed.
737 *
738 * @returns 64-bit signed number on success.
739 * @returns 0 on failure.
740 * @param pszValue Pointer to the string value.
741 */
742RTDECL(int64_t) RTStrToInt64(const char *pszValue);
743
744/**
745 * Converts a string representation of a number to a 32-bit signed number.
746 *
747 * @returns iprt status code.
748 * Warnings are used to indicate conversion problems.
749 * @param pszValue Pointer to the string value.
750 * @param ppszNext Where to store the pointer to the first char following the number. (Optional)
751 * @param uBase The base of the representation used.
752 * If 0 the function will look for known prefixes before defaulting to 10.
753 * @param pi32 Where to store the converted number. (optional)
754 */
755RTDECL(int) RTStrToInt32Ex(const char *pszValue, char **ppszNext, unsigned uBase, int32_t *pi32);
756
757/**
758 * Converts a string representation of a number to a 32-bit signed number.
759 * The base is guessed.
760 *
761 * @returns 32-bit signed number on success.
762 * @returns 0 on failure.
763 * @param pszValue Pointer to the string value.
764 */
765RTDECL(int32_t) RTStrToInt32(const char *pszValue);
766
767/**
768 * Converts a string representation of a number to a 16-bit signed number.
769 *
770 * @returns iprt status code.
771 * Warnings are used to indicate conversion problems.
772 * @param pszValue Pointer to the string value.
773 * @param ppszNext Where to store the pointer to the first char following the number. (Optional)
774 * @param uBase The base of the representation used.
775 * If 0 the function will look for known prefixes before defaulting to 10.
776 * @param pi16 Where to store the converted number. (optional)
777 */
778RTDECL(int) RTStrToInt16Ex(const char *pszValue, char **ppszNext, unsigned uBase, int16_t *pi16);
779
780/**
781 * Converts a string representation of a number to a 16-bit signed number.
782 * The base is guessed.
783 *
784 * @returns 16-bit signed number on success.
785 * @returns 0 on failure.
786 * @param pszValue Pointer to the string value.
787 */
788RTDECL(int16_t) RTStrToInt16(const char *pszValue);
789
790/**
791 * Converts a string representation of a number to a 8-bit signed number.
792 *
793 * @returns iprt status code.
794 * Warnings are used to indicate conversion problems.
795 * @param pszValue Pointer to the string value.
796 * @param ppszNext Where to store the pointer to the first char following the number. (Optional)
797 * @param uBase The base of the representation used.
798 * If 0 the function will look for known prefixes before defaulting to 10.
799 * @param pi8 Where to store the converted number. (optional)
800 */
801RTDECL(int) RTStrToInt8Ex(const char *pszValue, char **ppszNext, unsigned uBase, int8_t *pi8);
802
803/**
804 * Converts a string representation of a number to a 8-bit signed number.
805 * The base is guessed.
806 *
807 * @returns 8-bit signed number on success.
808 * @returns 0 on failure.
809 * @param pszValue Pointer to the string value.
810 */
811RTDECL(int8_t) RTStrToInt8(const char *pszValue);
812
813/**
814 * Performs a case insensitive string compare between two UTF-8 strings.
815 *
816 * This is a simplified compare, as only the simplified lower/upper case folding
817 * specified by the unicode specs are used. It does not consider character pairs
818 * as they are used in some languages, just simple upper & lower case compares.
819 *
820 * @returns < 0 if the first string less than the second string.
821 * @returns 0 if the first string identical to the second string.
822 * @returns > 0 if the first string greater than the second string.
823 * @param psz1 First UTF-8 string.
824 * @param psz2 Second UTF-8 string.
825 */
826RTDECL(int) RTStrICmp(const char *psz1, const char *psz2);
827
828/** @} */
829
830
831/** @defgroup rt_str_space Unique String Space
832 * @ingroup grp_rt_str
833 * @{
834 */
835
836/** Pointer to a string name space container node core. */
837typedef struct RTSTRSPACECORE *PRTSTRSPACECORE;
838/** Pointer to a pointer to a string name space container node core. */
839typedef PRTSTRSPACECORE *PPRTSTRSPACECORE;
840
841/**
842 * String name space container node core.
843 */
844typedef struct RTSTRSPACECORE
845{
846 /** Hash key. Don't touch. */
847 uint32_t Key;
848 /** Pointer to the left leaf node. Don't touch. */
849 PRTSTRSPACECORE pLeft;
850 /** Pointer to the left rigth node. Don't touch. */
851 PRTSTRSPACECORE pRight;
852 /** Pointer to the list of string with the same key. Don't touch. */
853 PRTSTRSPACECORE pList;
854 /** Height of this tree: max(heigth(left), heigth(right)) + 1. Don't touch */
855 unsigned char uchHeight;
856 /** The string length. Read only! */
857 size_t cchString;
858 /** Pointer to the string. Read only! */
859 const char * pszString;
860} RTSTRSPACECORE;
861
862/** String space. (Initialize with NULL.) */
863typedef PRTSTRSPACECORE RTSTRSPACE;
864/** Pointer to a string space. */
865typedef PPRTSTRSPACECORE PRTSTRSPACE;
866
867
868/**
869 * Inserts a string into a unique string space.
870 *
871 * @returns true on success.
872 * @returns false if the string collieded with an existing string.
873 * @param pStrSpace The space to insert it into.
874 * @param pStr The string node.
875 */
876RTDECL(bool) RTStrSpaceInsert(PRTSTRSPACE pStrSpace, PRTSTRSPACECORE pStr);
877
878/**
879 * Removes a string from a unique string space.
880 *
881 * @returns Pointer to the removed string node.
882 * @returns NULL if the string was not found in the string space.
883 * @param pStrSpace The space to insert it into.
884 * @param pszString The string to remove.
885 */
886RTDECL(PRTSTRSPACECORE) RTStrSpaceRemove(PRTSTRSPACE pStrSpace, const char *pszString);
887
888/**
889 * Gets a string from a unique string space.
890 *
891 * @returns Pointer to the string node.
892 * @returns NULL if the string was not found in the string space.
893 * @param pStrSpace The space to insert it into.
894 * @param pszString The string to get.
895 */
896RTDECL(PRTSTRSPACECORE) RTStrSpaceGet(PRTSTRSPACE pStrSpace, const char *pszString);
897
898/**
899 * Callback function for RTStrSpaceEnumerate() and RTStrSpaceDestroy().
900 *
901 * @returns 0 on continue.
902 * @returns Non-zero to aborts the operation.
903 * @param pStr The string node
904 * @param pvUser The user specified argument.
905 */
906typedef DECLCALLBACK(int) FNRTSTRSPACECALLBACK(PRTSTRSPACECORE pStr, void *pvUser);
907/** Pointer to callback function for RTStrSpaceEnumerate() and RTStrSpaceDestroy(). */
908typedef FNRTSTRSPACECALLBACK *PFNRTSTRSPACECALLBACK;
909
910/**
911 * Destroys the string space.
912 * The caller supplies a callback which will be called for each of
913 * the string nodes in for freeing their memory and other resources.
914 *
915 * @returns 0 or what ever non-zero return value pfnCallback returned
916 * when aborting the destruction.
917 * @param pStrSpace The space to insert it into.
918 * @param pfnCallback The callback.
919 * @param pvUser The user argument.
920 */
921RTDECL(int) RTStrSpaceDestroy(PRTSTRSPACE pStrSpace, PFNRTSTRSPACECALLBACK pfnCallback, void *pvUser);
922
923/**
924 * Enumerates the string space.
925 * The caller supplies a callback which will be called for each of
926 * the string nodes.
927 *
928 * @returns 0 or what ever non-zero return value pfnCallback returned
929 * when aborting the destruction.
930 * @param pStrSpace The space to insert it into.
931 * @param pfnCallback The callback.
932 * @param pvUser The user argument.
933 */
934RTDECL(int) RTStrSpaceEnumerate(PRTSTRSPACE pStrSpace, PFNRTSTRSPACECALLBACK pfnCallback, void *pvUser);
935
936/** @} */
937
938
939/** @defgroup rt_str_utf16 UTF-16 String Manipulation
940 * @ingroup grp_rt_str
941 * @{
942 */
943
944/**
945 * Free a UTF-16 string allocated by RTStrUtf8ToUtf16(), RTStrUtf8ToUtf16Ex(),
946 * RTUtf16Dup() or RTUtf16DupEx().
947 *
948 * @returns iprt status code.
949 * @param pwszString The UTF-16 string to free. NULL is accepted.
950 */
951RTDECL(void) RTUtf16Free(PRTUTF16 pwszString);
952
953/**
954 * Allocates a new copy of the specified UTF-16 string.
955 *
956 * @returns Pointer to the allocated string copy. Use RTUtf16Free() to free it.
957 * @returns NULL when out of memory.
958 * @param pwszString UTF-16 string to duplicate.
959 * @remark This function will not make any attempt to validate the encoding.
960 */
961RTDECL(PRTUTF16) RTUtf16Dup(PCRTUTF16 pwszString);
962
963/**
964 * Allocates a new copy of the specified UTF-16 string.
965 *
966 * @returns iprt status code.
967 * @param ppwszString Receives pointer of the allocated UTF-16 string.
968 * The returned pointer must be freed using RTUtf16Free().
969 * @param pwszString UTF-16 string to duplicate.
970 * @param cwcExtra Number of extra RTUTF16 items to allocate.
971 * @remark This function will not make any attempt to validate the encoding.
972 */
973RTDECL(int) RTUtf16DupEx(PRTUTF16 *ppwszString, PCRTUTF16 pwszString, size_t cwcExtra);
974
975/**
976 * Returns the length of a UTF-16 string in UTF-16 characters
977 * without trailing '\\0'.
978 *
979 * Surrogate pairs counts as two UTF-16 characters here. Use RTUtf16CpCnt()
980 * to get the exact number of code points in the string.
981 *
982 * @returns The number of RTUTF16 items in the string.
983 * @param pwszString Pointer the UTF-16 string.
984 * @remark This function will not make any attempt to validate the encoding.
985 */
986RTDECL(size_t) RTUtf16Len(PCRTUTF16 pwszString);
987
988/**
989 * Performs a case sensitive string compare between two UTF-16 strings.
990 *
991 * @returns < 0 if the first string less than the second string.s
992 * @returns 0 if the first string identical to the second string.
993 * @returns > 0 if the first string greater than the second string.
994 * @param pwsz1 First UTF-16 string.
995 * @param pwsz2 Second UTF-16 string.
996 * @remark This function will not make any attempt to validate the encoding.
997 */
998RTDECL(int) RTUtf16Cmp(register PCRTUTF16 pwsz1, register PCRTUTF16 pwsz2);
999
1000/**
1001 * Performs a case insensitive string compare between two UTF-16 strings.
1002 *
1003 * This is a simplified compare, as only the simplified lower/upper case folding
1004 * specified by the unicode specs are used. It does not consider character pairs
1005 * as they are used in some languages, just simple upper & lower case compares.
1006 *
1007 * @returns < 0 if the first string less than the second string.
1008 * @returns 0 if the first string identical to the second string.
1009 * @returns > 0 if the first string greater than the second string.
1010 * @param pwsz1 First UTF-16 string.
1011 * @param pwsz2 Second UTF-16 string.
1012 */
1013RTDECL(int) RTUtf16ICmp(PCRTUTF16 pwsz1, PCRTUTF16 pwsz2);
1014
1015/**
1016 * Performs a case insensitive string compare between two UTF-16 strings
1017 * using the current locale of the process (if applicable).
1018 *
1019 * This differs from RTUtf16ICmp() in that it will try, if a locale with the
1020 * required data is available, to do a correct case-insensitive compare. It
1021 * follows that it is more complex and thereby likely to be more expensive.
1022 *
1023 * @returns < 0 if the first string less than the second string.
1024 * @returns 0 if the first string identical to the second string.
1025 * @returns > 0 if the first string greater than the second string.
1026 * @param pwsz1 First UTF-16 string.
1027 * @param pwsz2 Second UTF-16 string.
1028 */
1029RTDECL(int) RTUtf16LocaleICmp(PCRTUTF16 pwsz1, PCRTUTF16 pwsz2);
1030
1031/**
1032 * Folds a UTF-16 string to lowercase.
1033 *
1034 * This is a very simple folding; is uses the simple lowercase
1035 * code point, it is not related to any locale just the most common
1036 * lowercase codepoint setup by the unicode specs, and it will not
1037 * create new surrogate pairs or remove existing ones.
1038 *
1039 * @returns Pointer to the passed in string.
1040 * @param pwsz The string to fold.
1041 */
1042RTDECL(PRTUTF16) RTUtf16ToLower(PRTUTF16 pwsz);
1043
1044/**
1045 * Folds a UTF-16 string to uppercase.
1046 *
1047 * This is a very simple folding; is uses the simple uppercase
1048 * code point, it is not related to any locale just the most common
1049 * uppercase codepoint setup by the unicode specs, and it will not
1050 * create new surrogate pairs or remove existing ones.
1051 *
1052 * @returns Pointer to the passed in string.
1053 * @param pwsz The string to fold.
1054 */
1055RTDECL(PRTUTF16) RTUtf16ToUpper(PRTUTF16 pwsz);
1056
1057/**
1058 * Translate a UTF-16 string into a UTF-8 allocating the result buffer.
1059 *
1060 * @returns iprt status code.
1061 * @param pwszString UTF-16 string to convert.
1062 * @param ppszString Receives pointer of allocated UTF-8 string.
1063 * The returned pointer must be freed using RTStrFree().
1064 */
1065RTDECL(int) RTUtf16ToUtf8(PCRTUTF16 pwszString, char **ppszString);
1066
1067/**
1068 * Translates UTF-16 to UTF-8 using buffer provided by the caller or
1069 * a fittingly sized buffer allocated by the function.
1070 *
1071 * @returns iprt status code.
1072 * @param pwszString The UTF-16 string to convert.
1073 * @param cwcString The number of RTUTF16 items to translation from pwszString.
1074 * The translate will stop when reaching cwcString or the terminator ('\\0').
1075 * Use RTSTR_MAX to translate the entire string.
1076 * @param ppsz If cch is non-zero, this must either be pointing to pointer to
1077 * a buffer of the specified size, or pointer to a NULL pointer.
1078 * If *ppsz is NULL or cch is zero a buffer of at least cch chars
1079 * will be allocated to hold the translated string.
1080 * If a buffer was requirest it must be freed using RTUtf16Free().
1081 * @param cch The buffer size in chars (the type). This includes the terminator.
1082 * @param pcch Where to store the length of the translated string. (Optional)
1083 * This field will be updated even on failure, however the value is only
1084 * specified for the following two error codes. On VERR_BUFFER_OVERFLOW
1085 * and VERR_NO_STR_MEMORY it contains the required buffer space.
1086 */
1087RTDECL(int) RTUtf16ToUtf8Ex(PCRTUTF16 pwszString, size_t cwcString, char **ppsz, size_t cch, size_t *pcch);
1088
1089
1090/**
1091 * Allocates tmp buffer, translates pwszString from UCS-2 to UTF8.
1092 *
1093 * @returns iprt status code.
1094 * @param ppszString Receives pointer of allocated UTF8 string.
1095 * The returned pointer must be freed using RTStrFree().
1096 * @param pwszString UCS-2 string to convert.
1097 * @deprecated Use RTUtf16ToUtf8().
1098 */
1099DECLINLINE(int) RTStrUcs2ToUtf8(char **ppszString, PCRTUCS2 pwszString)
1100{
1101 return RTUtf16ToUtf8(pwszString, ppszString);
1102}
1103
1104/**
1105 * Translates UCS-2 to UTF-8 using buffer provided by the caller or
1106 * a fittingly sized buffer allocated by the function.
1107 *
1108 * @returns iprt status code.
1109 * @param ppszString If cch is not zero, this points to the pointer to the
1110 * buffer where the converted string shall be resulted.
1111 * If cch is zero, this is where the pointer to the allocated
1112 * buffer with the converted string is stored. The allocated
1113 * buffer must be freed by using RTStrFree().
1114 * @param cch Size of the passed in buffer (*ppszString).
1115 * If 0 a fittingly sized buffer is allocated.
1116 * @param pwszString UCS-2 string to convert.
1117 * @deprecated
1118 */
1119DECLINLINE(int) RTStrUcs2ToUtf8Ex(char **ppszString, size_t cch, PCRTUCS2 pwszString)
1120{
1121 return RTUtf16ToUtf8Ex(pwszString, RTSTR_MAX, ppszString, cch, NULL);
1122}
1123
1124/**
1125 * Free a UCS-2 string allocated by RTStrUtf8ToUcs2().
1126 *
1127 * @returns iprt status code.
1128 * @param pwszString Pointer to buffer with unicode string to free.
1129 * NULL is accepted.
1130 * @deprecated
1131 */
1132DECLINLINE(void) RTStrUcs2Free(PRTUCS2 pwszString)
1133{
1134 RTUtf16Free(pwszString);
1135}
1136
1137/**
1138 * Allocates a new copy of the given UCS-2 string.
1139 *
1140 * @returns Pointer to the allocated string copy. Use RTStrUcs2Free() to free it.
1141 * @returns NULL when out of memory.
1142 * @param pwszString UCS-2 string to duplicate.
1143 * @deprecated
1144 */
1145DECLINLINE(PRTUCS2) RTStrUcs2Dup(PCRTUCS2 pwszString)
1146{
1147 return RTUtf16Dup(pwszString);
1148}
1149
1150/**
1151 * Allocates a new copy of the given UCS-2 string.
1152 *
1153 * @returns iprt status code.
1154 * @param ppwszString Receives pointer of the allocated UCS-2 string.
1155 * The returned pointer must be freed using RTStrUcs2Free().
1156 * @param pwszString UCS-2 string to duplicate.
1157 * @deprecated
1158 */
1159DECLINLINE(int) RTStrUcs2DupEx(PRTUCS2 *ppwszString, PCRTUCS2 pwszString)
1160{
1161 return RTUtf16DupEx(ppwszString, pwszString, 0);
1162}
1163
1164/**
1165 * Returns the length of a UCS-2 string in UCS-2 characters
1166 * without trailing '\\0'.
1167 *
1168 * @returns Length of input string in UCS-2 characters.
1169 * @param pwszString Pointer the UCS-2 string.
1170 * @deprecated
1171 */
1172DECLINLINE(size_t) RTStrUcs2Len(PCRTUCS2 pwszString)
1173{
1174 return RTUtf16Len(pwszString);
1175}
1176
1177/**
1178 * Performs a case sensitive string compare between two UCS-2 strings.
1179 *
1180 * @returns < 0 if the first string less than the second string.
1181 * @returns 0 if the first string identical to the second string.
1182 * @returns > 0 if the first string greater than the second string.
1183 * @param pwsz1 First UCS-2 string.
1184 * @param pwsz2 Second UCS-2 string.
1185 * @deprecated
1186 */
1187DECLINLINE(int) RTStrUcs2Cmp(register PCRTUCS2 pwsz1, register PCRTUCS2 pwsz2)
1188{
1189 return RTUtf16Cmp(pwsz1, pwsz2);
1190}
1191
1192
1193/**
1194 * Get the unicode code point at the given string position.
1195 *
1196 * @returns unicode code point.
1197 * @returns RTUNICP_INVALID if the encoding is invalid.
1198 * @param pwsz The string.
1199 *
1200 * @remark This is an internal worker for RTUtf16GetCp().
1201 */
1202RTDECL(RTUNICP) RTUtf16GetCpInternal(PCRTUTF16 pwsz);
1203
1204/**
1205 * Get the unicode code point at the given string position.
1206 *
1207 * @returns iprt status code.
1208 * @param ppwsz Pointer to the string pointer. This will be updated to
1209 * point to the char following the current code point.
1210 * @param pCp Where to store the code point.
1211 * RTUNICP_INVALID is stored here on failure.
1212 *
1213 * @remark This is an internal worker for RTUtf16GetCpEx().
1214 */
1215RTDECL(int) RTUtf16GetCpExInternal(PCRTUTF16 *ppwsz, PRTUNICP pCp);
1216
1217/**
1218 * Put the unicode code point at the given string position
1219 * and return the pointer to the char following it.
1220 *
1221 * This function will not consider anything at or following the the
1222 * buffer area pointed to by pwsz. It is therefore not suitable for
1223 * inserting code points into a string, only appending/overwriting.
1224 *
1225 * @returns pointer to the char following the written code point.
1226 * @param pwsz The string.
1227 * @param CodePoint The code point to write.
1228 * This sould not be RTUNICP_INVALID or any other charater
1229 * out of the UTF-16 range.
1230 *
1231 * @remark This is an internal worker for RTUtf16GetCpEx().
1232 */
1233RTDECL(PRTUTF16) RTUtf16PutCpInternal(PRTUTF16 pwsz, RTUNICP CodePoint);
1234
1235/**
1236 * Get the unicode code point at the given string position.
1237 *
1238 * @returns unicode code point.
1239 * @returns RTUNICP_INVALID if the encoding is invalid.
1240 * @param pwsz The string.
1241 *
1242 * @remark We optimize this operation by using an inline function for
1243 * everything which isn't a surrogate pair or an endian indicator.
1244 */
1245DECLINLINE(RTUNICP) RTUtf16GetCp(PCRTUTF16 pwsz)
1246{
1247 const RTUTF16 wc = *pwsz;
1248 if (wc < 0xd800 || (wc > 0xdfff && wc < 0xfffe))
1249 return wc;
1250 return RTUtf16GetCpInternal(pwsz);
1251}
1252
1253/**
1254 * Get the unicode code point at the given string position.
1255 *
1256 * @returns iprt status code.
1257 * @param ppwsz Pointer to the string pointer. This will be updated to
1258 * point to the char following the current code point.
1259 * @param pCp Where to store the code point.
1260 * RTUNICP_INVALID is stored here on failure.
1261 *
1262 * @remark We optimize this operation by using an inline function for
1263 * everything which isn't a surrogate pair or and endian indicator.
1264 */
1265DECLINLINE(int) RTUtf16GetCpEx(PCRTUTF16 *ppwsz, PRTUNICP pCp)
1266{
1267 const RTUTF16 wc = **ppwsz;
1268 if (wc < 0xd800 || (wc > 0xdfff && wc < 0xfffe))
1269 {
1270 (*ppwsz)++;
1271 *pCp = wc;
1272 return VINF_SUCCESS;
1273 }
1274 return RTUtf16GetCpExInternal(ppwsz, pCp);
1275}
1276
1277/**
1278 * Put the unicode code point at the given string position
1279 * and return the pointer to the char following it.
1280 *
1281 * This function will not consider anything at or following the the
1282 * buffer area pointed to by pwsz. It is therefore not suitable for
1283 * inserting code points into a string, only appending/overwriting.
1284 *
1285 * @returns pointer to the char following the written code point.
1286 * @param pwsz The string.
1287 * @param CodePoint The code point to write.
1288 * This sould not be RTUNICP_INVALID or any other charater
1289 * out of the UTF-16 range.
1290 *
1291 * @remark We optimize this operation by using an inline function for
1292 * everything which isn't a surrogate pair or and endian indicator.
1293 */
1294DECLINLINE(PRTUTF16) RTUtf16PutCp(PRTUTF16 pwsz, RTUNICP CodePoint)
1295{
1296 if (CodePoint < 0xd800 || (CodePoint > 0xd800 && CodePoint < 0xfffe))
1297 {
1298 *pwsz++ = (RTUTF16)CodePoint;
1299 return pwsz;
1300 }
1301 return RTUtf16PutCpInternal(pwsz, CodePoint);
1302}
1303
1304/**
1305 * Skips ahead, past the current code point.
1306 *
1307 * @returns Pointer to the char after the current code point.
1308 * @param pwsz Pointer to the current code point.
1309 * @remark This will not move the next valid code point, only past the current one.
1310 */
1311DECLINLINE(PRTUTF16) RTUtf16NextCp(PCRTUTF16 pwsz)
1312{
1313 RTUNICP Cp;
1314 RTUtf16GetCpEx(&pwsz, &Cp);
1315 return (PRTUTF16)pwsz;
1316}
1317
1318/**
1319 * Skips backwards, to the previous code point.
1320 *
1321 * @returns Pointer to the char after the current code point.
1322 * @param pwszStart Pointer to the start of the string.
1323 * @param pwsz Pointer to the current code point.
1324 */
1325RTDECL(PRTUTF16) RTUtf16PrevCp(PCRTUTF16 pwszStart, PCRTUTF16 pwsz);
1326
1327
1328/**
1329 * Checks if the UTF-16 char is the high surrogate char (i.e.
1330 * the 1st char in the pair).
1331 *
1332 * @returns true if it is.
1333 * @returns false if it isn't.
1334 * @param wc The character to investigate.
1335 */
1336DECLINLINE(bool) RTUtf16IsHighSurrogate(RTUTF16 wc)
1337{
1338 return wc >= 0xd800 && wc <= 0xdbff;
1339}
1340
1341/**
1342 * Checks if the UTF-16 char is the low surrogate char (i.e.
1343 * the 2nd char in the pair).
1344 *
1345 * @returns true if it is.
1346 * @returns false if it isn't.
1347 * @param wc The character to investigate.
1348 */
1349DECLINLINE(bool) RTUtf16IsLowSurrogate(RTUTF16 wc)
1350{
1351 return wc >= 0xdc00 && wc <= 0xdfff;
1352}
1353
1354
1355/**
1356 * Checks if the two UTF-16 chars form a valid surrogate pair.
1357 *
1358 * @returns true if they do.
1359 * @returns false if they doesn't.
1360 * @param wcHigh The high (1st) character.
1361 * @param wcLow The low (2nd) character.
1362 */
1363DECLINLINE(bool) RTUtf16IsSurrogatePair(RTUTF16 wcHigh, RTUTF16 wcLow)
1364{
1365 return RTUtf16IsHighSurrogate(wcHigh)
1366 && RTUtf16IsLowSurrogate(wcLow);
1367}
1368
1369/** @} */
1370
1371__END_DECLS
1372
1373/** @} */
1374
1375#endif
1376
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette