VirtualBox

source: vbox/trunk/include/iprt/string.h@ 6981

Last change on this file since 6981 was 6041, checked in by vboxsync, 17 years ago

Added RTUtf16CalcUtf8Len and RTUtf16CalcUtf8LenEx.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 58.7 KB
Line 
1/** @file
2 * innotek Portable Runtime - String Manipluation.
3 */
4
5/*
6 * Copyright (C) 2006-2007 innotek GmbH
7 *
8 * This file is part of VirtualBox Open Source Edition (OSE), as
9 * available from http://www.virtualbox.org. This file is free software;
10 * you can redistribute it and/or modify it under the terms of the GNU
11 * General Public License (GPL) as published by the Free Software
12 * Foundation, in version 2 as it comes in the "COPYING" file of the
13 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
14 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
15 *
16 * The contents of this file may alternatively be used under the terms
17 * of the Common Development and Distribution License Version 1.0
18 * (CDDL) only, as it comes in the "COPYING.CDDL" file of the
19 * VirtualBox OSE distribution, in which case the provisions of the
20 * CDDL are applicable instead of those of the GPL.
21 *
22 * You may elect to license modified versions of this file under the
23 * terms and conditions of either the GPL or the CDDL or both.
24 */
25
26#ifndef ___iprt_string_h
27#define ___iprt_string_h
28
29#include <iprt/cdefs.h>
30#include <iprt/types.h>
31#include <iprt/stdarg.h>
32#include <iprt/err.h> /* for VINF_SUCCESS */
33#if defined(RT_OS_LINUX) && defined(__KERNEL__)
34# include <linux/string.h>
35#elif defined(RT_OS_FREEBSD) && defined(_KERNEL)
36 /*
37 * Kludge for the FreeBSD kernel:
38 * Some of the string.h stuff clashes with sys/libkern.h, so just wrap
39 * it up while including string.h to keep things quiet. It's nothing
40 * important that's clashing, after all.
41 */
42# define strdup strdup_string_h
43# include <string.h>
44# undef strdup
45#elif defined(RT_OS_SOLARIS) && defined(_KERNEL)
46 /*
47 * Same case as with FreeBSD kernel:
48 * The string.h stuff clashes with sys/systm.h
49 * ffs = find first set bit.
50 */
51# define ffs ffs_string_h
52# include <string.h>
53# undef ffs
54# undef strpbrk
55#else
56# include <string.h>
57#endif
58
59/*
60 * Supply prototypes for standard string functions provided by
61 * IPRT instead of the operating environment.
62 */
63#if defined(RT_OS_DARWIN) && defined(KERNEL)
64__BEGIN_DECLS
65void *memchr(const void *pv, int ch, size_t cb);
66char *strpbrk(const char *pszStr, const char *pszChars);
67__END_DECLS
68#endif
69
70
71/** @defgroup grp_rt_str RTStr - String Manipulation
72 * Mostly UTF-8 related helpers where the standard string functions won't do.
73 * @ingroup grp_rt
74 * @{
75 */
76
77__BEGIN_DECLS
78
79
80/**
81 * The maximum string length.
82 */
83#define RTSTR_MAX (~(size_t)0)
84
85
86#ifdef IN_RING3
87
88/**
89 * Allocates tmp buffer, translates pszString from UTF8 to current codepage.
90 *
91 * @returns iprt status code.
92 * @param ppszString Receives pointer of allocated native CP string.
93 * The returned pointer must be freed using RTStrFree().
94 * @param pszString UTF-8 string to convert.
95 */
96RTR3DECL(int) RTStrUtf8ToCurrentCP(char **ppszString, const char *pszString);
97
98/**
99 * Allocates tmp buffer, translates pszString from current codepage to UTF-8.
100 *
101 * @returns iprt status code.
102 * @param ppszString Receives pointer of allocated UTF-8 string.
103 * The returned pointer must be freed using RTStrFree().
104 * @param pszString Native string to convert.
105 */
106RTR3DECL(int) RTStrCurrentCPToUtf8(char **ppszString, const char *pszString);
107
108#endif
109
110/**
111 * Free string allocated by any of the non-UCS-2 string functions.
112 *
113 * @returns iprt status code.
114 * @param pszString Pointer to buffer with string to free.
115 * NULL is accepted.
116 */
117RTDECL(void) RTStrFree(char *pszString);
118
119/**
120 * Allocates a new copy of the given UTF-8 string.
121 *
122 * @returns Pointer to the allocated UTF-8 string.
123 * @param pszString UTF-8 string to duplicate.
124 */
125RTDECL(char *) RTStrDup(const char *pszString);
126
127/**
128 * Allocates a new copy of the given UTF-8 string.
129 *
130 * @returns iprt status code.
131 * @param ppszString Receives pointer of the allocated UTF-8 string.
132 * The returned pointer must be freed using RTStrFree().
133 * @param pszString UTF-8 string to duplicate.
134 */
135RTDECL(int) RTStrDupEx(char **ppszString, const char *pszString);
136
137/**
138 * Gets the number of code points the string is made up of, excluding
139 * the terminator.
140 *
141 *
142 * @returns Number of code points (RTUNICP).
143 * @returns 0 if the string was incorrectly encoded.
144 * @param psz The string.
145 */
146RTDECL(size_t) RTStrUniLen(const char *psz);
147
148/**
149 * Gets the number of code points the string is made up of, excluding
150 * the terminator.
151 *
152 * This function will validate the string, and incorrectly encoded UTF-8
153 * strings will be rejected.
154 *
155 * @returns iprt status code.
156 * @param psz The string.
157 * @param cch The max string length. Use RTSTR_MAX to process the entire string.
158 * @param pcuc Where to store the code point count.
159 * This is undefined on failure.
160 */
161RTDECL(int) RTStrUniLenEx(const char *psz, size_t cch, size_t *pcuc);
162
163/**
164 * Translate a UTF-8 string into an unicode string (i.e. RTUNICPs), allocating the string buffer.
165 *
166 * @returns iprt status code.
167 * @param pszString UTF-8 string to convert.
168 * @param ppUniString Receives pointer to the allocated unicode string.
169 * The returned string must be freed using RTUniFree().
170 */
171RTDECL(int) RTStrToUni(const char *pszString, PRTUNICP *ppUniString);
172
173/**
174 * Translates pszString from UTF-8 to an array of code points, allocating the result
175 * array if requested.
176 *
177 * @returns iprt status code.
178 * @param pszString UTF-8 string to convert.
179 * @param cchString The maximum size in chars (the type) to convert. The conversion stop
180 * when it reaches cchString or the string terminator ('\\0').
181 * Use RTSTR_MAX to translate the entire string.
182 * @param ppaCps If cCps is non-zero, this must either be pointing to pointer to
183 * a buffer of the specified size, or pointer to a NULL pointer.
184 * If *ppusz is NULL or cCps is zero a buffer of at least cCps items
185 * will be allocated to hold the translated string.
186 * If a buffer was requirest it must be freed using RTUtf16Free().
187 * @param cCps The number of code points in the unicode string. This includes the terminator.
188 * @param pcCps Where to store the length of the translated string. (Optional)
189 * This field will be updated even on failure, however the value is only
190 * specified for the following two error codes. On VERR_BUFFER_OVERFLOW
191 * and VERR_NO_STR_MEMORY it contains the required buffer space.
192 */
193RTDECL(int) RTStrToUniEx(const char *pszString, size_t cchString, PRTUNICP *ppaCps, size_t cCps, size_t *pcCps);
194
195/**
196 * Calculates the length of the string in RTUTF16 items.
197 *
198 * This function will validate the string, and incorrectly encoded UTF-8
199 * strings will be rejected. The primary purpose of this function is to
200 * help allocate buffers for RTStrToUtf16Ex of the correct size. For most
201 * other puroses RTStrCalcUtf16LenEx() should be used.
202 *
203 * @returns Number of RTUTF16 items.
204 * @returns 0 if the string was incorrectly encoded.
205 * @param psz The string.
206 */
207RTDECL(size_t) RTStrCalcUtf16Len(const char *psz);
208
209/**
210 * Calculates the length of the string in RTUTF16 items.
211 *
212 * This function will validate the string, and incorrectly encoded UTF-8
213 * strings will be rejected.
214 *
215 * @returns iprt status code.
216 * @param psz The string.
217 * @param cch The max string length. Use RTSTR_MAX to process the entire string.
218 * @param pcwc Where to store the string length. Optional.
219 * This is undefined on failure.
220 */
221RTDECL(int) RTStrCalcUtf16LenEx(const char *psz, size_t cch, size_t *pcwc);
222
223/**
224 * Translate a UTF-8 string into a UTF-16 allocating the result buffer.
225 *
226 * @returns iprt status code.
227 * @param pszString UTF-8 string to convert.
228 * @param ppwszString Receives pointer to the allocated UTF-16 string.
229 * The returned string must be freed using RTUtf16Free().
230 */
231RTDECL(int) RTStrToUtf16(const char *pszString, PRTUTF16 *ppwszString);
232
233/**
234 * Translates pszString from UTF-8 to UTF-16, allocating the result buffer if requested.
235 *
236 * @returns iprt status code.
237 * @param pszString UTF-8 string to convert.
238 * @param cchString The maximum size in chars (the type) to convert. The conversion stop
239 * when it reaches cchString or the string terminator ('\\0').
240 * Use RTSTR_MAX to translate the entire string.
241 * @param ppwsz If cwc is non-zero, this must either be pointing to pointer to
242 * a buffer of the specified size, or pointer to a NULL pointer.
243 * If *ppwsz is NULL or cwc is zero a buffer of at least cwc items
244 * will be allocated to hold the translated string.
245 * If a buffer was requirest it must be freed using RTUtf16Free().
246 * @param cwc The buffer size in RTUTF16s. This includes the terminator.
247 * @param pcwc Where to store the length of the translated string. (Optional)
248 * This field will be updated even on failure, however the value is only
249 * specified for the following two error codes. On VERR_BUFFER_OVERFLOW
250 * and VERR_NO_STR_MEMORY it contains the required buffer space.
251 */
252RTDECL(int) RTStrToUtf16Ex(const char *pszString, size_t cchString, PRTUTF16 *ppwsz, size_t cwc, size_t *pcwc);
253
254/**
255 * Allocates tmp buffer, translates pszString from UTF8 to UCS-2.
256 *
257 * @returns iprt status code.
258 * @param ppwszString Receives pointer of allocated UCS-2 string.
259 * The returned pointer must be freed using RTStrUcs2Free().
260 * @param pszString UTF-8 string to convert.
261 * @deprecated Use RTStrToUtf16().
262 */
263DECLINLINE(int) RTStrUtf8ToUcs2(PRTUCS2 *ppwszString, const char *pszString)
264{
265 return RTStrToUtf16(pszString, ppwszString);
266}
267
268/**
269 * Translates pszString from UTF8 to backwater UCS-2, can allocate a temp buffer.
270 *
271 * @returns iprt status code.
272 * @param ppwszString Receives pointer of allocated UCS-2 string.
273 * The returned pointer must be freed using RTStrUcs2Free().
274 * @param cwc Length of target buffer in RTUCS2s including the trailing '\\0'.
275 * If 0 a temporary buffer is allocated.
276 * @param pszString UTF-8 string to convert.
277 * @deprecated Use RTStrToUtf16Ex().
278 */
279DECLINLINE(int) RTStrUtf8ToUcs2Ex(PRTUCS2 *ppwszString, unsigned cwc, const char *pszString)
280{
281 return RTStrToUtf16Ex(pszString, RTSTR_MAX, ppwszString, cwc, NULL);
282}
283
284
285/**
286 * Get the unicode code point at the given string position.
287 *
288 * @returns unicode code point.
289 * @returns RTUNICP_INVALID if the encoding is invalid.
290 * @param psz The string.
291 */
292RTDECL(RTUNICP) RTStrGetCpInternal(const char *psz);
293
294/**
295 * Get the unicode code point at the given string position.
296 *
297 * @returns unicode code point.
298 * @returns RTUNICP_INVALID if the encoding is invalid.
299 * @param ppsz The string.
300 * @param pCp Where to store the unicode code point.
301 */
302RTDECL(int) RTStrGetCpExInternal(const char **ppsz, PRTUNICP pCp);
303
304/**
305 * Put the unicode code point at the given string position
306 * and return the pointer to the char following it.
307 *
308 * This function will not consider anything at or following the the
309 * buffer area pointed to by psz. It is therefore not suitable for
310 * inserting code points into a string, only appending/overwriting.
311 *
312 * @returns pointer to the char following the written code point.
313 * @param psz The string.
314 * @param CodePoint The code point to write.
315 * This sould not be RTUNICP_INVALID or any other charater
316 * out of the UTF-8 range.
317 *
318 * @remark This is a worker function for RTStrPutCp().
319 *
320 */
321RTDECL(char *) RTStrPutCpInternal(char *psz, RTUNICP CodePoint);
322
323/**
324 * Get the unicode code point at the given string position.
325 *
326 * @returns unicode code point.
327 * @returns RTUNICP_INVALID if the encoding is invalid.
328 * @param psz The string.
329 *
330 * @remark We optimize this operation by using an inline function for
331 * the most frequent and simplest sequence, the rest is
332 * handled by RTStrGetCpInternal().
333 */
334DECLINLINE(RTUNICP) RTStrGetCp(const char *psz)
335{
336 const unsigned char uch = *(const unsigned char *)psz;
337 if (!(uch & RT_BIT(7)))
338 return uch;
339 return RTStrGetCpInternal(psz);
340}
341
342/**
343 * Get the unicode code point at the given string position.
344 *
345 * @returns iprt status code.
346 * @param ppsz Pointer to the string pointer. This will be updated to
347 * point to the char following the current code point.
348 * @param pCp Where to store the code point.
349 * RTUNICP_INVALID is stored here on failure.
350 *
351 * @remark We optimize this operation by using an inline function for
352 * the most frequent and simplest sequence, the rest is
353 * handled by RTStrGetCpExInternal().
354 */
355DECLINLINE(int) RTStrGetCpEx(const char **ppsz, PRTUNICP pCp)
356{
357 const unsigned char uch = **(const unsigned char **)ppsz;
358 if (!(uch & RT_BIT(7)))
359 {
360 (*ppsz)++;
361 *pCp = uch;
362 return VINF_SUCCESS;
363 }
364 return RTStrGetCpExInternal(ppsz, pCp);
365}
366
367/**
368 * Put the unicode code point at the given string position
369 * and return the pointer to the char following it.
370 *
371 * This function will not consider anything at or following the the
372 * buffer area pointed to by psz. It is therefore not suitable for
373 * inserting code points into a string, only appending/overwriting.
374 *
375 * @returns pointer to the char following the written code point.
376 * @param psz The string.
377 * @param CodePoint The code point to write.
378 * This sould not be RTUNICP_INVALID or any other charater
379 * out of the UTF-8 range.
380 *
381 * @remark We optimize this operation by using an inline function for
382 * the most frequent and simplest sequence, the rest is
383 * handled by RTStrPutCpInternal().
384 */
385DECLINLINE(char *) RTStrPutCp(char *psz, RTUNICP CodePoint)
386{
387 if (CodePoint < 0x80)
388 {
389 *psz++ = (unsigned char)CodePoint;
390 return psz;
391 }
392 return RTStrPutCpInternal(psz, CodePoint);
393}
394
395/**
396 * Skips ahead, past the current code point.
397 *
398 * @returns Pointer to the char after the current code point.
399 * @param psz Pointer to the current code point.
400 * @remark This will not move the next valid code point, only past the current one.
401 */
402DECLINLINE(char *) RTStrNextCp(const char *psz)
403{
404 RTUNICP Cp;
405 RTStrGetCpEx(&psz, &Cp);
406 return (char *)psz;
407}
408
409/**
410 * Skips back to the previous code point.
411 *
412 * @returns Pointer to the char before the current code point.
413 * @returns pszStart on failure.
414 * @param pszStart Pointer to the start of the string.
415 * @param psz Pointer to the current code point.
416 */
417RTDECL(char *) RTStrPrevCp(const char *pszStart, const char *psz);
418
419
420
421#ifndef DECLARED_FNRTSTROUTPUT /* duplicated in iprt/log.h */
422#define DECLARED_FNRTSTROUTPUT
423/**
424 * Output callback.
425 *
426 * @returns number of bytes written.
427 * @param pvArg User argument.
428 * @param pachChars Pointer to an array of utf-8 characters.
429 * @param cbChars Number of bytes in the character array pointed to by pachChars.
430 */
431typedef DECLCALLBACK(size_t) FNRTSTROUTPUT(void *pvArg, const char *pachChars, size_t cbChars);
432/** Pointer to callback function. */
433typedef FNRTSTROUTPUT *PFNRTSTROUTPUT;
434#endif
435
436/** Format flag.
437 * These are used by RTStrFormat extensions and RTStrFormatNumber, mind
438 * that not all flags makes sense to both of the functions.
439 * @{ */
440#define RTSTR_F_CAPITAL 0x0001
441#define RTSTR_F_LEFT 0x0002
442#define RTSTR_F_ZEROPAD 0x0004
443#define RTSTR_F_SPECIAL 0x0008
444#define RTSTR_F_VALSIGNED 0x0010
445#define RTSTR_F_PLUS 0x0020
446#define RTSTR_F_BLANK 0x0040
447#define RTSTR_F_WIDTH 0x0080
448#define RTSTR_F_PRECISION 0x0100
449
450#define RTSTR_F_BIT_MASK 0xf800
451#define RTSTR_F_8BIT 0x0800
452#define RTSTR_F_16BIT 0x1000
453#define RTSTR_F_32BIT 0x2000
454#define RTSTR_F_64BIT 0x4000
455#define RTSTR_F_128BIT 0x8000
456/** @} */
457
458/** @def RTSTR_GET_BIT_FLAG
459 * Gets the bit flag for the specified type.
460 */
461#define RTSTR_GET_BIT_FLAG(type) \
462 ( sizeof(type) == 32 ? RTSTR_F_32BIT \
463 : sizeof(type) == 64 ? RTSTR_F_64BIT \
464 : sizeof(type) == 16 ? RTSTR_F_16BIT \
465 : sizeof(type) == 8 ? RTSTR_F_8BIT \
466 : sizeof(type) == 128? RTSTR_F_128BIT \
467 : 0)
468
469
470/**
471 * Callback to format non-standard format specifiers.
472 *
473 * @returns The number of bytes formatted.
474 * @param pvArg Formatter argument.
475 * @param pfnOutput Pointer to output function.
476 * @param pvArgOutput Argument for the output function.
477 * @param ppszFormat Pointer to the format string pointer. Advance this till the char
478 * after the format specifier.
479 * @param pArgs Pointer to the argument list. Use this to fetch the arguments.
480 * @param cchWidth Format Width. -1 if not specified.
481 * @param cchPrecision Format Precision. -1 if not specified.
482 * @param fFlags Flags (RTSTR_NTFS_*).
483 * @param chArgSize The argument size specifier, 'l' or 'L'.
484 */
485typedef DECLCALLBACK(size_t) FNSTRFORMAT(void *pvArg, PFNRTSTROUTPUT pfnOutput, void *pvArgOutput,
486 const char **ppszFormat, va_list *pArgs, int cchWidth,
487 int cchPrecision, unsigned fFlags, char chArgSize);
488/** Pointer to a FNSTRFORMAT() function. */
489typedef FNSTRFORMAT *PFNSTRFORMAT;
490
491
492/**
493 * Partial implementation of a printf like formatter.
494 * It doesn't do everything correct, and there is no floating point support.
495 * However, it supports custom formats by the means of a format callback.
496 *
497 * @returns number of bytes formatted.
498 * @param pfnOutput Output worker.
499 * Called in two ways. Normally with a string and its length.
500 * For termination, it's called with NULL for string, 0 for length.
501 * @param pvArgOutput Argument to the output worker.
502 * @param pfnFormat Custom format worker.
503 * @param pvArgFormat Argument to the format worker.
504 * @param pszFormat Format string pointer.
505 * @param args Argument list.
506 */
507RTDECL(size_t) RTStrFormatV(PFNRTSTROUTPUT pfnOutput, void *pvArgOutput, PFNSTRFORMAT pfnFormat, void *pvArgFormat, const char *pszFormat, va_list args);
508
509/**
510 * Partial implementation of a printf like formatter.
511 * It doesn't do everything correct, and there is no floating point support.
512 * However, it supports custom formats by the means of a format callback.
513 *
514 * @returns number of bytes formatted.
515 * @param pfnOutput Output worker.
516 * Called in two ways. Normally with a string and its length.
517 * For termination, it's called with NULL for string, 0 for length.
518 * @param pvArgOutput Argument to the output worker.
519 * @param pfnFormat Custom format worker.
520 * @param pvArgFormat Argument to the format worker.
521 * @param pszFormat Format string.
522 * @param ... Argument list.
523 */
524RTDECL(size_t) RTStrFormat(PFNRTSTROUTPUT pfnOutput, void *pvArgOutput, PFNSTRFORMAT pfnFormat, void *pvArgFormat, const char *pszFormat, ...);
525
526/**
527 * Formats an integer number according to the parameters.
528 *
529 * @returns Length of the formatted number.
530 * @param psz Pointer to output string buffer of sufficient size.
531 * @param u64Value Value to format.
532 * @param uiBase Number representation base.
533 * @param cchWidth Width.
534 * @param cchPrecision Precision.
535 * @param fFlags Flags (NTFS_*).
536 */
537RTDECL(int) RTStrFormatNumber(char *psz, uint64_t u64Value, unsigned int uiBase, signed int cchWidth, signed int cchPrecision, unsigned int fFlags);
538
539/**
540 * String printf.
541 *
542 * @returns The length of the returned string (in pszBuffer).
543 * @param pszBuffer Output buffer.
544 * @param cchBuffer Size of the output buffer.
545 * @param pszFormat The format string.
546 * @param args The format argument.
547 */
548RTDECL(size_t) RTStrPrintfV(char *pszBuffer, size_t cchBuffer, const char *pszFormat, va_list args);
549
550/**
551 * String printf.
552 *
553 * @returns The length of the returned string (in pszBuffer).
554 * @param pszBuffer Output buffer.
555 * @param cchBuffer Size of the output buffer.
556 * @param pszFormat The format string.
557 * @param ... The format argument.
558 */
559RTDECL(size_t) RTStrPrintf(char *pszBuffer, size_t cchBuffer, const char *pszFormat, ...);
560
561
562/**
563 * String printf with custom formatting.
564 *
565 * @returns The length of the returned string (in pszBuffer).
566 * @param pfnFormat Pointer to handler function for the custom formats.
567 * @param pvArg Argument to the pfnFormat function.
568 * @param pszBuffer Output buffer.
569 * @param cchBuffer Size of the output buffer.
570 * @param pszFormat The format string.
571 * @param args The format argument.
572 */
573RTDECL(size_t) RTStrPrintfExV(PFNSTRFORMAT pfnFormat, void *pvArg, char *pszBuffer, size_t cchBuffer, const char *pszFormat, va_list args);
574
575/**
576 * String printf with custom formatting.
577 *
578 * @returns The length of the returned string (in pszBuffer).
579 * @param pfnFormat Pointer to handler function for the custom formats.
580 * @param pvArg Argument to the pfnFormat function.
581 * @param pszBuffer Output buffer.
582 * @param cchBuffer Size of the output buffer.
583 * @param pszFormat The format string.
584 * @param ... The format argument.
585 */
586RTDECL(size_t) RTStrPrintfEx(PFNSTRFORMAT pfnFormat, void *pvArg, char *pszBuffer, size_t cchBuffer, const char *pszFormat, ...);
587
588
589/**
590 * Allocating string printf.
591 *
592 * @returns The length of the string in the returned *ppszBuffer.
593 * @returns -1 on failure.
594 * @param ppszBuffer Where to store the pointer to the allocated output buffer.
595 * The buffer should be freed using RTStrFree().
596 * On failure *ppszBuffer will be set to NULL.
597 * @param pszFormat The format string.
598 * @param args The format argument.
599 */
600RTDECL(int) RTStrAPrintfV(char **ppszBuffer, const char *pszFormat, va_list args);
601
602/**
603 * Allocating string printf.
604 *
605 * @returns The length of the string in the returned *ppszBuffer.
606 * @returns -1 on failure.
607 * @param ppszBuffer Where to store the pointer to the allocated output buffer.
608 * The buffer should be freed using RTStrFree().
609 * On failure *ppszBuffer will be set to NULL.
610 * @param pszFormat The format string.
611 * @param ... The format argument.
612 */
613RTDECL(int) RTStrAPrintf(char **ppszBuffer, const char *pszFormat, ...);
614
615
616/**
617 * Strips blankspaces from both ends of the string.
618 *
619 * @returns Pointer to first non-blank char in the string.
620 * @param psz The string to strip.
621 */
622RTDECL(char *) RTStrStrip(char *psz);
623
624/**
625 * Strips blankspaces from the start of the string.
626 *
627 * @returns Pointer to first non-blank char in the string.
628 * @param psz The string to strip.
629 */
630RTDECL(char *) RTStrStripL(const char *psz);
631
632/**
633 * Strips blankspaces from the end of the string.
634 *
635 * @returns psz.
636 * @param psz The string to strip.
637 */
638RTDECL(char *) RTStrStripR(char *psz);
639
640
641/** @defgroup rt_str_conv String To/From Number Conversions
642 * @ingroup grp_rt_str
643 * @{ */
644
645/**
646 * Converts a string representation of a number to a 64-bit unsigned number.
647 *
648 * @returns iprt status code.
649 * Warnings are used to indicate convertion problems.
650 * @retval VWRN_NUMBER_TOO_BIG
651 * @retval VWRN_NEGATIVE_UNSIGNED
652 * @retval VWRN_TRAILING_CHARS
653 * @retval VWRN_TRAILING_SPACES
654 * @retval VINF_SUCCESS
655 * @retval VERR_NO_DIGITS
656 *
657 * @param pszValue Pointer to the string value.
658 * @param ppszNext Where to store the pointer to the first char following the number. (Optional)
659 * @param uBase The base of the representation used.
660 * If the function will look for known prefixes before defaulting to 10.
661 * @param pu64 Where to store the converted number. (optional)
662 */
663RTDECL(int) RTStrToUInt64Ex(const char *pszValue, char **ppszNext, unsigned uBase, uint64_t *pu64);
664
665/**
666 * Converts a string representation of a number to a 64-bit unsigned number,
667 * making sure the full string is converted.
668 *
669 * @returns iprt status code.
670 * Warnings are used to indicate convertion problems.
671 * @retval VWRN_NUMBER_TOO_BIG
672 * @retval VWRN_NEGATIVE_UNSIGNED
673 * @retval VINF_SUCCESS
674 * @retval VERR_NO_DIGITS
675 * @retval VERR_TRAILING_SPACES
676 * @retval VERR_TRAILING_CHARS
677 *
678 * @param pszValue Pointer to the string value.
679 * @param uBase The base of the representation used.
680 * If the function will look for known prefixes before defaulting to 10.
681 * @param pu64 Where to store the converted number. (optional)
682 */
683RTDECL(int) RTStrToUInt64Full(const char *pszValue, unsigned uBase, uint64_t *pu64);
684
685/**
686 * Converts a string representation of a number to a 64-bit unsigned number.
687 * The base is guessed.
688 *
689 * @returns 64-bit unsigned number on success.
690 * @returns 0 on failure.
691 * @param pszValue Pointer to the string value.
692 */
693RTDECL(uint64_t) RTStrToUInt64(const char *pszValue);
694
695/**
696 * Converts a string representation of a number to a 32-bit unsigned number.
697 *
698 * @returns iprt status code.
699 * Warnings are used to indicate conversion problems.
700 * @retval VWRN_NUMBER_TOO_BIG
701 * @retval VWRN_NEGATIVE_UNSIGNED
702 * @retval VWRN_TRAILING_CHARS
703 * @retval VWRN_TRAILING_SPACES
704 * @retval VINF_SUCCESS
705 * @retval VERR_NO_DIGITS
706 *
707 * @param pszValue Pointer to the string value.
708 * @param ppszNext Where to store the pointer to the first char following the number. (Optional)
709 * @param uBase The base of the representation used.
710 * If 0 the function will look for known prefixes before defaulting to 10.
711 * @param pu32 Where to store the converted number. (optional)
712 */
713RTDECL(int) RTStrToUInt32Ex(const char *pszValue, char **ppszNext, unsigned uBase, uint32_t *pu32);
714
715/**
716 * Converts a string representation of a number to a 32-bit unsigned number,
717 * making sure the full string is converted.
718 *
719 * @returns iprt status code.
720 * Warnings are used to indicate convertion problems.
721 * @retval VWRN_NUMBER_TOO_BIG
722 * @retval VWRN_NEGATIVE_UNSIGNED
723 * @retval VINF_SUCCESS
724 * @retval VERR_NO_DIGITS
725 * @retval VERR_TRAILING_SPACES
726 * @retval VERR_TRAILING_CHARS
727 *
728 * @param pszValue Pointer to the string value.
729 * @param uBase The base of the representation used.
730 * If the function will look for known prefixes before defaulting to 10.
731 * @param pu32 Where to store the converted number. (optional)
732 */
733RTDECL(int) RTStrToUInt32Full(const char *pszValue, unsigned uBase, uint32_t *pu32);
734
735/**
736 * Converts a string representation of a number to a 64-bit unsigned number.
737 * The base is guessed.
738 *
739 * @returns 32-bit unsigned number on success.
740 * @returns 0 on failure.
741 * @param pszValue Pointer to the string value.
742 */
743RTDECL(uint32_t) RTStrToUInt32(const char *pszValue);
744
745/**
746 * Converts a string representation of a number to a 16-bit unsigned number.
747 *
748 * @returns iprt status code.
749 * Warnings are used to indicate conversion problems.
750 * @retval VWRN_NUMBER_TOO_BIG
751 * @retval VWRN_NEGATIVE_UNSIGNED
752 * @retval VWRN_TRAILING_CHARS
753 * @retval VWRN_TRAILING_SPACES
754 * @retval VINF_SUCCESS
755 * @retval VERR_NO_DIGITS
756 *
757 * @param pszValue Pointer to the string value.
758 * @param ppszNext Where to store the pointer to the first char following the number. (Optional)
759 * @param uBase The base of the representation used.
760 * If 0 the function will look for known prefixes before defaulting to 10.
761 * @param pu16 Where to store the converted number. (optional)
762 */
763RTDECL(int) RTStrToUInt16Ex(const char *pszValue, char **ppszNext, unsigned uBase, uint16_t *pu16);
764
765/**
766 * Converts a string representation of a number to a 16-bit unsigned number,
767 * making sure the full string is converted.
768 *
769 * @returns iprt status code.
770 * Warnings are used to indicate convertion problems.
771 * @retval VWRN_NUMBER_TOO_BIG
772 * @retval VWRN_NEGATIVE_UNSIGNED
773 * @retval VINF_SUCCESS
774 * @retval VERR_NO_DIGITS
775 * @retval VERR_TRAILING_SPACES
776 * @retval VERR_TRAILING_CHARS
777 *
778 * @param pszValue Pointer to the string value.
779 * @param uBase The base of the representation used.
780 * If the function will look for known prefixes before defaulting to 10.
781 * @param pu16 Where to store the converted number. (optional)
782 */
783RTDECL(int) RTStrToUInt16Full(const char *pszValue, unsigned uBase, uint16_t *pu16);
784
785/**
786 * Converts a string representation of a number to a 16-bit unsigned number.
787 * The base is guessed.
788 *
789 * @returns 16-bit unsigned number on success.
790 * @returns 0 on failure.
791 * @param pszValue Pointer to the string value.
792 */
793RTDECL(uint16_t) RTStrToUInt16(const char *pszValue);
794
795/**
796 * Converts a string representation of a number to a 8-bit unsigned number.
797 *
798 * @returns iprt status code.
799 * Warnings are used to indicate conversion problems.
800 * @retval VWRN_NUMBER_TOO_BIG
801 * @retval VWRN_NEGATIVE_UNSIGNED
802 * @retval VWRN_TRAILING_CHARS
803 * @retval VWRN_TRAILING_SPACES
804 * @retval VINF_SUCCESS
805 * @retval VERR_NO_DIGITS
806 *
807 * @param pszValue Pointer to the string value.
808 * @param ppszNext Where to store the pointer to the first char following the number. (Optional)
809 * @param uBase The base of the representation used.
810 * If 0 the function will look for known prefixes before defaulting to 10.
811 * @param pu8 Where to store the converted number. (optional)
812 */
813RTDECL(int) RTStrToUInt8Ex(const char *pszValue, char **ppszNext, unsigned uBase, uint8_t *pu8);
814
815/**
816 * Converts a string representation of a number to a 8-bit unsigned number,
817 * making sure the full string is converted.
818 *
819 * @returns iprt status code.
820 * Warnings are used to indicate convertion problems.
821 * @retval VWRN_NUMBER_TOO_BIG
822 * @retval VWRN_NEGATIVE_UNSIGNED
823 * @retval VINF_SUCCESS
824 * @retval VERR_NO_DIGITS
825 * @retval VERR_TRAILING_SPACES
826 * @retval VERR_TRAILING_CHARS
827 *
828 * @param pszValue Pointer to the string value.
829 * @param uBase The base of the representation used.
830 * If the function will look for known prefixes before defaulting to 10.
831 * @param pu8 Where to store the converted number. (optional)
832 */
833RTDECL(int) RTStrToUInt8Full(const char *pszValue, unsigned uBase, uint8_t *pu8);
834
835/**
836 * Converts a string representation of a number to a 8-bit unsigned number.
837 * The base is guessed.
838 *
839 * @returns 8-bit unsigned number on success.
840 * @returns 0 on failure.
841 * @param pszValue Pointer to the string value.
842 */
843RTDECL(uint8_t) RTStrToUInt8(const char *pszValue);
844
845/**
846 * Converts a string representation of a number to a 64-bit signed number.
847 *
848 * @returns iprt status code.
849 * Warnings are used to indicate conversion problems.
850 * @retval VWRN_NUMBER_TOO_BIG
851 * @retval VWRN_TRAILING_CHARS
852 * @retval VWRN_TRAILING_SPACES
853 * @retval VINF_SUCCESS
854 * @retval VERR_NO_DIGITS
855 *
856 * @param pszValue Pointer to the string value.
857 * @param ppszNext Where to store the pointer to the first char following the number. (Optional)
858 * @param uBase The base of the representation used.
859 * If 0 the function will look for known prefixes before defaulting to 10.
860 * @param pi64 Where to store the converted number. (optional)
861 */
862RTDECL(int) RTStrToInt64Ex(const char *pszValue, char **ppszNext, unsigned uBase, int64_t *pi64);
863
864/**
865 * Converts a string representation of a number to a 64-bit signed number,
866 * making sure the full string is converted.
867 *
868 * @returns iprt status code.
869 * Warnings are used to indicate convertion problems.
870 * @retval VWRN_NUMBER_TOO_BIG
871 * @retval VINF_SUCCESS
872 * @retval VERR_TRAILING_CHARS
873 * @retval VERR_TRAILING_SPACES
874 * @retval VERR_NO_DIGITS
875 *
876 * @param pszValue Pointer to the string value.
877 * @param uBase The base of the representation used.
878 * If the function will look for known prefixes before defaulting to 10.
879 * @param pi64 Where to store the converted number. (optional)
880 */
881RTDECL(int) RTStrToInt64Full(const char *pszValue, unsigned uBase, int64_t *pi64);
882
883/**
884 * Converts a string representation of a number to a 64-bit signed number.
885 * The base is guessed.
886 *
887 * @returns 64-bit signed number on success.
888 * @returns 0 on failure.
889 * @param pszValue Pointer to the string value.
890 */
891RTDECL(int64_t) RTStrToInt64(const char *pszValue);
892
893/**
894 * Converts a string representation of a number to a 32-bit signed number.
895 *
896 * @returns iprt status code.
897 * Warnings are used to indicate conversion problems.
898 * @retval VWRN_NUMBER_TOO_BIG
899 * @retval VWRN_TRAILING_CHARS
900 * @retval VWRN_TRAILING_SPACES
901 * @retval VINF_SUCCESS
902 * @retval VERR_NO_DIGITS
903 *
904 * @param pszValue Pointer to the string value.
905 * @param ppszNext Where to store the pointer to the first char following the number. (Optional)
906 * @param uBase The base of the representation used.
907 * If 0 the function will look for known prefixes before defaulting to 10.
908 * @param pi32 Where to store the converted number. (optional)
909 */
910RTDECL(int) RTStrToInt32Ex(const char *pszValue, char **ppszNext, unsigned uBase, int32_t *pi32);
911
912/**
913 * Converts a string representation of a number to a 32-bit signed number,
914 * making sure the full string is converted.
915 *
916 * @returns iprt status code.
917 * Warnings are used to indicate convertion problems.
918 * @retval VWRN_NUMBER_TOO_BIG
919 * @retval VINF_SUCCESS
920 * @retval VERR_TRAILING_CHARS
921 * @retval VERR_TRAILING_SPACES
922 * @retval VERR_NO_DIGITS
923 *
924 * @param pszValue Pointer to the string value.
925 * @param uBase The base of the representation used.
926 * If the function will look for known prefixes before defaulting to 10.
927 * @param pi32 Where to store the converted number. (optional)
928 */
929RTDECL(int) RTStrToInt32Full(const char *pszValue, unsigned uBase, int32_t *pi32);
930
931/**
932 * Converts a string representation of a number to a 32-bit signed number.
933 * The base is guessed.
934 *
935 * @returns 32-bit signed number on success.
936 * @returns 0 on failure.
937 * @param pszValue Pointer to the string value.
938 */
939RTDECL(int32_t) RTStrToInt32(const char *pszValue);
940
941/**
942 * Converts a string representation of a number to a 16-bit signed number.
943 *
944 * @returns iprt status code.
945 * Warnings are used to indicate conversion problems.
946 * @retval VWRN_NUMBER_TOO_BIG
947 * @retval VWRN_TRAILING_CHARS
948 * @retval VWRN_TRAILING_SPACES
949 * @retval VINF_SUCCESS
950 * @retval VERR_NO_DIGITS
951 *
952 * @param pszValue Pointer to the string value.
953 * @param ppszNext Where to store the pointer to the first char following the number. (Optional)
954 * @param uBase The base of the representation used.
955 * If 0 the function will look for known prefixes before defaulting to 10.
956 * @param pi16 Where to store the converted number. (optional)
957 */
958RTDECL(int) RTStrToInt16Ex(const char *pszValue, char **ppszNext, unsigned uBase, int16_t *pi16);
959
960/**
961 * Converts a string representation of a number to a 16-bit signed number,
962 * making sure the full string is converted.
963 *
964 * @returns iprt status code.
965 * Warnings are used to indicate convertion problems.
966 * @retval VWRN_NUMBER_TOO_BIG
967 * @retval VINF_SUCCESS
968 * @retval VERR_TRAILING_CHARS
969 * @retval VERR_TRAILING_SPACES
970 * @retval VERR_NO_DIGITS
971 *
972 * @param pszValue Pointer to the string value.
973 * @param uBase The base of the representation used.
974 * If the function will look for known prefixes before defaulting to 10.
975 * @param pi16 Where to store the converted number. (optional)
976 */
977RTDECL(int) RTStrToInt16Full(const char *pszValue, unsigned uBase, int16_t *pi16);
978
979/**
980 * Converts a string representation of a number to a 16-bit signed number.
981 * The base is guessed.
982 *
983 * @returns 16-bit signed number on success.
984 * @returns 0 on failure.
985 * @param pszValue Pointer to the string value.
986 */
987RTDECL(int16_t) RTStrToInt16(const char *pszValue);
988
989/**
990 * Converts a string representation of a number to a 8-bit signed number.
991 *
992 * @returns iprt status code.
993 * Warnings are used to indicate conversion problems.
994 * @retval VWRN_NUMBER_TOO_BIG
995 * @retval VWRN_TRAILING_CHARS
996 * @retval VWRN_TRAILING_SPACES
997 * @retval VINF_SUCCESS
998 * @retval VERR_NO_DIGITS
999 *
1000 * @param pszValue Pointer to the string value.
1001 * @param ppszNext Where to store the pointer to the first char following the number. (Optional)
1002 * @param uBase The base of the representation used.
1003 * If 0 the function will look for known prefixes before defaulting to 10.
1004 * @param pi8 Where to store the converted number. (optional)
1005 */
1006RTDECL(int) RTStrToInt8Ex(const char *pszValue, char **ppszNext, unsigned uBase, int8_t *pi8);
1007
1008/**
1009 * Converts a string representation of a number to a 8-bit signed number,
1010 * making sure the full string is converted.
1011 *
1012 * @returns iprt status code.
1013 * Warnings are used to indicate convertion problems.
1014 * @retval VWRN_NUMBER_TOO_BIG
1015 * @retval VINF_SUCCESS
1016 * @retval VERR_TRAILING_CHARS
1017 * @retval VERR_TRAILING_SPACES
1018 * @retval VERR_NO_DIGITS
1019 *
1020 * @param pszValue Pointer to the string value.
1021 * @param uBase The base of the representation used.
1022 * If the function will look for known prefixes before defaulting to 10.
1023 * @param pi64 Where to store the converted number. (optional)
1024 */
1025RTDECL(int) RTStrToInt8Full(const char *pszValue, unsigned uBase, int8_t *pi8);
1026
1027/**
1028 * Converts a string representation of a number to a 8-bit signed number.
1029 * The base is guessed.
1030 *
1031 * @returns 8-bit signed number on success.
1032 * @returns 0 on failure.
1033 * @param pszValue Pointer to the string value.
1034 */
1035RTDECL(int8_t) RTStrToInt8(const char *pszValue);
1036
1037/**
1038 * Performs a case insensitive string compare between two UTF-8 strings.
1039 *
1040 * This is a simplified compare, as only the simplified lower/upper case folding
1041 * specified by the unicode specs are used. It does not consider character pairs
1042 * as they are used in some languages, just simple upper & lower case compares.
1043 *
1044 * @returns < 0 if the first string less than the second string.
1045 * @returns 0 if the first string identical to the second string.
1046 * @returns > 0 if the first string greater than the second string.
1047 * @param psz1 First UTF-8 string.
1048 * @param psz2 Second UTF-8 string.
1049 */
1050RTDECL(int) RTStrICmp(const char *psz1, const char *psz2);
1051
1052/** @} */
1053
1054
1055/** @defgroup rt_str_space Unique String Space
1056 * @ingroup grp_rt_str
1057 * @{
1058 */
1059
1060/** Pointer to a string name space container node core. */
1061typedef struct RTSTRSPACECORE *PRTSTRSPACECORE;
1062/** Pointer to a pointer to a string name space container node core. */
1063typedef PRTSTRSPACECORE *PPRTSTRSPACECORE;
1064
1065/**
1066 * String name space container node core.
1067 */
1068typedef struct RTSTRSPACECORE
1069{
1070 /** Hash key. Don't touch. */
1071 uint32_t Key;
1072 /** Pointer to the left leaf node. Don't touch. */
1073 PRTSTRSPACECORE pLeft;
1074 /** Pointer to the left rigth node. Don't touch. */
1075 PRTSTRSPACECORE pRight;
1076 /** Pointer to the list of string with the same key. Don't touch. */
1077 PRTSTRSPACECORE pList;
1078 /** Height of this tree: max(heigth(left), heigth(right)) + 1. Don't touch */
1079 unsigned char uchHeight;
1080 /** The string length. Read only! */
1081 size_t cchString;
1082 /** Pointer to the string. Read only! */
1083 const char * pszString;
1084} RTSTRSPACECORE;
1085
1086/** String space. (Initialize with NULL.) */
1087typedef PRTSTRSPACECORE RTSTRSPACE;
1088/** Pointer to a string space. */
1089typedef PPRTSTRSPACECORE PRTSTRSPACE;
1090
1091
1092/**
1093 * Inserts a string into a unique string space.
1094 *
1095 * @returns true on success.
1096 * @returns false if the string collieded with an existing string.
1097 * @param pStrSpace The space to insert it into.
1098 * @param pStr The string node.
1099 */
1100RTDECL(bool) RTStrSpaceInsert(PRTSTRSPACE pStrSpace, PRTSTRSPACECORE pStr);
1101
1102/**
1103 * Removes a string from a unique string space.
1104 *
1105 * @returns Pointer to the removed string node.
1106 * @returns NULL if the string was not found in the string space.
1107 * @param pStrSpace The space to insert it into.
1108 * @param pszString The string to remove.
1109 */
1110RTDECL(PRTSTRSPACECORE) RTStrSpaceRemove(PRTSTRSPACE pStrSpace, const char *pszString);
1111
1112/**
1113 * Gets a string from a unique string space.
1114 *
1115 * @returns Pointer to the string node.
1116 * @returns NULL if the string was not found in the string space.
1117 * @param pStrSpace The space to insert it into.
1118 * @param pszString The string to get.
1119 */
1120RTDECL(PRTSTRSPACECORE) RTStrSpaceGet(PRTSTRSPACE pStrSpace, const char *pszString);
1121
1122/**
1123 * Callback function for RTStrSpaceEnumerate() and RTStrSpaceDestroy().
1124 *
1125 * @returns 0 on continue.
1126 * @returns Non-zero to aborts the operation.
1127 * @param pStr The string node
1128 * @param pvUser The user specified argument.
1129 */
1130typedef DECLCALLBACK(int) FNRTSTRSPACECALLBACK(PRTSTRSPACECORE pStr, void *pvUser);
1131/** Pointer to callback function for RTStrSpaceEnumerate() and RTStrSpaceDestroy(). */
1132typedef FNRTSTRSPACECALLBACK *PFNRTSTRSPACECALLBACK;
1133
1134/**
1135 * Destroys the string space.
1136 * The caller supplies a callback which will be called for each of
1137 * the string nodes in for freeing their memory and other resources.
1138 *
1139 * @returns 0 or what ever non-zero return value pfnCallback returned
1140 * when aborting the destruction.
1141 * @param pStrSpace The space to insert it into.
1142 * @param pfnCallback The callback.
1143 * @param pvUser The user argument.
1144 */
1145RTDECL(int) RTStrSpaceDestroy(PRTSTRSPACE pStrSpace, PFNRTSTRSPACECALLBACK pfnCallback, void *pvUser);
1146
1147/**
1148 * Enumerates the string space.
1149 * The caller supplies a callback which will be called for each of
1150 * the string nodes.
1151 *
1152 * @returns 0 or what ever non-zero return value pfnCallback returned
1153 * when aborting the destruction.
1154 * @param pStrSpace The space to insert it into.
1155 * @param pfnCallback The callback.
1156 * @param pvUser The user argument.
1157 */
1158RTDECL(int) RTStrSpaceEnumerate(PRTSTRSPACE pStrSpace, PFNRTSTRSPACECALLBACK pfnCallback, void *pvUser);
1159
1160/** @} */
1161
1162
1163/** @defgroup rt_str_utf16 UTF-16 String Manipulation
1164 * @ingroup grp_rt_str
1165 * @{
1166 */
1167
1168/**
1169 * Free a UTF-16 string allocated by RTStrUtf8ToUtf16(), RTStrUtf8ToUtf16Ex(),
1170 * RTUtf16Dup() or RTUtf16DupEx().
1171 *
1172 * @returns iprt status code.
1173 * @param pwszString The UTF-16 string to free. NULL is accepted.
1174 */
1175RTDECL(void) RTUtf16Free(PRTUTF16 pwszString);
1176
1177/**
1178 * Allocates a new copy of the specified UTF-16 string.
1179 *
1180 * @returns Pointer to the allocated string copy. Use RTUtf16Free() to free it.
1181 * @returns NULL when out of memory.
1182 * @param pwszString UTF-16 string to duplicate.
1183 * @remark This function will not make any attempt to validate the encoding.
1184 */
1185RTDECL(PRTUTF16) RTUtf16Dup(PCRTUTF16 pwszString);
1186
1187/**
1188 * Allocates a new copy of the specified UTF-16 string.
1189 *
1190 * @returns iprt status code.
1191 * @param ppwszString Receives pointer of the allocated UTF-16 string.
1192 * The returned pointer must be freed using RTUtf16Free().
1193 * @param pwszString UTF-16 string to duplicate.
1194 * @param cwcExtra Number of extra RTUTF16 items to allocate.
1195 * @remark This function will not make any attempt to validate the encoding.
1196 */
1197RTDECL(int) RTUtf16DupEx(PRTUTF16 *ppwszString, PCRTUTF16 pwszString, size_t cwcExtra);
1198
1199/**
1200 * Returns the length of a UTF-16 string in UTF-16 characters
1201 * without trailing '\\0'.
1202 *
1203 * Surrogate pairs counts as two UTF-16 characters here. Use RTUtf16CpCnt()
1204 * to get the exact number of code points in the string.
1205 *
1206 * @returns The number of RTUTF16 items in the string.
1207 * @param pwszString Pointer the UTF-16 string.
1208 * @remark This function will not make any attempt to validate the encoding.
1209 */
1210RTDECL(size_t) RTUtf16Len(PCRTUTF16 pwszString);
1211
1212/**
1213 * Performs a case sensitive string compare between two UTF-16 strings.
1214 *
1215 * @returns < 0 if the first string less than the second string.s
1216 * @returns 0 if the first string identical to the second string.
1217 * @returns > 0 if the first string greater than the second string.
1218 * @param pwsz1 First UTF-16 string.
1219 * @param pwsz2 Second UTF-16 string.
1220 * @remark This function will not make any attempt to validate the encoding.
1221 */
1222RTDECL(int) RTUtf16Cmp(register PCRTUTF16 pwsz1, register PCRTUTF16 pwsz2);
1223
1224/**
1225 * Performs a case insensitive string compare between two UTF-16 strings.
1226 *
1227 * This is a simplified compare, as only the simplified lower/upper case folding
1228 * specified by the unicode specs are used. It does not consider character pairs
1229 * as they are used in some languages, just simple upper & lower case compares.
1230 *
1231 * @returns < 0 if the first string less than the second string.
1232 * @returns 0 if the first string identical to the second string.
1233 * @returns > 0 if the first string greater than the second string.
1234 * @param pwsz1 First UTF-16 string.
1235 * @param pwsz2 Second UTF-16 string.
1236 */
1237RTDECL(int) RTUtf16ICmp(PCRTUTF16 pwsz1, PCRTUTF16 pwsz2);
1238
1239/**
1240 * Performs a case insensitive string compare between two UTF-16 strings
1241 * using the current locale of the process (if applicable).
1242 *
1243 * This differs from RTUtf16ICmp() in that it will try, if a locale with the
1244 * required data is available, to do a correct case-insensitive compare. It
1245 * follows that it is more complex and thereby likely to be more expensive.
1246 *
1247 * @returns < 0 if the first string less than the second string.
1248 * @returns 0 if the first string identical to the second string.
1249 * @returns > 0 if the first string greater than the second string.
1250 * @param pwsz1 First UTF-16 string.
1251 * @param pwsz2 Second UTF-16 string.
1252 */
1253RTDECL(int) RTUtf16LocaleICmp(PCRTUTF16 pwsz1, PCRTUTF16 pwsz2);
1254
1255/**
1256 * Folds a UTF-16 string to lowercase.
1257 *
1258 * This is a very simple folding; is uses the simple lowercase
1259 * code point, it is not related to any locale just the most common
1260 * lowercase codepoint setup by the unicode specs, and it will not
1261 * create new surrogate pairs or remove existing ones.
1262 *
1263 * @returns Pointer to the passed in string.
1264 * @param pwsz The string to fold.
1265 */
1266RTDECL(PRTUTF16) RTUtf16ToLower(PRTUTF16 pwsz);
1267
1268/**
1269 * Folds a UTF-16 string to uppercase.
1270 *
1271 * This is a very simple folding; is uses the simple uppercase
1272 * code point, it is not related to any locale just the most common
1273 * uppercase codepoint setup by the unicode specs, and it will not
1274 * create new surrogate pairs or remove existing ones.
1275 *
1276 * @returns Pointer to the passed in string.
1277 * @param pwsz The string to fold.
1278 */
1279RTDECL(PRTUTF16) RTUtf16ToUpper(PRTUTF16 pwsz);
1280
1281/**
1282 * Translate a UTF-16 string into a UTF-8 allocating the result buffer.
1283 *
1284 * @returns iprt status code.
1285 * @param pwszString UTF-16 string to convert.
1286 * @param ppszString Receives pointer of allocated UTF-8 string.
1287 * The returned pointer must be freed using RTStrFree().
1288 */
1289RTDECL(int) RTUtf16ToUtf8(PCRTUTF16 pwszString, char **ppszString);
1290
1291/**
1292 * Translates UTF-16 to UTF-8 using buffer provided by the caller or
1293 * a fittingly sized buffer allocated by the function.
1294 *
1295 * @returns iprt status code.
1296 * @param pwszString The UTF-16 string to convert.
1297 * @param cwcString The number of RTUTF16 items to translation from pwszString.
1298 * The translate will stop when reaching cwcString or the terminator ('\\0').
1299 * Use RTSTR_MAX to translate the entire string.
1300 * @param ppsz If cch is non-zero, this must either be pointing to pointer to
1301 * a buffer of the specified size, or pointer to a NULL pointer.
1302 * If *ppsz is NULL or cch is zero a buffer of at least cch chars
1303 * will be allocated to hold the translated string.
1304 * If a buffer was requirest it must be freed using RTUtf16Free().
1305 * @param cch The buffer size in chars (the type). This includes the terminator.
1306 * @param pcch Where to store the length of the translated string. (Optional)
1307 * This field will be updated even on failure, however the value is only
1308 * specified for the following two error codes. On VERR_BUFFER_OVERFLOW
1309 * and VERR_NO_STR_MEMORY it contains the required buffer space.
1310 */
1311RTDECL(int) RTUtf16ToUtf8Ex(PCRTUTF16 pwszString, size_t cwcString, char **ppsz, size_t cch, size_t *pcch);
1312
1313/**
1314 * Calculates the length of the UTF-16 string in UTF-8 chars (bytes).
1315 *
1316 * This function will validate the string, and incorrectly encoded UTF-16
1317 * strings will be rejected. The primary purpose of this function is to
1318 * help allocate buffers for RTUtf16ToUtf8() of the correct size. For most
1319 * other puroses RTUtf16ToUtf8Ex() should be used.
1320 *
1321 * @returns Number of char (bytes).
1322 * @returns 0 if the string was incorrectly encoded.
1323 * @param pwsz The UTF-16 string.
1324 */
1325RTDECL(size_t) RTUtf16CalcUtf8Len(PCRTUTF16 pwsz);
1326
1327/**
1328 * Calculates the length of the UTF-16 string in UTF-8 chars (bytes).
1329 *
1330 * This function will validate the string, and incorrectly encoded UTF-16
1331 * strings will be rejected.
1332 *
1333 * @returns iprt status code.
1334 * @param pwsz The string.
1335 * @param cwc The max string length. Use RTSTR_MAX to process the entire string.
1336 * @param pcch Where to store the string length (in bytes). Optional.
1337 * This is undefined on failure.
1338 */
1339RTDECL(int) RTUtf16CalcUtf8LenEx(PCRTUTF16 pwsz, size_t cwc, size_t *pcch);
1340
1341/**
1342 * Allocates tmp buffer, translates pwszString from UCS-2 to UTF8.
1343 *
1344 * @returns iprt status code.
1345 * @param ppszString Receives pointer of allocated UTF8 string.
1346 * The returned pointer must be freed using RTStrFree().
1347 * @param pwszString UCS-2 string to convert.
1348 * @deprecated Use RTUtf16ToUtf8().
1349 */
1350DECLINLINE(int) RTStrUcs2ToUtf8(char **ppszString, PCRTUCS2 pwszString)
1351{
1352 return RTUtf16ToUtf8(pwszString, ppszString);
1353}
1354
1355/**
1356 * Translates UCS-2 to UTF-8 using buffer provided by the caller or
1357 * a fittingly sized buffer allocated by the function.
1358 *
1359 * @returns iprt status code.
1360 * @param ppszString If cch is not zero, this points to the pointer to the
1361 * buffer where the converted string shall be resulted.
1362 * If cch is zero, this is where the pointer to the allocated
1363 * buffer with the converted string is stored. The allocated
1364 * buffer must be freed by using RTStrFree().
1365 * @param cch Size of the passed in buffer (*ppszString).
1366 * If 0 a fittingly sized buffer is allocated.
1367 * @param pwszString UCS-2 string to convert.
1368 * @deprecated
1369 */
1370DECLINLINE(int) RTStrUcs2ToUtf8Ex(char **ppszString, size_t cch, PCRTUCS2 pwszString)
1371{
1372 return RTUtf16ToUtf8Ex(pwszString, RTSTR_MAX, ppszString, cch, NULL);
1373}
1374
1375/**
1376 * Free a UCS-2 string allocated by RTStrUtf8ToUcs2().
1377 *
1378 * @returns iprt status code.
1379 * @param pwszString Pointer to buffer with unicode string to free.
1380 * NULL is accepted.
1381 * @deprecated
1382 */
1383DECLINLINE(void) RTStrUcs2Free(PRTUCS2 pwszString)
1384{
1385 RTUtf16Free(pwszString);
1386}
1387
1388/**
1389 * Allocates a new copy of the given UCS-2 string.
1390 *
1391 * @returns Pointer to the allocated string copy. Use RTStrUcs2Free() to free it.
1392 * @returns NULL when out of memory.
1393 * @param pwszString UCS-2 string to duplicate.
1394 * @deprecated
1395 */
1396DECLINLINE(PRTUCS2) RTStrUcs2Dup(PCRTUCS2 pwszString)
1397{
1398 return RTUtf16Dup(pwszString);
1399}
1400
1401/**
1402 * Allocates a new copy of the given UCS-2 string.
1403 *
1404 * @returns iprt status code.
1405 * @param ppwszString Receives pointer of the allocated UCS-2 string.
1406 * The returned pointer must be freed using RTStrUcs2Free().
1407 * @param pwszString UCS-2 string to duplicate.
1408 * @deprecated
1409 */
1410DECLINLINE(int) RTStrUcs2DupEx(PRTUCS2 *ppwszString, PCRTUCS2 pwszString)
1411{
1412 return RTUtf16DupEx(ppwszString, pwszString, 0);
1413}
1414
1415/**
1416 * Returns the length of a UCS-2 string in UCS-2 characters
1417 * without trailing '\\0'.
1418 *
1419 * @returns Length of input string in UCS-2 characters.
1420 * @param pwszString Pointer the UCS-2 string.
1421 * @deprecated
1422 */
1423DECLINLINE(size_t) RTStrUcs2Len(PCRTUCS2 pwszString)
1424{
1425 return RTUtf16Len(pwszString);
1426}
1427
1428/**
1429 * Performs a case sensitive string compare between two UCS-2 strings.
1430 *
1431 * @returns < 0 if the first string less than the second string.
1432 * @returns 0 if the first string identical to the second string.
1433 * @returns > 0 if the first string greater than the second string.
1434 * @param pwsz1 First UCS-2 string.
1435 * @param pwsz2 Second UCS-2 string.
1436 * @deprecated
1437 */
1438DECLINLINE(int) RTStrUcs2Cmp(register PCRTUCS2 pwsz1, register PCRTUCS2 pwsz2)
1439{
1440 return RTUtf16Cmp(pwsz1, pwsz2);
1441}
1442
1443
1444/**
1445 * Get the unicode code point at the given string position.
1446 *
1447 * @returns unicode code point.
1448 * @returns RTUNICP_INVALID if the encoding is invalid.
1449 * @param pwsz The string.
1450 *
1451 * @remark This is an internal worker for RTUtf16GetCp().
1452 */
1453RTDECL(RTUNICP) RTUtf16GetCpInternal(PCRTUTF16 pwsz);
1454
1455/**
1456 * Get the unicode code point at the given string position.
1457 *
1458 * @returns iprt status code.
1459 * @param ppwsz Pointer to the string pointer. This will be updated to
1460 * point to the char following the current code point.
1461 * @param pCp Where to store the code point.
1462 * RTUNICP_INVALID is stored here on failure.
1463 *
1464 * @remark This is an internal worker for RTUtf16GetCpEx().
1465 */
1466RTDECL(int) RTUtf16GetCpExInternal(PCRTUTF16 *ppwsz, PRTUNICP pCp);
1467
1468/**
1469 * Put the unicode code point at the given string position
1470 * and return the pointer to the char following it.
1471 *
1472 * This function will not consider anything at or following the the
1473 * buffer area pointed to by pwsz. It is therefore not suitable for
1474 * inserting code points into a string, only appending/overwriting.
1475 *
1476 * @returns pointer to the char following the written code point.
1477 * @param pwsz The string.
1478 * @param CodePoint The code point to write.
1479 * This sould not be RTUNICP_INVALID or any other charater
1480 * out of the UTF-16 range.
1481 *
1482 * @remark This is an internal worker for RTUtf16GetCpEx().
1483 */
1484RTDECL(PRTUTF16) RTUtf16PutCpInternal(PRTUTF16 pwsz, RTUNICP CodePoint);
1485
1486/**
1487 * Get the unicode code point at the given string position.
1488 *
1489 * @returns unicode code point.
1490 * @returns RTUNICP_INVALID if the encoding is invalid.
1491 * @param pwsz The string.
1492 *
1493 * @remark We optimize this operation by using an inline function for
1494 * everything which isn't a surrogate pair or an endian indicator.
1495 */
1496DECLINLINE(RTUNICP) RTUtf16GetCp(PCRTUTF16 pwsz)
1497{
1498 const RTUTF16 wc = *pwsz;
1499 if (wc < 0xd800 || (wc > 0xdfff && wc < 0xfffe))
1500 return wc;
1501 return RTUtf16GetCpInternal(pwsz);
1502}
1503
1504/**
1505 * Get the unicode code point at the given string position.
1506 *
1507 * @returns iprt status code.
1508 * @param ppwsz Pointer to the string pointer. This will be updated to
1509 * point to the char following the current code point.
1510 * @param pCp Where to store the code point.
1511 * RTUNICP_INVALID is stored here on failure.
1512 *
1513 * @remark We optimize this operation by using an inline function for
1514 * everything which isn't a surrogate pair or and endian indicator.
1515 */
1516DECLINLINE(int) RTUtf16GetCpEx(PCRTUTF16 *ppwsz, PRTUNICP pCp)
1517{
1518 const RTUTF16 wc = **ppwsz;
1519 if (wc < 0xd800 || (wc > 0xdfff && wc < 0xfffe))
1520 {
1521 (*ppwsz)++;
1522 *pCp = wc;
1523 return VINF_SUCCESS;
1524 }
1525 return RTUtf16GetCpExInternal(ppwsz, pCp);
1526}
1527
1528/**
1529 * Put the unicode code point at the given string position
1530 * and return the pointer to the char following it.
1531 *
1532 * This function will not consider anything at or following the the
1533 * buffer area pointed to by pwsz. It is therefore not suitable for
1534 * inserting code points into a string, only appending/overwriting.
1535 *
1536 * @returns pointer to the char following the written code point.
1537 * @param pwsz The string.
1538 * @param CodePoint The code point to write.
1539 * This sould not be RTUNICP_INVALID or any other charater
1540 * out of the UTF-16 range.
1541 *
1542 * @remark We optimize this operation by using an inline function for
1543 * everything which isn't a surrogate pair or and endian indicator.
1544 */
1545DECLINLINE(PRTUTF16) RTUtf16PutCp(PRTUTF16 pwsz, RTUNICP CodePoint)
1546{
1547 if (CodePoint < 0xd800 || (CodePoint > 0xd800 && CodePoint < 0xfffe))
1548 {
1549 *pwsz++ = (RTUTF16)CodePoint;
1550 return pwsz;
1551 }
1552 return RTUtf16PutCpInternal(pwsz, CodePoint);
1553}
1554
1555/**
1556 * Skips ahead, past the current code point.
1557 *
1558 * @returns Pointer to the char after the current code point.
1559 * @param pwsz Pointer to the current code point.
1560 * @remark This will not move the next valid code point, only past the current one.
1561 */
1562DECLINLINE(PRTUTF16) RTUtf16NextCp(PCRTUTF16 pwsz)
1563{
1564 RTUNICP Cp;
1565 RTUtf16GetCpEx(&pwsz, &Cp);
1566 return (PRTUTF16)pwsz;
1567}
1568
1569/**
1570 * Skips backwards, to the previous code point.
1571 *
1572 * @returns Pointer to the char after the current code point.
1573 * @param pwszStart Pointer to the start of the string.
1574 * @param pwsz Pointer to the current code point.
1575 */
1576RTDECL(PRTUTF16) RTUtf16PrevCp(PCRTUTF16 pwszStart, PCRTUTF16 pwsz);
1577
1578
1579/**
1580 * Checks if the UTF-16 char is the high surrogate char (i.e.
1581 * the 1st char in the pair).
1582 *
1583 * @returns true if it is.
1584 * @returns false if it isn't.
1585 * @param wc The character to investigate.
1586 */
1587DECLINLINE(bool) RTUtf16IsHighSurrogate(RTUTF16 wc)
1588{
1589 return wc >= 0xd800 && wc <= 0xdbff;
1590}
1591
1592/**
1593 * Checks if the UTF-16 char is the low surrogate char (i.e.
1594 * the 2nd char in the pair).
1595 *
1596 * @returns true if it is.
1597 * @returns false if it isn't.
1598 * @param wc The character to investigate.
1599 */
1600DECLINLINE(bool) RTUtf16IsLowSurrogate(RTUTF16 wc)
1601{
1602 return wc >= 0xdc00 && wc <= 0xdfff;
1603}
1604
1605
1606/**
1607 * Checks if the two UTF-16 chars form a valid surrogate pair.
1608 *
1609 * @returns true if they do.
1610 * @returns false if they doesn't.
1611 * @param wcHigh The high (1st) character.
1612 * @param wcLow The low (2nd) character.
1613 */
1614DECLINLINE(bool) RTUtf16IsSurrogatePair(RTUTF16 wcHigh, RTUTF16 wcLow)
1615{
1616 return RTUtf16IsHighSurrogate(wcHigh)
1617 && RTUtf16IsLowSurrogate(wcLow);
1618}
1619
1620/** @} */
1621
1622__END_DECLS
1623
1624/** @} */
1625
1626#endif
1627
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette