VirtualBox

source: vbox/trunk/include/iprt/string.h@ 31808

Last change on this file since 31808 was 31419, checked in by vboxsync, 14 years ago

iprt/string.h: Fixed RTSTR_GET_BIT_FLAG. Affects RTStrFormatV and all its users.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 131.2 KB
Line 
1/** @file
2 * IPRT - String Manipluation.
3 */
4
5/*
6 * Copyright (C) 2006-2010 Oracle Corporation
7 *
8 * This file is part of VirtualBox Open Source Edition (OSE), as
9 * available from http://www.virtualbox.org. This file is free software;
10 * you can redistribute it and/or modify it under the terms of the GNU
11 * General Public License (GPL) as published by the Free Software
12 * Foundation, in version 2 as it comes in the "COPYING" file of the
13 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
14 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
15 *
16 * The contents of this file may alternatively be used under the terms
17 * of the Common Development and Distribution License Version 1.0
18 * (CDDL) only, as it comes in the "COPYING.CDDL" file of the
19 * VirtualBox OSE distribution, in which case the provisions of the
20 * CDDL are applicable instead of those of the GPL.
21 *
22 * You may elect to license modified versions of this file under the
23 * terms and conditions of either the GPL or the CDDL or both.
24 */
25
26#ifndef ___iprt_string_h
27#define ___iprt_string_h
28
29#include <iprt/cdefs.h>
30#include <iprt/types.h>
31#include <iprt/assert.h>
32#include <iprt/stdarg.h>
33#include <iprt/err.h> /* for VINF_SUCCESS */
34#if defined(RT_OS_LINUX) && defined(__KERNEL__)
35# include <linux/string.h>
36#elif defined(RT_OS_FREEBSD) && defined(_KERNEL)
37# include <sys/libkern.h>
38 /*
39 * No memmove on versions < 7.2
40 * Defining a macro using bcopy here
41 */
42# define memmove(dst, src, size) bcopy(src, dst, size)
43#elif defined(RT_OS_SOLARIS) && defined(_KERNEL)
44 /*
45 * Same case as with FreeBSD kernel:
46 * The string.h stuff clashes with sys/system.h
47 * ffs = find first set bit.
48 */
49# define ffs ffs_string_h
50# include <string.h>
51# undef ffs
52# undef strpbrk
53#else
54# include <string.h>
55#endif
56
57/*
58 * Supply prototypes for standard string functions provided by
59 * IPRT instead of the operating environment.
60 */
61#if (defined(RT_OS_DARWIN) && defined(KERNEL)) \
62 || (defined(RT_OS_FREEBSD) && defined(_KERNEL))
63RT_C_DECLS_BEGIN
64void *memchr(const void *pv, int ch, size_t cb);
65char *strpbrk(const char *pszStr, const char *pszChars);
66RT_C_DECLS_END
67#endif
68
69
70/** @def RT_USE_RTC_3629
71 * When defined the UTF-8 range will stop at 0x10ffff. If not defined, the
72 * range stops at 0x7fffffff.
73 * @remarks Must be defined both when building and using the IPRT. */
74#ifdef DOXYGEN_RUNNING
75# define RT_USE_RTC_3629
76#endif
77
78
79/**
80 * Byte zero the specified object.
81 *
82 * This will use sizeof(Obj) to figure the size and will call memset, bzero
83 * or some compiler intrinsic to perform the actual zeroing.
84 *
85 * @param Obj The object to zero. Make sure to dereference pointers.
86 *
87 * @remarks Because the macro may use memset it has been placed in string.h
88 * instead of cdefs.h to avoid build issues because someone forgot
89 * to include this header.
90 *
91 * @ingroup grp_rt_cdefs
92 */
93#define RT_ZERO(Obj) RT_BZERO(&(Obj), sizeof(Obj))
94
95/**
96 * Byte zero the specified memory area.
97 *
98 * This will call memset, bzero or some compiler intrinsic to clear the
99 * specified bytes of memory.
100 *
101 * @param pv Pointer to the memory.
102 * @param cb The number of bytes to clear. Please, don't pass 0.
103 *
104 * @remarks Because the macro may use memset it has been placed in string.h
105 * instead of cdefs.h to avoid build issues because someone forgot
106 * to include this header.
107 *
108 * @ingroup grp_rt_cdefs
109 */
110#define RT_BZERO(pv, cb) do { memset((pv), 0, cb); } while (0)
111
112
113
114/** @defgroup grp_rt_str RTStr - String Manipulation
115 * Mostly UTF-8 related helpers where the standard string functions won't do.
116 * @ingroup grp_rt
117 * @{
118 */
119
120RT_C_DECLS_BEGIN
121
122
123/**
124 * The maximum string length.
125 */
126#define RTSTR_MAX (~(size_t)0)
127
128
129/** @def RTMEM_TAG
130 * The default allocation tag used by the RTStr allocation APIs.
131 *
132 * When not defined before the inclusion of iprt/string.h, this will default to
133 * the pointer to the current file name. The string API will make of use of
134 * this as pointer to a volatile but read-only string.
135 */
136#ifndef RTSTR_TAG
137# define RTSTR_TAG (__FILE__)
138#endif
139
140
141#ifdef IN_RING3
142
143/**
144 * Allocates tmp buffer with default tag, translates pszString from UTF8 to
145 * current codepage.
146 *
147 * @returns iprt status code.
148 * @param ppszString Receives pointer of allocated native CP string.
149 * The returned pointer must be freed using RTStrFree().
150 * @param pszString UTF-8 string to convert.
151 */
152#define RTStrUtf8ToCurrentCP(ppszString, pszString) RTStrUtf8ToCurrentCPTag((ppszString), (pszString), RTSTR_TAG)
153
154/**
155 * Allocates tmp buffer with custom tag, translates pszString from UTF8 to
156 * current codepage.
157 *
158 * @returns iprt status code.
159 * @param ppszString Receives pointer of allocated native CP string.
160 * The returned pointer must be freed using
161 * RTStrFree()., const char *pszTag
162 * @param pszString UTF-8 string to convert.
163 * @param pszTag Allocation tag used for statistics and such.
164 */
165RTR3DECL(int) RTStrUtf8ToCurrentCPTag(char **ppszString, const char *pszString, const char *pszTag);
166
167/**
168 * Allocates tmp buffer, translates pszString from current codepage to UTF-8.
169 *
170 * @returns iprt status code.
171 * @param ppszString Receives pointer of allocated UTF-8 string.
172 * The returned pointer must be freed using RTStrFree().
173 * @param pszString Native string to convert.
174 */
175#define RTStrCurrentCPToUtf8(ppszString, pszString) RTStrCurrentCPToUtf8Tag((ppszString), (pszString), RTSTR_TAG)
176
177/**
178 * Allocates tmp buffer, translates pszString from current codepage to UTF-8.
179 *
180 * @returns iprt status code.
181 * @param ppszString Receives pointer of allocated UTF-8 string.
182 * The returned pointer must be freed using RTStrFree().
183 * @param pszString Native string to convert.
184 * @param pszTag Allocation tag used for statistics and such.
185 */
186RTR3DECL(int) RTStrCurrentCPToUtf8Tag(char **ppszString, const char *pszString, const char *pszTag);
187
188#endif /* IN_RING3 */
189
190/**
191 * Free string allocated by any of the non-UCS-2 string functions.
192 *
193 * @returns iprt status code.
194 * @param pszString Pointer to buffer with string to free.
195 * NULL is accepted.
196 */
197RTDECL(void) RTStrFree(char *pszString);
198
199/**
200 * Allocates a new copy of the given UTF-8 string (default tag).
201 *
202 * @returns Pointer to the allocated UTF-8 string.
203 * @param pszString UTF-8 string to duplicate.
204 */
205#define RTStrDup(pszString) RTStrDupTag((pszString), RTSTR_TAG)
206
207/**
208 * Allocates a new copy of the given UTF-8 string (custom tag).
209 *
210 * @returns Pointer to the allocated UTF-8 string.
211 * @param pszString UTF-8 string to duplicate.
212 * @param pszTag Allocation tag used for statistics and such.
213 */
214RTDECL(char *) RTStrDupTag(const char *pszString, const char *pszTag);
215
216/**
217 * Allocates a new copy of the given UTF-8 string (default tag).
218 *
219 * @returns iprt status code.
220 * @param ppszString Receives pointer of the allocated UTF-8 string.
221 * The returned pointer must be freed using RTStrFree().
222 * @param pszString UTF-8 string to duplicate.
223 */
224#define RTStrDupEx(ppszString, pszString) RTStrDupExTag((ppszString), (pszString), RTSTR_TAG)
225
226/**
227 * Allocates a new copy of the given UTF-8 string (custom tag).
228 *
229 * @returns iprt status code.
230 * @param ppszString Receives pointer of the allocated UTF-8 string.
231 * The returned pointer must be freed using RTStrFree().
232 * @param pszString UTF-8 string to duplicate.
233 * @param pszTag Allocation tag used for statistics and such.
234 */
235RTDECL(int) RTStrDupExTag(char **ppszString, const char *pszString, const char *pszTag);
236
237/**
238 * Allocates a new copy of the given UTF-8 substring (default tag).
239 *
240 * @returns Pointer to the allocated UTF-8 substring.
241 * @param pszString UTF-8 string to duplicate.
242 * @param cchMax The max number of chars to duplicate, not counting
243 * the terminator.
244 */
245#define RTStrDupN(pszString, cchMax) RTStrDupNTag((pszString), (cchMax), RTSTR_TAG)
246
247/**
248 * Allocates a new copy of the given UTF-8 substring (custom tag).
249 *
250 * @returns Pointer to the allocated UTF-8 substring.
251 * @param pszString UTF-8 string to duplicate.
252 * @param cchMax The max number of chars to duplicate, not counting
253 * the terminator.
254 * @param pszTag Allocation tag used for statistics and such.
255 */
256RTDECL(char *) RTStrDupNTag(const char *pszString, size_t cchMax, const char *pszTag);
257
258/**
259 * Appends a string onto an existing IPRT allocated string (defaul tag).
260 *
261 * @retval VINF_SUCCESS
262 * @retval VERR_NO_STR_MEMORY if we failed to reallocate the string, @a *ppsz
263 * remains unchanged.
264 *
265 * @param ppsz Pointer to the string pointer. The string
266 * pointer must either be NULL or point to a string
267 * returned by an IPRT string API. (In/Out)
268 * @param pszAppend The string to append. NULL and empty strings
269 * are quietly ignored.
270 */
271#define RTStrAAppend(ppsz, pszAppend) RTStrAAppendTag((ppsz), (pszAppend), RTSTR_TAG)
272
273/**
274 * Appends a string onto an existing IPRT allocated string (custom tag).
275 *
276 * @retval VINF_SUCCESS
277 * @retval VERR_NO_STR_MEMORY if we failed to reallocate the string, @a *ppsz
278 * remains unchanged.
279 *
280 * @param ppsz Pointer to the string pointer. The string
281 * pointer must either be NULL or point to a string
282 * returned by an IPRT string API. (In/Out)
283 * @param pszAppend The string to append. NULL and empty strings
284 * are quietly ignored.
285 * @param pszTag Allocation tag used for statistics and such.
286 */
287RTDECL(int) RTStrAAppendTag(char **ppsz, const char *pszAppend, const char *pszTag);
288
289/**
290 * Appends N bytes from a strings onto an existing IPRT allocated string
291 * (default tag).
292 *
293 * @retval VINF_SUCCESS
294 * @retval VERR_NO_STR_MEMORY if we failed to reallocate the string, @a *ppsz
295 * remains unchanged.
296 *
297 * @param ppsz Pointer to the string pointer. The string
298 * pointer must either be NULL or point to a string
299 * returned by an IPRT string API. (In/Out)
300 * @param pszAppend The string to append. Can be NULL if cchAppend
301 * is NULL.
302 * @param cchAppend The number of chars (not code points) to append
303 * from pszAppend. Must not be more than
304 * @a pszAppend contains, except for the special
305 * value RTSTR_MAX that can be used to indicate all
306 * of @a pszAppend without having to strlen it.
307 */
308#define RTStrAAppendN(ppsz, pszAppend, cchAppend) RTStrAAppendNTag((ppsz), (pszAppend), (cchAppend), RTSTR_TAG)
309
310/**
311 * Appends N bytes from a strings onto an existing IPRT allocated string (custom
312 * tag).
313 *
314 * @retval VINF_SUCCESS
315 * @retval VERR_NO_STR_MEMORY if we failed to reallocate the string, @a *ppsz
316 * remains unchanged.
317 *
318 * @param ppsz Pointer to the string pointer. The string
319 * pointer must either be NULL or point to a string
320 * returned by an IPRT string API. (In/Out)
321 * @param pszAppend The string to append. Can be NULL if cchAppend
322 * is NULL.
323 * @param cchAppend The number of chars (not code points) to append
324 * from pszAppend. Must not be more than
325 * @a pszAppend contains, except for the special
326 * value RTSTR_MAX that can be used to indicate all
327 * of @a pszAppend without having to strlen it.
328 * @param pszTag Allocation tag used for statistics and such.
329 */
330RTDECL(int) RTStrAAppendNTag(char **ppsz, const char *pszAppend, size_t cchAppend, const char *pszTag);
331
332/**
333 * Appends one or more strings onto an existing IPRT allocated string.
334 *
335 * This is a very flexible and efficient alternative to using RTStrAPrintf to
336 * combine several strings together.
337 *
338 * @retval VINF_SUCCESS
339 * @retval VERR_NO_STR_MEMORY if we failed to reallocate the string, @a *ppsz
340 * remains unchanged.
341 *
342 * @param ppsz Pointer to the string pointer. The string
343 * pointer must either be NULL or point to a string
344 * returned by an IPRT string API. (In/Out)
345 * @param cPairs The number of string / length pairs in the
346 * @a va.
347 * @param va List of string (const char *) and length
348 * (size_t) pairs. The strings will be appended to
349 * the string in the first argument.
350 */
351#define RTStrAAppendExNV(ppsz, cPairs, va) RTStrAAppendExNVTag((ppsz), (cPairs), (va), RTSTR_TAG)
352
353/**
354 * Appends one or more strings onto an existing IPRT allocated string.
355 *
356 * This is a very flexible and efficient alternative to using RTStrAPrintf to
357 * combine several strings together.
358 *
359 * @retval VINF_SUCCESS
360 * @retval VERR_NO_STR_MEMORY if we failed to reallocate the string, @a *ppsz
361 * remains unchanged.
362 *
363 * @param ppsz Pointer to the string pointer. The string
364 * pointer must either be NULL or point to a string
365 * returned by an IPRT string API. (In/Out)
366 * @param cPairs The number of string / length pairs in the
367 * @a va.
368 * @param va List of string (const char *) and length
369 * (size_t) pairs. The strings will be appended to
370 * the string in the first argument.
371 * @param pszTag Allocation tag used for statistics and such.
372 */
373RTDECL(int) RTStrAAppendExNVTag(char **ppsz, size_t cPairs, va_list va, const char *pszTag);
374
375/**
376 * Appends one or more strings onto an existing IPRT allocated string
377 * (untagged).
378 *
379 * This is a very flexible and efficient alternative to using RTStrAPrintf to
380 * combine several strings together.
381 *
382 * @retval VINF_SUCCESS
383 * @retval VERR_NO_STR_MEMORY if we failed to reallocate the string, @a *ppsz
384 * remains unchanged.
385 *
386 * @param ppsz Pointer to the string pointer. The string
387 * pointer must either be NULL or point to a string
388 * returned by an IPRT string API. (In/Out)
389 * @param cPairs The number of string / length pairs in the
390 * ellipsis.
391 * @param ... List of string (const char *) and length
392 * (size_t) pairs. The strings will be appended to
393 * the string in the first argument.
394 */
395DECLINLINE(int) RTStrAAppendExN(char **ppsz, size_t cPairs, ...)
396{
397 int rc;
398 va_list va;
399 va_start(va, cPairs);
400 rc = RTStrAAppendExNVTag(ppsz, cPairs, va, RTSTR_TAG);
401 va_end(va);
402 return rc;
403}
404
405/**
406 * Appends one or more strings onto an existing IPRT allocated string (custom
407 * tag).
408 *
409 * This is a very flexible and efficient alternative to using RTStrAPrintf to
410 * combine several strings together.
411 *
412 * @retval VINF_SUCCESS
413 * @retval VERR_NO_STR_MEMORY if we failed to reallocate the string, @a *ppsz
414 * remains unchanged.
415 *
416 * @param ppsz Pointer to the string pointer. The string
417 * pointer must either be NULL or point to a string
418 * returned by an IPRT string API. (In/Out)
419 * @param pszTag Allocation tag used for statistics and such.
420 * @param cPairs The number of string / length pairs in the
421 * ellipsis.
422 * @param ... List of string (const char *) and length
423 * (size_t) pairs. The strings will be appended to
424 * the string in the first argument.
425 */
426DECLINLINE(int) RTStrAAppendExNTag(char **ppsz, const char *pszTag, size_t cPairs, ...)
427{
428 int rc;
429 va_list va;
430 va_start(va, cPairs);
431 rc = RTStrAAppendExNVTag(ppsz, cPairs, va, pszTag);
432 va_end(va);
433 return rc;
434}
435
436/**
437 * Truncates an IPRT allocated string (default tag).
438 *
439 * @retval VINF_SUCCESS.
440 * @retval VERR_OUT_OF_RANGE if cchNew is too long. Nothing is done.
441 *
442 * @param ppsz Pointer to the string pointer. The string
443 * pointer can be NULL if @a cchNew is 0, no change
444 * is made then. If we actually reallocate the
445 * string, the string pointer might be changed by
446 * this call. (In/Out)
447 * @param cchNew The new string length (excluding the
448 * terminator). The string must be at least this
449 * long or we'll return VERR_OUT_OF_RANGE and
450 * assert on you.
451 */
452#define RTStrATruncate(ppsz, cchNew) RTStrATruncateTag((ppsz), (cchNew), RTSTR_TAG)
453
454/**
455 * Truncates an IPRT allocated string.
456 *
457 * @retval VINF_SUCCESS.
458 * @retval VERR_OUT_OF_RANGE if cchNew is too long. Nothing is done.
459 *
460 * @param ppsz Pointer to the string pointer. The string
461 * pointer can be NULL if @a cchNew is 0, no change
462 * is made then. If we actually reallocate the
463 * string, the string pointer might be changed by
464 * this call. (In/Out)
465 * @param cchNew The new string length (excluding the
466 * terminator). The string must be at least this
467 * long or we'll return VERR_OUT_OF_RANGE and
468 * assert on you.
469 * @param pszTag Allocation tag used for statistics and such.
470 */
471RTDECL(int) RTStrATruncateTag(char **ppsz, size_t cchNew, const char *pszTag);
472
473/**
474 * Allocates memory for string storage (default tag).
475 *
476 * You should normally not use this function, except if there is some very
477 * custom string handling you need doing that isn't covered by any of the other
478 * APIs.
479 *
480 * @returns Pointer to the allocated string. The first byte is always set
481 * to the string terminator char, the contents of the remainder of the
482 * memory is undefined. The string must be freed by calling RTStrFree.
483 *
484 * NULL is returned if the allocation failed. Please translate this to
485 * VERR_NO_STR_MEMORY and not VERR_NO_MEMORY. Also consider
486 * RTStrAllocEx if an IPRT status code is required.
487 *
488 * @param cb How many bytes to allocate. If this is zero, we
489 * will allocate a terminator byte anyway.
490 */
491#define RTStrAlloc(cb) RTStrAllocTag((cb), RTSTR_TAG)
492
493/**
494 * Allocates memory for string storage (custom tag).
495 *
496 * You should normally not use this function, except if there is some very
497 * custom string handling you need doing that isn't covered by any of the other
498 * APIs.
499 *
500 * @returns Pointer to the allocated string. The first byte is always set
501 * to the string terminator char, the contents of the remainder of the
502 * memory is undefined. The string must be freed by calling RTStrFree.
503 *
504 * NULL is returned if the allocation failed. Please translate this to
505 * VERR_NO_STR_MEMORY and not VERR_NO_MEMORY. Also consider
506 * RTStrAllocEx if an IPRT status code is required.
507 *
508 * @param cb How many bytes to allocate. If this is zero, we
509 * will allocate a terminator byte anyway.
510 * @param pszTag Allocation tag used for statistics and such.
511 */
512RTDECL(char *) RTStrAllocTag(size_t cb, const char *pszTag);
513
514/**
515 * Allocates memory for string storage, with status code (default tag).
516 *
517 * You should normally not use this function, except if there is some very
518 * custom string handling you need doing that isn't covered by any of the other
519 * APIs.
520 *
521 * @retval VINF_SUCCESS
522 * @retval VERR_NO_STR_MEMORY
523 *
524 * @param ppsz Where to return the allocated string. This will
525 * be set to NULL on failure. On success, the
526 * returned memory will always start with a
527 * terminator char so that it is considered a valid
528 * C string, the contents of rest of the memory is
529 * undefined.
530 * @param cb How many bytes to allocate. If this is zero, we
531 * will allocate a terminator byte anyway.
532 */
533#define RTStrAllocEx(ppsz, cb) RTStrAllocExTag((ppsz), (cb), RTSTR_TAG)
534
535/**
536 * Allocates memory for string storage, with status code (custom tag).
537 *
538 * You should normally not use this function, except if there is some very
539 * custom string handling you need doing that isn't covered by any of the other
540 * APIs.
541 *
542 * @retval VINF_SUCCESS
543 * @retval VERR_NO_STR_MEMORY
544 *
545 * @param ppsz Where to return the allocated string. This will
546 * be set to NULL on failure. On success, the
547 * returned memory will always start with a
548 * terminator char so that it is considered a valid
549 * C string, the contents of rest of the memory is
550 * undefined.
551 * @param cb How many bytes to allocate. If this is zero, we
552 * will allocate a terminator byte anyway.
553 * @param pszTag Allocation tag used for statistics and such.
554 */
555RTDECL(int) RTStrAllocExTag(char **ppsz, size_t cb, const char *pszTag);
556
557/**
558 * Reallocates the specified string (default tag).
559 *
560 * You should normally not have use this function, except perhaps to truncate a
561 * really long string you've got from some IPRT string API, but then you should
562 * use RTStrATruncate.
563 *
564 * @returns VINF_SUCCESS.
565 * @retval VERR_NO_STR_MEMORY if we failed to reallocate the string, @a *ppsz
566 * remains unchanged.
567 *
568 * @param ppsz Pointer to the string variable containing the
569 * input and output string.
570 *
571 * When not freeing the string, the result will
572 * always have the last byte set to the terminator
573 * character so that when used for string
574 * truncation the result will be a valid C string
575 * (your job to keep it a valid UTF-8 string).
576 *
577 * When the input string is NULL and we're supposed
578 * to reallocate, the returned string will also
579 * have the first byte set to the terminator char
580 * so it will be a valid C string.
581 *
582 * @param cbNew When @a cbNew is zero, we'll behave like
583 * RTStrFree and @a *ppsz will be set to NULL.
584 *
585 * When not zero, this will be the new size of the
586 * memory backing the string, i.e. it includes the
587 * terminator char.
588 */
589#define RTStrRealloc(ppsz, cbNew) RTStrReallocTag((ppsz), (cbNew), RTSTR_TAG)
590
591/**
592 * Reallocates the specified string (custom tag).
593 *
594 * You should normally not have use this function, except perhaps to truncate a
595 * really long string you've got from some IPRT string API, but then you should
596 * use RTStrATruncate.
597 *
598 * @returns VINF_SUCCESS.
599 * @retval VERR_NO_STR_MEMORY if we failed to reallocate the string, @a *ppsz
600 * remains unchanged.
601 *
602 * @param ppsz Pointer to the string variable containing the
603 * input and output string.
604 *
605 * When not freeing the string, the result will
606 * always have the last byte set to the terminator
607 * character so that when used for string
608 * truncation the result will be a valid C string
609 * (your job to keep it a valid UTF-8 string).
610 *
611 * When the input string is NULL and we're supposed
612 * to reallocate, the returned string will also
613 * have the first byte set to the terminator char
614 * so it will be a valid C string.
615 *
616 * @param cbNew When @a cbNew is zero, we'll behave like
617 * RTStrFree and @a *ppsz will be set to NULL.
618 *
619 * When not zero, this will be the new size of the
620 * memory backing the string, i.e. it includes the
621 * terminator char.
622 * @param pszTag Allocation tag used for statistics and such.
623 */
624RTDECL(int) RTStrReallocTag(char **ppsz, size_t cbNew, const char *pszTag);
625
626/**
627 * Validates the UTF-8 encoding of the string.
628 *
629 * @returns iprt status code.
630 * @param psz The string.
631 */
632RTDECL(int) RTStrValidateEncoding(const char *psz);
633
634/** @name Flags for RTStrValidateEncodingEx
635 */
636/** Check that the string is zero terminated within the given size.
637 * VERR_BUFFER_OVERFLOW will be returned if the check fails. */
638#define RTSTR_VALIDATE_ENCODING_ZERO_TERMINATED RT_BIT_32(0)
639/** @} */
640
641/**
642 * Validates the UTF-8 encoding of the string.
643 *
644 * @returns iprt status code.
645 * @param psz The string.
646 * @param cch The max string length. Use RTSTR_MAX to process the entire string.
647 * @param fFlags Reserved for future. Pass 0.
648 */
649RTDECL(int) RTStrValidateEncodingEx(const char *psz, size_t cch, uint32_t fFlags);
650
651/**
652 * Checks if the UTF-8 encoding is valid.
653 *
654 * @returns true / false.
655 * @param psz The string.
656 */
657RTDECL(bool) RTStrIsValidEncoding(const char *psz);
658
659/**
660 * Purge all bad UTF-8 encoding in the string, replacing it with '?'.
661 *
662 * @returns The number of bad characters (0 if nothing was done).
663 * @param psz The string to purge.
664 */
665RTDECL(size_t) RTStrPurgeEncoding(char *psz);
666
667/**
668 * Gets the number of code points the string is made up of, excluding
669 * the terminator.
670 *
671 *
672 * @returns Number of code points (RTUNICP).
673 * @returns 0 if the string was incorrectly encoded.
674 * @param psz The string.
675 */
676RTDECL(size_t) RTStrUniLen(const char *psz);
677
678/**
679 * Gets the number of code points the string is made up of, excluding
680 * the terminator.
681 *
682 * This function will validate the string, and incorrectly encoded UTF-8
683 * strings will be rejected.
684 *
685 * @returns iprt status code.
686 * @param psz The string.
687 * @param cch The max string length. Use RTSTR_MAX to process the entire string.
688 * @param pcuc Where to store the code point count.
689 * This is undefined on failure.
690 */
691RTDECL(int) RTStrUniLenEx(const char *psz, size_t cch, size_t *pcuc);
692
693/**
694 * Translate a UTF-8 string into an unicode string (i.e. RTUNICPs), allocating the string buffer.
695 *
696 * @returns iprt status code.
697 * @param pszString UTF-8 string to convert.
698 * @param ppUniString Receives pointer to the allocated unicode string.
699 * The returned string must be freed using RTUniFree().
700 */
701RTDECL(int) RTStrToUni(const char *pszString, PRTUNICP *ppUniString);
702
703/**
704 * Translates pszString from UTF-8 to an array of code points, allocating the result
705 * array if requested.
706 *
707 * @returns iprt status code.
708 * @param pszString UTF-8 string to convert.
709 * @param cchString The maximum size in chars (the type) to convert. The conversion stop
710 * when it reaches cchString or the string terminator ('\\0').
711 * Use RTSTR_MAX to translate the entire string.
712 * @param ppaCps If cCps is non-zero, this must either be pointing to pointer to
713 * a buffer of the specified size, or pointer to a NULL pointer.
714 * If *ppusz is NULL or cCps is zero a buffer of at least cCps items
715 * will be allocated to hold the translated string.
716 * If a buffer was requested it must be freed using RTUtf16Free().
717 * @param cCps The number of code points in the unicode string. This includes the terminator.
718 * @param pcCps Where to store the length of the translated string,
719 * excluding the terminator. (Optional)
720 *
721 * This may be set under some error conditions,
722 * however, only for VERR_BUFFER_OVERFLOW and
723 * VERR_NO_STR_MEMORY will it contain a valid string
724 * length that can be used to resize the buffer.
725 */
726RTDECL(int) RTStrToUniEx(const char *pszString, size_t cchString, PRTUNICP *ppaCps, size_t cCps, size_t *pcCps);
727
728/**
729 * Calculates the length of the string in RTUTF16 items.
730 *
731 * This function will validate the string, and incorrectly encoded UTF-8
732 * strings will be rejected. The primary purpose of this function is to
733 * help allocate buffers for RTStrToUtf16Ex of the correct size. For most
734 * other purposes RTStrCalcUtf16LenEx() should be used.
735 *
736 * @returns Number of RTUTF16 items.
737 * @returns 0 if the string was incorrectly encoded.
738 * @param psz The string.
739 */
740RTDECL(size_t) RTStrCalcUtf16Len(const char *psz);
741
742/**
743 * Calculates the length of the string in RTUTF16 items.
744 *
745 * This function will validate the string, and incorrectly encoded UTF-8
746 * strings will be rejected.
747 *
748 * @returns iprt status code.
749 * @param psz The string.
750 * @param cch The max string length. Use RTSTR_MAX to process the entire string.
751 * @param pcwc Where to store the string length. Optional.
752 * This is undefined on failure.
753 */
754RTDECL(int) RTStrCalcUtf16LenEx(const char *psz, size_t cch, size_t *pcwc);
755
756/**
757 * Translate a UTF-8 string into a UTF-16 allocating the result buffer (default
758 * tag).
759 *
760 * @returns iprt status code.
761 * @param pszString UTF-8 string to convert.
762 * @param ppwszString Receives pointer to the allocated UTF-16 string.
763 * The returned string must be freed using RTUtf16Free().
764 */
765#define RTStrToUtf16(pszString, ppwszString) RTStrToUtf16Tag((pszString), (ppwszString), RTSTR_TAG)
766
767/**
768 * Translate a UTF-8 string into a UTF-16 allocating the result buffer (custom
769 * tag).
770 *
771 * @returns iprt status code.
772 * @param pszString UTF-8 string to convert.
773 * @param ppwszString Receives pointer to the allocated UTF-16 string.
774 * The returned string must be freed using RTUtf16Free().
775 * @param pszTag Allocation tag used for statistics and such.
776 */
777RTDECL(int) RTStrToUtf16Tag(const char *pszString, PRTUTF16 *ppwszString, const char *pszTag);
778
779/**
780 * Translates pszString from UTF-8 to UTF-16, allocating the result buffer if requested.
781 *
782 * @returns iprt status code.
783 * @param pszString UTF-8 string to convert.
784 * @param cchString The maximum size in chars (the type) to convert. The conversion stop
785 * when it reaches cchString or the string terminator ('\\0').
786 * Use RTSTR_MAX to translate the entire string.
787 * @param ppwsz If cwc is non-zero, this must either be pointing to pointer to
788 * a buffer of the specified size, or pointer to a NULL pointer.
789 * If *ppwsz is NULL or cwc is zero a buffer of at least cwc items
790 * will be allocated to hold the translated string.
791 * If a buffer was requested it must be freed using RTUtf16Free().
792 * @param cwc The buffer size in RTUTF16s. This includes the terminator.
793 * @param pcwc Where to store the length of the translated string,
794 * excluding the terminator. (Optional)
795 *
796 * This may be set under some error conditions,
797 * however, only for VERR_BUFFER_OVERFLOW and
798 * VERR_NO_STR_MEMORY will it contain a valid string
799 * length that can be used to resize the buffer.
800 */
801#define RTStrToUtf16Ex(pszString, cchString, ppwsz, cwc, pcwc) \
802 RTStrToUtf16ExTag((pszString), (cchString), (ppwsz), (cwc), (pcwc), RTSTR_TAG)
803
804/**
805 * Translates pszString from UTF-8 to UTF-16, allocating the result buffer if
806 * requested (custom tag).
807 *
808 * @returns iprt status code.
809 * @param pszString UTF-8 string to convert.
810 * @param cchString The maximum size in chars (the type) to convert. The conversion stop
811 * when it reaches cchString or the string terminator ('\\0').
812 * Use RTSTR_MAX to translate the entire string.
813 * @param ppwsz If cwc is non-zero, this must either be pointing to pointer to
814 * a buffer of the specified size, or pointer to a NULL pointer.
815 * If *ppwsz is NULL or cwc is zero a buffer of at least cwc items
816 * will be allocated to hold the translated string.
817 * If a buffer was requested it must be freed using RTUtf16Free().
818 * @param cwc The buffer size in RTUTF16s. This includes the terminator.
819 * @param pcwc Where to store the length of the translated string,
820 * excluding the terminator. (Optional)
821 *
822 * This may be set under some error conditions,
823 * however, only for VERR_BUFFER_OVERFLOW and
824 * VERR_NO_STR_MEMORY will it contain a valid string
825 * length that can be used to resize the buffer.
826 * @param pszTag Allocation tag used for statistics and such.
827 */
828RTDECL(int) RTStrToUtf16ExTag(const char *pszString, size_t cchString, PRTUTF16 *ppwsz, size_t cwc, size_t *pcwc, const char *pszTag);
829
830
831/**
832 * Calculates the length of the string in Latin-1 characters.
833 *
834 * This function will validate the string, and incorrectly encoded UTF-8
835 * strings as well as string with codepoints outside the latin-1 range will be
836 * rejected. The primary purpose of this function is to help allocate buffers
837 * for RTStrToLatin1Ex of the correct size. For most other purposes
838 * RTStrCalcLatin1LenEx() should be used.
839 *
840 * @returns Number of Latin-1 characters.
841 * @returns 0 if the string was incorrectly encoded.
842 * @param psz The string.
843 */
844RTDECL(size_t) RTStrCalcLatin1Len(const char *psz);
845
846/**
847 * Calculates the length of the string in Latin-1 characters.
848 *
849 * This function will validate the string, and incorrectly encoded UTF-8
850 * strings as well as string with codepoints outside the latin-1 range will be
851 * rejected.
852 *
853 * @returns iprt status code.
854 * @param psz The string.
855 * @param cch The max string length. Use RTSTR_MAX to process the
856 * entire string.
857 * @param pcch Where to store the string length. Optional.
858 * This is undefined on failure.
859 */
860RTDECL(int) RTStrCalcLatin1LenEx(const char *psz, size_t cch, size_t *pcwc);
861
862/**
863 * Translate a UTF-8 string into a Latin-1 allocating the result buffer (default
864 * tag).
865 *
866 * @returns iprt status code.
867 * @param pszString UTF-8 string to convert.
868 * @param ppszString Receives pointer to the allocated Latin-1 string.
869 * The returned string must be freed using RTStrFree().
870 */
871#define RTStrToLatin1(pszString, ppszString) RTStrToLatin1Tag((pszString), (ppszString), RTSTR_TAG)
872
873/**
874 * Translate a UTF-8 string into a Latin-1 allocating the result buffer (custom
875 * tag).
876 *
877 * @returns iprt status code.
878 * @param pszString UTF-8 string to convert.
879 * @param ppszString Receives pointer to the allocated Latin-1 string.
880 * The returned string must be freed using RTStrFree().
881 * @param pszTag Allocation tag used for statistics and such.
882 */
883RTDECL(int) RTStrToLatin1Tag(const char *pszString, char **ppszString, const char *pszTag);
884
885/**
886 * Translates pszString from UTF-8 to Latin-1, allocating the result buffer if requested.
887 *
888 * @returns iprt status code.
889 * @param pszString UTF-8 string to convert.
890 * @param cchString The maximum size in chars (the type) to convert.
891 * The conversion stop when it reaches cchString or
892 * the string terminator ('\\0'). Use RTSTR_MAX to
893 * translate the entire string.
894 * @param ppsz If cch is non-zero, this must either be pointing to
895 * pointer to a buffer of the specified size, or
896 * pointer to a NULL pointer. If *ppsz is NULL or cch
897 * is zero a buffer of at least cch items will be
898 * allocated to hold the translated string. If a
899 * buffer was requested it must be freed using
900 * RTStrFree().
901 * @param cch The buffer size in bytes. This includes the
902 * terminator.
903 * @param pcch Where to store the length of the translated string,
904 * excluding the terminator. (Optional)
905 *
906 * This may be set under some error conditions,
907 * however, only for VERR_BUFFER_OVERFLOW and
908 * VERR_NO_STR_MEMORY will it contain a valid string
909 * length that can be used to resize the buffer.
910 */
911#define RTStrToLatin1Ex(pszString, cchString, ppsz, cch, pcch) \
912 RTStrToLatin1ExTag((pszString), (cchString), (ppsz), (cch), (pcch), RTSTR_TAG)
913
914/**
915 * Translates pszString from UTF-8 to Latin1, allocating the result buffer if
916 * requested (custom tag).
917 *
918 * @returns iprt status code.
919 * @param pszString UTF-8 string to convert.
920 * @param cchString The maximum size in chars (the type) to convert.
921 * The conversion stop when it reaches cchString or
922 * the string terminator ('\\0'). Use RTSTR_MAX to
923 * translate the entire string.
924 * @param ppsz If cch is non-zero, this must either be pointing to
925 * pointer to a buffer of the specified size, or
926 * pointer to a NULL pointer. If *ppsz is NULL or cch
927 * is zero a buffer of at least cch items will be
928 * allocated to hold the translated string. If a
929 * buffer was requested it must be freed using
930 * RTStrFree().
931 * @param cch The buffer size in bytes. This includes the
932 * terminator.
933 * @param pcch Where to store the length of the translated string,
934 * excluding the terminator. (Optional)
935 *
936 * This may be set under some error conditions,
937 * however, only for VERR_BUFFER_OVERFLOW and
938 * VERR_NO_STR_MEMORY will it contain a valid string
939 * length that can be used to resize the buffer.
940 * @param pszTag Allocation tag used for statistics and such.
941 */
942RTDECL(int) RTStrToLatin1ExTag(const char *pszString, size_t cchString, char **ppsz, size_t cch, size_t *pcch, const char *pszTag);
943
944
945/**
946 * Translate a Latin1 string into a UTF-8 allocating the result buffer (default
947 * tag).
948 *
949 * @returns iprt status code.
950 * @param pszString Latin1 string to convert.
951 * @param ppszString Receives pointer of allocated UTF-8 string on
952 * success, and is always set to NULL on failure.
953 * The returned pointer must be freed using RTStrFree().
954 */
955#define RTLatin1ToUtf8(pszString, ppszString) RTLatin1ToUtf8Tag((pszString), (ppszString), RTSTR_TAG)
956
957/**
958 * Translate a Latin-1 string into a UTF-8 allocating the result buffer.
959 *
960 * @returns iprt status code.
961 * @param pszString Latin-1 string to convert.
962 * @param ppszString Receives pointer of allocated UTF-8 string on
963 * success, and is always set to NULL on failure.
964 * The returned pointer must be freed using RTStrFree().
965 * @param pszTag Allocation tag used for statistics and such.
966 */
967RTDECL(int) RTLatin1ToUtf8Tag(const char *pszString, char **ppszString, const char *pszTag);
968
969/**
970 * Translates Latin-1 to UTF-8 using buffer provided by the caller or a fittingly
971 * sized buffer allocated by the function (default tag).
972 *
973 * @returns iprt status code.
974 * @param pszString The Latin-1 string to convert.
975 * @param cchString The number of Latin-1 characters to translate from
976 * pszString. The translation will stop when reaching
977 * cchString or the terminator ('\\0'). Use RTSTR_MAX
978 * to translate the entire string.
979 * @param ppsz If cch is non-zero, this must either be pointing to
980 * a pointer to a buffer of the specified size, or
981 * pointer to a NULL pointer. If *ppsz is NULL or cch
982 * is zero a buffer of at least cch chars will be
983 * allocated to hold the translated string. If a
984 * buffer was requested it must be freed using
985 * RTStrFree().
986 * @param cch The buffer size in chars (the type). This includes the terminator.
987 * @param pcch Where to store the length of the translated string,
988 * excluding the terminator. (Optional)
989 *
990 * This may be set under some error conditions,
991 * however, only for VERR_BUFFER_OVERFLOW and
992 * VERR_NO_STR_MEMORY will it contain a valid string
993 * length that can be used to resize the buffer.
994 */
995#define RTLatin1ToUtf8Ex(pszString, cchString, ppsz, cch, pcch) \
996 RTLatin1ToUtf8ExTag((pszString), (cchString), (ppsz), (cch), (pcch), RTSTR_TAG)
997
998/**
999 * Translates Latin1 to UTF-8 using buffer provided by the caller or a fittingly
1000 * sized buffer allocated by the function (custom tag).
1001 *
1002 * @returns iprt status code.
1003 * @param pszString The Latin1 string to convert.
1004 * @param cchString The number of Latin1 characters to translate from
1005 * pwszString. The translation will stop when
1006 * reaching cchString or the terminator ('\\0'). Use
1007 * RTSTR_MAX to translate the entire string.
1008 * @param ppsz If cch is non-zero, this must either be pointing to
1009 * a pointer to a buffer of the specified size, or
1010 * pointer to a NULL pointer. If *ppsz is NULL or cch
1011 * is zero a buffer of at least cch chars will be
1012 * allocated to hold the translated string. If a
1013 * buffer was requested it must be freed using
1014 * RTStrFree().
1015 * @param cch The buffer size in chars (the type). This includes
1016 * the terminator.
1017 * @param pcch Where to store the length of the translated string,
1018 * excluding the terminator. (Optional)
1019 *
1020 * This may be set under some error conditions,
1021 * however, only for VERR_BUFFER_OVERFLOW and
1022 * VERR_NO_STR_MEMORY will it contain a valid string
1023 * length that can be used to resize the buffer.
1024 * @param pszTag Allocation tag used for statistics and such.
1025 */
1026RTDECL(int) RTLatin1ToUtf8ExTag(const char *pszString, size_t cchString, char **ppsz, size_t cch, size_t *pcch, const char *pszTag);
1027
1028/**
1029 * Calculates the length of the Latin-1 string in UTF-8 chars (bytes).
1030 *
1031 * The primary purpose of this function is to help allocate buffers for
1032 * RTLatin1ToUtf8() of the correct size. For most other purposes
1033 * RTLatin1ToUtf8Ex() should be used.
1034 *
1035 * @returns Number of chars (bytes).
1036 * @returns 0 if the string was incorrectly encoded.
1037 * @param psz The Latin-1 string.
1038 */
1039RTDECL(size_t) RTLatin1CalcUtf8Len(const char *psz);
1040
1041/**
1042 * Calculates the length of the Latin-1 string in UTF-8 chars (bytes).
1043 *
1044 * @returns iprt status code.
1045 * @param psz The string.
1046 * @param cch The max string length. Use RTSTR_MAX to process the entire string.
1047 * @param pcch Where to store the string length (in bytes). Optional.
1048 * This is undefined on failure.
1049 */
1050RTDECL(int) RTLatin1CalcUtf8LenEx(const char *psz, size_t cch, size_t *pcch);
1051
1052/**
1053 * Get the unicode code point at the given string position.
1054 *
1055 * @returns unicode code point.
1056 * @returns RTUNICP_INVALID if the encoding is invalid.
1057 * @param psz The string.
1058 */
1059RTDECL(RTUNICP) RTStrGetCpInternal(const char *psz);
1060
1061/**
1062 * Get the unicode code point at the given string position.
1063 *
1064 * @returns iprt status code
1065 * @returns VERR_INVALID_UTF8_ENCODING if the encoding is invalid.
1066 * @param ppsz The string cursor.
1067 * This is advanced one character forward on failure.
1068 * @param pCp Where to store the unicode code point.
1069 * Stores RTUNICP_INVALID if the encoding is invalid.
1070 */
1071RTDECL(int) RTStrGetCpExInternal(const char **ppsz, PRTUNICP pCp);
1072
1073/**
1074 * Get the unicode code point at the given string position for a string of a
1075 * given length.
1076 *
1077 * @returns iprt status code
1078 * @retval VERR_INVALID_UTF8_ENCODING if the encoding is invalid.
1079 * @retval VERR_END_OF_STRING if *pcch is 0. *pCp is set to RTUNICP_INVALID.
1080 *
1081 * @param ppsz The string.
1082 * @param pcch Pointer to the length of the string. This will be
1083 * decremented by the size of the code point.
1084 * @param pCp Where to store the unicode code point.
1085 * Stores RTUNICP_INVALID if the encoding is invalid.
1086 */
1087RTDECL(int) RTStrGetCpNExInternal(const char **ppsz, size_t *pcch, PRTUNICP pCp);
1088
1089/**
1090 * Put the unicode code point at the given string position
1091 * and return the pointer to the char following it.
1092 *
1093 * This function will not consider anything at or following the
1094 * buffer area pointed to by psz. It is therefore not suitable for
1095 * inserting code points into a string, only appending/overwriting.
1096 *
1097 * @returns pointer to the char following the written code point.
1098 * @param psz The string.
1099 * @param CodePoint The code point to write.
1100 * This should not be RTUNICP_INVALID or any other
1101 * character out of the UTF-8 range.
1102 *
1103 * @remark This is a worker function for RTStrPutCp().
1104 *
1105 */
1106RTDECL(char *) RTStrPutCpInternal(char *psz, RTUNICP CodePoint);
1107
1108/**
1109 * Get the unicode code point at the given string position.
1110 *
1111 * @returns unicode code point.
1112 * @returns RTUNICP_INVALID if the encoding is invalid.
1113 * @param psz The string.
1114 *
1115 * @remark We optimize this operation by using an inline function for
1116 * the most frequent and simplest sequence, the rest is
1117 * handled by RTStrGetCpInternal().
1118 */
1119DECLINLINE(RTUNICP) RTStrGetCp(const char *psz)
1120{
1121 const unsigned char uch = *(const unsigned char *)psz;
1122 if (!(uch & RT_BIT(7)))
1123 return uch;
1124 return RTStrGetCpInternal(psz);
1125}
1126
1127/**
1128 * Get the unicode code point at the given string position.
1129 *
1130 * @returns iprt status code.
1131 * @param ppsz Pointer to the string pointer. This will be updated to
1132 * point to the char following the current code point.
1133 * This is advanced one character forward on failure.
1134 * @param pCp Where to store the code point.
1135 * RTUNICP_INVALID is stored here on failure.
1136 *
1137 * @remark We optimize this operation by using an inline function for
1138 * the most frequent and simplest sequence, the rest is
1139 * handled by RTStrGetCpExInternal().
1140 */
1141DECLINLINE(int) RTStrGetCpEx(const char **ppsz, PRTUNICP pCp)
1142{
1143 const unsigned char uch = **(const unsigned char **)ppsz;
1144 if (!(uch & RT_BIT(7)))
1145 {
1146 (*ppsz)++;
1147 *pCp = uch;
1148 return VINF_SUCCESS;
1149 }
1150 return RTStrGetCpExInternal(ppsz, pCp);
1151}
1152
1153/**
1154 * Get the unicode code point at the given string position for a string of a
1155 * given maximum length.
1156 *
1157 * @returns iprt status code.
1158 * @retval VERR_INVALID_UTF8_ENCODING if the encoding is invalid.
1159 * @retval VERR_END_OF_STRING if *pcch is 0. *pCp is set to RTUNICP_INVALID.
1160 *
1161 * @param ppsz Pointer to the string pointer. This will be updated to
1162 * point to the char following the current code point.
1163 * @param pcch Pointer to the maximum string length. This will be
1164 * decremented by the size of the code point found.
1165 * @param pCp Where to store the code point.
1166 * RTUNICP_INVALID is stored here on failure.
1167 *
1168 * @remark We optimize this operation by using an inline function for
1169 * the most frequent and simplest sequence, the rest is
1170 * handled by RTStrGetCpNExInternal().
1171 */
1172DECLINLINE(int) RTStrGetCpNEx(const char **ppsz, size_t *pcch, PRTUNICP pCp)
1173{
1174 if (RT_LIKELY(*pcch != 0))
1175 {
1176 const unsigned char uch = **(const unsigned char **)ppsz;
1177 if (!(uch & RT_BIT(7)))
1178 {
1179 (*ppsz)++;
1180 (*pcch)--;
1181 *pCp = uch;
1182 return VINF_SUCCESS;
1183 }
1184 }
1185 return RTStrGetCpNExInternal(ppsz, pcch, pCp);
1186}
1187
1188/**
1189 * Get the UTF-8 size in characters of a given Unicode code point.
1190 *
1191 * The code point is expected to be a valid Unicode one, but not necessarily in
1192 * the range supported by UTF-8.
1193 *
1194 * @returns The number of chars (bytes) required to encode the code point, or
1195 * zero if there is no UTF-8 encoding.
1196 * @param CodePoint The unicode code point.
1197 */
1198DECLINLINE(size_t) RTStrCpSize(RTUNICP CodePoint)
1199{
1200 if (CodePoint < 0x00000080)
1201 return 1;
1202 if (CodePoint < 0x00000800)
1203 return 2;
1204 if (CodePoint < 0x00010000)
1205 return 3;
1206#ifdef RT_USE_RTC_3629
1207 if (CodePoint < 0x00011000)
1208 return 4;
1209#else
1210 if (CodePoint < 0x00200000)
1211 return 4;
1212 if (CodePoint < 0x04000000)
1213 return 5;
1214 if (CodePoint < 0x7fffffff)
1215 return 6;
1216#endif
1217 return 0;
1218}
1219
1220/**
1221 * Put the unicode code point at the given string position
1222 * and return the pointer to the char following it.
1223 *
1224 * This function will not consider anything at or following the
1225 * buffer area pointed to by psz. It is therefore not suitable for
1226 * inserting code points into a string, only appending/overwriting.
1227 *
1228 * @returns pointer to the char following the written code point.
1229 * @param psz The string.
1230 * @param CodePoint The code point to write.
1231 * This should not be RTUNICP_INVALID or any other
1232 * character out of the UTF-8 range.
1233 *
1234 * @remark We optimize this operation by using an inline function for
1235 * the most frequent and simplest sequence, the rest is
1236 * handled by RTStrPutCpInternal().
1237 */
1238DECLINLINE(char *) RTStrPutCp(char *psz, RTUNICP CodePoint)
1239{
1240 if (CodePoint < 0x80)
1241 {
1242 *psz++ = (unsigned char)CodePoint;
1243 return psz;
1244 }
1245 return RTStrPutCpInternal(psz, CodePoint);
1246}
1247
1248/**
1249 * Skips ahead, past the current code point.
1250 *
1251 * @returns Pointer to the char after the current code point.
1252 * @param psz Pointer to the current code point.
1253 * @remark This will not move the next valid code point, only past the current one.
1254 */
1255DECLINLINE(char *) RTStrNextCp(const char *psz)
1256{
1257 RTUNICP Cp;
1258 RTStrGetCpEx(&psz, &Cp);
1259 return (char *)psz;
1260}
1261
1262/**
1263 * Skips back to the previous code point.
1264 *
1265 * @returns Pointer to the char before the current code point.
1266 * @returns pszStart on failure.
1267 * @param pszStart Pointer to the start of the string.
1268 * @param psz Pointer to the current code point.
1269 */
1270RTDECL(char *) RTStrPrevCp(const char *pszStart, const char *psz);
1271
1272/**
1273 * Get the unicode code point at the given string position.
1274 *
1275 * @returns unicode code point.
1276 * @returns RTUNICP_INVALID if the encoding is invalid.
1277 * @param psz The string.
1278 */
1279DECLINLINE(RTUNICP) RTLatin1GetCp(const char *psz)
1280{
1281 return *(const unsigned char *)psz;
1282}
1283
1284/**
1285 * Get the unicode code point at the given string position.
1286 *
1287 * @returns iprt status code.
1288 * @param ppsz Pointer to the string pointer. This will be updated to
1289 * point to the char following the current code point.
1290 * This is advanced one character forward on failure.
1291 * @param pCp Where to store the code point.
1292 * RTUNICP_INVALID is stored here on failure.
1293 *
1294 * @remark We optimize this operation by using an inline function for
1295 * the most frequent and simplest sequence, the rest is
1296 * handled by RTStrGetCpExInternal().
1297 */
1298DECLINLINE(int) RTLatin1GetCpEx(const char **ppsz, PRTUNICP pCp)
1299{
1300 const unsigned char uch = **(const unsigned char **)ppsz;
1301 (*ppsz)++;
1302 *pCp = uch;
1303 return VINF_SUCCESS;
1304}
1305
1306/**
1307 * Get the unicode code point at the given string position for a string of a
1308 * given maximum length.
1309 *
1310 * @returns iprt status code.
1311 * @retval VERR_END_OF_STRING if *pcch is 0. *pCp is set to RTUNICP_INVALID.
1312 *
1313 * @param ppsz Pointer to the string pointer. This will be updated to
1314 * point to the char following the current code point.
1315 * @param pcch Pointer to the maximum string length. This will be
1316 * decremented by the size of the code point found.
1317 * @param pCp Where to store the code point.
1318 * RTUNICP_INVALID is stored here on failure.
1319 */
1320DECLINLINE(int) RTLatin1GetCpNEx(const char **ppsz, size_t *pcch, PRTUNICP pCp)
1321{
1322 if (RT_LIKELY(*pcch != 0))
1323 {
1324 const unsigned char uch = **(const unsigned char **)ppsz;
1325 (*ppsz)++;
1326 (*pcch)--;
1327 *pCp = uch;
1328 return VINF_SUCCESS;
1329 }
1330 *pCp = RTUNICP_INVALID;
1331 return VERR_END_OF_STRING;
1332}
1333
1334/**
1335 * Get the Latin-1 size in characters of a given Unicode code point.
1336 *
1337 * The code point is expected to be a valid Unicode one, but not necessarily in
1338 * the range supported by Latin-1.
1339 *
1340 * @returns the size in characters, or zero if there is no Latin-1 encoding
1341 */
1342DECLINLINE(size_t) RTLatin1CpSize(RTUNICP CodePoint)
1343{
1344 if (CodePoint < 0x100)
1345 return 1;
1346 return 0;
1347}
1348
1349/**
1350 * Put the unicode code point at the given string position
1351 * and return the pointer to the char following it.
1352 *
1353 * This function will not consider anything at or following the
1354 * buffer area pointed to by psz. It is therefore not suitable for
1355 * inserting code points into a string, only appending/overwriting.
1356 *
1357 * @returns pointer to the char following the written code point.
1358 * @param psz The string.
1359 * @param CodePoint The code point to write.
1360 * This should not be RTUNICP_INVALID or any other
1361 * character out of the Latin-1 range.
1362 */
1363DECLINLINE(char *) RTLatin1PutCp(char *psz, RTUNICP CodePoint)
1364{
1365 AssertReturn(CodePoint < 0x100, NULL);
1366 *psz++ = (unsigned char)CodePoint;
1367 return psz;
1368}
1369
1370/**
1371 * Skips ahead, past the current code point.
1372 *
1373 * @returns Pointer to the char after the current code point.
1374 * @param psz Pointer to the current code point.
1375 * @remark This will not move the next valid code point, only past the current one.
1376 */
1377DECLINLINE(char *) RTLatin1NextCp(const char *psz)
1378{
1379 psz++;
1380 return (char *)psz;
1381}
1382
1383/**
1384 * Skips back to the previous code point.
1385 *
1386 * @returns Pointer to the char before the current code point.
1387 * @returns pszStart on failure.
1388 * @param pszStart Pointer to the start of the string.
1389 * @param psz Pointer to the current code point.
1390 */
1391DECLINLINE(char *) RTLatin1PrevCp(const char *psz)
1392{
1393 psz--;
1394 return (char *)psz;
1395}
1396
1397
1398
1399#ifndef DECLARED_FNRTSTROUTPUT /* duplicated in iprt/log.h */
1400#define DECLARED_FNRTSTROUTPUT
1401/**
1402 * Output callback.
1403 *
1404 * @returns number of bytes written.
1405 * @param pvArg User argument.
1406 * @param pachChars Pointer to an array of utf-8 characters.
1407 * @param cbChars Number of bytes in the character array pointed to by pachChars.
1408 */
1409typedef DECLCALLBACK(size_t) FNRTSTROUTPUT(void *pvArg, const char *pachChars, size_t cbChars);
1410/** Pointer to callback function. */
1411typedef FNRTSTROUTPUT *PFNRTSTROUTPUT;
1412#endif
1413
1414/** Format flag.
1415 * These are used by RTStrFormat extensions and RTStrFormatNumber, mind
1416 * that not all flags makes sense to both of the functions.
1417 * @{ */
1418#define RTSTR_F_CAPITAL 0x0001
1419#define RTSTR_F_LEFT 0x0002
1420#define RTSTR_F_ZEROPAD 0x0004
1421#define RTSTR_F_SPECIAL 0x0008
1422#define RTSTR_F_VALSIGNED 0x0010
1423#define RTSTR_F_PLUS 0x0020
1424#define RTSTR_F_BLANK 0x0040
1425#define RTSTR_F_WIDTH 0x0080
1426#define RTSTR_F_PRECISION 0x0100
1427#define RTSTR_F_THOUSAND_SEP 0x0200
1428
1429#define RTSTR_F_BIT_MASK 0xf800
1430#define RTSTR_F_8BIT 0x0800
1431#define RTSTR_F_16BIT 0x1000
1432#define RTSTR_F_32BIT 0x2000
1433#define RTSTR_F_64BIT 0x4000
1434#define RTSTR_F_128BIT 0x8000
1435/** @} */
1436
1437/** @def RTSTR_GET_BIT_FLAG
1438 * Gets the bit flag for the specified type.
1439 */
1440#define RTSTR_GET_BIT_FLAG(type) \
1441 ( sizeof(type) * 8 == 32 ? RTSTR_F_32BIT \
1442 : sizeof(type) * 8 == 64 ? RTSTR_F_64BIT \
1443 : sizeof(type) * 8 == 16 ? RTSTR_F_16BIT \
1444 : sizeof(type) * 8 == 8 ? RTSTR_F_8BIT \
1445 : sizeof(type) * 8 == 128 ? RTSTR_F_128BIT \
1446 : 0)
1447
1448
1449/**
1450 * Callback to format non-standard format specifiers.
1451 *
1452 * @returns The number of bytes formatted.
1453 * @param pvArg Formatter argument.
1454 * @param pfnOutput Pointer to output function.
1455 * @param pvArgOutput Argument for the output function.
1456 * @param ppszFormat Pointer to the format string pointer. Advance this till the char
1457 * after the format specifier.
1458 * @param pArgs Pointer to the argument list. Use this to fetch the arguments.
1459 * @param cchWidth Format Width. -1 if not specified.
1460 * @param cchPrecision Format Precision. -1 if not specified.
1461 * @param fFlags Flags (RTSTR_NTFS_*).
1462 * @param chArgSize The argument size specifier, 'l' or 'L'.
1463 */
1464typedef DECLCALLBACK(size_t) FNSTRFORMAT(void *pvArg, PFNRTSTROUTPUT pfnOutput, void *pvArgOutput,
1465 const char **ppszFormat, va_list *pArgs, int cchWidth,
1466 int cchPrecision, unsigned fFlags, char chArgSize);
1467/** Pointer to a FNSTRFORMAT() function. */
1468typedef FNSTRFORMAT *PFNSTRFORMAT;
1469
1470
1471/**
1472 * Partial implementation of a printf like formatter.
1473 * It doesn't do everything correct, and there is no floating point support.
1474 * However, it supports custom formats by the means of a format callback.
1475 *
1476 * @returns number of bytes formatted.
1477 * @param pfnOutput Output worker.
1478 * Called in two ways. Normally with a string and its length.
1479 * For termination, it's called with NULL for string, 0 for length.
1480 * @param pvArgOutput Argument to the output worker.
1481 * @param pfnFormat Custom format worker.
1482 * @param pvArgFormat Argument to the format worker.
1483 * @param pszFormat Format string pointer.
1484 * @param InArgs Argument list.
1485 */
1486RTDECL(size_t) RTStrFormatV(PFNRTSTROUTPUT pfnOutput, void *pvArgOutput, PFNSTRFORMAT pfnFormat, void *pvArgFormat, const char *pszFormat, va_list InArgs);
1487
1488/**
1489 * Partial implementation of a printf like formatter.
1490 * It doesn't do everything correct, and there is no floating point support.
1491 * However, it supports custom formats by the means of a format callback.
1492 *
1493 * @returns number of bytes formatted.
1494 * @param pfnOutput Output worker.
1495 * Called in two ways. Normally with a string and its length.
1496 * For termination, it's called with NULL for string, 0 for length.
1497 * @param pvArgOutput Argument to the output worker.
1498 * @param pfnFormat Custom format worker.
1499 * @param pvArgFormat Argument to the format worker.
1500 * @param pszFormat Format string.
1501 * @param ... Argument list.
1502 */
1503RTDECL(size_t) RTStrFormat(PFNRTSTROUTPUT pfnOutput, void *pvArgOutput, PFNSTRFORMAT pfnFormat, void *pvArgFormat, const char *pszFormat, ...);
1504
1505/**
1506 * Formats an integer number according to the parameters.
1507 *
1508 * @returns Length of the formatted number.
1509 * @param psz Pointer to output string buffer of sufficient size.
1510 * @param u64Value Value to format.
1511 * @param uiBase Number representation base.
1512 * @param cchWidth Width.
1513 * @param cchPrecision Precision.
1514 * @param fFlags Flags (NTFS_*).
1515 */
1516RTDECL(int) RTStrFormatNumber(char *psz, uint64_t u64Value, unsigned int uiBase, signed int cchWidth, signed int cchPrecision, unsigned int fFlags);
1517
1518
1519/**
1520 * Callback for formatting a type.
1521 *
1522 * This is registered using the RTStrFormatTypeRegister function and will
1523 * be called during string formatting to handle the specified %R[type].
1524 * The argument for this format type is assumed to be a pointer and it's
1525 * passed in the @a pvValue argument.
1526 *
1527 * @returns Length of the formatted output.
1528 * @param pfnOutput Output worker.
1529 * @param pvArgOutput Argument to the output worker.
1530 * @param pszType The type name.
1531 * @param pvValue The argument value.
1532 * @param cchWidth Width.
1533 * @param cchPrecision Precision.
1534 * @param fFlags Flags (NTFS_*).
1535 * @param pvUser The user argument.
1536 */
1537typedef DECLCALLBACK(size_t) FNRTSTRFORMATTYPE(PFNRTSTROUTPUT pfnOutput, void *pvArgOutput,
1538 const char *pszType, void const *pvValue,
1539 int cchWidth, int cchPrecision, unsigned fFlags,
1540 void *pvUser);
1541/** Pointer to a FNRTSTRFORMATTYPE. */
1542typedef FNRTSTRFORMATTYPE *PFNRTSTRFORMATTYPE;
1543
1544
1545/**
1546 * Register a format handler for a type.
1547 *
1548 * The format handler is used to handle '%R[type]' format types, where the argument
1549 * in the vector is a pointer value (a bit restrictive, but keeps it simple).
1550 *
1551 * The caller must ensure that no other thread will be making use of any of
1552 * the dynamic formatting type facilities simultaneously with this call.
1553 *
1554 * @returns IPRT status code.
1555 * @retval VINF_SUCCESS on success.
1556 * @retval VERR_ALREADY_EXISTS if the type has already been registered.
1557 * @retval VERR_TOO_MANY_OPEN_FILES if all the type slots has been allocated already.
1558 *
1559 * @param pszType The type name.
1560 * @param pfnHandler The handler address. See FNRTSTRFORMATTYPE for details.
1561 * @param pvUser The user argument to pass to the handler. See RTStrFormatTypeSetUser
1562 * for how to update this later.
1563 */
1564RTDECL(int) RTStrFormatTypeRegister(const char *pszType, PFNRTSTRFORMATTYPE pfnHandler, void *pvUser);
1565
1566/**
1567 * Deregisters a format type.
1568 *
1569 * The caller must ensure that no other thread will be making use of any of
1570 * the dynamic formatting type facilities simultaneously with this call.
1571 *
1572 * @returns IPRT status code.
1573 * @retval VINF_SUCCESS on success.
1574 * @retval VERR_FILE_NOT_FOUND if not found.
1575 *
1576 * @param pszType The type to deregister.
1577 */
1578RTDECL(int) RTStrFormatTypeDeregister(const char *pszType);
1579
1580/**
1581 * Sets the user argument for a type.
1582 *
1583 * This can be used if a user argument needs relocating in GC.
1584 *
1585 * @returns IPRT status code.
1586 * @retval VINF_SUCCESS on success.
1587 * @retval VERR_FILE_NOT_FOUND if not found.
1588 *
1589 * @param pszType The type to update.
1590 * @param pvUser The new user argument value.
1591 */
1592RTDECL(int) RTStrFormatTypeSetUser(const char *pszType, void *pvUser);
1593
1594
1595/**
1596 * String printf.
1597 *
1598 * @returns The length of the returned string (in pszBuffer).
1599 * @param pszBuffer Output buffer.
1600 * @param cchBuffer Size of the output buffer.
1601 * @param pszFormat The format string.
1602 * @param args The format argument.
1603 */
1604RTDECL(size_t) RTStrPrintfV(char *pszBuffer, size_t cchBuffer, const char *pszFormat, va_list args);
1605
1606/**
1607 * String printf.
1608 *
1609 * @returns The length of the returned string (in pszBuffer).
1610 * @param pszBuffer Output buffer.
1611 * @param cchBuffer Size of the output buffer.
1612 * @param pszFormat The format string.
1613 * @param ... The format argument.
1614 */
1615RTDECL(size_t) RTStrPrintf(char *pszBuffer, size_t cchBuffer, const char *pszFormat, ...);
1616
1617
1618/**
1619 * String printf with custom formatting.
1620 *
1621 * @returns The length of the returned string (in pszBuffer).
1622 * @param pfnFormat Pointer to handler function for the custom formats.
1623 * @param pvArg Argument to the pfnFormat function.
1624 * @param pszBuffer Output buffer.
1625 * @param cchBuffer Size of the output buffer.
1626 * @param pszFormat The format string.
1627 * @param args The format argument.
1628 */
1629RTDECL(size_t) RTStrPrintfExV(PFNSTRFORMAT pfnFormat, void *pvArg, char *pszBuffer, size_t cchBuffer, const char *pszFormat, va_list args);
1630
1631/**
1632 * String printf with custom formatting.
1633 *
1634 * @returns The length of the returned string (in pszBuffer).
1635 * @param pfnFormat Pointer to handler function for the custom formats.
1636 * @param pvArg Argument to the pfnFormat function.
1637 * @param pszBuffer Output buffer.
1638 * @param cchBuffer Size of the output buffer.
1639 * @param pszFormat The format string.
1640 * @param ... The format argument.
1641 */
1642RTDECL(size_t) RTStrPrintfEx(PFNSTRFORMAT pfnFormat, void *pvArg, char *pszBuffer, size_t cchBuffer, const char *pszFormat, ...);
1643
1644
1645/**
1646 * Allocating string printf (default tag).
1647 *
1648 * @returns The length of the string in the returned *ppszBuffer.
1649 * @returns -1 on failure.
1650 * @param ppszBuffer Where to store the pointer to the allocated output buffer.
1651 * The buffer should be freed using RTStrFree().
1652 * On failure *ppszBuffer will be set to NULL.
1653 * @param pszFormat The format string.
1654 * @param args The format argument.
1655 */
1656#define RTStrAPrintfV(ppszBuffer, pszFormat, args) RTStrAPrintfVTag((ppszBuffer), (pszFormat), (args), RTSTR_TAG)
1657
1658/**
1659 * Allocating string printf (custom tag).
1660 *
1661 * @returns The length of the string in the returned *ppszBuffer.
1662 * @returns -1 on failure.
1663 * @param ppszBuffer Where to store the pointer to the allocated output buffer.
1664 * The buffer should be freed using RTStrFree().
1665 * On failure *ppszBuffer will be set to NULL.
1666 * @param pszFormat The format string.
1667 * @param args The format argument.
1668 * @param pszTag Allocation tag used for statistics and such.
1669 */
1670RTDECL(int) RTStrAPrintfVTag(char **ppszBuffer, const char *pszFormat, va_list args, const char *pszTag);
1671
1672/**
1673 * Allocating string printf.
1674 *
1675 * @returns The length of the string in the returned *ppszBuffer.
1676 * @returns -1 on failure.
1677 * @param ppszBuffer Where to store the pointer to the allocated output buffer.
1678 * The buffer should be freed using RTStrFree().
1679 * On failure *ppszBuffer will be set to NULL.
1680 * @param pszFormat The format string.
1681 * @param ... The format argument.
1682 */
1683DECLINLINE(int) RTStrAPrintf(char **ppszBuffer, const char *pszFormat, ...)
1684{
1685 int cbRet;
1686 va_list va;
1687 va_start(va, pszFormat);
1688 cbRet = RTStrAPrintfVTag(ppszBuffer, pszFormat, va, RTSTR_TAG);
1689 va_end(va);
1690 return cbRet;
1691}
1692
1693/**
1694 * Allocating string printf (custom tag).
1695 *
1696 * @returns The length of the string in the returned *ppszBuffer.
1697 * @returns -1 on failure.
1698 * @param ppszBuffer Where to store the pointer to the allocated output buffer.
1699 * The buffer should be freed using RTStrFree().
1700 * On failure *ppszBuffer will be set to NULL.
1701 * @param pszTag Allocation tag used for statistics and such.
1702 * @param pszFormat The format string.
1703 * @param ... The format argument.
1704 */
1705DECLINLINE(int) RTStrAPrintfTag(char **ppszBuffer, const char *pszTag, const char *pszFormat, ...)
1706{
1707 int cbRet;
1708 va_list va;
1709 va_start(va, pszFormat);
1710 cbRet = RTStrAPrintfVTag(ppszBuffer, pszFormat, va, pszTag);
1711 va_end(va);
1712 return cbRet;
1713}
1714
1715/**
1716 * Allocating string printf, version 2.
1717 *
1718 * @returns Formatted string. Use RTStrFree() to free it. NULL when out of
1719 * memory.
1720 * @param pszFormat The format string.
1721 * @param args The format argument.
1722 */
1723#define RTStrAPrintf2V(pszFormat, args) RTStrAPrintf2VTag((pszFormat), (args), RTSTR_TAG)
1724
1725/**
1726 * Allocating string printf, version 2.
1727 *
1728 * @returns Formatted string. Use RTStrFree() to free it. NULL when out of
1729 * memory.
1730 * @param pszFormat The format string.
1731 * @param args The format argument.
1732 * @param pszTag Allocation tag used for statistics and such.
1733 */
1734RTDECL(char *) RTStrAPrintf2VTag(const char *pszFormat, va_list args, const char *pszTag);
1735
1736/**
1737 * Allocating string printf, version 2 (default tag).
1738 *
1739 * @returns Formatted string. Use RTStrFree() to free it. NULL when out of
1740 * memory.
1741 * @param pszFormat The format string.
1742 * @param ... The format argument.
1743 */
1744DECLINLINE(char *) RTStrAPrintf2(const char *pszFormat, ...)
1745{
1746 char *pszRet;
1747 va_list va;
1748 va_start(va, pszFormat);
1749 pszRet = RTStrAPrintf2VTag(pszFormat, va, RTSTR_TAG);
1750 va_end(va);
1751 return pszRet;
1752}
1753
1754/**
1755 * Allocating string printf, version 2 (custom tag).
1756 *
1757 * @returns Formatted string. Use RTStrFree() to free it. NULL when out of
1758 * memory.
1759 * @param pszTag Allocation tag used for statistics and such.
1760 * @param pszFormat The format string.
1761 * @param ... The format argument.
1762 */
1763DECLINLINE(char *) RTStrAPrintf2Tag(const char *pszTag, const char *pszFormat, ...)
1764{
1765 char *pszRet;
1766 va_list va;
1767 va_start(va, pszFormat);
1768 pszRet = RTStrAPrintf2VTag(pszFormat, va, pszTag);
1769 va_end(va);
1770 return pszRet;
1771}
1772
1773/**
1774 * Strips blankspaces from both ends of the string.
1775 *
1776 * @returns Pointer to first non-blank char in the string.
1777 * @param psz The string to strip.
1778 */
1779RTDECL(char *) RTStrStrip(char *psz);
1780
1781/**
1782 * Strips blankspaces from the start of the string.
1783 *
1784 * @returns Pointer to first non-blank char in the string.
1785 * @param psz The string to strip.
1786 */
1787RTDECL(char *) RTStrStripL(const char *psz);
1788
1789/**
1790 * Strips blankspaces from the end of the string.
1791 *
1792 * @returns psz.
1793 * @param psz The string to strip.
1794 */
1795RTDECL(char *) RTStrStripR(char *psz);
1796
1797/**
1798 * String copy with overflow handling.
1799 *
1800 * @retval VINF_SUCCESS on success.
1801 * @retval VERR_BUFFER_OVERFLOW if the destination buffer is too small. The
1802 * buffer will contain as much of the string as it can hold, fully
1803 * terminated.
1804 *
1805 * @param pszDst The destination buffer.
1806 * @param cbDst The size of the destination buffer (in bytes).
1807 * @param pszSrc The source string. NULL is not OK.
1808 */
1809RTDECL(int) RTStrCopy(char *pszDst, size_t cbDst, const char *pszSrc);
1810
1811/**
1812 * String copy with overflow handling.
1813 *
1814 * @retval VINF_SUCCESS on success.
1815 * @retval VERR_BUFFER_OVERFLOW if the destination buffer is too small. The
1816 * buffer will contain as much of the string as it can hold, fully
1817 * terminated.
1818 *
1819 * @param pszDst The destination buffer.
1820 * @param cbDst The size of the destination buffer (in bytes).
1821 * @param pszSrc The source string. NULL is not OK.
1822 * @param cchSrcMax The maximum number of chars (not code points) to
1823 * copy from the source string, not counting the
1824 * terminator as usual.
1825 */
1826RTDECL(int) RTStrCopyEx(char *pszDst, size_t cbDst, const char *pszSrc, size_t cchSrcMax);
1827
1828/**
1829 * Performs a case sensitive string compare between two UTF-8 strings.
1830 *
1831 * Encoding errors are ignored by the current implementation. So, the only
1832 * difference between this and the CRT strcmp function is the handling of
1833 * NULL arguments.
1834 *
1835 * @returns < 0 if the first string less than the second string.
1836 * @returns 0 if the first string identical to the second string.
1837 * @returns > 0 if the first string greater than the second string.
1838 * @param psz1 First UTF-8 string. Null is allowed.
1839 * @param psz2 Second UTF-8 string. Null is allowed.
1840 */
1841RTDECL(int) RTStrCmp(const char *psz1, const char *psz2);
1842
1843/**
1844 * Performs a case sensitive string compare between two UTF-8 strings, given
1845 * a maximum string length.
1846 *
1847 * Encoding errors are ignored by the current implementation. So, the only
1848 * difference between this and the CRT strncmp function is the handling of
1849 * NULL arguments.
1850 *
1851 * @returns < 0 if the first string less than the second string.
1852 * @returns 0 if the first string identical to the second string.
1853 * @returns > 0 if the first string greater than the second string.
1854 * @param psz1 First UTF-8 string. Null is allowed.
1855 * @param psz2 Second UTF-8 string. Null is allowed.
1856 * @param cchMax The maximum string length
1857 */
1858RTDECL(int) RTStrNCmp(const char *psz1, const char *psz2, size_t cchMax);
1859
1860/**
1861 * Performs a case insensitive string compare between two UTF-8 strings.
1862 *
1863 * This is a simplified compare, as only the simplified lower/upper case folding
1864 * specified by the unicode specs are used. It does not consider character pairs
1865 * as they are used in some languages, just simple upper & lower case compares.
1866 *
1867 * The result is the difference between the mismatching codepoints after they
1868 * both have been lower cased.
1869 *
1870 * If the string encoding is invalid the function will assert (strict builds)
1871 * and use RTStrCmp for the remainder of the string.
1872 *
1873 * @returns < 0 if the first string less than the second string.
1874 * @returns 0 if the first string identical to the second string.
1875 * @returns > 0 if the first string greater than the second string.
1876 * @param psz1 First UTF-8 string. Null is allowed.
1877 * @param psz2 Second UTF-8 string. Null is allowed.
1878 */
1879RTDECL(int) RTStrICmp(const char *psz1, const char *psz2);
1880
1881/**
1882 * Performs a case insensitive string compare between two UTF-8 strings, given a
1883 * maximum string length.
1884 *
1885 * This is a simplified compare, as only the simplified lower/upper case folding
1886 * specified by the unicode specs are used. It does not consider character pairs
1887 * as they are used in some languages, just simple upper & lower case compares.
1888 *
1889 * The result is the difference between the mismatching codepoints after they
1890 * both have been lower cased.
1891 *
1892 * If the string encoding is invalid the function will assert (strict builds)
1893 * and use RTStrCmp for the remainder of the string.
1894 *
1895 * @returns < 0 if the first string less than the second string.
1896 * @returns 0 if the first string identical to the second string.
1897 * @returns > 0 if the first string greater than the second string.
1898 * @param psz1 First UTF-8 string. Null is allowed.
1899 * @param psz2 Second UTF-8 string. Null is allowed.
1900 * @param cchMax Maximum string length
1901 */
1902RTDECL(int) RTStrNICmp(const char *psz1, const char *psz2, size_t cchMax);
1903
1904/**
1905 * Locates a case sensitive substring.
1906 *
1907 * If any of the two strings are NULL, then NULL is returned. If the needle is
1908 * an empty string, then the haystack is returned (i.e. matches anything).
1909 *
1910 * @returns Pointer to the first occurrence of the substring if found, NULL if
1911 * not.
1912 *
1913 * @param pszHaystack The string to search.
1914 * @param pszNeedle The substring to search for.
1915 *
1916 * @remarks The difference between this and strstr is the handling of NULL
1917 * pointers.
1918 */
1919RTDECL(char *) RTStrStr(const char *pszHaystack, const char *pszNeedle);
1920
1921/**
1922 * Locates a case insensitive substring.
1923 *
1924 * If any of the two strings are NULL, then NULL is returned. If the needle is
1925 * an empty string, then the haystack is returned (i.e. matches anything).
1926 *
1927 * @returns Pointer to the first occurrence of the substring if found, NULL if
1928 * not.
1929 *
1930 * @param pszHaystack The string to search.
1931 * @param pszNeedle The substring to search for.
1932 *
1933 */
1934RTDECL(char *) RTStrIStr(const char *pszHaystack, const char *pszNeedle);
1935
1936/**
1937 * Converts the string to lower case.
1938 *
1939 * @returns Pointer to the converted string.
1940 * @param psz The string to convert.
1941 */
1942RTDECL(char *) RTStrToLower(char *psz);
1943
1944/**
1945 * Converts the string to upper case.
1946 *
1947 * @returns Pointer to the converted string.
1948 * @param psz The string to convert.
1949 */
1950RTDECL(char *) RTStrToUpper(char *psz);
1951
1952/**
1953 * Find the length of a zero-terminated byte string, given
1954 * a max string length.
1955 *
1956 * See also RTStrNLenEx.
1957 *
1958 * @returns The string length or cbMax. The returned length does not include
1959 * the zero terminator if it was found.
1960 *
1961 * @param pszString The string.
1962 * @param cchMax The max string length.
1963 */
1964RTDECL(size_t) RTStrNLen(const char *pszString, size_t cchMax);
1965
1966/**
1967 * Find the length of a zero-terminated byte string, given
1968 * a max string length.
1969 *
1970 * See also RTStrNLen.
1971 *
1972 * @returns IPRT status code.
1973 * @retval VINF_SUCCESS if the string has a length less than cchMax.
1974 * @retval VERR_BUFFER_OVERFLOW if the end of the string wasn't found
1975 * before cchMax was reached.
1976 *
1977 * @param pszString The string.
1978 * @param cchMax The max string length.
1979 * @param pcch Where to store the string length excluding the
1980 * terminator. This is set to cchMax if the terminator
1981 * isn't found.
1982 */
1983RTDECL(int) RTStrNLenEx(const char *pszString, size_t cchMax, size_t *pcch);
1984
1985RT_C_DECLS_END
1986
1987/** The maximum size argument of a memchr call. */
1988#define RTSTR_MEMCHR_MAX (~(size_t)0x10000)
1989
1990/**
1991 * Find the zero terminator in a string with a limited length.
1992 *
1993 * @returns Pointer to the zero terminator.
1994 * @returns NULL if the zero terminator was not found.
1995 *
1996 * @param pszString The string.
1997 * @param cchMax The max string length. RTSTR_MAX is fine.
1998 */
1999#if defined(__cplusplus) && !defined(DOXYGEN_RUNNING)
2000DECLINLINE(char const *) RTStrEnd(char const *pszString, size_t cchMax)
2001{
2002 /* Avoid potential issues with memchr seen in glibc. */
2003 if (cchMax > RTSTR_MEMCHR_MAX)
2004 {
2005 char const *pszRet = (char const *)memchr(pszString, '\0', RTSTR_MEMCHR_MAX);
2006 if (RT_LIKELY(pszRet))
2007 return pszRet;
2008 pszString += RTSTR_MEMCHR_MAX;
2009 cchMax -= RTSTR_MEMCHR_MAX;
2010 }
2011 return (char const *)memchr(pszString, '\0', cchMax);
2012}
2013
2014DECLINLINE(char *) RTStrEnd(char *pszString, size_t cchMax)
2015#else
2016DECLINLINE(char *) RTStrEnd(const char *pszString, size_t cchMax)
2017#endif
2018{
2019 /* Avoid potential issues with memchr seen in glibc. */
2020 if (cchMax > RTSTR_MEMCHR_MAX)
2021 {
2022 char *pszRet = (char *)memchr(pszString, '\0', RTSTR_MEMCHR_MAX);
2023 if (RT_LIKELY(pszRet))
2024 return pszRet;
2025 pszString += RTSTR_MEMCHR_MAX;
2026 cchMax -= RTSTR_MEMCHR_MAX;
2027 }
2028 return (char *)memchr(pszString, '\0', cchMax);
2029}
2030
2031RT_C_DECLS_BEGIN
2032
2033/**
2034 * Matches a simple string pattern.
2035 *
2036 * @returns true if the string matches the pattern, otherwise false.
2037 *
2038 * @param pszPattern The pattern. Special chars are '*' and '?', where the
2039 * asterisk matches zero or more characters and question
2040 * mark matches exactly one character.
2041 * @param pszString The string to match against the pattern.
2042 */
2043RTDECL(bool) RTStrSimplePatternMatch(const char *pszPattern, const char *pszString);
2044
2045/**
2046 * Matches a simple string pattern, neither which needs to be zero terminated.
2047 *
2048 * This is identical to RTStrSimplePatternMatch except that you can optionally
2049 * specify the length of both the pattern and the string. The function will
2050 * stop when it hits a string terminator or either of the lengths.
2051 *
2052 * @returns true if the string matches the pattern, otherwise false.
2053 *
2054 * @param pszPattern The pattern. Special chars are '*' and '?', where the
2055 * asterisk matches zero or more characters and question
2056 * mark matches exactly one character.
2057 * @param cchPattern The pattern length. Pass RTSTR_MAX if you don't know the
2058 * length and wish to stop at the string terminator.
2059 * @param pszString The string to match against the pattern.
2060 * @param cchString The string length. Pass RTSTR_MAX if you don't know the
2061 * length and wish to match up to the string terminator.
2062 */
2063RTDECL(bool) RTStrSimplePatternNMatch(const char *pszPattern, size_t cchPattern,
2064 const char *pszString, size_t cchString);
2065
2066/**
2067 * Matches multiple patterns against a string.
2068 *
2069 * The patterns are separated by the pipe character (|).
2070 *
2071 * @returns true if the string matches the pattern, otherwise false.
2072 *
2073 * @param pszPatterns The patterns.
2074 * @param cchPatterns The lengths of the patterns to use. Pass RTSTR_MAX to
2075 * stop at the terminator.
2076 * @param pszString The string to match against the pattern.
2077 * @param cchString The string length. Pass RTSTR_MAX stop stop at the
2078 * terminator.
2079 * @param poffPattern Offset into the patterns string of the patttern that
2080 * matched. If no match, this will be set to RTSTR_MAX.
2081 * This is optional, NULL is fine.
2082 */
2083RTDECL(bool) RTStrSimplePatternMultiMatch(const char *pszPatterns, size_t cchPatterns,
2084 const char *pszString, size_t cchString,
2085 size_t *poffPattern);
2086
2087/**
2088 * Compares two version strings RTStrICmp fashion.
2089 *
2090 * The version string is split up into sections at punctuation, spaces,
2091 * underscores, dashes and pluss signs. The sections are then split up into
2092 * numeric and string sub-sections. Finally, the sub-sections are compared
2093 * in a numeric or case insesntivie fashion depending on what they are.
2094 *
2095 * The following strings are considered to be equal: "1.0.0", "1.00.0", "1.0",
2096 * "1". These aren't: "1.0.0r993", "1.0", "1.0r993", "1.0_Beta3", "1.1"
2097 *
2098 * @returns < 0 if the first string less than the second string.
2099 * @returns 0 if the first string identical to the second string.
2100 * @returns > 0 if the first string greater than the second string.
2101 *
2102 * @param pszVer1 First version string to compare.
2103 * @param pszVer2 Second version string to compare first version with.
2104 */
2105RTDECL(int) RTStrVersionCompare(const char *pszVer1, const char *pszVer2);
2106
2107
2108/** @defgroup rt_str_conv String To/From Number Conversions
2109 * @ingroup grp_rt_str
2110 * @{ */
2111
2112/**
2113 * Converts a string representation of a number to a 64-bit unsigned number.
2114 *
2115 * @returns iprt status code.
2116 * Warnings are used to indicate conversion problems.
2117 * @retval VWRN_NUMBER_TOO_BIG
2118 * @retval VWRN_NEGATIVE_UNSIGNED
2119 * @retval VWRN_TRAILING_CHARS
2120 * @retval VWRN_TRAILING_SPACES
2121 * @retval VINF_SUCCESS
2122 * @retval VERR_NO_DIGITS
2123 *
2124 * @param pszValue Pointer to the string value.
2125 * @param ppszNext Where to store the pointer to the first char following the number. (Optional)
2126 * @param uBase The base of the representation used.
2127 * If 0 the function will look for known prefixes before defaulting to 10.
2128 * @param pu64 Where to store the converted number. (optional)
2129 */
2130RTDECL(int) RTStrToUInt64Ex(const char *pszValue, char **ppszNext, unsigned uBase, uint64_t *pu64);
2131
2132/**
2133 * Converts a string representation of a number to a 64-bit unsigned number,
2134 * making sure the full string is converted.
2135 *
2136 * @returns iprt status code.
2137 * Warnings are used to indicate conversion problems.
2138 * @retval VWRN_NUMBER_TOO_BIG
2139 * @retval VWRN_NEGATIVE_UNSIGNED
2140 * @retval VINF_SUCCESS
2141 * @retval VERR_NO_DIGITS
2142 * @retval VERR_TRAILING_SPACES
2143 * @retval VERR_TRAILING_CHARS
2144 *
2145 * @param pszValue Pointer to the string value.
2146 * @param uBase The base of the representation used.
2147 * If 0 the function will look for known prefixes before defaulting to 10.
2148 * @param pu64 Where to store the converted number. (optional)
2149 */
2150RTDECL(int) RTStrToUInt64Full(const char *pszValue, unsigned uBase, uint64_t *pu64);
2151
2152/**
2153 * Converts a string representation of a number to a 64-bit unsigned number.
2154 * The base is guessed.
2155 *
2156 * @returns 64-bit unsigned number on success.
2157 * @returns 0 on failure.
2158 * @param pszValue Pointer to the string value.
2159 */
2160RTDECL(uint64_t) RTStrToUInt64(const char *pszValue);
2161
2162/**
2163 * Converts a string representation of a number to a 32-bit unsigned number.
2164 *
2165 * @returns iprt status code.
2166 * Warnings are used to indicate conversion problems.
2167 * @retval VWRN_NUMBER_TOO_BIG
2168 * @retval VWRN_NEGATIVE_UNSIGNED
2169 * @retval VWRN_TRAILING_CHARS
2170 * @retval VWRN_TRAILING_SPACES
2171 * @retval VINF_SUCCESS
2172 * @retval VERR_NO_DIGITS
2173 *
2174 * @param pszValue Pointer to the string value.
2175 * @param ppszNext Where to store the pointer to the first char following the number. (Optional)
2176 * @param uBase The base of the representation used.
2177 * If 0 the function will look for known prefixes before defaulting to 10.
2178 * @param pu32 Where to store the converted number. (optional)
2179 */
2180RTDECL(int) RTStrToUInt32Ex(const char *pszValue, char **ppszNext, unsigned uBase, uint32_t *pu32);
2181
2182/**
2183 * Converts a string representation of a number to a 32-bit unsigned number,
2184 * making sure the full string is converted.
2185 *
2186 * @returns iprt status code.
2187 * Warnings are used to indicate conversion problems.
2188 * @retval VWRN_NUMBER_TOO_BIG
2189 * @retval VWRN_NEGATIVE_UNSIGNED
2190 * @retval VINF_SUCCESS
2191 * @retval VERR_NO_DIGITS
2192 * @retval VERR_TRAILING_SPACES
2193 * @retval VERR_TRAILING_CHARS
2194 *
2195 * @param pszValue Pointer to the string value.
2196 * @param uBase The base of the representation used.
2197 * If 0 the function will look for known prefixes before defaulting to 10.
2198 * @param pu32 Where to store the converted number. (optional)
2199 */
2200RTDECL(int) RTStrToUInt32Full(const char *pszValue, unsigned uBase, uint32_t *pu32);
2201
2202/**
2203 * Converts a string representation of a number to a 64-bit unsigned number.
2204 * The base is guessed.
2205 *
2206 * @returns 32-bit unsigned number on success.
2207 * @returns 0 on failure.
2208 * @param pszValue Pointer to the string value.
2209 */
2210RTDECL(uint32_t) RTStrToUInt32(const char *pszValue);
2211
2212/**
2213 * Converts a string representation of a number to a 16-bit unsigned number.
2214 *
2215 * @returns iprt status code.
2216 * Warnings are used to indicate conversion problems.
2217 * @retval VWRN_NUMBER_TOO_BIG
2218 * @retval VWRN_NEGATIVE_UNSIGNED
2219 * @retval VWRN_TRAILING_CHARS
2220 * @retval VWRN_TRAILING_SPACES
2221 * @retval VINF_SUCCESS
2222 * @retval VERR_NO_DIGITS
2223 *
2224 * @param pszValue Pointer to the string value.
2225 * @param ppszNext Where to store the pointer to the first char following the number. (Optional)
2226 * @param uBase The base of the representation used.
2227 * If 0 the function will look for known prefixes before defaulting to 10.
2228 * @param pu16 Where to store the converted number. (optional)
2229 */
2230RTDECL(int) RTStrToUInt16Ex(const char *pszValue, char **ppszNext, unsigned uBase, uint16_t *pu16);
2231
2232/**
2233 * Converts a string representation of a number to a 16-bit unsigned number,
2234 * making sure the full string is converted.
2235 *
2236 * @returns iprt status code.
2237 * Warnings are used to indicate conversion problems.
2238 * @retval VWRN_NUMBER_TOO_BIG
2239 * @retval VWRN_NEGATIVE_UNSIGNED
2240 * @retval VINF_SUCCESS
2241 * @retval VERR_NO_DIGITS
2242 * @retval VERR_TRAILING_SPACES
2243 * @retval VERR_TRAILING_CHARS
2244 *
2245 * @param pszValue Pointer to the string value.
2246 * @param uBase The base of the representation used.
2247 * If 0 the function will look for known prefixes before defaulting to 10.
2248 * @param pu16 Where to store the converted number. (optional)
2249 */
2250RTDECL(int) RTStrToUInt16Full(const char *pszValue, unsigned uBase, uint16_t *pu16);
2251
2252/**
2253 * Converts a string representation of a number to a 16-bit unsigned number.
2254 * The base is guessed.
2255 *
2256 * @returns 16-bit unsigned number on success.
2257 * @returns 0 on failure.
2258 * @param pszValue Pointer to the string value.
2259 */
2260RTDECL(uint16_t) RTStrToUInt16(const char *pszValue);
2261
2262/**
2263 * Converts a string representation of a number to a 8-bit unsigned number.
2264 *
2265 * @returns iprt status code.
2266 * Warnings are used to indicate conversion problems.
2267 * @retval VWRN_NUMBER_TOO_BIG
2268 * @retval VWRN_NEGATIVE_UNSIGNED
2269 * @retval VWRN_TRAILING_CHARS
2270 * @retval VWRN_TRAILING_SPACES
2271 * @retval VINF_SUCCESS
2272 * @retval VERR_NO_DIGITS
2273 *
2274 * @param pszValue Pointer to the string value.
2275 * @param ppszNext Where to store the pointer to the first char following the number. (Optional)
2276 * @param uBase The base of the representation used.
2277 * If 0 the function will look for known prefixes before defaulting to 10.
2278 * @param pu8 Where to store the converted number. (optional)
2279 */
2280RTDECL(int) RTStrToUInt8Ex(const char *pszValue, char **ppszNext, unsigned uBase, uint8_t *pu8);
2281
2282/**
2283 * Converts a string representation of a number to a 8-bit unsigned number,
2284 * making sure the full string is converted.
2285 *
2286 * @returns iprt status code.
2287 * Warnings are used to indicate conversion problems.
2288 * @retval VWRN_NUMBER_TOO_BIG
2289 * @retval VWRN_NEGATIVE_UNSIGNED
2290 * @retval VINF_SUCCESS
2291 * @retval VERR_NO_DIGITS
2292 * @retval VERR_TRAILING_SPACES
2293 * @retval VERR_TRAILING_CHARS
2294 *
2295 * @param pszValue Pointer to the string value.
2296 * @param uBase The base of the representation used.
2297 * If 0 the function will look for known prefixes before defaulting to 10.
2298 * @param pu8 Where to store the converted number. (optional)
2299 */
2300RTDECL(int) RTStrToUInt8Full(const char *pszValue, unsigned uBase, uint8_t *pu8);
2301
2302/**
2303 * Converts a string representation of a number to a 8-bit unsigned number.
2304 * The base is guessed.
2305 *
2306 * @returns 8-bit unsigned number on success.
2307 * @returns 0 on failure.
2308 * @param pszValue Pointer to the string value.
2309 */
2310RTDECL(uint8_t) RTStrToUInt8(const char *pszValue);
2311
2312/**
2313 * Converts a string representation of a number to a 64-bit signed number.
2314 *
2315 * @returns iprt status code.
2316 * Warnings are used to indicate conversion problems.
2317 * @retval VWRN_NUMBER_TOO_BIG
2318 * @retval VWRN_TRAILING_CHARS
2319 * @retval VWRN_TRAILING_SPACES
2320 * @retval VINF_SUCCESS
2321 * @retval VERR_NO_DIGITS
2322 *
2323 * @param pszValue Pointer to the string value.
2324 * @param ppszNext Where to store the pointer to the first char following the number. (Optional)
2325 * @param uBase The base of the representation used.
2326 * If 0 the function will look for known prefixes before defaulting to 10.
2327 * @param pi64 Where to store the converted number. (optional)
2328 */
2329RTDECL(int) RTStrToInt64Ex(const char *pszValue, char **ppszNext, unsigned uBase, int64_t *pi64);
2330
2331/**
2332 * Converts a string representation of a number to a 64-bit signed number,
2333 * making sure the full string is converted.
2334 *
2335 * @returns iprt status code.
2336 * Warnings are used to indicate conversion problems.
2337 * @retval VWRN_NUMBER_TOO_BIG
2338 * @retval VINF_SUCCESS
2339 * @retval VERR_TRAILING_CHARS
2340 * @retval VERR_TRAILING_SPACES
2341 * @retval VERR_NO_DIGITS
2342 *
2343 * @param pszValue Pointer to the string value.
2344 * @param uBase The base of the representation used.
2345 * If 0 the function will look for known prefixes before defaulting to 10.
2346 * @param pi64 Where to store the converted number. (optional)
2347 */
2348RTDECL(int) RTStrToInt64Full(const char *pszValue, unsigned uBase, int64_t *pi64);
2349
2350/**
2351 * Converts a string representation of a number to a 64-bit signed number.
2352 * The base is guessed.
2353 *
2354 * @returns 64-bit signed number on success.
2355 * @returns 0 on failure.
2356 * @param pszValue Pointer to the string value.
2357 */
2358RTDECL(int64_t) RTStrToInt64(const char *pszValue);
2359
2360/**
2361 * Converts a string representation of a number to a 32-bit signed number.
2362 *
2363 * @returns iprt status code.
2364 * Warnings are used to indicate conversion problems.
2365 * @retval VWRN_NUMBER_TOO_BIG
2366 * @retval VWRN_TRAILING_CHARS
2367 * @retval VWRN_TRAILING_SPACES
2368 * @retval VINF_SUCCESS
2369 * @retval VERR_NO_DIGITS
2370 *
2371 * @param pszValue Pointer to the string value.
2372 * @param ppszNext Where to store the pointer to the first char following the number. (Optional)
2373 * @param uBase The base of the representation used.
2374 * If 0 the function will look for known prefixes before defaulting to 10.
2375 * @param pi32 Where to store the converted number. (optional)
2376 */
2377RTDECL(int) RTStrToInt32Ex(const char *pszValue, char **ppszNext, unsigned uBase, int32_t *pi32);
2378
2379/**
2380 * Converts a string representation of a number to a 32-bit signed number,
2381 * making sure the full string is converted.
2382 *
2383 * @returns iprt status code.
2384 * Warnings are used to indicate conversion problems.
2385 * @retval VWRN_NUMBER_TOO_BIG
2386 * @retval VINF_SUCCESS
2387 * @retval VERR_TRAILING_CHARS
2388 * @retval VERR_TRAILING_SPACES
2389 * @retval VERR_NO_DIGITS
2390 *
2391 * @param pszValue Pointer to the string value.
2392 * @param uBase The base of the representation used.
2393 * If 0 the function will look for known prefixes before defaulting to 10.
2394 * @param pi32 Where to store the converted number. (optional)
2395 */
2396RTDECL(int) RTStrToInt32Full(const char *pszValue, unsigned uBase, int32_t *pi32);
2397
2398/**
2399 * Converts a string representation of a number to a 32-bit signed number.
2400 * The base is guessed.
2401 *
2402 * @returns 32-bit signed number on success.
2403 * @returns 0 on failure.
2404 * @param pszValue Pointer to the string value.
2405 */
2406RTDECL(int32_t) RTStrToInt32(const char *pszValue);
2407
2408/**
2409 * Converts a string representation of a number to a 16-bit signed number.
2410 *
2411 * @returns iprt status code.
2412 * Warnings are used to indicate conversion problems.
2413 * @retval VWRN_NUMBER_TOO_BIG
2414 * @retval VWRN_TRAILING_CHARS
2415 * @retval VWRN_TRAILING_SPACES
2416 * @retval VINF_SUCCESS
2417 * @retval VERR_NO_DIGITS
2418 *
2419 * @param pszValue Pointer to the string value.
2420 * @param ppszNext Where to store the pointer to the first char following the number. (Optional)
2421 * @param uBase The base of the representation used.
2422 * If 0 the function will look for known prefixes before defaulting to 10.
2423 * @param pi16 Where to store the converted number. (optional)
2424 */
2425RTDECL(int) RTStrToInt16Ex(const char *pszValue, char **ppszNext, unsigned uBase, int16_t *pi16);
2426
2427/**
2428 * Converts a string representation of a number to a 16-bit signed number,
2429 * making sure the full string is converted.
2430 *
2431 * @returns iprt status code.
2432 * Warnings are used to indicate conversion problems.
2433 * @retval VWRN_NUMBER_TOO_BIG
2434 * @retval VINF_SUCCESS
2435 * @retval VERR_TRAILING_CHARS
2436 * @retval VERR_TRAILING_SPACES
2437 * @retval VERR_NO_DIGITS
2438 *
2439 * @param pszValue Pointer to the string value.
2440 * @param uBase The base of the representation used.
2441 * If 0 the function will look for known prefixes before defaulting to 10.
2442 * @param pi16 Where to store the converted number. (optional)
2443 */
2444RTDECL(int) RTStrToInt16Full(const char *pszValue, unsigned uBase, int16_t *pi16);
2445
2446/**
2447 * Converts a string representation of a number to a 16-bit signed number.
2448 * The base is guessed.
2449 *
2450 * @returns 16-bit signed number on success.
2451 * @returns 0 on failure.
2452 * @param pszValue Pointer to the string value.
2453 */
2454RTDECL(int16_t) RTStrToInt16(const char *pszValue);
2455
2456/**
2457 * Converts a string representation of a number to a 8-bit signed number.
2458 *
2459 * @returns iprt status code.
2460 * Warnings are used to indicate conversion problems.
2461 * @retval VWRN_NUMBER_TOO_BIG
2462 * @retval VWRN_TRAILING_CHARS
2463 * @retval VWRN_TRAILING_SPACES
2464 * @retval VINF_SUCCESS
2465 * @retval VERR_NO_DIGITS
2466 *
2467 * @param pszValue Pointer to the string value.
2468 * @param ppszNext Where to store the pointer to the first char following the number. (Optional)
2469 * @param uBase The base of the representation used.
2470 * If 0 the function will look for known prefixes before defaulting to 10.
2471 * @param pi8 Where to store the converted number. (optional)
2472 */
2473RTDECL(int) RTStrToInt8Ex(const char *pszValue, char **ppszNext, unsigned uBase, int8_t *pi8);
2474
2475/**
2476 * Converts a string representation of a number to a 8-bit signed number,
2477 * making sure the full string is converted.
2478 *
2479 * @returns iprt status code.
2480 * Warnings are used to indicate conversion problems.
2481 * @retval VWRN_NUMBER_TOO_BIG
2482 * @retval VINF_SUCCESS
2483 * @retval VERR_TRAILING_CHARS
2484 * @retval VERR_TRAILING_SPACES
2485 * @retval VERR_NO_DIGITS
2486 *
2487 * @param pszValue Pointer to the string value.
2488 * @param uBase The base of the representation used.
2489 * If 0 the function will look for known prefixes before defaulting to 10.
2490 * @param pi8 Where to store the converted number. (optional)
2491 */
2492RTDECL(int) RTStrToInt8Full(const char *pszValue, unsigned uBase, int8_t *pi8);
2493
2494/**
2495 * Converts a string representation of a number to a 8-bit signed number.
2496 * The base is guessed.
2497 *
2498 * @returns 8-bit signed number on success.
2499 * @returns 0 on failure.
2500 * @param pszValue Pointer to the string value.
2501 */
2502RTDECL(int8_t) RTStrToInt8(const char *pszValue);
2503
2504/**
2505 * Formats a buffer stream as hex bytes.
2506 *
2507 * The default is no separating spaces or line breaks or anything.
2508 *
2509 * @returns IPRT status code.
2510 * @retval VERR_INVALID_POINTER if any of the pointers are wrong.
2511 * @retval VERR_BUFFER_OVERFLOW if the buffer is insufficent to hold the bytes.
2512 *
2513 * @param pszBuf Output string buffer.
2514 * @param cchBuf The size of the output buffer.
2515 * @param pv Pointer to the bytes to stringify.
2516 * @param cb The number of bytes to stringify.
2517 * @param fFlags Must be zero, reserved for future use.
2518 */
2519RTDECL(int) RTStrPrintHexBytes(char *pszBuf, size_t cchBuf, void const *pv, size_t cb, uint32_t fFlags);
2520
2521/**
2522 * Converts a string of hex bytes back into binary data.
2523 *
2524 * @returns IPRT status code.
2525 * @retval VERR_INVALID_POINTER if any of the pointers are wrong.
2526 * @retval VERR_BUFFER_OVERFLOW if the string contains too many hex bytes.
2527 * @retval VERR_BUFFER_UNDERFLOW if there aren't enough hex bytes to fill up
2528 * the output buffer.
2529 * @retval VERR_UNEVEN_INPUT if the input contains a half byte.
2530 * @retval VERR_NO_DIGITS
2531 * @retval VWRN_TRAILING_CHARS
2532 * @retval VWRN_TRAILING_SPACES
2533 *
2534 * @param pszHex The string containing the hex bytes.
2535 * @param pv Output buffer.
2536 * @param cb The size of the output buffer.
2537 * @param fFlags Must be zero, reserved for future use.
2538 */
2539RTDECL(int) RTStrConvertHexBytes(char const *pszHex, void *pv, size_t cb, uint32_t fFlags);
2540
2541/** @} */
2542
2543
2544/** @defgroup rt_str_space Unique String Space
2545 * @ingroup grp_rt_str
2546 * @{
2547 */
2548
2549/** Pointer to a string name space container node core. */
2550typedef struct RTSTRSPACECORE *PRTSTRSPACECORE;
2551/** Pointer to a pointer to a string name space container node core. */
2552typedef PRTSTRSPACECORE *PPRTSTRSPACECORE;
2553
2554/**
2555 * String name space container node core.
2556 */
2557typedef struct RTSTRSPACECORE
2558{
2559 /** Hash key. Don't touch. */
2560 uint32_t Key;
2561 /** Pointer to the left leaf node. Don't touch. */
2562 PRTSTRSPACECORE pLeft;
2563 /** Pointer to the left rigth node. Don't touch. */
2564 PRTSTRSPACECORE pRight;
2565 /** Pointer to the list of string with the same key. Don't touch. */
2566 PRTSTRSPACECORE pList;
2567 /** Height of this tree: max(heigth(left), heigth(right)) + 1. Don't touch */
2568 unsigned char uchHeight;
2569 /** The string length. Read only! */
2570 size_t cchString;
2571 /** Pointer to the string. Read only! */
2572 const char *pszString;
2573} RTSTRSPACECORE;
2574
2575/** String space. (Initialize with NULL.) */
2576typedef PRTSTRSPACECORE RTSTRSPACE;
2577/** Pointer to a string space. */
2578typedef PPRTSTRSPACECORE PRTSTRSPACE;
2579
2580
2581/**
2582 * Inserts a string into a unique string space.
2583 *
2584 * @returns true on success.
2585 * @returns false if the string collided with an existing string.
2586 * @param pStrSpace The space to insert it into.
2587 * @param pStr The string node.
2588 */
2589RTDECL(bool) RTStrSpaceInsert(PRTSTRSPACE pStrSpace, PRTSTRSPACECORE pStr);
2590
2591/**
2592 * Removes a string from a unique string space.
2593 *
2594 * @returns Pointer to the removed string node.
2595 * @returns NULL if the string was not found in the string space.
2596 * @param pStrSpace The space to insert it into.
2597 * @param pszString The string to remove.
2598 */
2599RTDECL(PRTSTRSPACECORE) RTStrSpaceRemove(PRTSTRSPACE pStrSpace, const char *pszString);
2600
2601/**
2602 * Gets a string from a unique string space.
2603 *
2604 * @returns Pointer to the string node.
2605 * @returns NULL if the string was not found in the string space.
2606 * @param pStrSpace The space to insert it into.
2607 * @param pszString The string to get.
2608 */
2609RTDECL(PRTSTRSPACECORE) RTStrSpaceGet(PRTSTRSPACE pStrSpace, const char *pszString);
2610
2611/**
2612 * Callback function for RTStrSpaceEnumerate() and RTStrSpaceDestroy().
2613 *
2614 * @returns 0 on continue.
2615 * @returns Non-zero to aborts the operation.
2616 * @param pStr The string node
2617 * @param pvUser The user specified argument.
2618 */
2619typedef DECLCALLBACK(int) FNRTSTRSPACECALLBACK(PRTSTRSPACECORE pStr, void *pvUser);
2620/** Pointer to callback function for RTStrSpaceEnumerate() and RTStrSpaceDestroy(). */
2621typedef FNRTSTRSPACECALLBACK *PFNRTSTRSPACECALLBACK;
2622
2623/**
2624 * Destroys the string space.
2625 * The caller supplies a callback which will be called for each of
2626 * the string nodes in for freeing their memory and other resources.
2627 *
2628 * @returns 0 or what ever non-zero return value pfnCallback returned
2629 * when aborting the destruction.
2630 * @param pStrSpace The space to insert it into.
2631 * @param pfnCallback The callback.
2632 * @param pvUser The user argument.
2633 */
2634RTDECL(int) RTStrSpaceDestroy(PRTSTRSPACE pStrSpace, PFNRTSTRSPACECALLBACK pfnCallback, void *pvUser);
2635
2636/**
2637 * Enumerates the string space.
2638 * The caller supplies a callback which will be called for each of
2639 * the string nodes.
2640 *
2641 * @returns 0 or what ever non-zero return value pfnCallback returned
2642 * when aborting the destruction.
2643 * @param pStrSpace The space to insert it into.
2644 * @param pfnCallback The callback.
2645 * @param pvUser The user argument.
2646 */
2647RTDECL(int) RTStrSpaceEnumerate(PRTSTRSPACE pStrSpace, PFNRTSTRSPACECALLBACK pfnCallback, void *pvUser);
2648
2649/** @} */
2650
2651
2652/** @defgroup rt_str_utf16 UTF-16 String Manipulation
2653 * @ingroup grp_rt_str
2654 * @{
2655 */
2656
2657/**
2658 * Free a UTF-16 string allocated by RTStrToUtf16(), RTStrToUtf16Ex(),
2659 * RTLatin1ToUtf16(), RTLatin1ToUtf16Ex(), RTUtf16Dup() or RTUtf16DupEx().
2660 *
2661 * @returns iprt status code.
2662 * @param pwszString The UTF-16 string to free. NULL is accepted.
2663 */
2664RTDECL(void) RTUtf16Free(PRTUTF16 pwszString);
2665
2666/**
2667 * Allocates a new copy of the specified UTF-16 string (default tag).
2668 *
2669 * @returns Pointer to the allocated string copy. Use RTUtf16Free() to free it.
2670 * @returns NULL when out of memory.
2671 * @param pwszString UTF-16 string to duplicate.
2672 * @remark This function will not make any attempt to validate the encoding.
2673 */
2674#define RTUtf16Dup(pwszString) RTUtf16DupTag((pwszString), RTSTR_TAG)
2675
2676/**
2677 * Allocates a new copy of the specified UTF-16 string (custom tag).
2678 *
2679 * @returns Pointer to the allocated string copy. Use RTUtf16Free() to free it.
2680 * @returns NULL when out of memory.
2681 * @param pwszString UTF-16 string to duplicate.
2682 * @param pszTag Allocation tag used for statistics and such.
2683 * @remark This function will not make any attempt to validate the encoding.
2684 */
2685RTDECL(PRTUTF16) RTUtf16DupTag(PCRTUTF16 pwszString, const char *pszTag);
2686
2687/**
2688 * Allocates a new copy of the specified UTF-16 string (default tag).
2689 *
2690 * @returns iprt status code.
2691 * @param ppwszString Receives pointer of the allocated UTF-16 string.
2692 * The returned pointer must be freed using RTUtf16Free().
2693 * @param pwszString UTF-16 string to duplicate.
2694 * @param cwcExtra Number of extra RTUTF16 items to allocate.
2695 * @remark This function will not make any attempt to validate the encoding.
2696 */
2697#define RTUtf16DupEx(ppwszString, pwszString, cwcExtra) \
2698 RTUtf16DupExTag((ppwszString), (pwszString), (cwcExtra), RTSTR_TAG)
2699
2700/**
2701 * Allocates a new copy of the specified UTF-16 string (custom tag).
2702 *
2703 * @returns iprt status code.
2704 * @param ppwszString Receives pointer of the allocated UTF-16 string.
2705 * The returned pointer must be freed using RTUtf16Free().
2706 * @param pwszString UTF-16 string to duplicate.
2707 * @param cwcExtra Number of extra RTUTF16 items to allocate.
2708 * @param pszTag Allocation tag used for statistics and such.
2709 * @remark This function will not make any attempt to validate the encoding.
2710 */
2711RTDECL(int) RTUtf16DupExTag(PRTUTF16 *ppwszString, PCRTUTF16 pwszString, size_t cwcExtra, const char *pszTag);
2712
2713/**
2714 * Returns the length of a UTF-16 string in UTF-16 characters
2715 * without trailing '\\0'.
2716 *
2717 * Surrogate pairs counts as two UTF-16 characters here. Use RTUtf16CpCnt()
2718 * to get the exact number of code points in the string.
2719 *
2720 * @returns The number of RTUTF16 items in the string.
2721 * @param pwszString Pointer the UTF-16 string.
2722 * @remark This function will not make any attempt to validate the encoding.
2723 */
2724RTDECL(size_t) RTUtf16Len(PCRTUTF16 pwszString);
2725
2726/**
2727 * Performs a case sensitive string compare between two UTF-16 strings.
2728 *
2729 * @returns < 0 if the first string less than the second string.s
2730 * @returns 0 if the first string identical to the second string.
2731 * @returns > 0 if the first string greater than the second string.
2732 * @param pwsz1 First UTF-16 string. Null is allowed.
2733 * @param pwsz2 Second UTF-16 string. Null is allowed.
2734 * @remark This function will not make any attempt to validate the encoding.
2735 */
2736RTDECL(int) RTUtf16Cmp(register PCRTUTF16 pwsz1, register PCRTUTF16 pwsz2);
2737
2738/**
2739 * Performs a case insensitive string compare between two UTF-16 strings.
2740 *
2741 * This is a simplified compare, as only the simplified lower/upper case folding
2742 * specified by the unicode specs are used. It does not consider character pairs
2743 * as they are used in some languages, just simple upper & lower case compares.
2744 *
2745 * @returns < 0 if the first string less than the second string.
2746 * @returns 0 if the first string identical to the second string.
2747 * @returns > 0 if the first string greater than the second string.
2748 * @param pwsz1 First UTF-16 string. Null is allowed.
2749 * @param pwsz2 Second UTF-16 string. Null is allowed.
2750 */
2751RTDECL(int) RTUtf16ICmp(PCRTUTF16 pwsz1, PCRTUTF16 pwsz2);
2752
2753/**
2754 * Performs a case insensitive string compare between two UTF-16 strings
2755 * using the current locale of the process (if applicable).
2756 *
2757 * This differs from RTUtf16ICmp() in that it will try, if a locale with the
2758 * required data is available, to do a correct case-insensitive compare. It
2759 * follows that it is more complex and thereby likely to be more expensive.
2760 *
2761 * @returns < 0 if the first string less than the second string.
2762 * @returns 0 if the first string identical to the second string.
2763 * @returns > 0 if the first string greater than the second string.
2764 * @param pwsz1 First UTF-16 string. Null is allowed.
2765 * @param pwsz2 Second UTF-16 string. Null is allowed.
2766 */
2767RTDECL(int) RTUtf16LocaleICmp(PCRTUTF16 pwsz1, PCRTUTF16 pwsz2);
2768
2769/**
2770 * Folds a UTF-16 string to lowercase.
2771 *
2772 * This is a very simple folding; is uses the simple lowercase
2773 * code point, it is not related to any locale just the most common
2774 * lowercase codepoint setup by the unicode specs, and it will not
2775 * create new surrogate pairs or remove existing ones.
2776 *
2777 * @returns Pointer to the passed in string.
2778 * @param pwsz The string to fold.
2779 */
2780RTDECL(PRTUTF16) RTUtf16ToLower(PRTUTF16 pwsz);
2781
2782/**
2783 * Folds a UTF-16 string to uppercase.
2784 *
2785 * This is a very simple folding; is uses the simple uppercase
2786 * code point, it is not related to any locale just the most common
2787 * uppercase codepoint setup by the unicode specs, and it will not
2788 * create new surrogate pairs or remove existing ones.
2789 *
2790 * @returns Pointer to the passed in string.
2791 * @param pwsz The string to fold.
2792 */
2793RTDECL(PRTUTF16) RTUtf16ToUpper(PRTUTF16 pwsz);
2794
2795/**
2796 * Translate a UTF-16 string into a UTF-8 allocating the result buffer (default
2797 * tag).
2798 *
2799 * @returns iprt status code.
2800 * @param pwszString UTF-16 string to convert.
2801 * @param ppszString Receives pointer of allocated UTF-8 string on
2802 * success, and is always set to NULL on failure.
2803 * The returned pointer must be freed using RTStrFree().
2804 */
2805#define RTUtf16ToUtf8(pwszString, ppszString) RTUtf16ToUtf8Tag((pwszString), (ppszString), RTSTR_TAG)
2806
2807/**
2808 * Translate a UTF-16 string into a UTF-8 allocating the result buffer.
2809 *
2810 * @returns iprt status code.
2811 * @param pwszString UTF-16 string to convert.
2812 * @param ppszString Receives pointer of allocated UTF-8 string on
2813 * success, and is always set to NULL on failure.
2814 * The returned pointer must be freed using RTStrFree().
2815 * @param pszTag Allocation tag used for statistics and such.
2816 */
2817RTDECL(int) RTUtf16ToUtf8Tag(PCRTUTF16 pwszString, char **ppszString, const char *pszTag);
2818
2819/**
2820 * Translates UTF-16 to UTF-8 using buffer provided by the caller or a fittingly
2821 * sized buffer allocated by the function (default tag).
2822 *
2823 * @returns iprt status code.
2824 * @param pwszString The UTF-16 string to convert.
2825 * @param cwcString The number of RTUTF16 items to translate from pwszString.
2826 * The translation will stop when reaching cwcString or the terminator ('\\0').
2827 * Use RTSTR_MAX to translate the entire string.
2828 * @param ppsz If cch is non-zero, this must either be pointing to a pointer to
2829 * a buffer of the specified size, or pointer to a NULL pointer.
2830 * If *ppsz is NULL or cch is zero a buffer of at least cch chars
2831 * will be allocated to hold the translated string.
2832 * If a buffer was requested it must be freed using RTStrFree().
2833 * @param cch The buffer size in chars (the type). This includes the terminator.
2834 * @param pcch Where to store the length of the translated string,
2835 * excluding the terminator. (Optional)
2836 *
2837 * This may be set under some error conditions,
2838 * however, only for VERR_BUFFER_OVERFLOW and
2839 * VERR_NO_STR_MEMORY will it contain a valid string
2840 * length that can be used to resize the buffer.
2841 */
2842#define RTUtf16ToUtf8Ex(pwszString, cwcString, ppsz, cch, pcch) \
2843 RTUtf16ToUtf8ExTag((pwszString), (cwcString), (ppsz), (cch), (pcch), RTSTR_TAG)
2844
2845/**
2846 * Translates UTF-16 to UTF-8 using buffer provided by the caller or a fittingly
2847 * sized buffer allocated by the function (custom tag).
2848 *
2849 * @returns iprt status code.
2850 * @param pwszString The UTF-16 string to convert.
2851 * @param cwcString The number of RTUTF16 items to translate from pwszString.
2852 * The translation will stop when reaching cwcString or the terminator ('\\0').
2853 * Use RTSTR_MAX to translate the entire string.
2854 * @param ppsz If cch is non-zero, this must either be pointing to a pointer to
2855 * a buffer of the specified size, or pointer to a NULL pointer.
2856 * If *ppsz is NULL or cch is zero a buffer of at least cch chars
2857 * will be allocated to hold the translated string.
2858 * If a buffer was requested it must be freed using RTStrFree().
2859 * @param cch The buffer size in chars (the type). This includes the terminator.
2860 * @param pcch Where to store the length of the translated string,
2861 * excluding the terminator. (Optional)
2862 *
2863 * This may be set under some error conditions,
2864 * however, only for VERR_BUFFER_OVERFLOW and
2865 * VERR_NO_STR_MEMORY will it contain a valid string
2866 * length that can be used to resize the buffer.
2867 * @param pszTag Allocation tag used for statistics and such.
2868 */
2869RTDECL(int) RTUtf16ToUtf8ExTag(PCRTUTF16 pwszString, size_t cwcString, char **ppsz, size_t cch, size_t *pcch, const char *pszTag);
2870
2871/**
2872 * Calculates the length of the UTF-16 string in UTF-8 chars (bytes).
2873 *
2874 * This function will validate the string, and incorrectly encoded UTF-16
2875 * strings will be rejected. The primary purpose of this function is to
2876 * help allocate buffers for RTUtf16ToUtf8() of the correct size. For most
2877 * other purposes RTUtf16ToUtf8Ex() should be used.
2878 *
2879 * @returns Number of char (bytes).
2880 * @returns 0 if the string was incorrectly encoded.
2881 * @param pwsz The UTF-16 string.
2882 */
2883RTDECL(size_t) RTUtf16CalcUtf8Len(PCRTUTF16 pwsz);
2884
2885/**
2886 * Calculates the length of the UTF-16 string in UTF-8 chars (bytes).
2887 *
2888 * This function will validate the string, and incorrectly encoded UTF-16
2889 * strings will be rejected.
2890 *
2891 * @returns iprt status code.
2892 * @param pwsz The string.
2893 * @param cwc The max string length. Use RTSTR_MAX to process the entire string.
2894 * @param pcch Where to store the string length (in bytes). Optional.
2895 * This is undefined on failure.
2896 */
2897RTDECL(int) RTUtf16CalcUtf8LenEx(PCRTUTF16 pwsz, size_t cwc, size_t *pcch);
2898
2899/**
2900 * Translate a UTF-16 string into a Latin-1 (ISO-8859-1) allocating the result
2901 * buffer (default tag).
2902 *
2903 * @returns iprt status code.
2904 * @param pwszString UTF-16 string to convert.
2905 * @param ppszString Receives pointer of allocated Latin1 string on
2906 * success, and is always set to NULL on failure.
2907 * The returned pointer must be freed using RTStrFree().
2908 */
2909#define RTUtf16ToLatin1(pwszString, ppszString) RTUtf16ToLatin1Tag((pwszString), (ppszString), RTSTR_TAG)
2910
2911/**
2912 * Translate a UTF-16 string into a Latin-1 (ISO-8859-1) allocating the result
2913 * buffer (custom tag).
2914 *
2915 * @returns iprt status code.
2916 * @param pwszString UTF-16 string to convert.
2917 * @param ppszString Receives pointer of allocated Latin1 string on
2918 * success, and is always set to NULL on failure.
2919 * The returned pointer must be freed using RTStrFree().
2920 * @param pszTag Allocation tag used for statistics and such.
2921 */
2922RTDECL(int) RTUtf16ToLatin1Tag(PCRTUTF16 pwszString, char **ppszString, const char *pszTag);
2923
2924/**
2925 * Translates UTF-16 to Latin-1 (ISO-8859-1) using buffer provided by the caller
2926 * or a fittingly sized buffer allocated by the function (default tag).
2927 *
2928 * @returns iprt status code.
2929 * @param pwszString The UTF-16 string to convert.
2930 * @param cwcString The number of RTUTF16 items to translate from
2931 * pwszString. The translation will stop when reaching
2932 * cwcString or the terminator ('\\0'). Use RTSTR_MAX
2933 * to translate the entire string.
2934 * @param ppsz Pointer to the pointer to the Latin-1 string. The
2935 * buffer can optionally be preallocated by the caller.
2936 *
2937 * If cch is zero, *ppsz is undefined.
2938 *
2939 * If cch is non-zero and *ppsz is not NULL, then this
2940 * will be used as the output buffer.
2941 * VERR_BUFFER_OVERFLOW will be returned if this is
2942 * insufficient.
2943 *
2944 * If cch is zero or *ppsz is NULL, then a buffer of
2945 * sufficent size is allocated. cch can be used to
2946 * specify a minimum size of this buffer. Use
2947 * RTUtf16Free() to free the result.
2948 *
2949 * @param cch The buffer size in chars (the type). This includes
2950 * the terminator.
2951 * @param pcch Where to store the length of the translated string,
2952 * excluding the terminator. (Optional)
2953 *
2954 * This may be set under some error conditions,
2955 * however, only for VERR_BUFFER_OVERFLOW and
2956 * VERR_NO_STR_MEMORY will it contain a valid string
2957 * length that can be used to resize the buffer.
2958 */
2959#define RTUtf16ToLatin1Ex(pwszString, cwcString, ppsz, cch, pcch) \
2960 RTUtf16ToLatin1ExTag((pwszString), (cwcString), (ppsz), (cch), (pcch), RTSTR_TAG)
2961
2962/**
2963 * Translates UTF-16 to Latin-1 (ISO-8859-1) using buffer provided by the caller
2964 * or a fittingly sized buffer allocated by the function (custom tag).
2965 *
2966 * @returns iprt status code.
2967 * @param pwszString The UTF-16 string to convert.
2968 * @param cwcString The number of RTUTF16 items to translate from
2969 * pwszString. The translation will stop when reaching
2970 * cwcString or the terminator ('\\0'). Use RTSTR_MAX
2971 * to translate the entire string.
2972 * @param ppsz Pointer to the pointer to the Latin-1 string. The
2973 * buffer can optionally be preallocated by the caller.
2974 *
2975 * If cch is zero, *ppsz is undefined.
2976 *
2977 * If cch is non-zero and *ppsz is not NULL, then this
2978 * will be used as the output buffer.
2979 * VERR_BUFFER_OVERFLOW will be returned if this is
2980 * insufficient.
2981 *
2982 * If cch is zero or *ppsz is NULL, then a buffer of
2983 * sufficent size is allocated. cch can be used to
2984 * specify a minimum size of this buffer. Use
2985 * RTUtf16Free() to free the result.
2986 *
2987 * @param cch The buffer size in chars (the type). This includes
2988 * the terminator.
2989 * @param pcch Where to store the length of the translated string,
2990 * excluding the terminator. (Optional)
2991 *
2992 * This may be set under some error conditions,
2993 * however, only for VERR_BUFFER_OVERFLOW and
2994 * VERR_NO_STR_MEMORY will it contain a valid string
2995 * length that can be used to resize the buffer.
2996 * @param pszTag Allocation tag used for statistics and such.
2997 */
2998RTDECL(int) RTUtf16ToLatin1ExTag(PCRTUTF16 pwszString, size_t cwcString, char **ppsz, size_t cch, size_t *pcch, const char *pszTag);
2999
3000/**
3001 * Calculates the length of the UTF-16 string in Latin-1 (ISO-8859-1) chars.
3002 *
3003 * This function will validate the string, and incorrectly encoded UTF-16
3004 * strings will be rejected. The primary purpose of this function is to
3005 * help allocate buffers for RTUtf16ToLatin1() of the correct size. For most
3006 * other purposes RTUtf16ToLatin1Ex() should be used.
3007 *
3008 * @returns Number of char (bytes).
3009 * @returns 0 if the string was incorrectly encoded.
3010 * @param pwsz The UTF-16 string.
3011 */
3012RTDECL(size_t) RTUtf16CalcLatin1Len(PCRTUTF16 pwsz);
3013
3014/**
3015 * Calculates the length of the UTF-16 string in Latin-1 (ISO-8859-1) chars.
3016 *
3017 * This function will validate the string, and incorrectly encoded UTF-16
3018 * strings will be rejected.
3019 *
3020 * @returns iprt status code.
3021 * @param pwsz The string.
3022 * @param cwc The max string length. Use RTSTR_MAX to process the
3023 * entire string.
3024 * @param pcch Where to store the string length (in bytes). Optional.
3025 * This is undefined on failure.
3026 */
3027RTDECL(int) RTUtf16CalcLatin1LenEx(PCRTUTF16 pwsz, size_t cwc, size_t *pcch);
3028
3029/**
3030 * Get the unicode code point at the given string position.
3031 *
3032 * @returns unicode code point.
3033 * @returns RTUNICP_INVALID if the encoding is invalid.
3034 * @param pwsz The string.
3035 *
3036 * @remark This is an internal worker for RTUtf16GetCp().
3037 */
3038RTDECL(RTUNICP) RTUtf16GetCpInternal(PCRTUTF16 pwsz);
3039
3040/**
3041 * Get the unicode code point at the given string position.
3042 *
3043 * @returns iprt status code.
3044 * @param ppwsz Pointer to the string pointer. This will be updated to
3045 * point to the char following the current code point.
3046 * @param pCp Where to store the code point.
3047 * RTUNICP_INVALID is stored here on failure.
3048 *
3049 * @remark This is an internal worker for RTUtf16GetCpEx().
3050 */
3051RTDECL(int) RTUtf16GetCpExInternal(PCRTUTF16 *ppwsz, PRTUNICP pCp);
3052
3053/**
3054 * Put the unicode code point at the given string position
3055 * and return the pointer to the char following it.
3056 *
3057 * This function will not consider anything at or following the
3058 * buffer area pointed to by pwsz. It is therefore not suitable for
3059 * inserting code points into a string, only appending/overwriting.
3060 *
3061 * @returns pointer to the char following the written code point.
3062 * @param pwsz The string.
3063 * @param CodePoint The code point to write.
3064 * This should not be RTUNICP_INVALID or any other
3065 * character out of the UTF-16 range.
3066 *
3067 * @remark This is an internal worker for RTUtf16GetCpEx().
3068 */
3069RTDECL(PRTUTF16) RTUtf16PutCpInternal(PRTUTF16 pwsz, RTUNICP CodePoint);
3070
3071/**
3072 * Get the unicode code point at the given string position.
3073 *
3074 * @returns unicode code point.
3075 * @returns RTUNICP_INVALID if the encoding is invalid.
3076 * @param pwsz The string.
3077 *
3078 * @remark We optimize this operation by using an inline function for
3079 * everything which isn't a surrogate pair or an endian indicator.
3080 */
3081DECLINLINE(RTUNICP) RTUtf16GetCp(PCRTUTF16 pwsz)
3082{
3083 const RTUTF16 wc = *pwsz;
3084 if (wc < 0xd800 || (wc > 0xdfff && wc < 0xfffe))
3085 return wc;
3086 return RTUtf16GetCpInternal(pwsz);
3087}
3088
3089/**
3090 * Get the unicode code point at the given string position.
3091 *
3092 * @returns iprt status code.
3093 * @param ppwsz Pointer to the string pointer. This will be updated to
3094 * point to the char following the current code point.
3095 * @param pCp Where to store the code point.
3096 * RTUNICP_INVALID is stored here on failure.
3097 *
3098 * @remark We optimize this operation by using an inline function for
3099 * everything which isn't a surrogate pair or and endian indicator.
3100 */
3101DECLINLINE(int) RTUtf16GetCpEx(PCRTUTF16 *ppwsz, PRTUNICP pCp)
3102{
3103 const RTUTF16 wc = **ppwsz;
3104 if (wc < 0xd800 || (wc > 0xdfff && wc < 0xfffe))
3105 {
3106 (*ppwsz)++;
3107 *pCp = wc;
3108 return VINF_SUCCESS;
3109 }
3110 return RTUtf16GetCpExInternal(ppwsz, pCp);
3111}
3112
3113/**
3114 * Put the unicode code point at the given string position
3115 * and return the pointer to the char following it.
3116 *
3117 * This function will not consider anything at or following the
3118 * buffer area pointed to by pwsz. It is therefore not suitable for
3119 * inserting code points into a string, only appending/overwriting.
3120 *
3121 * @returns pointer to the char following the written code point.
3122 * @param pwsz The string.
3123 * @param CodePoint The code point to write.
3124 * This should not be RTUNICP_INVALID or any other
3125 * character out of the UTF-16 range.
3126 *
3127 * @remark We optimize this operation by using an inline function for
3128 * everything which isn't a surrogate pair or and endian indicator.
3129 */
3130DECLINLINE(PRTUTF16) RTUtf16PutCp(PRTUTF16 pwsz, RTUNICP CodePoint)
3131{
3132 if (CodePoint < 0xd800 || (CodePoint > 0xd800 && CodePoint < 0xfffe))
3133 {
3134 *pwsz++ = (RTUTF16)CodePoint;
3135 return pwsz;
3136 }
3137 return RTUtf16PutCpInternal(pwsz, CodePoint);
3138}
3139
3140/**
3141 * Skips ahead, past the current code point.
3142 *
3143 * @returns Pointer to the char after the current code point.
3144 * @param pwsz Pointer to the current code point.
3145 * @remark This will not move the next valid code point, only past the current one.
3146 */
3147DECLINLINE(PRTUTF16) RTUtf16NextCp(PCRTUTF16 pwsz)
3148{
3149 RTUNICP Cp;
3150 RTUtf16GetCpEx(&pwsz, &Cp);
3151 return (PRTUTF16)pwsz;
3152}
3153
3154/**
3155 * Skips backwards, to the previous code point.
3156 *
3157 * @returns Pointer to the char after the current code point.
3158 * @param pwszStart Pointer to the start of the string.
3159 * @param pwsz Pointer to the current code point.
3160 */
3161RTDECL(PRTUTF16) RTUtf16PrevCp(PCRTUTF16 pwszStart, PCRTUTF16 pwsz);
3162
3163
3164/**
3165 * Checks if the UTF-16 char is the high surrogate char (i.e.
3166 * the 1st char in the pair).
3167 *
3168 * @returns true if it is.
3169 * @returns false if it isn't.
3170 * @param wc The character to investigate.
3171 */
3172DECLINLINE(bool) RTUtf16IsHighSurrogate(RTUTF16 wc)
3173{
3174 return wc >= 0xd800 && wc <= 0xdbff;
3175}
3176
3177/**
3178 * Checks if the UTF-16 char is the low surrogate char (i.e.
3179 * the 2nd char in the pair).
3180 *
3181 * @returns true if it is.
3182 * @returns false if it isn't.
3183 * @param wc The character to investigate.
3184 */
3185DECLINLINE(bool) RTUtf16IsLowSurrogate(RTUTF16 wc)
3186{
3187 return wc >= 0xdc00 && wc <= 0xdfff;
3188}
3189
3190
3191/**
3192 * Checks if the two UTF-16 chars form a valid surrogate pair.
3193 *
3194 * @returns true if they do.
3195 * @returns false if they doesn't.
3196 * @param wcHigh The high (1st) character.
3197 * @param wcLow The low (2nd) character.
3198 */
3199DECLINLINE(bool) RTUtf16IsSurrogatePair(RTUTF16 wcHigh, RTUTF16 wcLow)
3200{
3201 return RTUtf16IsHighSurrogate(wcHigh)
3202 && RTUtf16IsLowSurrogate(wcLow);
3203}
3204
3205/** @} */
3206
3207
3208/** @defgroup rt_str_latin1 Latin-1 (ISO-8859-1) String Manipulation
3209 * @ingroup grp_rt_str
3210 * @{
3211 */
3212
3213/**
3214 * Calculates the length of the Latin-1 (ISO-8859-1) string in RTUTF16 items.
3215 *
3216 * @returns Number of RTUTF16 items.
3217 * @param psz The Latin-1 string.
3218 */
3219RTDECL(size_t) RTLatin1CalcUtf16Len(const char *psz);
3220
3221/**
3222 * Calculates the length of the Latin-1 (ISO-8859-1) string in RTUTF16 items.
3223 *
3224 * @returns iprt status code.
3225 * @param psz The Latin-1 string.
3226 * @param cch The max string length. Use RTSTR_MAX to process the
3227 * entire string.
3228 * @param pcwc Where to store the string length. Optional.
3229 * This is undefined on failure.
3230 */
3231RTDECL(int) RTLatin1CalcUtf16LenEx(const char *psz, size_t cch, size_t *pcwc);
3232
3233/**
3234 * Translate a Latin-1 (ISO-8859-1) string into a UTF-16 allocating the result
3235 * buffer (default tag).
3236 *
3237 * @returns iprt status code.
3238 * @param pszString The Latin-1 string to convert.
3239 * @param ppwszString Receives pointer to the allocated UTF-16 string. The
3240 * returned string must be freed using RTUtf16Free().
3241 */
3242#define RTLatin1ToUtf16(pszString, ppwszString) RTLatin1ToUtf16Tag((pszString), (ppwszString), RTSTR_TAG)
3243
3244/**
3245 * Translate a Latin-1 (ISO-8859-1) string into a UTF-16 allocating the result
3246 * buffer (custom tag).
3247 *
3248 * @returns iprt status code.
3249 * @param pszString The Latin-1 string to convert.
3250 * @param ppwszString Receives pointer to the allocated UTF-16 string. The
3251 * returned string must be freed using RTUtf16Free().
3252 * @param pszTag Allocation tag used for statistics and such.
3253 */
3254RTDECL(int) RTLatin1ToUtf16Tag(const char *pszString, PRTUTF16 *ppwszString, const char *pszTag);
3255
3256/**
3257 * Translates pszString from Latin-1 (ISO-8859-1) to UTF-16, allocating the
3258 * result buffer if requested (default tag).
3259 *
3260 * @returns iprt status code.
3261 * @param pszString The Latin-1 string to convert.
3262 * @param cchString The maximum size in chars (the type) to convert.
3263 * The conversion stops when it reaches cchString or
3264 * the string terminator ('\\0').
3265 * Use RTSTR_MAX to translate the entire string.
3266 * @param ppwsz If cwc is non-zero, this must either be pointing
3267 * to pointer to a buffer of the specified size, or
3268 * pointer to a NULL pointer.
3269 * If *ppwsz is NULL or cwc is zero a buffer of at
3270 * least cwc items will be allocated to hold the
3271 * translated string. If a buffer was requested it
3272 * must be freed using RTUtf16Free().
3273 * @param cwc The buffer size in RTUTF16s. This includes the
3274 * terminator.
3275 * @param pcwc Where to store the length of the translated string,
3276 * excluding the terminator. (Optional)
3277 *
3278 * This may be set under some error conditions,
3279 * however, only for VERR_BUFFER_OVERFLOW and
3280 * VERR_NO_STR_MEMORY will it contain a valid string
3281 * length that can be used to resize the buffer.
3282 */
3283#define RTLatin1ToUtf16Ex(pszString, cchString, ppwsz, cwc, pcwc) \
3284 RTLatin1ToUtf16ExTag((pszString), (cchString), (ppwsz), (cwc), (pcwc), RTSTR_TAG)
3285
3286/**
3287 * Translates pszString from Latin-1 (ISO-8859-1) to UTF-16, allocating the
3288 * result buffer if requested.
3289 *
3290 * @returns iprt status code.
3291 * @param pszString The Latin-1 string to convert.
3292 * @param cchString The maximum size in chars (the type) to convert.
3293 * The conversion stops when it reaches cchString or
3294 * the string terminator ('\\0').
3295 * Use RTSTR_MAX to translate the entire string.
3296 * @param ppwsz If cwc is non-zero, this must either be pointing
3297 * to pointer to a buffer of the specified size, or
3298 * pointer to a NULL pointer.
3299 * If *ppwsz is NULL or cwc is zero a buffer of at
3300 * least cwc items will be allocated to hold the
3301 * translated string. If a buffer was requested it
3302 * must be freed using RTUtf16Free().
3303 * @param cwc The buffer size in RTUTF16s. This includes the
3304 * terminator.
3305 * @param pcwc Where to store the length of the translated string,
3306 * excluding the terminator. (Optional)
3307 *
3308 * This may be set under some error conditions,
3309 * however, only for VERR_BUFFER_OVERFLOW and
3310 * VERR_NO_STR_MEMORY will it contain a valid string
3311 * length that can be used to resize the buffer.
3312 * @param pszTag Allocation tag used for statistics and such.
3313 */
3314RTDECL(int) RTLatin1ToUtf16ExTag(const char *pszString, size_t cchString,
3315 PRTUTF16 *ppwsz, size_t cwc, size_t *pcwc, const char *pszTag);
3316
3317/** @} */
3318
3319
3320RT_C_DECLS_END
3321
3322/** @} */
3323
3324#endif
3325
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette