VirtualBox

source: vbox/trunk/include/iprt/string.h@ 8150

Last change on this file since 8150 was 7421, checked in by vboxsync, 17 years ago

Reimplemented RTStrICmp so it's actually UTF-8 and not subject to the current locale.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 58.2 KB
Line 
1/** @file
2 * innotek Portable Runtime - String Manipluation.
3 */
4
5/*
6 * Copyright (C) 2006-2007 innotek GmbH
7 *
8 * This file is part of VirtualBox Open Source Edition (OSE), as
9 * available from http://www.virtualbox.org. This file is free software;
10 * you can redistribute it and/or modify it under the terms of the GNU
11 * General Public License (GPL) as published by the Free Software
12 * Foundation, in version 2 as it comes in the "COPYING" file of the
13 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
14 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
15 *
16 * The contents of this file may alternatively be used under the terms
17 * of the Common Development and Distribution License Version 1.0
18 * (CDDL) only, as it comes in the "COPYING.CDDL" file of the
19 * VirtualBox OSE distribution, in which case the provisions of the
20 * CDDL are applicable instead of those of the GPL.
21 *
22 * You may elect to license modified versions of this file under the
23 * terms and conditions of either the GPL or the CDDL or both.
24 */
25
26#ifndef ___iprt_string_h
27#define ___iprt_string_h
28
29#include <iprt/cdefs.h>
30#include <iprt/types.h>
31#include <iprt/stdarg.h>
32#include <iprt/err.h> /* for VINF_SUCCESS */
33#if defined(RT_OS_LINUX) && defined(__KERNEL__)
34# include <linux/string.h>
35#elif defined(RT_OS_FREEBSD) && defined(_KERNEL)
36 /*
37 * Kludge for the FreeBSD kernel:
38 * Some of the string.h stuff clashes with sys/libkern.h, so just wrap
39 * it up while including string.h to keep things quiet. It's nothing
40 * important that's clashing, after all.
41 */
42# define strdup strdup_string_h
43# include <string.h>
44# undef strdup
45#elif defined(RT_OS_SOLARIS) && defined(_KERNEL)
46 /*
47 * Same case as with FreeBSD kernel:
48 * The string.h stuff clashes with sys/systm.h
49 * ffs = find first set bit.
50 */
51# define ffs ffs_string_h
52# include <string.h>
53# undef ffs
54# undef strpbrk
55#else
56# include <string.h>
57#endif
58
59/*
60 * Supply prototypes for standard string functions provided by
61 * IPRT instead of the operating environment.
62 */
63#if defined(RT_OS_DARWIN) && defined(KERNEL)
64__BEGIN_DECLS
65void *memchr(const void *pv, int ch, size_t cb);
66char *strpbrk(const char *pszStr, const char *pszChars);
67__END_DECLS
68#endif
69
70
71/** @defgroup grp_rt_str RTStr - String Manipulation
72 * Mostly UTF-8 related helpers where the standard string functions won't do.
73 * @ingroup grp_rt
74 * @{
75 */
76
77__BEGIN_DECLS
78
79
80/**
81 * The maximum string length.
82 */
83#define RTSTR_MAX (~(size_t)0)
84
85
86#ifdef IN_RING3
87
88/**
89 * Allocates tmp buffer, translates pszString from UTF8 to current codepage.
90 *
91 * @returns iprt status code.
92 * @param ppszString Receives pointer of allocated native CP string.
93 * The returned pointer must be freed using RTStrFree().
94 * @param pszString UTF-8 string to convert.
95 */
96RTR3DECL(int) RTStrUtf8ToCurrentCP(char **ppszString, const char *pszString);
97
98/**
99 * Allocates tmp buffer, translates pszString from current codepage to UTF-8.
100 *
101 * @returns iprt status code.
102 * @param ppszString Receives pointer of allocated UTF-8 string.
103 * The returned pointer must be freed using RTStrFree().
104 * @param pszString Native string to convert.
105 */
106RTR3DECL(int) RTStrCurrentCPToUtf8(char **ppszString, const char *pszString);
107
108#endif
109
110/**
111 * Free string allocated by any of the non-UCS-2 string functions.
112 *
113 * @returns iprt status code.
114 * @param pszString Pointer to buffer with string to free.
115 * NULL is accepted.
116 */
117RTDECL(void) RTStrFree(char *pszString);
118
119/**
120 * Allocates a new copy of the given UTF-8 string.
121 *
122 * @returns Pointer to the allocated UTF-8 string.
123 * @param pszString UTF-8 string to duplicate.
124 */
125RTDECL(char *) RTStrDup(const char *pszString);
126
127/**
128 * Allocates a new copy of the given UTF-8 string.
129 *
130 * @returns iprt status code.
131 * @param ppszString Receives pointer of the allocated UTF-8 string.
132 * The returned pointer must be freed using RTStrFree().
133 * @param pszString UTF-8 string to duplicate.
134 */
135RTDECL(int) RTStrDupEx(char **ppszString, const char *pszString);
136
137/**
138 * Gets the number of code points the string is made up of, excluding
139 * the terminator.
140 *
141 *
142 * @returns Number of code points (RTUNICP).
143 * @returns 0 if the string was incorrectly encoded.
144 * @param psz The string.
145 */
146RTDECL(size_t) RTStrUniLen(const char *psz);
147
148/**
149 * Gets the number of code points the string is made up of, excluding
150 * the terminator.
151 *
152 * This function will validate the string, and incorrectly encoded UTF-8
153 * strings will be rejected.
154 *
155 * @returns iprt status code.
156 * @param psz The string.
157 * @param cch The max string length. Use RTSTR_MAX to process the entire string.
158 * @param pcuc Where to store the code point count.
159 * This is undefined on failure.
160 */
161RTDECL(int) RTStrUniLenEx(const char *psz, size_t cch, size_t *pcuc);
162
163/**
164 * Translate a UTF-8 string into an unicode string (i.e. RTUNICPs), allocating the string buffer.
165 *
166 * @returns iprt status code.
167 * @param pszString UTF-8 string to convert.
168 * @param ppUniString Receives pointer to the allocated unicode string.
169 * The returned string must be freed using RTUniFree().
170 */
171RTDECL(int) RTStrToUni(const char *pszString, PRTUNICP *ppUniString);
172
173/**
174 * Translates pszString from UTF-8 to an array of code points, allocating the result
175 * array if requested.
176 *
177 * @returns iprt status code.
178 * @param pszString UTF-8 string to convert.
179 * @param cchString The maximum size in chars (the type) to convert. The conversion stop
180 * when it reaches cchString or the string terminator ('\\0').
181 * Use RTSTR_MAX to translate the entire string.
182 * @param ppaCps If cCps is non-zero, this must either be pointing to pointer to
183 * a buffer of the specified size, or pointer to a NULL pointer.
184 * If *ppusz is NULL or cCps is zero a buffer of at least cCps items
185 * will be allocated to hold the translated string.
186 * If a buffer was requirest it must be freed using RTUtf16Free().
187 * @param cCps The number of code points in the unicode string. This includes the terminator.
188 * @param pcCps Where to store the length of the translated string. (Optional)
189 * This field will be updated even on failure, however the value is only
190 * specified for the following two error codes. On VERR_BUFFER_OVERFLOW
191 * and VERR_NO_STR_MEMORY it contains the required buffer space.
192 */
193RTDECL(int) RTStrToUniEx(const char *pszString, size_t cchString, PRTUNICP *ppaCps, size_t cCps, size_t *pcCps);
194
195/**
196 * Calculates the length of the string in RTUTF16 items.
197 *
198 * This function will validate the string, and incorrectly encoded UTF-8
199 * strings will be rejected. The primary purpose of this function is to
200 * help allocate buffers for RTStrToUtf16Ex of the correct size. For most
201 * other puroses RTStrCalcUtf16LenEx() should be used.
202 *
203 * @returns Number of RTUTF16 items.
204 * @returns 0 if the string was incorrectly encoded.
205 * @param psz The string.
206 */
207RTDECL(size_t) RTStrCalcUtf16Len(const char *psz);
208
209/**
210 * Calculates the length of the string in RTUTF16 items.
211 *
212 * This function will validate the string, and incorrectly encoded UTF-8
213 * strings will be rejected.
214 *
215 * @returns iprt status code.
216 * @param psz The string.
217 * @param cch The max string length. Use RTSTR_MAX to process the entire string.
218 * @param pcwc Where to store the string length. Optional.
219 * This is undefined on failure.
220 */
221RTDECL(int) RTStrCalcUtf16LenEx(const char *psz, size_t cch, size_t *pcwc);
222
223/**
224 * Translate a UTF-8 string into a UTF-16 allocating the result buffer.
225 *
226 * @returns iprt status code.
227 * @param pszString UTF-8 string to convert.
228 * @param ppwszString Receives pointer to the allocated UTF-16 string.
229 * The returned string must be freed using RTUtf16Free().
230 */
231RTDECL(int) RTStrToUtf16(const char *pszString, PRTUTF16 *ppwszString);
232
233/**
234 * Translates pszString from UTF-8 to UTF-16, allocating the result buffer if requested.
235 *
236 * @returns iprt status code.
237 * @param pszString UTF-8 string to convert.
238 * @param cchString The maximum size in chars (the type) to convert. The conversion stop
239 * when it reaches cchString or the string terminator ('\\0').
240 * Use RTSTR_MAX to translate the entire string.
241 * @param ppwsz If cwc is non-zero, this must either be pointing to pointer to
242 * a buffer of the specified size, or pointer to a NULL pointer.
243 * If *ppwsz is NULL or cwc is zero a buffer of at least cwc items
244 * will be allocated to hold the translated string.
245 * If a buffer was requirest it must be freed using RTUtf16Free().
246 * @param cwc The buffer size in RTUTF16s. This includes the terminator.
247 * @param pcwc Where to store the length of the translated string. (Optional)
248 * This field will be updated even on failure, however the value is only
249 * specified for the following two error codes. On VERR_BUFFER_OVERFLOW
250 * and VERR_NO_STR_MEMORY it contains the required buffer space.
251 */
252RTDECL(int) RTStrToUtf16Ex(const char *pszString, size_t cchString, PRTUTF16 *ppwsz, size_t cwc, size_t *pcwc);
253
254
255/**
256 * Get the unicode code point at the given string position.
257 *
258 * @returns unicode code point.
259 * @returns RTUNICP_INVALID if the encoding is invalid.
260 * @param psz The string.
261 */
262RTDECL(RTUNICP) RTStrGetCpInternal(const char *psz);
263
264/**
265 * Get the unicode code point at the given string position.
266 *
267 * @returns unicode code point.
268 * @returns RTUNICP_INVALID if the encoding is invalid.
269 * @param ppsz The string.
270 * @param pCp Where to store the unicode code point.
271 */
272RTDECL(int) RTStrGetCpExInternal(const char **ppsz, PRTUNICP pCp);
273
274/**
275 * Put the unicode code point at the given string position
276 * and return the pointer to the char following it.
277 *
278 * This function will not consider anything at or following the the
279 * buffer area pointed to by psz. It is therefore not suitable for
280 * inserting code points into a string, only appending/overwriting.
281 *
282 * @returns pointer to the char following the written code point.
283 * @param psz The string.
284 * @param CodePoint The code point to write.
285 * This sould not be RTUNICP_INVALID or any other charater
286 * out of the UTF-8 range.
287 *
288 * @remark This is a worker function for RTStrPutCp().
289 *
290 */
291RTDECL(char *) RTStrPutCpInternal(char *psz, RTUNICP CodePoint);
292
293/**
294 * Get the unicode code point at the given string position.
295 *
296 * @returns unicode code point.
297 * @returns RTUNICP_INVALID if the encoding is invalid.
298 * @param psz The string.
299 *
300 * @remark We optimize this operation by using an inline function for
301 * the most frequent and simplest sequence, the rest is
302 * handled by RTStrGetCpInternal().
303 */
304DECLINLINE(RTUNICP) RTStrGetCp(const char *psz)
305{
306 const unsigned char uch = *(const unsigned char *)psz;
307 if (!(uch & RT_BIT(7)))
308 return uch;
309 return RTStrGetCpInternal(psz);
310}
311
312/**
313 * Get the unicode code point at the given string position.
314 *
315 * @returns iprt status code.
316 * @param ppsz Pointer to the string pointer. This will be updated to
317 * point to the char following the current code point.
318 * @param pCp Where to store the code point.
319 * RTUNICP_INVALID is stored here on failure.
320 *
321 * @remark We optimize this operation by using an inline function for
322 * the most frequent and simplest sequence, the rest is
323 * handled by RTStrGetCpExInternal().
324 */
325DECLINLINE(int) RTStrGetCpEx(const char **ppsz, PRTUNICP pCp)
326{
327 const unsigned char uch = **(const unsigned char **)ppsz;
328 if (!(uch & RT_BIT(7)))
329 {
330 (*ppsz)++;
331 *pCp = uch;
332 return VINF_SUCCESS;
333 }
334 return RTStrGetCpExInternal(ppsz, pCp);
335}
336
337/**
338 * Put the unicode code point at the given string position
339 * and return the pointer to the char following it.
340 *
341 * This function will not consider anything at or following the the
342 * buffer area pointed to by psz. It is therefore not suitable for
343 * inserting code points into a string, only appending/overwriting.
344 *
345 * @returns pointer to the char following the written code point.
346 * @param psz The string.
347 * @param CodePoint The code point to write.
348 * This sould not be RTUNICP_INVALID or any other charater
349 * out of the UTF-8 range.
350 *
351 * @remark We optimize this operation by using an inline function for
352 * the most frequent and simplest sequence, the rest is
353 * handled by RTStrPutCpInternal().
354 */
355DECLINLINE(char *) RTStrPutCp(char *psz, RTUNICP CodePoint)
356{
357 if (CodePoint < 0x80)
358 {
359 *psz++ = (unsigned char)CodePoint;
360 return psz;
361 }
362 return RTStrPutCpInternal(psz, CodePoint);
363}
364
365/**
366 * Skips ahead, past the current code point.
367 *
368 * @returns Pointer to the char after the current code point.
369 * @param psz Pointer to the current code point.
370 * @remark This will not move the next valid code point, only past the current one.
371 */
372DECLINLINE(char *) RTStrNextCp(const char *psz)
373{
374 RTUNICP Cp;
375 RTStrGetCpEx(&psz, &Cp);
376 return (char *)psz;
377}
378
379/**
380 * Skips back to the previous code point.
381 *
382 * @returns Pointer to the char before the current code point.
383 * @returns pszStart on failure.
384 * @param pszStart Pointer to the start of the string.
385 * @param psz Pointer to the current code point.
386 */
387RTDECL(char *) RTStrPrevCp(const char *pszStart, const char *psz);
388
389
390
391#ifndef DECLARED_FNRTSTROUTPUT /* duplicated in iprt/log.h */
392#define DECLARED_FNRTSTROUTPUT
393/**
394 * Output callback.
395 *
396 * @returns number of bytes written.
397 * @param pvArg User argument.
398 * @param pachChars Pointer to an array of utf-8 characters.
399 * @param cbChars Number of bytes in the character array pointed to by pachChars.
400 */
401typedef DECLCALLBACK(size_t) FNRTSTROUTPUT(void *pvArg, const char *pachChars, size_t cbChars);
402/** Pointer to callback function. */
403typedef FNRTSTROUTPUT *PFNRTSTROUTPUT;
404#endif
405
406/** Format flag.
407 * These are used by RTStrFormat extensions and RTStrFormatNumber, mind
408 * that not all flags makes sense to both of the functions.
409 * @{ */
410#define RTSTR_F_CAPITAL 0x0001
411#define RTSTR_F_LEFT 0x0002
412#define RTSTR_F_ZEROPAD 0x0004
413#define RTSTR_F_SPECIAL 0x0008
414#define RTSTR_F_VALSIGNED 0x0010
415#define RTSTR_F_PLUS 0x0020
416#define RTSTR_F_BLANK 0x0040
417#define RTSTR_F_WIDTH 0x0080
418#define RTSTR_F_PRECISION 0x0100
419
420#define RTSTR_F_BIT_MASK 0xf800
421#define RTSTR_F_8BIT 0x0800
422#define RTSTR_F_16BIT 0x1000
423#define RTSTR_F_32BIT 0x2000
424#define RTSTR_F_64BIT 0x4000
425#define RTSTR_F_128BIT 0x8000
426/** @} */
427
428/** @def RTSTR_GET_BIT_FLAG
429 * Gets the bit flag for the specified type.
430 */
431#define RTSTR_GET_BIT_FLAG(type) \
432 ( sizeof(type) == 32 ? RTSTR_F_32BIT \
433 : sizeof(type) == 64 ? RTSTR_F_64BIT \
434 : sizeof(type) == 16 ? RTSTR_F_16BIT \
435 : sizeof(type) == 8 ? RTSTR_F_8BIT \
436 : sizeof(type) == 128? RTSTR_F_128BIT \
437 : 0)
438
439
440/**
441 * Callback to format non-standard format specifiers.
442 *
443 * @returns The number of bytes formatted.
444 * @param pvArg Formatter argument.
445 * @param pfnOutput Pointer to output function.
446 * @param pvArgOutput Argument for the output function.
447 * @param ppszFormat Pointer to the format string pointer. Advance this till the char
448 * after the format specifier.
449 * @param pArgs Pointer to the argument list. Use this to fetch the arguments.
450 * @param cchWidth Format Width. -1 if not specified.
451 * @param cchPrecision Format Precision. -1 if not specified.
452 * @param fFlags Flags (RTSTR_NTFS_*).
453 * @param chArgSize The argument size specifier, 'l' or 'L'.
454 */
455typedef DECLCALLBACK(size_t) FNSTRFORMAT(void *pvArg, PFNRTSTROUTPUT pfnOutput, void *pvArgOutput,
456 const char **ppszFormat, va_list *pArgs, int cchWidth,
457 int cchPrecision, unsigned fFlags, char chArgSize);
458/** Pointer to a FNSTRFORMAT() function. */
459typedef FNSTRFORMAT *PFNSTRFORMAT;
460
461
462/**
463 * Partial implementation of a printf like formatter.
464 * It doesn't do everything correct, and there is no floating point support.
465 * However, it supports custom formats by the means of a format callback.
466 *
467 * @returns number of bytes formatted.
468 * @param pfnOutput Output worker.
469 * Called in two ways. Normally with a string and its length.
470 * For termination, it's called with NULL for string, 0 for length.
471 * @param pvArgOutput Argument to the output worker.
472 * @param pfnFormat Custom format worker.
473 * @param pvArgFormat Argument to the format worker.
474 * @param pszFormat Format string pointer.
475 * @param InArgs Argument list.
476 */
477RTDECL(size_t) RTStrFormatV(PFNRTSTROUTPUT pfnOutput, void *pvArgOutput, PFNSTRFORMAT pfnFormat, void *pvArgFormat, const char *pszFormat, va_list InArgs);
478
479/**
480 * Partial implementation of a printf like formatter.
481 * It doesn't do everything correct, and there is no floating point support.
482 * However, it supports custom formats by the means of a format callback.
483 *
484 * @returns number of bytes formatted.
485 * @param pfnOutput Output worker.
486 * Called in two ways. Normally with a string and its length.
487 * For termination, it's called with NULL for string, 0 for length.
488 * @param pvArgOutput Argument to the output worker.
489 * @param pfnFormat Custom format worker.
490 * @param pvArgFormat Argument to the format worker.
491 * @param pszFormat Format string.
492 * @param ... Argument list.
493 */
494RTDECL(size_t) RTStrFormat(PFNRTSTROUTPUT pfnOutput, void *pvArgOutput, PFNSTRFORMAT pfnFormat, void *pvArgFormat, const char *pszFormat, ...);
495
496/**
497 * Formats an integer number according to the parameters.
498 *
499 * @returns Length of the formatted number.
500 * @param psz Pointer to output string buffer of sufficient size.
501 * @param u64Value Value to format.
502 * @param uiBase Number representation base.
503 * @param cchWidth Width.
504 * @param cchPrecision Precision.
505 * @param fFlags Flags (NTFS_*).
506 */
507RTDECL(int) RTStrFormatNumber(char *psz, uint64_t u64Value, unsigned int uiBase, signed int cchWidth, signed int cchPrecision, unsigned int fFlags);
508
509
510/**
511 * Callback for formatting a type.
512 *
513 * This is registered using the RTStrFormatTypeRegister function and will
514 * be called during string formatting to handle the specified %R[type].
515 * The argument for this format type is assumed to be a pointer and it's
516 * passed in the @a pvValue argument.
517 *
518 * @returns Length of the formatted output.
519 * @param pfnOutput Output worker.
520 * @param pvArgOutput Argument to the output worker.
521 * @param pszType The type name.
522 * @param pvValue The argument value.
523 * @param cchWidth Width.
524 * @param cchPrecision Precision.
525 * @param fFlags Flags (NTFS_*).
526 * @param pvUser The user argument.
527 */
528typedef DECLCALLBACK(size_t) FNRTSTRFORMATTYPE(PFNRTSTROUTPUT pfnOutput, void *pvArgOutput,
529 const char *pszType, void const *pvValue,
530 int cchWidth, int cchPrecision, unsigned fFlags,
531 void *pvUser);
532/** Pointer to a FNRTSTRFORMATTYPE. */
533typedef FNRTSTRFORMATTYPE *PFNRTSTRFORMATTYPE;
534
535
536/**
537 * Register a format handler for a type.
538 *
539 * The format handler is used to handle '%R[type]' format types, where the argument
540 * in the vector is a pointer value (a bit restrictive, but keeps it simple).
541 *
542 * The caller must ensure that no other thread will be making use of any of
543 * the dynamic formatting type facilities simultaneously with this call.
544 *
545 * @returns IPRT status code.
546 * @retval VINF_SUCCESS on success.
547 * @retval VERR_ALREADY_EXISTS if the type has already been registered.
548 * @retval VERR_TOO_MANY_OPEN_FILES if all the type slots has been allocated already.
549 *
550 * @param pszType The type name.
551 * @param pfnHandler The handler address. See FNRTSTRFORMATTYPE for details.
552 * @param pvUser The user argument to pass to the handler. See RTStrFormatTypeSetUser
553 * for how to update this later.
554 */
555RTDECL(int) RTStrFormatTypeRegister(const char *pszType, PFNRTSTRFORMATTYPE pfnHandler, void *pvUser);
556
557/**
558 * Deregisters a format type.
559 *
560 * The caller must ensure that no other thread will be making use of any of
561 * the dynamic formatting type facilities simultaneously with this call.
562 *
563 * @returns IPRT status code.
564 * @retval VINF_SUCCESS on success.
565 * @retval VERR_FILE_NOT_FOUND if not found.
566 *
567 * @param pszType The type to deregister.
568 */
569RTDECL(int) RTStrFormatTypeDeregister(const char *pszType);
570
571/**
572 * Sets the user argument for a type.
573 *
574 * This can be used if a user argument needs relocating in GC.
575 *
576 * @returns IPRT status code.
577 * @retval VINF_SUCCESS on success.
578 * @retval VERR_FILE_NOT_FOUND if not found.
579 *
580 * @param pszType The type to update.
581 * @param pvUser The new user argument value.
582 */
583RTDECL(int) RTStrFormatTypeSetUser(const char *pszType, void *pvUser);
584
585
586/**
587 * String printf.
588 *
589 * @returns The length of the returned string (in pszBuffer).
590 * @param pszBuffer Output buffer.
591 * @param cchBuffer Size of the output buffer.
592 * @param pszFormat The format string.
593 * @param args The format argument.
594 */
595RTDECL(size_t) RTStrPrintfV(char *pszBuffer, size_t cchBuffer, const char *pszFormat, va_list args);
596
597/**
598 * String printf.
599 *
600 * @returns The length of the returned string (in pszBuffer).
601 * @param pszBuffer Output buffer.
602 * @param cchBuffer Size of the output buffer.
603 * @param pszFormat The format string.
604 * @param ... The format argument.
605 */
606RTDECL(size_t) RTStrPrintf(char *pszBuffer, size_t cchBuffer, const char *pszFormat, ...);
607
608
609/**
610 * String printf with custom formatting.
611 *
612 * @returns The length of the returned string (in pszBuffer).
613 * @param pfnFormat Pointer to handler function for the custom formats.
614 * @param pvArg Argument to the pfnFormat function.
615 * @param pszBuffer Output buffer.
616 * @param cchBuffer Size of the output buffer.
617 * @param pszFormat The format string.
618 * @param args The format argument.
619 */
620RTDECL(size_t) RTStrPrintfExV(PFNSTRFORMAT pfnFormat, void *pvArg, char *pszBuffer, size_t cchBuffer, const char *pszFormat, va_list args);
621
622/**
623 * String printf with custom formatting.
624 *
625 * @returns The length of the returned string (in pszBuffer).
626 * @param pfnFormat Pointer to handler function for the custom formats.
627 * @param pvArg Argument to the pfnFormat function.
628 * @param pszBuffer Output buffer.
629 * @param cchBuffer Size of the output buffer.
630 * @param pszFormat The format string.
631 * @param ... The format argument.
632 */
633RTDECL(size_t) RTStrPrintfEx(PFNSTRFORMAT pfnFormat, void *pvArg, char *pszBuffer, size_t cchBuffer, const char *pszFormat, ...);
634
635
636/**
637 * Allocating string printf.
638 *
639 * @returns The length of the string in the returned *ppszBuffer.
640 * @returns -1 on failure.
641 * @param ppszBuffer Where to store the pointer to the allocated output buffer.
642 * The buffer should be freed using RTStrFree().
643 * On failure *ppszBuffer will be set to NULL.
644 * @param pszFormat The format string.
645 * @param args The format argument.
646 */
647RTDECL(int) RTStrAPrintfV(char **ppszBuffer, const char *pszFormat, va_list args);
648
649/**
650 * Allocating string printf.
651 *
652 * @returns The length of the string in the returned *ppszBuffer.
653 * @returns -1 on failure.
654 * @param ppszBuffer Where to store the pointer to the allocated output buffer.
655 * The buffer should be freed using RTStrFree().
656 * On failure *ppszBuffer will be set to NULL.
657 * @param pszFormat The format string.
658 * @param ... The format argument.
659 */
660RTDECL(int) RTStrAPrintf(char **ppszBuffer, const char *pszFormat, ...);
661
662
663/**
664 * Strips blankspaces from both ends of the string.
665 *
666 * @returns Pointer to first non-blank char in the string.
667 * @param psz The string to strip.
668 */
669RTDECL(char *) RTStrStrip(char *psz);
670
671/**
672 * Strips blankspaces from the start of the string.
673 *
674 * @returns Pointer to first non-blank char in the string.
675 * @param psz The string to strip.
676 */
677RTDECL(char *) RTStrStripL(const char *psz);
678
679/**
680 * Strips blankspaces from the end of the string.
681 *
682 * @returns psz.
683 * @param psz The string to strip.
684 */
685RTDECL(char *) RTStrStripR(char *psz);
686
687
688/** @defgroup rt_str_conv String To/From Number Conversions
689 * @ingroup grp_rt_str
690 * @{ */
691
692/**
693 * Converts a string representation of a number to a 64-bit unsigned number.
694 *
695 * @returns iprt status code.
696 * Warnings are used to indicate convertion problems.
697 * @retval VWRN_NUMBER_TOO_BIG
698 * @retval VWRN_NEGATIVE_UNSIGNED
699 * @retval VWRN_TRAILING_CHARS
700 * @retval VWRN_TRAILING_SPACES
701 * @retval VINF_SUCCESS
702 * @retval VERR_NO_DIGITS
703 *
704 * @param pszValue Pointer to the string value.
705 * @param ppszNext Where to store the pointer to the first char following the number. (Optional)
706 * @param uBase The base of the representation used.
707 * If the function will look for known prefixes before defaulting to 10.
708 * @param pu64 Where to store the converted number. (optional)
709 */
710RTDECL(int) RTStrToUInt64Ex(const char *pszValue, char **ppszNext, unsigned uBase, uint64_t *pu64);
711
712/**
713 * Converts a string representation of a number to a 64-bit unsigned number,
714 * making sure the full string is converted.
715 *
716 * @returns iprt status code.
717 * Warnings are used to indicate convertion problems.
718 * @retval VWRN_NUMBER_TOO_BIG
719 * @retval VWRN_NEGATIVE_UNSIGNED
720 * @retval VINF_SUCCESS
721 * @retval VERR_NO_DIGITS
722 * @retval VERR_TRAILING_SPACES
723 * @retval VERR_TRAILING_CHARS
724 *
725 * @param pszValue Pointer to the string value.
726 * @param uBase The base of the representation used.
727 * If the function will look for known prefixes before defaulting to 10.
728 * @param pu64 Where to store the converted number. (optional)
729 */
730RTDECL(int) RTStrToUInt64Full(const char *pszValue, unsigned uBase, uint64_t *pu64);
731
732/**
733 * Converts a string representation of a number to a 64-bit unsigned number.
734 * The base is guessed.
735 *
736 * @returns 64-bit unsigned number on success.
737 * @returns 0 on failure.
738 * @param pszValue Pointer to the string value.
739 */
740RTDECL(uint64_t) RTStrToUInt64(const char *pszValue);
741
742/**
743 * Converts a string representation of a number to a 32-bit unsigned number.
744 *
745 * @returns iprt status code.
746 * Warnings are used to indicate conversion problems.
747 * @retval VWRN_NUMBER_TOO_BIG
748 * @retval VWRN_NEGATIVE_UNSIGNED
749 * @retval VWRN_TRAILING_CHARS
750 * @retval VWRN_TRAILING_SPACES
751 * @retval VINF_SUCCESS
752 * @retval VERR_NO_DIGITS
753 *
754 * @param pszValue Pointer to the string value.
755 * @param ppszNext Where to store the pointer to the first char following the number. (Optional)
756 * @param uBase The base of the representation used.
757 * If 0 the function will look for known prefixes before defaulting to 10.
758 * @param pu32 Where to store the converted number. (optional)
759 */
760RTDECL(int) RTStrToUInt32Ex(const char *pszValue, char **ppszNext, unsigned uBase, uint32_t *pu32);
761
762/**
763 * Converts a string representation of a number to a 32-bit unsigned number,
764 * making sure the full string is converted.
765 *
766 * @returns iprt status code.
767 * Warnings are used to indicate convertion problems.
768 * @retval VWRN_NUMBER_TOO_BIG
769 * @retval VWRN_NEGATIVE_UNSIGNED
770 * @retval VINF_SUCCESS
771 * @retval VERR_NO_DIGITS
772 * @retval VERR_TRAILING_SPACES
773 * @retval VERR_TRAILING_CHARS
774 *
775 * @param pszValue Pointer to the string value.
776 * @param uBase The base of the representation used.
777 * If the function will look for known prefixes before defaulting to 10.
778 * @param pu32 Where to store the converted number. (optional)
779 */
780RTDECL(int) RTStrToUInt32Full(const char *pszValue, unsigned uBase, uint32_t *pu32);
781
782/**
783 * Converts a string representation of a number to a 64-bit unsigned number.
784 * The base is guessed.
785 *
786 * @returns 32-bit unsigned number on success.
787 * @returns 0 on failure.
788 * @param pszValue Pointer to the string value.
789 */
790RTDECL(uint32_t) RTStrToUInt32(const char *pszValue);
791
792/**
793 * Converts a string representation of a number to a 16-bit unsigned number.
794 *
795 * @returns iprt status code.
796 * Warnings are used to indicate conversion problems.
797 * @retval VWRN_NUMBER_TOO_BIG
798 * @retval VWRN_NEGATIVE_UNSIGNED
799 * @retval VWRN_TRAILING_CHARS
800 * @retval VWRN_TRAILING_SPACES
801 * @retval VINF_SUCCESS
802 * @retval VERR_NO_DIGITS
803 *
804 * @param pszValue Pointer to the string value.
805 * @param ppszNext Where to store the pointer to the first char following the number. (Optional)
806 * @param uBase The base of the representation used.
807 * If 0 the function will look for known prefixes before defaulting to 10.
808 * @param pu16 Where to store the converted number. (optional)
809 */
810RTDECL(int) RTStrToUInt16Ex(const char *pszValue, char **ppszNext, unsigned uBase, uint16_t *pu16);
811
812/**
813 * Converts a string representation of a number to a 16-bit unsigned number,
814 * making sure the full string is converted.
815 *
816 * @returns iprt status code.
817 * Warnings are used to indicate convertion problems.
818 * @retval VWRN_NUMBER_TOO_BIG
819 * @retval VWRN_NEGATIVE_UNSIGNED
820 * @retval VINF_SUCCESS
821 * @retval VERR_NO_DIGITS
822 * @retval VERR_TRAILING_SPACES
823 * @retval VERR_TRAILING_CHARS
824 *
825 * @param pszValue Pointer to the string value.
826 * @param uBase The base of the representation used.
827 * If the function will look for known prefixes before defaulting to 10.
828 * @param pu16 Where to store the converted number. (optional)
829 */
830RTDECL(int) RTStrToUInt16Full(const char *pszValue, unsigned uBase, uint16_t *pu16);
831
832/**
833 * Converts a string representation of a number to a 16-bit unsigned number.
834 * The base is guessed.
835 *
836 * @returns 16-bit unsigned number on success.
837 * @returns 0 on failure.
838 * @param pszValue Pointer to the string value.
839 */
840RTDECL(uint16_t) RTStrToUInt16(const char *pszValue);
841
842/**
843 * Converts a string representation of a number to a 8-bit unsigned number.
844 *
845 * @returns iprt status code.
846 * Warnings are used to indicate conversion problems.
847 * @retval VWRN_NUMBER_TOO_BIG
848 * @retval VWRN_NEGATIVE_UNSIGNED
849 * @retval VWRN_TRAILING_CHARS
850 * @retval VWRN_TRAILING_SPACES
851 * @retval VINF_SUCCESS
852 * @retval VERR_NO_DIGITS
853 *
854 * @param pszValue Pointer to the string value.
855 * @param ppszNext Where to store the pointer to the first char following the number. (Optional)
856 * @param uBase The base of the representation used.
857 * If 0 the function will look for known prefixes before defaulting to 10.
858 * @param pu8 Where to store the converted number. (optional)
859 */
860RTDECL(int) RTStrToUInt8Ex(const char *pszValue, char **ppszNext, unsigned uBase, uint8_t *pu8);
861
862/**
863 * Converts a string representation of a number to a 8-bit unsigned number,
864 * making sure the full string is converted.
865 *
866 * @returns iprt status code.
867 * Warnings are used to indicate convertion problems.
868 * @retval VWRN_NUMBER_TOO_BIG
869 * @retval VWRN_NEGATIVE_UNSIGNED
870 * @retval VINF_SUCCESS
871 * @retval VERR_NO_DIGITS
872 * @retval VERR_TRAILING_SPACES
873 * @retval VERR_TRAILING_CHARS
874 *
875 * @param pszValue Pointer to the string value.
876 * @param uBase The base of the representation used.
877 * If the function will look for known prefixes before defaulting to 10.
878 * @param pu8 Where to store the converted number. (optional)
879 */
880RTDECL(int) RTStrToUInt8Full(const char *pszValue, unsigned uBase, uint8_t *pu8);
881
882/**
883 * Converts a string representation of a number to a 8-bit unsigned number.
884 * The base is guessed.
885 *
886 * @returns 8-bit unsigned number on success.
887 * @returns 0 on failure.
888 * @param pszValue Pointer to the string value.
889 */
890RTDECL(uint8_t) RTStrToUInt8(const char *pszValue);
891
892/**
893 * Converts a string representation of a number to a 64-bit signed number.
894 *
895 * @returns iprt status code.
896 * Warnings are used to indicate conversion problems.
897 * @retval VWRN_NUMBER_TOO_BIG
898 * @retval VWRN_TRAILING_CHARS
899 * @retval VWRN_TRAILING_SPACES
900 * @retval VINF_SUCCESS
901 * @retval VERR_NO_DIGITS
902 *
903 * @param pszValue Pointer to the string value.
904 * @param ppszNext Where to store the pointer to the first char following the number. (Optional)
905 * @param uBase The base of the representation used.
906 * If 0 the function will look for known prefixes before defaulting to 10.
907 * @param pi64 Where to store the converted number. (optional)
908 */
909RTDECL(int) RTStrToInt64Ex(const char *pszValue, char **ppszNext, unsigned uBase, int64_t *pi64);
910
911/**
912 * Converts a string representation of a number to a 64-bit signed number,
913 * making sure the full string is converted.
914 *
915 * @returns iprt status code.
916 * Warnings are used to indicate convertion problems.
917 * @retval VWRN_NUMBER_TOO_BIG
918 * @retval VINF_SUCCESS
919 * @retval VERR_TRAILING_CHARS
920 * @retval VERR_TRAILING_SPACES
921 * @retval VERR_NO_DIGITS
922 *
923 * @param pszValue Pointer to the string value.
924 * @param uBase The base of the representation used.
925 * If the function will look for known prefixes before defaulting to 10.
926 * @param pi64 Where to store the converted number. (optional)
927 */
928RTDECL(int) RTStrToInt64Full(const char *pszValue, unsigned uBase, int64_t *pi64);
929
930/**
931 * Converts a string representation of a number to a 64-bit signed number.
932 * The base is guessed.
933 *
934 * @returns 64-bit signed number on success.
935 * @returns 0 on failure.
936 * @param pszValue Pointer to the string value.
937 */
938RTDECL(int64_t) RTStrToInt64(const char *pszValue);
939
940/**
941 * Converts a string representation of a number to a 32-bit signed number.
942 *
943 * @returns iprt status code.
944 * Warnings are used to indicate conversion problems.
945 * @retval VWRN_NUMBER_TOO_BIG
946 * @retval VWRN_TRAILING_CHARS
947 * @retval VWRN_TRAILING_SPACES
948 * @retval VINF_SUCCESS
949 * @retval VERR_NO_DIGITS
950 *
951 * @param pszValue Pointer to the string value.
952 * @param ppszNext Where to store the pointer to the first char following the number. (Optional)
953 * @param uBase The base of the representation used.
954 * If 0 the function will look for known prefixes before defaulting to 10.
955 * @param pi32 Where to store the converted number. (optional)
956 */
957RTDECL(int) RTStrToInt32Ex(const char *pszValue, char **ppszNext, unsigned uBase, int32_t *pi32);
958
959/**
960 * Converts a string representation of a number to a 32-bit signed number,
961 * making sure the full string is converted.
962 *
963 * @returns iprt status code.
964 * Warnings are used to indicate convertion problems.
965 * @retval VWRN_NUMBER_TOO_BIG
966 * @retval VINF_SUCCESS
967 * @retval VERR_TRAILING_CHARS
968 * @retval VERR_TRAILING_SPACES
969 * @retval VERR_NO_DIGITS
970 *
971 * @param pszValue Pointer to the string value.
972 * @param uBase The base of the representation used.
973 * If the function will look for known prefixes before defaulting to 10.
974 * @param pi32 Where to store the converted number. (optional)
975 */
976RTDECL(int) RTStrToInt32Full(const char *pszValue, unsigned uBase, int32_t *pi32);
977
978/**
979 * Converts a string representation of a number to a 32-bit signed number.
980 * The base is guessed.
981 *
982 * @returns 32-bit signed number on success.
983 * @returns 0 on failure.
984 * @param pszValue Pointer to the string value.
985 */
986RTDECL(int32_t) RTStrToInt32(const char *pszValue);
987
988/**
989 * Converts a string representation of a number to a 16-bit signed number.
990 *
991 * @returns iprt status code.
992 * Warnings are used to indicate conversion problems.
993 * @retval VWRN_NUMBER_TOO_BIG
994 * @retval VWRN_TRAILING_CHARS
995 * @retval VWRN_TRAILING_SPACES
996 * @retval VINF_SUCCESS
997 * @retval VERR_NO_DIGITS
998 *
999 * @param pszValue Pointer to the string value.
1000 * @param ppszNext Where to store the pointer to the first char following the number. (Optional)
1001 * @param uBase The base of the representation used.
1002 * If 0 the function will look for known prefixes before defaulting to 10.
1003 * @param pi16 Where to store the converted number. (optional)
1004 */
1005RTDECL(int) RTStrToInt16Ex(const char *pszValue, char **ppszNext, unsigned uBase, int16_t *pi16);
1006
1007/**
1008 * Converts a string representation of a number to a 16-bit signed number,
1009 * making sure the full string is converted.
1010 *
1011 * @returns iprt status code.
1012 * Warnings are used to indicate convertion problems.
1013 * @retval VWRN_NUMBER_TOO_BIG
1014 * @retval VINF_SUCCESS
1015 * @retval VERR_TRAILING_CHARS
1016 * @retval VERR_TRAILING_SPACES
1017 * @retval VERR_NO_DIGITS
1018 *
1019 * @param pszValue Pointer to the string value.
1020 * @param uBase The base of the representation used.
1021 * If the function will look for known prefixes before defaulting to 10.
1022 * @param pi16 Where to store the converted number. (optional)
1023 */
1024RTDECL(int) RTStrToInt16Full(const char *pszValue, unsigned uBase, int16_t *pi16);
1025
1026/**
1027 * Converts a string representation of a number to a 16-bit signed number.
1028 * The base is guessed.
1029 *
1030 * @returns 16-bit signed number on success.
1031 * @returns 0 on failure.
1032 * @param pszValue Pointer to the string value.
1033 */
1034RTDECL(int16_t) RTStrToInt16(const char *pszValue);
1035
1036/**
1037 * Converts a string representation of a number to a 8-bit signed number.
1038 *
1039 * @returns iprt status code.
1040 * Warnings are used to indicate conversion problems.
1041 * @retval VWRN_NUMBER_TOO_BIG
1042 * @retval VWRN_TRAILING_CHARS
1043 * @retval VWRN_TRAILING_SPACES
1044 * @retval VINF_SUCCESS
1045 * @retval VERR_NO_DIGITS
1046 *
1047 * @param pszValue Pointer to the string value.
1048 * @param ppszNext Where to store the pointer to the first char following the number. (Optional)
1049 * @param uBase The base of the representation used.
1050 * If 0 the function will look for known prefixes before defaulting to 10.
1051 * @param pi8 Where to store the converted number. (optional)
1052 */
1053RTDECL(int) RTStrToInt8Ex(const char *pszValue, char **ppszNext, unsigned uBase, int8_t *pi8);
1054
1055/**
1056 * Converts a string representation of a number to a 8-bit signed number,
1057 * making sure the full string is converted.
1058 *
1059 * @returns iprt status code.
1060 * Warnings are used to indicate convertion problems.
1061 * @retval VWRN_NUMBER_TOO_BIG
1062 * @retval VINF_SUCCESS
1063 * @retval VERR_TRAILING_CHARS
1064 * @retval VERR_TRAILING_SPACES
1065 * @retval VERR_NO_DIGITS
1066 *
1067 * @param pszValue Pointer to the string value.
1068 * @param uBase The base of the representation used.
1069 * If the function will look for known prefixes before defaulting to 10.
1070 * @param pi8 Where to store the converted number. (optional)
1071 */
1072RTDECL(int) RTStrToInt8Full(const char *pszValue, unsigned uBase, int8_t *pi8);
1073
1074/**
1075 * Converts a string representation of a number to a 8-bit signed number.
1076 * The base is guessed.
1077 *
1078 * @returns 8-bit signed number on success.
1079 * @returns 0 on failure.
1080 * @param pszValue Pointer to the string value.
1081 */
1082RTDECL(int8_t) RTStrToInt8(const char *pszValue);
1083
1084/**
1085 * Performs a case sensitive string compare between two UTF-8 strings.
1086 *
1087 * Encoding errors are ignored by the current implementation. So, the only
1088 * difference between this and the CRT strcmp function is the handling of
1089 * NULL arguments.
1090 *
1091 * @returns < 0 if the first string less than the second string.
1092 * @returns 0 if the first string identical to the second string.
1093 * @returns > 0 if the first string greater than the second string.
1094 * @param psz1 First UTF-8 string. Null is allowed.
1095 * @param psz2 Second UTF-8 string. Null is allowed.
1096 */
1097RTDECL(int) RTStrCmp(const char *psz1, const char *psz2);
1098
1099/**
1100 * Performs a case insensitive string compare between two UTF-8 strings.
1101 *
1102 * This is a simplified compare, as only the simplified lower/upper case folding
1103 * specified by the unicode specs are used. It does not consider character pairs
1104 * as they are used in some languages, just simple upper & lower case compares.
1105 *
1106 * The result is the difference between the mismatching codepoints after they
1107 * both have been lower cased.
1108 *
1109 * If the string encoding is invalid the function will assert (strict builds)
1110 * and use RTStrCmp for the remainder of the string.
1111 *
1112 * @returns < 0 if the first string less than the second string.
1113 * @returns 0 if the first string identical to the second string.
1114 * @returns > 0 if the first string greater than the second string.
1115 * @param psz1 First UTF-8 string. Null is allowed.
1116 * @param psz2 Second UTF-8 string. Null is allowed.
1117 */
1118RTDECL(int) RTStrICmp(const char *psz1, const char *psz2);
1119
1120/** @} */
1121
1122
1123/** @defgroup rt_str_space Unique String Space
1124 * @ingroup grp_rt_str
1125 * @{
1126 */
1127
1128/** Pointer to a string name space container node core. */
1129typedef struct RTSTRSPACECORE *PRTSTRSPACECORE;
1130/** Pointer to a pointer to a string name space container node core. */
1131typedef PRTSTRSPACECORE *PPRTSTRSPACECORE;
1132
1133/**
1134 * String name space container node core.
1135 */
1136typedef struct RTSTRSPACECORE
1137{
1138 /** Hash key. Don't touch. */
1139 uint32_t Key;
1140 /** Pointer to the left leaf node. Don't touch. */
1141 PRTSTRSPACECORE pLeft;
1142 /** Pointer to the left rigth node. Don't touch. */
1143 PRTSTRSPACECORE pRight;
1144 /** Pointer to the list of string with the same key. Don't touch. */
1145 PRTSTRSPACECORE pList;
1146 /** Height of this tree: max(heigth(left), heigth(right)) + 1. Don't touch */
1147 unsigned char uchHeight;
1148 /** The string length. Read only! */
1149 size_t cchString;
1150 /** Pointer to the string. Read only! */
1151 const char * pszString;
1152} RTSTRSPACECORE;
1153
1154/** String space. (Initialize with NULL.) */
1155typedef PRTSTRSPACECORE RTSTRSPACE;
1156/** Pointer to a string space. */
1157typedef PPRTSTRSPACECORE PRTSTRSPACE;
1158
1159
1160/**
1161 * Inserts a string into a unique string space.
1162 *
1163 * @returns true on success.
1164 * @returns false if the string collieded with an existing string.
1165 * @param pStrSpace The space to insert it into.
1166 * @param pStr The string node.
1167 */
1168RTDECL(bool) RTStrSpaceInsert(PRTSTRSPACE pStrSpace, PRTSTRSPACECORE pStr);
1169
1170/**
1171 * Removes a string from a unique string space.
1172 *
1173 * @returns Pointer to the removed string node.
1174 * @returns NULL if the string was not found in the string space.
1175 * @param pStrSpace The space to insert it into.
1176 * @param pszString The string to remove.
1177 */
1178RTDECL(PRTSTRSPACECORE) RTStrSpaceRemove(PRTSTRSPACE pStrSpace, const char *pszString);
1179
1180/**
1181 * Gets a string from a unique string space.
1182 *
1183 * @returns Pointer to the string node.
1184 * @returns NULL if the string was not found in the string space.
1185 * @param pStrSpace The space to insert it into.
1186 * @param pszString The string to get.
1187 */
1188RTDECL(PRTSTRSPACECORE) RTStrSpaceGet(PRTSTRSPACE pStrSpace, const char *pszString);
1189
1190/**
1191 * Callback function for RTStrSpaceEnumerate() and RTStrSpaceDestroy().
1192 *
1193 * @returns 0 on continue.
1194 * @returns Non-zero to aborts the operation.
1195 * @param pStr The string node
1196 * @param pvUser The user specified argument.
1197 */
1198typedef DECLCALLBACK(int) FNRTSTRSPACECALLBACK(PRTSTRSPACECORE pStr, void *pvUser);
1199/** Pointer to callback function for RTStrSpaceEnumerate() and RTStrSpaceDestroy(). */
1200typedef FNRTSTRSPACECALLBACK *PFNRTSTRSPACECALLBACK;
1201
1202/**
1203 * Destroys the string space.
1204 * The caller supplies a callback which will be called for each of
1205 * the string nodes in for freeing their memory and other resources.
1206 *
1207 * @returns 0 or what ever non-zero return value pfnCallback returned
1208 * when aborting the destruction.
1209 * @param pStrSpace The space to insert it into.
1210 * @param pfnCallback The callback.
1211 * @param pvUser The user argument.
1212 */
1213RTDECL(int) RTStrSpaceDestroy(PRTSTRSPACE pStrSpace, PFNRTSTRSPACECALLBACK pfnCallback, void *pvUser);
1214
1215/**
1216 * Enumerates the string space.
1217 * The caller supplies a callback which will be called for each of
1218 * the string nodes.
1219 *
1220 * @returns 0 or what ever non-zero return value pfnCallback returned
1221 * when aborting the destruction.
1222 * @param pStrSpace The space to insert it into.
1223 * @param pfnCallback The callback.
1224 * @param pvUser The user argument.
1225 */
1226RTDECL(int) RTStrSpaceEnumerate(PRTSTRSPACE pStrSpace, PFNRTSTRSPACECALLBACK pfnCallback, void *pvUser);
1227
1228/** @} */
1229
1230
1231/** @defgroup rt_str_utf16 UTF-16 String Manipulation
1232 * @ingroup grp_rt_str
1233 * @{
1234 */
1235
1236/**
1237 * Free a UTF-16 string allocated by RTStrUtf8ToUtf16(), RTStrUtf8ToUtf16Ex(),
1238 * RTUtf16Dup() or RTUtf16DupEx().
1239 *
1240 * @returns iprt status code.
1241 * @param pwszString The UTF-16 string to free. NULL is accepted.
1242 */
1243RTDECL(void) RTUtf16Free(PRTUTF16 pwszString);
1244
1245/**
1246 * Allocates a new copy of the specified UTF-16 string.
1247 *
1248 * @returns Pointer to the allocated string copy. Use RTUtf16Free() to free it.
1249 * @returns NULL when out of memory.
1250 * @param pwszString UTF-16 string to duplicate.
1251 * @remark This function will not make any attempt to validate the encoding.
1252 */
1253RTDECL(PRTUTF16) RTUtf16Dup(PCRTUTF16 pwszString);
1254
1255/**
1256 * Allocates a new copy of the specified UTF-16 string.
1257 *
1258 * @returns iprt status code.
1259 * @param ppwszString Receives pointer of the allocated UTF-16 string.
1260 * The returned pointer must be freed using RTUtf16Free().
1261 * @param pwszString UTF-16 string to duplicate.
1262 * @param cwcExtra Number of extra RTUTF16 items to allocate.
1263 * @remark This function will not make any attempt to validate the encoding.
1264 */
1265RTDECL(int) RTUtf16DupEx(PRTUTF16 *ppwszString, PCRTUTF16 pwszString, size_t cwcExtra);
1266
1267/**
1268 * Returns the length of a UTF-16 string in UTF-16 characters
1269 * without trailing '\\0'.
1270 *
1271 * Surrogate pairs counts as two UTF-16 characters here. Use RTUtf16CpCnt()
1272 * to get the exact number of code points in the string.
1273 *
1274 * @returns The number of RTUTF16 items in the string.
1275 * @param pwszString Pointer the UTF-16 string.
1276 * @remark This function will not make any attempt to validate the encoding.
1277 */
1278RTDECL(size_t) RTUtf16Len(PCRTUTF16 pwszString);
1279
1280/**
1281 * Performs a case sensitive string compare between two UTF-16 strings.
1282 *
1283 * @returns < 0 if the first string less than the second string.s
1284 * @returns 0 if the first string identical to the second string.
1285 * @returns > 0 if the first string greater than the second string.
1286 * @param pwsz1 First UTF-16 string. Null is allowed.
1287 * @param pwsz2 Second UTF-16 string. Null is allowed.
1288 * @remark This function will not make any attempt to validate the encoding.
1289 */
1290RTDECL(int) RTUtf16Cmp(register PCRTUTF16 pwsz1, register PCRTUTF16 pwsz2);
1291
1292/**
1293 * Performs a case insensitive string compare between two UTF-16 strings.
1294 *
1295 * This is a simplified compare, as only the simplified lower/upper case folding
1296 * specified by the unicode specs are used. It does not consider character pairs
1297 * as they are used in some languages, just simple upper & lower case compares.
1298 *
1299 * @returns < 0 if the first string less than the second string.
1300 * @returns 0 if the first string identical to the second string.
1301 * @returns > 0 if the first string greater than the second string.
1302 * @param pwsz1 First UTF-16 string. Null is allowed.
1303 * @param pwsz2 Second UTF-16 string. Null is allowed.
1304 */
1305RTDECL(int) RTUtf16ICmp(PCRTUTF16 pwsz1, PCRTUTF16 pwsz2);
1306
1307/**
1308 * Performs a case insensitive string compare between two UTF-16 strings
1309 * using the current locale of the process (if applicable).
1310 *
1311 * This differs from RTUtf16ICmp() in that it will try, if a locale with the
1312 * required data is available, to do a correct case-insensitive compare. It
1313 * follows that it is more complex and thereby likely to be more expensive.
1314 *
1315 * @returns < 0 if the first string less than the second string.
1316 * @returns 0 if the first string identical to the second string.
1317 * @returns > 0 if the first string greater than the second string.
1318 * @param pwsz1 First UTF-16 string. Null is allowed.
1319 * @param pwsz2 Second UTF-16 string. Null is allowed.
1320 */
1321RTDECL(int) RTUtf16LocaleICmp(PCRTUTF16 pwsz1, PCRTUTF16 pwsz2);
1322
1323/**
1324 * Folds a UTF-16 string to lowercase.
1325 *
1326 * This is a very simple folding; is uses the simple lowercase
1327 * code point, it is not related to any locale just the most common
1328 * lowercase codepoint setup by the unicode specs, and it will not
1329 * create new surrogate pairs or remove existing ones.
1330 *
1331 * @returns Pointer to the passed in string.
1332 * @param pwsz The string to fold.
1333 */
1334RTDECL(PRTUTF16) RTUtf16ToLower(PRTUTF16 pwsz);
1335
1336/**
1337 * Folds a UTF-16 string to uppercase.
1338 *
1339 * This is a very simple folding; is uses the simple uppercase
1340 * code point, it is not related to any locale just the most common
1341 * uppercase codepoint setup by the unicode specs, and it will not
1342 * create new surrogate pairs or remove existing ones.
1343 *
1344 * @returns Pointer to the passed in string.
1345 * @param pwsz The string to fold.
1346 */
1347RTDECL(PRTUTF16) RTUtf16ToUpper(PRTUTF16 pwsz);
1348
1349/**
1350 * Translate a UTF-16 string into a UTF-8 allocating the result buffer.
1351 *
1352 * @returns iprt status code.
1353 * @param pwszString UTF-16 string to convert.
1354 * @param ppszString Receives pointer of allocated UTF-8 string.
1355 * The returned pointer must be freed using RTStrFree().
1356 */
1357RTDECL(int) RTUtf16ToUtf8(PCRTUTF16 pwszString, char **ppszString);
1358
1359/**
1360 * Translates UTF-16 to UTF-8 using buffer provided by the caller or
1361 * a fittingly sized buffer allocated by the function.
1362 *
1363 * @returns iprt status code.
1364 * @param pwszString The UTF-16 string to convert.
1365 * @param cwcString The number of RTUTF16 items to translation from pwszString.
1366 * The translate will stop when reaching cwcString or the terminator ('\\0').
1367 * Use RTSTR_MAX to translate the entire string.
1368 * @param ppsz If cch is non-zero, this must either be pointing to pointer to
1369 * a buffer of the specified size, or pointer to a NULL pointer.
1370 * If *ppsz is NULL or cch is zero a buffer of at least cch chars
1371 * will be allocated to hold the translated string.
1372 * If a buffer was requirest it must be freed using RTUtf16Free().
1373 * @param cch The buffer size in chars (the type). This includes the terminator.
1374 * @param pcch Where to store the length of the translated string. (Optional)
1375 * This field will be updated even on failure, however the value is only
1376 * specified for the following two error codes. On VERR_BUFFER_OVERFLOW
1377 * and VERR_NO_STR_MEMORY it contains the required buffer space.
1378 */
1379RTDECL(int) RTUtf16ToUtf8Ex(PCRTUTF16 pwszString, size_t cwcString, char **ppsz, size_t cch, size_t *pcch);
1380
1381/**
1382 * Calculates the length of the UTF-16 string in UTF-8 chars (bytes).
1383 *
1384 * This function will validate the string, and incorrectly encoded UTF-16
1385 * strings will be rejected. The primary purpose of this function is to
1386 * help allocate buffers for RTUtf16ToUtf8() of the correct size. For most
1387 * other puroses RTUtf16ToUtf8Ex() should be used.
1388 *
1389 * @returns Number of char (bytes).
1390 * @returns 0 if the string was incorrectly encoded.
1391 * @param pwsz The UTF-16 string.
1392 */
1393RTDECL(size_t) RTUtf16CalcUtf8Len(PCRTUTF16 pwsz);
1394
1395/**
1396 * Calculates the length of the UTF-16 string in UTF-8 chars (bytes).
1397 *
1398 * This function will validate the string, and incorrectly encoded UTF-16
1399 * strings will be rejected.
1400 *
1401 * @returns iprt status code.
1402 * @param pwsz The string.
1403 * @param cwc The max string length. Use RTSTR_MAX to process the entire string.
1404 * @param pcch Where to store the string length (in bytes). Optional.
1405 * This is undefined on failure.
1406 */
1407RTDECL(int) RTUtf16CalcUtf8LenEx(PCRTUTF16 pwsz, size_t cwc, size_t *pcch);
1408
1409/**
1410 * Get the unicode code point at the given string position.
1411 *
1412 * @returns unicode code point.
1413 * @returns RTUNICP_INVALID if the encoding is invalid.
1414 * @param pwsz The string.
1415 *
1416 * @remark This is an internal worker for RTUtf16GetCp().
1417 */
1418RTDECL(RTUNICP) RTUtf16GetCpInternal(PCRTUTF16 pwsz);
1419
1420/**
1421 * Get the unicode code point at the given string position.
1422 *
1423 * @returns iprt status code.
1424 * @param ppwsz Pointer to the string pointer. This will be updated to
1425 * point to the char following the current code point.
1426 * @param pCp Where to store the code point.
1427 * RTUNICP_INVALID is stored here on failure.
1428 *
1429 * @remark This is an internal worker for RTUtf16GetCpEx().
1430 */
1431RTDECL(int) RTUtf16GetCpExInternal(PCRTUTF16 *ppwsz, PRTUNICP pCp);
1432
1433/**
1434 * Put the unicode code point at the given string position
1435 * and return the pointer to the char following it.
1436 *
1437 * This function will not consider anything at or following the the
1438 * buffer area pointed to by pwsz. It is therefore not suitable for
1439 * inserting code points into a string, only appending/overwriting.
1440 *
1441 * @returns pointer to the char following the written code point.
1442 * @param pwsz The string.
1443 * @param CodePoint The code point to write.
1444 * This sould not be RTUNICP_INVALID or any other charater
1445 * out of the UTF-16 range.
1446 *
1447 * @remark This is an internal worker for RTUtf16GetCpEx().
1448 */
1449RTDECL(PRTUTF16) RTUtf16PutCpInternal(PRTUTF16 pwsz, RTUNICP CodePoint);
1450
1451/**
1452 * Get the unicode code point at the given string position.
1453 *
1454 * @returns unicode code point.
1455 * @returns RTUNICP_INVALID if the encoding is invalid.
1456 * @param pwsz The string.
1457 *
1458 * @remark We optimize this operation by using an inline function for
1459 * everything which isn't a surrogate pair or an endian indicator.
1460 */
1461DECLINLINE(RTUNICP) RTUtf16GetCp(PCRTUTF16 pwsz)
1462{
1463 const RTUTF16 wc = *pwsz;
1464 if (wc < 0xd800 || (wc > 0xdfff && wc < 0xfffe))
1465 return wc;
1466 return RTUtf16GetCpInternal(pwsz);
1467}
1468
1469/**
1470 * Get the unicode code point at the given string position.
1471 *
1472 * @returns iprt status code.
1473 * @param ppwsz Pointer to the string pointer. This will be updated to
1474 * point to the char following the current code point.
1475 * @param pCp Where to store the code point.
1476 * RTUNICP_INVALID is stored here on failure.
1477 *
1478 * @remark We optimize this operation by using an inline function for
1479 * everything which isn't a surrogate pair or and endian indicator.
1480 */
1481DECLINLINE(int) RTUtf16GetCpEx(PCRTUTF16 *ppwsz, PRTUNICP pCp)
1482{
1483 const RTUTF16 wc = **ppwsz;
1484 if (wc < 0xd800 || (wc > 0xdfff && wc < 0xfffe))
1485 {
1486 (*ppwsz)++;
1487 *pCp = wc;
1488 return VINF_SUCCESS;
1489 }
1490 return RTUtf16GetCpExInternal(ppwsz, pCp);
1491}
1492
1493/**
1494 * Put the unicode code point at the given string position
1495 * and return the pointer to the char following it.
1496 *
1497 * This function will not consider anything at or following the the
1498 * buffer area pointed to by pwsz. It is therefore not suitable for
1499 * inserting code points into a string, only appending/overwriting.
1500 *
1501 * @returns pointer to the char following the written code point.
1502 * @param pwsz The string.
1503 * @param CodePoint The code point to write.
1504 * This sould not be RTUNICP_INVALID or any other charater
1505 * out of the UTF-16 range.
1506 *
1507 * @remark We optimize this operation by using an inline function for
1508 * everything which isn't a surrogate pair or and endian indicator.
1509 */
1510DECLINLINE(PRTUTF16) RTUtf16PutCp(PRTUTF16 pwsz, RTUNICP CodePoint)
1511{
1512 if (CodePoint < 0xd800 || (CodePoint > 0xd800 && CodePoint < 0xfffe))
1513 {
1514 *pwsz++ = (RTUTF16)CodePoint;
1515 return pwsz;
1516 }
1517 return RTUtf16PutCpInternal(pwsz, CodePoint);
1518}
1519
1520/**
1521 * Skips ahead, past the current code point.
1522 *
1523 * @returns Pointer to the char after the current code point.
1524 * @param pwsz Pointer to the current code point.
1525 * @remark This will not move the next valid code point, only past the current one.
1526 */
1527DECLINLINE(PRTUTF16) RTUtf16NextCp(PCRTUTF16 pwsz)
1528{
1529 RTUNICP Cp;
1530 RTUtf16GetCpEx(&pwsz, &Cp);
1531 return (PRTUTF16)pwsz;
1532}
1533
1534/**
1535 * Skips backwards, to the previous code point.
1536 *
1537 * @returns Pointer to the char after the current code point.
1538 * @param pwszStart Pointer to the start of the string.
1539 * @param pwsz Pointer to the current code point.
1540 */
1541RTDECL(PRTUTF16) RTUtf16PrevCp(PCRTUTF16 pwszStart, PCRTUTF16 pwsz);
1542
1543
1544/**
1545 * Checks if the UTF-16 char is the high surrogate char (i.e.
1546 * the 1st char in the pair).
1547 *
1548 * @returns true if it is.
1549 * @returns false if it isn't.
1550 * @param wc The character to investigate.
1551 */
1552DECLINLINE(bool) RTUtf16IsHighSurrogate(RTUTF16 wc)
1553{
1554 return wc >= 0xd800 && wc <= 0xdbff;
1555}
1556
1557/**
1558 * Checks if the UTF-16 char is the low surrogate char (i.e.
1559 * the 2nd char in the pair).
1560 *
1561 * @returns true if it is.
1562 * @returns false if it isn't.
1563 * @param wc The character to investigate.
1564 */
1565DECLINLINE(bool) RTUtf16IsLowSurrogate(RTUTF16 wc)
1566{
1567 return wc >= 0xdc00 && wc <= 0xdfff;
1568}
1569
1570
1571/**
1572 * Checks if the two UTF-16 chars form a valid surrogate pair.
1573 *
1574 * @returns true if they do.
1575 * @returns false if they doesn't.
1576 * @param wcHigh The high (1st) character.
1577 * @param wcLow The low (2nd) character.
1578 */
1579DECLINLINE(bool) RTUtf16IsSurrogatePair(RTUTF16 wcHigh, RTUTF16 wcLow)
1580{
1581 return RTUtf16IsHighSurrogate(wcHigh)
1582 && RTUtf16IsLowSurrogate(wcLow);
1583}
1584
1585/** @} */
1586
1587__END_DECLS
1588
1589/** @} */
1590
1591#endif
1592
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette