VirtualBox

source: vbox/trunk/include/iprt/latin1.h@ 77807

Last change on this file since 77807 was 76585, checked in by vboxsync, 6 years ago

*: scm --fix-header-guard-endif

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 16.4 KB
RevLine 
[1]1/** @file
[57941]2 * IPRT - String Manipulation, Latin-1 (ISO-8859-1) encoding.
[1]3 */
4
5/*
[76553]6 * Copyright (C) 2006-2019 Oracle Corporation
[1]7 *
8 * This file is part of VirtualBox Open Source Edition (OSE), as
9 * available from http://www.virtualbox.org. This file is free software;
10 * you can redistribute it and/or modify it under the terms of the GNU
[5999]11 * General Public License (GPL) as published by the Free Software
12 * Foundation, in version 2 as it comes in the "COPYING" file of the
13 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
14 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
15 *
16 * The contents of this file may alternatively be used under the terms
17 * of the Common Development and Distribution License Version 1.0
18 * (CDDL) only, as it comes in the "COPYING.CDDL" file of the
19 * VirtualBox OSE distribution, in which case the provisions of the
20 * CDDL are applicable instead of those of the GPL.
21 *
22 * You may elect to license modified versions of this file under the
23 * terms and conditions of either the GPL or the CDDL or both.
[1]24 */
25
[76557]26#ifndef IPRT_INCLUDED_latin1_h
27#define IPRT_INCLUDED_latin1_h
[76507]28#ifndef RT_WITHOUT_PRAGMA_ONCE
29# pragma once
30#endif
[1]31
[76417]32#include <iprt/assert.h>
33#include <iprt/errcore.h> /* VERR_END_OF_STRING */
[36555]34
[20374]35RT_C_DECLS_BEGIN
[1]36
[31418]37
[57941]38/** @defgroup rt_str_latin1 Latin-1 (ISO-8859-1) String Manipulation
39 * @ingroup grp_rt_str
[18424]40 *
[57941]41 * Deals with Latin-1 encoded strings.
[18424]42 *
[57941]43 * @warning Make sure to name all variables dealing with Latin-1 strings
44 * suchthat there is no way to mistake them for normal UTF-8 strings.
45 * There may be severe security issues resulting from mistaking Latin-1
46 * for UTF-8!
[18424]47 *
[1]48 * @{
49 */
50
51/**
52 * Get the unicode code point at the given string position.
53 *
54 * @returns unicode code point.
55 * @returns RTUNICP_INVALID if the encoding is invalid.
[57941]56 * @param pszLatin1 The Latin-1 string.
[1]57 */
[57941]58DECLINLINE(RTUNICP) RTLatin1GetCp(const char *pszLatin1)
[1]59{
[57941]60 return *(const unsigned char *)pszLatin1;
[1]61}
62
63/**
64 * Get the unicode code point at the given string position.
65 *
66 * @returns iprt status code.
[57941]67 * @param ppszLatin1 Pointer to the string pointer. This will be updated to
68 * point to the char following the current code point. This
69 * is advanced one character forward on failure.
70 * @param pCp Where to store the code point. RTUNICP_INVALID is stored
71 * here on failure.
[1]72 */
[57941]73DECLINLINE(int) RTLatin1GetCpEx(const char **ppszLatin1, PRTUNICP pCp)
[1]74{
[57941]75 const unsigned char uch = **(const unsigned char **)ppszLatin1;
76 (*ppszLatin1)++;
[31246]77 *pCp = uch;
78 return VINF_SUCCESS;
79}
[1]80
[31246]81/**
82 * Get the unicode code point at the given string position for a string of a
83 * given maximum length.
84 *
85 * @returns iprt status code.
86 * @retval VERR_END_OF_STRING if *pcch is 0. *pCp is set to RTUNICP_INVALID.
87 *
[57941]88 * @param ppszLatin1 Pointer to the string pointer. This will be updated to
[31246]89 * point to the char following the current code point.
[57941]90 * @param pcchLatin1 Pointer to the maximum string length. This will be
[31246]91 * decremented by the size of the code point found.
92 * @param pCp Where to store the code point.
93 * RTUNICP_INVALID is stored here on failure.
94 */
[57941]95DECLINLINE(int) RTLatin1GetCpNEx(const char **ppszLatin1, size_t *pcchLatin1, PRTUNICP pCp)
[31246]96{
[57941]97 if (RT_LIKELY(*pcchLatin1 != 0))
[31246]98 {
[57941]99 const unsigned char uch = **(const unsigned char **)ppszLatin1;
100 (*ppszLatin1)++;
101 (*pcchLatin1)--;
[31246]102 *pCp = uch;
103 return VINF_SUCCESS;
104 }
105 *pCp = RTUNICP_INVALID;
106 return VERR_END_OF_STRING;
107}
108
109/**
[31418]110 * Get the Latin-1 size in characters of a given Unicode code point.
[31246]111 *
[31418]112 * The code point is expected to be a valid Unicode one, but not necessarily in
113 * the range supported by Latin-1.
114 *
[31246]115 * @returns the size in characters, or zero if there is no Latin-1 encoding
116 */
117DECLINLINE(size_t) RTLatin1CpSize(RTUNICP CodePoint)
118{
119 if (CodePoint < 0x100)
120 return 1;
121 return 0;
122}
123
124/**
125 * Put the unicode code point at the given string position
126 * and return the pointer to the char following it.
127 *
128 * This function will not consider anything at or following the
129 * buffer area pointed to by psz. It is therefore not suitable for
130 * inserting code points into a string, only appending/overwriting.
131 *
132 * @returns pointer to the char following the written code point.
[57941]133 * @param pszLatin1 The string.
[31246]134 * @param CodePoint The code point to write.
135 * This should not be RTUNICP_INVALID or any other
136 * character out of the Latin-1 range.
137 */
[57941]138DECLINLINE(char *) RTLatin1PutCp(char *pszLatin1, RTUNICP CodePoint)
[31246]139{
[40938]140 AssertReturn(CodePoint < 0x100, NULL);
[57941]141 *pszLatin1++ = (unsigned char)CodePoint;
142 return pszLatin1;
[31246]143}
144
145/**
146 * Skips ahead, past the current code point.
147 *
148 * @returns Pointer to the char after the current code point.
[57941]149 * @param pszLatin1 Pointer to the current code point.
[31246]150 * @remark This will not move the next valid code point, only past the current one.
151 */
[57941]152DECLINLINE(char *) RTLatin1NextCp(const char *pszLatin1)
[31246]153{
[57941]154 pszLatin1++;
155 return (char *)pszLatin1;
[31246]156}
157
158/**
159 * Skips back to the previous code point.
160 *
161 * @returns Pointer to the char before the current code point.
[57941]162 * @returns pszLatin1Start on failure.
163 * @param pszLatin1Start Pointer to the start of the string.
164 * @param pszLatin1 Pointer to the current code point.
[31246]165 */
[57941]166DECLINLINE(char *) RTLatin1PrevCp(const char *pszLatin1Start, const char *pszLatin1)
[31246]167{
[57941]168 if ((uintptr_t)pszLatin1 > (uintptr_t)pszLatin1Start)
[57926]169 {
[57941]170 pszLatin1--;
171 return (char *)pszLatin1;
[57926]172 }
[57941]173 return (char *)pszLatin1Start;
[31246]174}
175
[1]176/**
[57941]177 * Translate a Latin1 string into a UTF-8 allocating the result buffer (default
[31157]178 * tag).
179 *
180 * @returns iprt status code.
[57941]181 * @param pszLatin1 Latin1 string to convert.
[31157]182 * @param ppszString Receives pointer of allocated UTF-8 string on
183 * success, and is always set to NULL on failure.
184 * The returned pointer must be freed using RTStrFree().
185 */
[57941]186#define RTLatin1ToUtf8(pszLatin1, ppszString) RTLatin1ToUtf8Tag((pszLatin1), (ppszString), RTSTR_TAG)
[31157]187
188/**
[57941]189 * Translate a Latin-1 string into a UTF-8 allocating the result buffer.
[1]190 *
191 * @returns iprt status code.
[57941]192 * @param pszLatin1 Latin-1 string to convert.
[18525]193 * @param ppszString Receives pointer of allocated UTF-8 string on
194 * success, and is always set to NULL on failure.
[1]195 * The returned pointer must be freed using RTStrFree().
[31157]196 * @param pszTag Allocation tag used for statistics and such.
[1]197 */
[57941]198RTDECL(int) RTLatin1ToUtf8Tag(const char *pszLatin1, char **ppszString, const char *pszTag);
[1]199
200/**
[57941]201 * Translates Latin-1 to UTF-8 using buffer provided by the caller or a fittingly
[31157]202 * sized buffer allocated by the function (default tag).
[1]203 *
204 * @returns iprt status code.
[57941]205 * @param pszLatin1 The Latin-1 string to convert.
206 * @param cchLatin1 The number of Latin-1 characters to translate from
207 * pszLatin1. The translation will stop when reaching
208 * cchLatin1 or the terminator ('\\0'). Use RTSTR_MAX
209 * to translate the entire string.
210 * @param ppsz If @a cch is non-zero, this must either be pointing
211 * to a pointer to a buffer of the specified size, or
212 * pointer to a NULL pointer. If *ppsz is NULL or
213 * @a cch is zero a buffer of at least @a cch chars
214 * will be allocated to hold the translated string. If
215 * a buffer was requested it must be freed using
216 * RTStrFree().
[1]217 * @param cch The buffer size in chars (the type). This includes the terminator.
[21741]218 * @param pcch Where to store the length of the translated string,
219 * excluding the terminator. (Optional)
220 *
221 * This may be set under some error conditions,
222 * however, only for VERR_BUFFER_OVERFLOW and
223 * VERR_NO_STR_MEMORY will it contain a valid string
224 * length that can be used to resize the buffer.
[1]225 */
[57941]226#define RTLatin1ToUtf8Ex(pszLatin1, cchLatin1, ppsz, cch, pcch) \
227 RTLatin1ToUtf8ExTag((pszLatin1), (cchLatin1), (ppsz), (cch), (pcch), RTSTR_TAG)
[1]228
[6041]229/**
[57941]230 * Translates Latin1 to UTF-8 using buffer provided by the caller or a fittingly
[31157]231 * sized buffer allocated by the function (custom tag).
232 *
233 * @returns iprt status code.
[57941]234 * @param pszLatin1 The Latin1 string to convert.
235 * @param cchLatin1 The number of Latin1 characters to translate from
236 * pwszString. The translation will stop when
237 * reaching cchLatin1 or the terminator ('\\0'). Use
238 * RTSTR_MAX to translate the entire string.
239 * @param ppsz If cch is non-zero, this must either be pointing to
240 * a pointer to a buffer of the specified size, or
241 * pointer to a NULL pointer. If *ppsz is NULL or cch
242 * is zero a buffer of at least cch chars will be
243 * allocated to hold the translated string. If a
244 * buffer was requested it must be freed using
245 * RTStrFree().
246 * @param cch The buffer size in chars (the type). This includes
[21721]247 * the terminator.
[21741]248 * @param pcch Where to store the length of the translated string,
249 * excluding the terminator. (Optional)
250 *
251 * This may be set under some error conditions,
252 * however, only for VERR_BUFFER_OVERFLOW and
253 * VERR_NO_STR_MEMORY will it contain a valid string
254 * length that can be used to resize the buffer.
[31157]255 * @param pszTag Allocation tag used for statistics and such.
256 */
[57941]257RTDECL(int) RTLatin1ToUtf8ExTag(const char *pszLatin1, size_t cchLatin1, char **ppsz, size_t cch, size_t *pcch,
258 const char *pszTag);
[31157]259
260/**
[57941]261 * Calculates the length of the Latin-1 string in UTF-8 chars (bytes).
[21714]262 *
[57941]263 * The primary purpose of this function is to help allocate buffers for
264 * RTLatin1ToUtf8() of the correct size. For most other purposes
265 * RTLatin1ToUtf8Ex() should be used.
[21714]266 *
[57941]267 * @returns Number of chars (bytes).
[21714]268 * @returns 0 if the string was incorrectly encoded.
[57941]269 * @param pszLatin1 The Latin-1 string.
[21714]270 */
[57941]271RTDECL(size_t) RTLatin1CalcUtf8Len(const char *pszLatin1);
[21714]272
273/**
[57941]274 * Calculates the length of the Latin-1 string in UTF-8 chars (bytes).
[21714]275 *
276 * @returns iprt status code.
[57941]277 * @param pszLatin1 The Latin-1 string.
278 * @param cchLatin1 The max string length. Use RTSTR_MAX to process the
[21721]279 * entire string.
[57941]280 * @param pcch Where to store the string length (in bytes). Optional.
[21714]281 * This is undefined on failure.
282 */
[57941]283RTDECL(int) RTLatin1CalcUtf8LenEx(const char *pszLatin1, size_t cchLatin1, size_t *pcch);
[21714]284
285/**
[21722]286 * Calculates the length of the Latin-1 (ISO-8859-1) string in RTUTF16 items.
287 *
288 * @returns Number of RTUTF16 items.
[57941]289 * @param pszLatin1 The Latin-1 string.
[21722]290 */
[57941]291RTDECL(size_t) RTLatin1CalcUtf16Len(const char *pszLatin1);
[21722]292
293/**
294 * Calculates the length of the Latin-1 (ISO-8859-1) string in RTUTF16 items.
295 *
296 * @returns iprt status code.
[57941]297 * @param pszLatin1 The Latin-1 string.
298 * @param cchLatin1 The max string length. Use RTSTR_MAX to process the
[21722]299 * entire string.
300 * @param pcwc Where to store the string length. Optional.
301 * This is undefined on failure.
302 */
[57941]303RTDECL(int) RTLatin1CalcUtf16LenEx(const char *pszLatin1, size_t cchLatin1, size_t *pcwc);
[21722]304
305/**
306 * Translate a Latin-1 (ISO-8859-1) string into a UTF-16 allocating the result
[31157]307 * buffer (default tag).
[21722]308 *
309 * @returns iprt status code.
[57941]310 * @param pszLatin1 The Latin-1 string to convert.
[21722]311 * @param ppwszString Receives pointer to the allocated UTF-16 string. The
312 * returned string must be freed using RTUtf16Free().
313 */
[57941]314#define RTLatin1ToUtf16(pszLatin1, ppwszString) RTLatin1ToUtf16Tag((pszLatin1), (ppwszString), RTSTR_TAG)
[21722]315
316/**
[31157]317 * Translate a Latin-1 (ISO-8859-1) string into a UTF-16 allocating the result
318 * buffer (custom tag).
319 *
320 * @returns iprt status code.
[57941]321 * @param pszLatin1 The Latin-1 string to convert.
[31157]322 * @param ppwszString Receives pointer to the allocated UTF-16 string. The
323 * returned string must be freed using RTUtf16Free().
324 * @param pszTag Allocation tag used for statistics and such.
325 */
[57941]326RTDECL(int) RTLatin1ToUtf16Tag(const char *pszLatin1, PRTUTF16 *ppwszString, const char *pszTag);
[31157]327
328/**
[57941]329 * Translates pszLatin1 from Latin-1 (ISO-8859-1) to UTF-16, allocating the
[31157]330 * result buffer if requested (default tag).
331 *
332 * @returns iprt status code.
[57941]333 * @param pszLatin1 The Latin-1 string to convert.
334 * @param cchLatin1 The maximum size in chars (the type) to convert. The
335 * conversion stops when it reaches cchLatin1 or the
336 * string terminator ('\\0'). Use RTSTR_MAX to
337 * translate the entire string.
[31157]338 * @param ppwsz If cwc is non-zero, this must either be pointing
339 * to pointer to a buffer of the specified size, or
340 * pointer to a NULL pointer.
341 * If *ppwsz is NULL or cwc is zero a buffer of at
342 * least cwc items will be allocated to hold the
343 * translated string. If a buffer was requested it
344 * must be freed using RTUtf16Free().
345 * @param cwc The buffer size in RTUTF16s. This includes the
346 * terminator.
347 * @param pcwc Where to store the length of the translated string,
348 * excluding the terminator. (Optional)
349 *
350 * This may be set under some error conditions,
351 * however, only for VERR_BUFFER_OVERFLOW and
352 * VERR_NO_STR_MEMORY will it contain a valid string
353 * length that can be used to resize the buffer.
354 */
[57941]355#define RTLatin1ToUtf16Ex(pszLatin1, cchLatin1, ppwsz, cwc, pcwc) \
356 RTLatin1ToUtf16ExTag((pszLatin1), (cchLatin1), (ppwsz), (cwc), (pcwc), RTSTR_TAG)
[31157]357
358/**
[57941]359 * Translates pszLatin1 from Latin-1 (ISO-8859-1) to UTF-16, allocating the
[21722]360 * result buffer if requested.
361 *
362 * @returns iprt status code.
[57941]363 * @param pszLatin1 The Latin-1 string to convert.
364 * @param cchLatin1 The maximum size in chars (the type) to convert. The
365 * conversion stops when it reaches cchLatin1 or the
366 * string terminator ('\\0'). Use RTSTR_MAX to
367 * translate the entire string.
[21722]368 * @param ppwsz If cwc is non-zero, this must either be pointing
369 * to pointer to a buffer of the specified size, or
370 * pointer to a NULL pointer.
371 * If *ppwsz is NULL or cwc is zero a buffer of at
372 * least cwc items will be allocated to hold the
373 * translated string. If a buffer was requested it
374 * must be freed using RTUtf16Free().
375 * @param cwc The buffer size in RTUTF16s. This includes the
376 * terminator.
[21741]377 * @param pcwc Where to store the length of the translated string,
378 * excluding the terminator. (Optional)
379 *
380 * This may be set under some error conditions,
381 * however, only for VERR_BUFFER_OVERFLOW and
382 * VERR_NO_STR_MEMORY will it contain a valid string
383 * length that can be used to resize the buffer.
[31157]384 * @param pszTag Allocation tag used for statistics and such.
[21722]385 */
[57941]386RTDECL(int) RTLatin1ToUtf16ExTag(const char *pszLatin1, size_t cchLatin1,
[31157]387 PRTUTF16 *ppwsz, size_t cwc, size_t *pcwc, const char *pszTag);
[21722]388
389/** @} */
390
[20374]391RT_C_DECLS_END
[1]392
393/** @} */
394
[76585]395#endif /* !IPRT_INCLUDED_latin1_h */
[1]396
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette