VirtualBox

source: vbox/trunk/src/VBox/Runtime/r3/posix/utf8-posix.cpp@ 43363

Last change on this file since 43363 was 43363, checked in by vboxsync, 12 years ago

Haiku Additions.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id
File size: 16.5 KB
Line 
1/* $Id: utf8-posix.cpp 43363 2012-09-20 09:56:07Z vboxsync $ */
2/** @file
3 * IPRT - UTF-8 helpers, POSIX.
4 */
5
6/*
7 * Copyright (C) 2006-2012 Oracle Corporation
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 *
17 * The contents of this file may alternatively be used under the terms
18 * of the Common Development and Distribution License Version 1.0
19 * (CDDL) only, as it comes in the "COPYING.CDDL" file of the
20 * VirtualBox OSE distribution, in which case the provisions of the
21 * CDDL are applicable instead of those of the GPL.
22 *
23 * You may elect to license modified versions of this file under the
24 * terms and conditions of either the GPL or the CDDL or both.
25 */
26
27
28/*******************************************************************************
29* Header Files *
30*******************************************************************************/
31#include <iprt/string.h>
32#include "internal/iprt.h"
33
34#include <iprt/alloc.h>
35#include <iprt/assert.h>
36#include <iprt/err.h>
37#include <iprt/string.h>
38
39#include <errno.h>
40#include <locale.h>
41
42/* iconv prototype changed with 165+ (thanks to PSARC/2010/160 Bugster 7037400) */
43#if defined(RT_OS_SOLARIS)
44# if !defined(_XPG6)
45# define VBOX_XPG6_TMP_DEF
46# define _XPG6
47# endif
48# if defined(__USE_LEGACY_PROTOTYPES__)
49# define VBOX_LEGACY_PROTO_TMP_DEF
50# undef __USE_LEGACY_PROTOTYPES__
51# endif
52#endif /* RT_OS_SOLARIS */
53
54# include <iconv.h>
55
56#if defined(RT_OS_SOLARIS)
57# if defined(VBOX_XPG6_TMP_DEF)
58# undef _XPG6
59# undef VBOX_XPG6_TMP_DEF
60# endif
61# if defined(VBOX_LEGACY_PROTO_TMP_DEF)
62# define __USE_LEGACY_PROTOTYPES__
63# undef VBOX_LEGACY_PROTO_TMP_DEF
64# endif
65#endif /* RT_OS_SOLARIS */
66
67#include <wctype.h>
68
69#include <langinfo.h>
70
71#include "internal/alignmentchecks.h"
72#include "internal/string.h"
73#ifdef RT_WITH_ICONV_CACHE
74# include "internal/thread.h"
75AssertCompile(sizeof(iconv_t) <= sizeof(void *));
76#endif
77
78
79/**
80 * Gets the codeset of the current locale (LC_CTYPE).
81 *
82 * @returns Pointer to read-only string with the codeset name.
83 */
84DECLHIDDEN(const char *) rtStrGetLocaleCodeset(void)
85{
86 return nl_langinfo(CODESET);
87}
88
89
90#ifdef RT_WITH_ICONV_CACHE
91
92/**
93 * Initializes the iconv handle cache associated with a thread.
94 *
95 * @param pThread The thread in question.
96 */
97DECLHIDDEN(void) rtStrIconvCacheInit(PRTTHREADINT pThread)
98{
99 for (size_t i = 0; i < RT_ELEMENTS(pThread->ahIconvs); i++)
100 pThread->ahIconvs[i] = (iconv_t)-1;
101}
102
103/**
104 * Destroys the iconv handle cache associated with a thread.
105 *
106 * @param pThread The thread in question.
107 */
108DECLHIDDEN(void) rtStrIconvCacheDestroy(PRTTHREADINT pThread)
109{
110 for (size_t i = 0; i < RT_ELEMENTS(pThread->ahIconvs); i++)
111 {
112 iconv_t hIconv = (iconv_t)pThread->ahIconvs[i];
113 pThread->ahIconvs[i] = (iconv_t)-1;
114 if (hIconv != (iconv_t)-1)
115 iconv_close(hIconv);
116 }
117}
118
119
120/**
121 * Converts a string from one charset to another.
122 *
123 * @returns iprt status code.
124 * @param pvInput Pointer to intput string.
125 * @param cbInput Size (in bytes) of input string. Excludes any terminators.
126 * @param pszInputCS Codeset of the input string.
127 * @param ppvOutput Pointer to pointer to output buffer if cbOutput > 0.
128 * If cbOutput is 0 this is where the pointer to the allocated
129 * buffer is stored.
130 * @param cbOutput Size of the passed in buffer.
131 * @param pszOutputCS Codeset of the input string.
132 * @param cFactor Input vs. output size factor.
133 * @param phIconv Pointer to the cache entry.
134 */
135static int rtstrConvertCached(const void *pvInput, size_t cbInput, const char *pszInputCS,
136 void **ppvOutput, size_t cbOutput, const char *pszOutputCS,
137 unsigned cFactor, iconv_t *phIconv)
138{
139 /*
140 * Allocate buffer
141 */
142 bool fUcs2Term;
143 void *pvOutput;
144 size_t cbOutput2;
145 if (!cbOutput)
146 {
147 cbOutput2 = cbInput * cFactor;
148 pvOutput = RTMemTmpAlloc(cbOutput2 + sizeof(RTUTF16));
149 if (!pvOutput)
150 return VERR_NO_TMP_MEMORY;
151 fUcs2Term = true;
152 }
153 else
154 {
155 pvOutput = *ppvOutput;
156 fUcs2Term = !strcmp(pszOutputCS, "UCS-2")
157 || !strcmp(pszOutputCS, "UTF-16")
158 || !strcmp(pszOutputCS, "ucs-2")
159 || !strcmp(pszOutputCS, "utf-16");
160 cbOutput2 = cbOutput - (fUcs2Term ? sizeof(RTUTF16) : 1);
161 if (cbOutput2 > cbOutput)
162 return VERR_BUFFER_OVERFLOW;
163 }
164
165 /*
166 * Use a loop here to retry with bigger buffers.
167 */
168 for (unsigned cTries = 10; cTries > 0; cTries--)
169 {
170 /*
171 * Create conversion object if necessary.
172 */
173 iconv_t hIconv = (iconv_t)*phIconv;
174 if (hIconv == (iconv_t)-1)
175 {
176#ifdef RT_OS_SOLARIS
177 /* Solaris doesn't grok empty codeset strings, so help it find the current codeset. */
178 if (!*pszInputCS)
179 pszInputCS = rtStrGetLocaleCodeset();
180 if (!*pszOutputCS)
181 pszOutputCS = rtStrGetLocaleCodeset();
182#endif
183 IPRT_ALIGNMENT_CHECKS_DISABLE(); /* glibc causes trouble */
184 *phIconv = hIconv = iconv_open(pszOutputCS, pszInputCS);
185 IPRT_ALIGNMENT_CHECKS_ENABLE();
186 }
187 if (hIconv != (iconv_t)-1)
188 {
189 /*
190 * Do the conversion.
191 */
192 size_t cbInLeft = cbInput;
193 size_t cbOutLeft = cbOutput2;
194 const void *pvInputLeft = pvInput;
195 void *pvOutputLeft = pvOutput;
196#if defined(RT_OS_LINUX) || defined(RT_OS_HAIKU) || defined(RT_OS_SOLARIS) || (defined(RT_OS_DARWIN) && defined(_DARWIN_FEATURE_UNIX_CONFORMANCE)) /* there are different opinions about the constness of the input buffer. */
197 if (iconv(hIconv, (char **)&pvInputLeft, &cbInLeft, (char **)&pvOutputLeft, &cbOutLeft) != (size_t)-1)
198#else
199 if (iconv(hIconv, (const char **)&pvInputLeft, &cbInLeft, (char **)&pvOutputLeft, &cbOutLeft) != (size_t)-1)
200#endif
201 {
202 if (!cbInLeft)
203 {
204 /*
205 * We're done, just add the terminator and return.
206 * (Two terminators to support UCS-2 output, too.)
207 */
208 ((char *)pvOutputLeft)[0] = '\0';
209 if (fUcs2Term)
210 ((char *)pvOutputLeft)[1] = '\0';
211 *ppvOutput = pvOutput;
212 return VINF_SUCCESS;
213 }
214 errno = E2BIG;
215 }
216
217 /*
218 * If we failed because of output buffer space we'll
219 * increase the output buffer size and retry.
220 */
221 if (errno == E2BIG)
222 {
223 if (!cbOutput)
224 {
225 RTMemTmpFree(pvOutput);
226 cbOutput2 *= 2;
227 pvOutput = RTMemTmpAlloc(cbOutput2 + sizeof(RTUTF16));
228 if (!pvOutput)
229 return VERR_NO_TMP_MEMORY;
230 continue;
231 }
232 return VERR_BUFFER_OVERFLOW;
233 }
234
235 /*
236 * Close the handle on all other errors to make sure we won't carry
237 * any bad state with us.
238 */
239 *phIconv = (iconv_t)-1;
240 iconv_close(hIconv);
241 }
242 break;
243 }
244
245 /* failure */
246 if (!cbOutput)
247 RTMemTmpFree(pvOutput);
248 return VERR_NO_TRANSLATION;
249}
250
251#endif /* RT_WITH_ICONV_CACHE */
252
253/**
254 * Converts a string from one charset to another without using the handle cache.
255 *
256 * @returns IPRT status code.
257 *
258 * @param pvInput Pointer to intput string.
259 * @param cbInput Size (in bytes) of input string. Excludes any terminators.
260 * @param pszInputCS Codeset of the input string.
261 * @param ppvOutput Pointer to pointer to output buffer if cbOutput > 0.
262 * If cbOutput is 0 this is where the pointer to the allocated
263 * buffer is stored.
264 * @param cbOutput Size of the passed in buffer.
265 * @param pszOutputCS Codeset of the input string.
266 * @param cFactor Input vs. output size factor.
267 */
268static int rtStrConvertUncached(const void *pvInput, size_t cbInput, const char *pszInputCS,
269 void **ppvOutput, size_t cbOutput, const char *pszOutputCS,
270 unsigned cFactor)
271{
272 /*
273 * Allocate buffer
274 */
275 bool fUcs2Term;
276 void *pvOutput;
277 size_t cbOutput2;
278 if (!cbOutput)
279 {
280 cbOutput2 = cbInput * cFactor;
281 pvOutput = RTMemTmpAlloc(cbOutput2 + sizeof(RTUTF16));
282 if (!pvOutput)
283 return VERR_NO_TMP_MEMORY;
284 fUcs2Term = true;
285 }
286 else
287 {
288 pvOutput = *ppvOutput;
289 fUcs2Term = !strcmp(pszOutputCS, "UCS-2");
290 cbOutput2 = cbOutput - (fUcs2Term ? sizeof(RTUTF16) : 1);
291 if (cbOutput2 > cbOutput)
292 return VERR_BUFFER_OVERFLOW;
293 }
294
295 /*
296 * Use a loop here to retry with bigger buffers.
297 */
298 for (unsigned cTries = 10; cTries > 0; cTries--)
299 {
300 /*
301 * Create conversion object.
302 */
303#ifdef RT_OS_SOLARIS
304 /* Solaris doesn't grok empty codeset strings, so help it find the current codeset. */
305 if (!*pszInputCS)
306 pszInputCS = rtStrGetLocaleCodeset();
307 if (!*pszOutputCS)
308 pszOutputCS = rtStrGetLocaleCodeset();
309#endif
310 IPRT_ALIGNMENT_CHECKS_DISABLE(); /* glibc causes trouble */
311 iconv_t icHandle = iconv_open(pszOutputCS, pszInputCS);
312 IPRT_ALIGNMENT_CHECKS_ENABLE();
313 if (icHandle != (iconv_t)-1)
314 {
315 /*
316 * Do the conversion.
317 */
318 size_t cbInLeft = cbInput;
319 size_t cbOutLeft = cbOutput2;
320 const void *pvInputLeft = pvInput;
321 void *pvOutputLeft = pvOutput;
322#if defined(RT_OS_LINUX) || defined(RT_OS_HAIKU) || defined(RT_OS_SOLARIS) || (defined(RT_OS_DARWIN) && defined(_DARWIN_FEATURE_UNIX_CONFORMANCE)) /* there are different opinions about the constness of the input buffer. */
323 if (iconv(icHandle, (char **)&pvInputLeft, &cbInLeft, (char **)&pvOutputLeft, &cbOutLeft) != (size_t)-1)
324#else
325 if (iconv(icHandle, (const char **)&pvInputLeft, &cbInLeft, (char **)&pvOutputLeft, &cbOutLeft) != (size_t)-1)
326#endif
327 {
328 if (!cbInLeft)
329 {
330 /*
331 * We're done, just add the terminator and return.
332 * (Two terminators to support UCS-2 output, too.)
333 */
334 iconv_close(icHandle);
335 ((char *)pvOutputLeft)[0] = '\0';
336 if (fUcs2Term)
337 ((char *)pvOutputLeft)[1] = '\0';
338 *ppvOutput = pvOutput;
339 return VINF_SUCCESS;
340 }
341 errno = E2BIG;
342 }
343 iconv_close(icHandle);
344
345 /*
346 * If we failed because of output buffer space we'll
347 * increase the output buffer size and retry.
348 */
349 if (errno == E2BIG)
350 {
351 if (!cbOutput)
352 {
353 RTMemTmpFree(pvOutput);
354 cbOutput2 *= 2;
355 pvOutput = RTMemTmpAlloc(cbOutput2 + sizeof(RTUTF16));
356 if (!pvOutput)
357 return VERR_NO_TMP_MEMORY;
358 continue;
359 }
360 return VERR_BUFFER_OVERFLOW;
361 }
362 }
363 break;
364 }
365
366 /* failure */
367 if (!cbOutput)
368 RTMemTmpFree(pvOutput);
369 return VERR_NO_TRANSLATION;
370}
371
372
373/**
374 * Wrapper that selects rtStrConvertCached or rtStrConvertUncached.
375 *
376 * @returns IPRT status code.
377 *
378 * @param pszInput Pointer to intput string.
379 * @param cchInput Size (in bytes) of input string. Excludes any
380 * terminators.
381 * @param pszInputCS Codeset of the input string.
382 * @param ppszOutput Pointer to pointer to output buffer if cbOutput > 0.
383 * If cbOutput is 0 this is where the pointer to the
384 * allocated buffer is stored.
385 * @param cbOutput Size of the passed in buffer.
386 * @param pszOutputCS Codeset of the input string.
387 * @param cFactor Input vs. output size factor.
388 * @param enmCacheIdx The iconv cache index.
389 */
390DECLINLINE(int) rtStrConvertWrapper(const char *pchInput, size_t cchInput, const char *pszInputCS,
391 char **ppszOutput, size_t cbOutput, const char *pszOutputCS,
392 unsigned cFactor, RTSTRICONV enmCacheIdx)
393{
394#ifdef RT_WITH_ICONV_CACHE
395 RTTHREAD hSelf = RTThreadSelf();
396 if (hSelf != NIL_RTTHREAD)
397 {
398 PRTTHREADINT pThread = rtThreadGet(hSelf);
399 if (pThread)
400 {
401 if ((pThread->fIntFlags & (RTTHREADINT_FLAGS_ALIEN | RTTHREADINT_FLAGS_MAIN)) != RTTHREADINT_FLAGS_ALIEN)
402 {
403 int rc = rtstrConvertCached(pchInput, cchInput, pszInputCS,
404 (void **)ppszOutput, cbOutput, pszOutputCS,
405 cFactor, (iconv_t *)&pThread->ahIconvs[enmCacheIdx]);
406 rtThreadRelease(pThread);
407 return rc;
408 }
409 rtThreadRelease(pThread);
410 }
411 }
412#endif
413 return rtStrConvertUncached(pchInput, cchInput, pszInputCS,
414 (void **)ppszOutput, cbOutput, pszOutputCS,
415 cFactor);
416}
417
418
419/**
420 * Internal API for use by the path conversion code.
421 *
422 * @returns IPRT status code.
423 *
424 * @param pszInput Pointer to intput string.
425 * @param cchInput Size (in bytes) of input string. Excludes any
426 * terminators.
427 * @param pszInputCS Codeset of the input string.
428 * @param ppszOutput Pointer to pointer to output buffer if cbOutput > 0.
429 * If cbOutput is 0 this is where the pointer to the
430 * allocated buffer is stored.
431 * @param cbOutput Size of the passed in buffer.
432 * @param pszOutputCS Codeset of the input string.
433 * @param cFactor Input vs. output size factor.
434 * @param enmCacheIdx The iconv cache index.
435 */
436DECLHIDDEN(int) rtStrConvert(const char *pchInput, size_t cchInput, const char *pszInputCS,
437 char **ppszOutput, size_t cbOutput, const char *pszOutputCS,
438 unsigned cFactor, RTSTRICONV enmCacheIdx)
439{
440 Assert(enmCacheIdx >= 0 && enmCacheIdx < RTSTRICONV_END);
441 return rtStrConvertWrapper(pchInput, cchInput, pszInputCS,
442 ppszOutput, cbOutput, pszOutputCS,
443 cFactor, enmCacheIdx);
444}
445
446
447RTR3DECL(int) RTStrUtf8ToCurrentCPTag(char **ppszString, const char *pszString, const char *pszTag)
448{
449 Assert(ppszString);
450 Assert(pszString);
451 *ppszString = NULL;
452
453 /*
454 * Assume result string length is not longer than UTF-8 string.
455 */
456 size_t cch = strlen(pszString);
457 if (cch <= 0)
458 {
459 /* zero length string passed. */
460 *ppszString = (char *)RTMemTmpAllocZTag(sizeof(char), pszTag);
461 if (*ppszString)
462 return VINF_SUCCESS;
463 return VERR_NO_TMP_MEMORY;
464 }
465 return rtStrConvertWrapper(pszString, cch, "UTF-8", ppszString, 0, "", 1, RTSTRICONV_UTF8_TO_LOCALE);
466}
467
468
469RTR3DECL(int) RTStrCurrentCPToUtf8Tag(char **ppszString, const char *pszString, const char *pszTag)
470{
471 Assert(ppszString);
472 Assert(pszString);
473 *ppszString = NULL;
474
475 /*
476 * Attempt with UTF-8 length of 2x the native length.
477 */
478 size_t cch = strlen(pszString);
479 if (cch <= 0)
480 {
481 /* zero length string passed. */
482 *ppszString = (char *)RTMemTmpAllocZTag(sizeof(char), pszTag);
483 if (*ppszString)
484 return VINF_SUCCESS;
485 return VERR_NO_TMP_MEMORY;
486 }
487 return rtStrConvertWrapper(pszString, cch, "", ppszString, 0, "UTF-8", 2, RTSTRICONV_LOCALE_TO_UTF8);
488}
489
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette