VirtualBox

source: vbox/trunk/src/VBox/Runtime/testcase/tstUtf8.cpp@ 100765

Last change on this file since 100765 was 99775, checked in by vboxsync, 20 months ago

*: Mark functions as static if not used outside of a given compilation unit. Enables the compiler to optimize inlining, reduces the symbol tables, exposes unused functions and in some rare cases exposes mismtaches between function declarations and definitions, but most importantly reduces the number of parfait reports for the extern-function-no-forward-declaration category. This should not result in any functional changes, bugref:3409

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id Revision
File size: 58.9 KB
Line 
1/* $Id: tstUtf8.cpp 99775 2023-05-12 12:21:58Z vboxsync $ */
2/** @file
3 * IPRT Testcase - UTF-8 and UTF-16 string conversions.
4 */
5
6/*
7 * Copyright (C) 2006-2023 Oracle and/or its affiliates.
8 *
9 * This file is part of VirtualBox base platform packages, as
10 * available from https://www.virtualbox.org.
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation, in version 3 of the
15 * License.
16 *
17 * This program is distributed in the hope that it will be useful, but
18 * WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * General Public License for more details.
21 *
22 * You should have received a copy of the GNU General Public License
23 * along with this program; if not, see <https://www.gnu.org/licenses>.
24 *
25 * The contents of this file may alternatively be used under the terms
26 * of the Common Development and Distribution License Version 1.0
27 * (CDDL), a copy of it is provided in the "COPYING.CDDL" file included
28 * in the VirtualBox distribution, in which case the provisions of the
29 * CDDL are applicable instead of those of the GPL.
30 *
31 * You may elect to license modified versions of this file under the
32 * terms and conditions of either the GPL or the CDDL or both.
33 *
34 * SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0
35 */
36
37
38/*********************************************************************************************************************************
39* Header Files *
40*********************************************************************************************************************************/
41#include <iprt/string.h>
42#include <iprt/latin1.h>
43#include <iprt/utf16.h>
44
45#include <iprt/alloc.h>
46#include <iprt/assert.h>
47#include <iprt/env.h>
48#include <iprt/err.h>
49#include <iprt/rand.h>
50#include <iprt/stream.h>
51#include <iprt/test.h>
52#include <iprt/time.h>
53#include <iprt/uni.h>
54#include <iprt/uuid.h>
55
56#ifdef RT_OS_WINDOWS
57# include <iprt/win/windows.h> /* For GetACP(). */
58#endif
59
60
61/**
62 * Generate a random codepoint for simple UTF-16 encoding.
63 */
64static RTUTF16 GetRandUtf16(void)
65{
66 RTUTF16 wc;
67 do
68 {
69 wc = (RTUTF16)RTRandU32Ex(1, 0xfffd);
70 } while (wc >= 0xd800 && wc <= 0xdfff);
71 return wc;
72}
73
74
75/**
76 *
77 */
78static void test1(RTTEST hTest)
79{
80 static const char s_szBadString1[] = "Bad \xe0\x13\x0";
81 static const char s_szBadString2[] = "Bad \xef\xbf\xc3";
82 int rc;
83 char *pszUtf8;
84 char *pszCurrent;
85 PRTUTF16 pwsz;
86 PRTUTF16 pwszRand;
87
88 /*
89 * Invalid UTF-8 to UCS-2 test.
90 */
91 RTTestSub(hTest, "Feeding bad UTF-8 to RTStrToUtf16");
92 rc = RTStrToUtf16(s_szBadString1, &pwsz);
93 RTTEST_CHECK_MSG(hTest, rc == VERR_NO_TRANSLATION || rc == VERR_INVALID_UTF8_ENCODING,
94 (hTest, "Conversion of first bad UTF-8 string to UTF-16 apparently succeeded. It shouldn't. rc=%Rrc\n", rc));
95 rc = RTStrToUtf16(s_szBadString2, &pwsz);
96 RTTEST_CHECK_MSG(hTest, rc == VERR_NO_TRANSLATION || rc == VERR_INVALID_UTF8_ENCODING,
97 (hTest, "Conversion of second bad UTF-8 strings to UTF-16 apparently succeeded. It shouldn't. rc=%Rrc\n", rc));
98
99 /*
100 * Test current CP conversion.
101 */
102 RTTestSub(hTest, "Rand UTF-16 -> UTF-8 -> CP -> UTF-8");
103 pwszRand = (PRTUTF16)RTMemAlloc(31 * sizeof(*pwsz));
104 for (int i = 0; i < 30; i++)
105 pwszRand[i] = GetRandUtf16();
106 pwszRand[30] = 0;
107
108 rc = RTUtf16ToUtf8(pwszRand, &pszUtf8);
109 if (rc == VINF_SUCCESS)
110 {
111 rc = RTStrUtf8ToCurrentCP(&pszCurrent, pszUtf8);
112 if (rc == VINF_SUCCESS)
113 {
114 RTStrFree(pszUtf8);
115 rc = RTStrCurrentCPToUtf8(&pszUtf8, pszCurrent);
116 if (rc == VINF_SUCCESS)
117 RTTestPassed(hTest, "Random UTF-16 -> UTF-8 -> Current -> UTF-8 successful.\n");
118 else
119 RTTestFailed(hTest, "%d: The third part of random UTF-16 -> UTF-8 -> Current -> UTF-8 failed with return value %Rrc.",
120 __LINE__, rc);
121 if (RT_SUCCESS(rc))
122 RTStrFree(pszUtf8);
123 RTStrFree(pszCurrent);
124 }
125 else
126 {
127 if (rc == VERR_NO_TRANSLATION)
128 RTTestPassed(hTest, "The second part of random UTF-16 -> UTF-8 -> Current -> UTF-8 returned VERR_NO_TRANSLATION. This is probably as it should be.\n");
129 else if (rc == VWRN_NO_TRANSLATION)
130 RTTestPassed(hTest, "The second part of random UTF-16 -> UTF-8 -> Current -> UTF-8 returned VWRN_NO_TRANSLATION. This is probably as it should be.\n");
131 else
132 RTTestFailed(hTest, "%d: The second part of random UTF-16 -> UTF-8 -> Current -> UTF-8 failed with return value %Rrc.",
133 __LINE__, rc);
134 if (RT_SUCCESS(rc))
135 RTStrFree(pszCurrent);
136 RTStrFree(pszUtf8);
137 }
138 }
139 else
140 RTTestFailed(hTest, "%d: The first part of random UTF-16 -> UTF-8 -> Current -> UTF-8 failed with return value %Rrc.",
141 __LINE__, rc);
142 RTMemFree(pwszRand);
143
144 /*
145 * Generate a new random string.
146 */
147 RTTestSub(hTest, "Random UTF-16 -> UTF-8 -> UTF-16");
148 pwszRand = (PRTUTF16)RTMemAlloc(31 * sizeof(*pwsz));
149 for (int i = 0; i < 30; i++)
150 pwszRand[i] = GetRandUtf16();
151 pwszRand[30] = 0;
152 rc = RTUtf16ToUtf8(pwszRand, &pszUtf8);
153 if (rc == VINF_SUCCESS)
154 {
155 rc = RTStrToUtf16(pszUtf8, &pwsz);
156 if (rc == VINF_SUCCESS)
157 {
158 int i;
159 for (i = 0; pwszRand[i] == pwsz[i] && pwsz[i] != 0; i++)
160 /* nothing */;
161 if (pwszRand[i] == pwsz[i] && pwsz[i] == 0)
162 RTTestPassed(hTest, "Random UTF-16 -> UTF-8 -> UTF-16 successful.\n");
163 else
164 {
165 RTTestFailed(hTest, "%d: The second part of random UTF-16 -> UTF-8 -> UTF-16 failed.", __LINE__);
166 RTTestPrintf(hTest, RTTESTLVL_FAILURE, "First differing character is at position %d and has the value %x.\n", i, pwsz[i]);
167 }
168 RTUtf16Free(pwsz);
169 }
170 else
171 RTTestFailed(hTest, "%d: The second part of random UTF-16 -> UTF-8 -> UTF-16 failed with return value %Rrc.",
172 __LINE__, rc);
173 RTStrFree(pszUtf8);
174 }
175 else
176 RTTestFailed(hTest, "%d: The first part of random UTF-16 -> UTF-8 -> UTF-16 failed with return value %Rrc.",
177 __LINE__, rc);
178 RTMemFree(pwszRand);
179
180 /*
181 * Generate yet another random string and convert it to a buffer.
182 */
183 RTTestSub(hTest, "Random RTUtf16ToUtf8Ex + RTStrToUtf16");
184 pwszRand = (PRTUTF16)RTMemAlloc(31 * sizeof(*pwsz));
185 for (int i = 0; i < 30; i++)
186 pwszRand[i] = GetRandUtf16();
187 pwszRand[30] = 0;
188
189 char szUtf8Array[120];
190 char *pszUtf8Array = szUtf8Array;
191 rc = RTUtf16ToUtf8Ex(pwszRand, RTSTR_MAX, &pszUtf8Array, 120, NULL);
192 if (rc == 0)
193 {
194 rc = RTStrToUtf16(pszUtf8Array, &pwsz);
195 if (rc == 0)
196 {
197 int i;
198 for (i = 0; pwszRand[i] == pwsz[i] && pwsz[i] != 0; i++)
199 ;
200 if (pwsz[i] == 0 && i >= 8)
201 RTTestPassed(hTest, "Random UTF-16 -> fixed length UTF-8 -> UTF-16 successful.\n");
202 else
203 {
204 RTTestFailed(hTest, "%d: Incorrect conversion of UTF-16 -> fixed length UTF-8 -> UTF-16.\n", __LINE__);
205 RTTestPrintf(hTest, RTTESTLVL_FAILURE, "First differing character is at position %d and has the value %x.\n", i, pwsz[i]);
206 }
207 RTUtf16Free(pwsz);
208 }
209 else
210 RTTestFailed(hTest, "%d: The second part of random UTF-16 -> fixed length UTF-8 -> UTF-16 failed with return value %Rrc.\n", __LINE__, rc);
211 }
212 else
213 RTTestFailed(hTest, "%d: The first part of random UTF-16 -> fixed length UTF-8 -> UTF-16 failed with return value %Rrc.\n", __LINE__, rc);
214 RTMemFree(pwszRand);
215
216 /*
217 * And again.
218 */
219 RTTestSub(hTest, "Random RTUtf16ToUtf8 + RTStrToUtf16Ex");
220 pwszRand = (PRTUTF16)RTMemAlloc(31 * sizeof(*pwsz));
221 for (int i = 0; i < 30; i++)
222 pwszRand[i] = GetRandUtf16();
223 pwszRand[30] = 0;
224
225 RTUTF16 wszBuf[70];
226 PRTUTF16 pwsz2Buf = wszBuf;
227 rc = RTUtf16ToUtf8(pwszRand, &pszUtf8);
228 if (rc == 0)
229 {
230 rc = RTStrToUtf16Ex(pszUtf8, RTSTR_MAX, &pwsz2Buf, 70, NULL);
231 if (rc == 0)
232 {
233 int i;
234 for (i = 0; pwszRand[i] == pwsz2Buf[i] && pwsz2Buf[i] != 0; i++)
235 ;
236 if (pwszRand[i] == 0 && pwsz2Buf[i] == 0)
237 RTTestPassed(hTest, "Random UTF-16 -> UTF-8 -> fixed length UTF-16 successful.\n");
238 else
239 {
240 RTTestFailed(hTest, "%d: Incorrect conversion of random UTF-16 -> UTF-8 -> fixed length UTF-16.\n", __LINE__);
241 RTTestPrintf(hTest, RTTESTLVL_FAILURE, "First differing character is at position %d and has the value %x.\n", i, pwsz2Buf[i]);
242 }
243 }
244 else
245 RTTestFailed(hTest, "%d: The second part of random UTF-16 -> UTF-8 -> fixed length UTF-16 failed with return value %Rrc.\n", __LINE__, rc);
246 RTStrFree(pszUtf8);
247 }
248 else
249 RTTestFailed(hTest, "%d: The first part of random UTF-16 -> UTF-8 -> fixed length UTF-16 failed with return value %Rrc.\n",
250 __LINE__, rc);
251 RTMemFree(pwszRand);
252
253 pwszRand = (PRTUTF16)RTMemAlloc(31 * sizeof(*pwsz));
254 for (int i = 0; i < 30; i++)
255 pwszRand[i] = GetRandUtf16();
256 pwszRand[30] = 0;
257
258 rc = RTUtf16ToUtf8Ex(pwszRand, RTSTR_MAX, &pszUtf8Array, 20, NULL);
259 if (rc == VERR_BUFFER_OVERFLOW)
260 RTTestPassed(hTest, "Random UTF-16 -> fixed length UTF-8 with too short buffer successfully rejected.\n");
261 else
262 RTTestFailed(hTest, "%d: Random UTF-16 -> fixed length UTF-8 with too small buffer returned value %d instead of VERR_BUFFER_OVERFLOW.\n",
263 __LINE__, rc);
264 RTMemFree(pwszRand);
265
266 /*
267 * last time...
268 */
269 RTTestSub(hTest, "Random RTUtf16ToUtf8 + RTStrToUtf16Ex");
270 pwszRand = (PRTUTF16)RTMemAlloc(31 * sizeof(*pwsz));
271 for (int i = 0; i < 30; i++)
272 pwszRand[i] = GetRandUtf16();
273 pwszRand[30] = 0;
274
275 rc = RTUtf16ToUtf8(pwszRand, &pszUtf8);
276 if (rc == VINF_SUCCESS)
277 {
278 rc = RTStrToUtf16Ex(pszUtf8, RTSTR_MAX, &pwsz2Buf, 20, NULL);
279 if (rc == VERR_BUFFER_OVERFLOW)
280 RTTestPassed(hTest, "Random UTF-16 -> UTF-8 -> fixed length UTF-16 with too short buffer successfully rejected.\n");
281 else
282 RTTestFailed(hTest, "%d: The second part of random UTF-16 -> UTF-8 -> fixed length UTF-16 with too short buffer returned value %Rrc instead of VERR_BUFFER_OVERFLOW.\n",
283 __LINE__, rc);
284 RTStrFree(pszUtf8);
285 }
286 else
287 RTTestFailed(hTest, "%d:The first part of random UTF-16 -> UTF-8 -> fixed length UTF-16 failed with return value %Rrc.\n",
288 __LINE__, rc);
289 RTMemFree(pwszRand);
290
291 RTTestSubDone(hTest);
292}
293
294
295static RTUNICP g_uszAll[0x110000 - 1 - 0x800 - 2 + 1];
296static RTUTF16 g_wszAll[0xfffe - (0xe000 - 0xd800) + (0x110000 - 0x10000) * 2];
297static char g_szAll[0x7f + (0x800 - 0x80) * 2 + (0xfffe - 0x800 - (0xe000 - 0xd800))* 3 + (0x110000 - 0x10000) * 4 + 1];
298
299static void whereami(int cBits, size_t off)
300{
301 if (cBits == 8)
302 {
303 if (off < 0x7f)
304 RTTestPrintf(NIL_RTTEST, RTTESTLVL_FAILURE, "UTF-8 U+%#x\n", off + 1);
305 else if (off < 0xf7f)
306 RTTestPrintf(NIL_RTTEST, RTTESTLVL_FAILURE, "UTF-8 U+%#x\n", (off - 0x7f) / 2 + 0x80);
307 else if (off < 0x27f7f)
308 RTTestPrintf(NIL_RTTEST, RTTESTLVL_FAILURE, "UTF-8 U+%#x\n", (off - 0xf7f) / 3 + 0x800);
309 else if (off < 0x2df79)
310 RTTestPrintf(NIL_RTTEST, RTTESTLVL_FAILURE, "UTF-8 U+%#x\n", (off - 0x27f7f) / 3 + 0xe000);
311 else if (off < 0x42df79)
312 RTTestPrintf(NIL_RTTEST, RTTESTLVL_FAILURE, "UTF-8 U+%#x\n", (off - 0x2df79) / 4 + 0x10000);
313 else
314 RTTestPrintf(NIL_RTTEST, RTTESTLVL_FAILURE, "UTF-8 ???\n");
315 }
316 else if (cBits == 16)
317 {
318 if (off < 0xd7ff*2)
319 RTTestPrintf(NIL_RTTEST, RTTESTLVL_FAILURE, "UTF-16 U+%#x\n", off / 2 + 1);
320 else if (off < 0xf7fd*2)
321 RTTestPrintf(NIL_RTTEST, RTTESTLVL_FAILURE, "UTF-16 U+%#x\n", (off - 0xd7ff*2) / 2 + 0xe000);
322 else if (off < 0x20f7fd)
323 RTTestPrintf(NIL_RTTEST, RTTESTLVL_FAILURE, "UTF-16 U+%#x\n", (off - 0xf7fd*2) / 4 + 0x10000);
324 else
325 RTTestPrintf(NIL_RTTEST, RTTESTLVL_FAILURE, "UTF-16 ???\n");
326 }
327 else
328 {
329 if (off < (0xd800 - 1) * sizeof(RTUNICP))
330 RTTestPrintf(NIL_RTTEST, RTTESTLVL_FAILURE, "RTUNICP U+%#x\n", off / sizeof(RTUNICP) + 1);
331 else if (off < (0xfffe - 0x800 - 1) * sizeof(RTUNICP))
332 RTTestPrintf(NIL_RTTEST, RTTESTLVL_FAILURE, "RTUNICP U+%#x\n", off / sizeof(RTUNICP) + 0x800 + 1);
333 else
334 RTTestPrintf(NIL_RTTEST, RTTESTLVL_FAILURE, "RTUNICP U+%#x\n", off / sizeof(RTUNICP) + 0x800 + 1 + 2);
335 }
336}
337
338static int mymemcmp(const void *pv1, const void *pv2, size_t cb, int cBits)
339{
340 const uint8_t *pb1 = (const uint8_t *)pv1;
341 const uint8_t *pb2 = (const uint8_t *)pv2;
342 for (size_t off = 0; off < cb; off++)
343 {
344 if (pb1[off] != pb2[off])
345 {
346 RTTestPrintf(NIL_RTTEST, RTTESTLVL_FAILURE, "mismatch at %#x: ", off);
347 whereami(cBits, off);
348 if (off > 0)
349 RTTestPrintf(NIL_RTTEST, RTTESTLVL_FAILURE, " %#x: %02x != %02x!\n", off-1, pb1[off-1], pb2[off-1]);
350 RTTestPrintf(NIL_RTTEST, RTTESTLVL_FAILURE, "*%#x: %02x != %02x!\n", off, pb1[off], pb2[off]);
351 for (size_t i = 1; i < 10; i++)
352 if (off + i < cb)
353 RTTestPrintf(NIL_RTTEST, RTTESTLVL_FAILURE, " %#x: %02x != %02x!\n", off+i, pb1[off+i], pb2[off+i]);
354 return 1;
355 }
356 }
357 return 0;
358}
359
360
361static void InitStrings()
362{
363 /*
364 * Generate unicode string containing all the legal UTF-16 codepoints, both UTF-16 and UTF-8 version.
365 */
366 /* the simple code point array first */
367 unsigned i = 0;
368 RTUNICP uc = 1;
369 while (uc < 0xd800)
370 g_uszAll[i++] = uc++;
371 uc = 0xe000;
372 while (uc < 0xfffe)
373 g_uszAll[i++] = uc++;
374 uc = 0x10000;
375 while (uc < 0x110000)
376 g_uszAll[i++] = uc++;
377 g_uszAll[i++] = 0;
378 Assert(RT_ELEMENTS(g_uszAll) == i);
379
380 /* the utf-16 one */
381 i = 0;
382 uc = 1;
383 //RTPrintf("tstUtf8: %#x=%#x", i, uc);
384 while (uc < 0xd800)
385 g_wszAll[i++] = uc++;
386 uc = 0xe000;
387 //RTPrintf(" %#x=%#x", i, uc);
388 while (uc < 0xfffe)
389 g_wszAll[i++] = uc++;
390 uc = 0x10000;
391 //RTPrintf(" %#x=%#x", i, uc);
392 while (uc < 0x110000)
393 {
394 g_wszAll[i++] = 0xd800 | ((uc - 0x10000) >> 10);
395 g_wszAll[i++] = 0xdc00 | ((uc - 0x10000) & 0x3ff);
396 uc++;
397 }
398 //RTPrintf(" %#x=%#x\n", i, uc);
399 g_wszAll[i++] = '\0';
400 Assert(RT_ELEMENTS(g_wszAll) == i);
401
402 /*
403 * The utf-8 one
404 */
405 i = 0;
406 uc = 1;
407 //RTPrintf("tstUtf8: %#x=%#x", i, uc);
408 while (uc < 0x80)
409 g_szAll[i++] = uc++;
410 //RTPrintf(" %#x=%#x", i, uc);
411 while (uc < 0x800)
412 {
413 g_szAll[i++] = 0xc0 | (uc >> 6);
414 g_szAll[i++] = 0x80 | (uc & 0x3f);
415 Assert(!((uc >> 6) & ~0x1f));
416 uc++;
417 }
418 //RTPrintf(" %#x=%#x", i, uc);
419 while (uc < 0xd800)
420 {
421 g_szAll[i++] = 0xe0 | (uc >> 12);
422 g_szAll[i++] = 0x80 | ((uc >> 6) & 0x3f);
423 g_szAll[i++] = 0x80 | (uc & 0x3f);
424 Assert(!((uc >> 12) & ~0xf));
425 uc++;
426 }
427 uc = 0xe000;
428 //RTPrintf(" %#x=%#x", i, uc);
429 while (uc < 0xfffe)
430 {
431 g_szAll[i++] = 0xe0 | (uc >> 12);
432 g_szAll[i++] = 0x80 | ((uc >> 6) & 0x3f);
433 g_szAll[i++] = 0x80 | (uc & 0x3f);
434 Assert(!((uc >> 12) & ~0xf));
435 uc++;
436 }
437 uc = 0x10000;
438 //RTPrintf(" %#x=%#x", i, uc);
439 while (uc < 0x110000)
440 {
441 g_szAll[i++] = 0xf0 | (uc >> 18);
442 g_szAll[i++] = 0x80 | ((uc >> 12) & 0x3f);
443 g_szAll[i++] = 0x80 | ((uc >> 6) & 0x3f);
444 g_szAll[i++] = 0x80 | (uc & 0x3f);
445 Assert(!((uc >> 18) & ~0x7));
446 uc++;
447 }
448 //RTPrintf(" %#x=%#x\n", i, uc);
449 g_szAll[i++] = '\0';
450 Assert(RT_ELEMENTS(g_szAll) == i);
451}
452
453
454static void test2(RTTEST hTest)
455{
456 /*
457 * Convert to UTF-8 and back.
458 */
459 RTTestSub(hTest, "UTF-16 -> UTF-8 -> UTF-16");
460 char *pszUtf8;
461 int rc = RTUtf16ToUtf8(&g_wszAll[0], &pszUtf8);
462 if (rc == VINF_SUCCESS)
463 {
464 pszUtf8[0] = 1;
465 if (mymemcmp(pszUtf8, g_szAll, sizeof(g_szAll), 8))
466 RTTestFailed(hTest, "UTF-16 -> UTF-8 mismatch!");
467
468 PRTUTF16 pwszUtf16;
469 rc = RTStrToUtf16(pszUtf8, &pwszUtf16);
470 if (rc == VINF_SUCCESS)
471 {
472 if (mymemcmp(pwszUtf16, g_wszAll, sizeof(g_wszAll), 16))
473 RTTestFailed(hTest, "UTF-8 -> UTF-16 failed compare!");
474 RTUtf16Free(pwszUtf16);
475 }
476 else
477 RTTestFailed(hTest, "UTF-8 -> UTF-16 failed, rc=%Rrc.", rc);
478 RTStrFree(pszUtf8);
479 }
480 else
481 RTTestFailed(hTest, "UTF-16 -> UTF-8 failed, rc=%Rrc.", rc);
482
483
484 /*
485 * Convert to UTF-16 and back. (just in case the above test fails)
486 */
487 RTTestSub(hTest, "UTF-8 -> UTF-16 -> UTF-8");
488 PRTUTF16 pwszUtf16;
489 rc = RTStrToUtf16(&g_szAll[0], &pwszUtf16);
490 if (rc == VINF_SUCCESS)
491 {
492 if (mymemcmp(pwszUtf16, g_wszAll, sizeof(g_wszAll), 16))
493 RTTestFailed(hTest, "UTF-8 -> UTF-16 failed compare!");
494
495 rc = RTUtf16ToUtf8(pwszUtf16, &pszUtf8);
496 if (rc == VINF_SUCCESS)
497 {
498 if (mymemcmp(pszUtf8, g_szAll, sizeof(g_szAll), 8))
499 RTTestFailed(hTest, "UTF-16 -> UTF-8 failed compare!");
500 RTStrFree(pszUtf8);
501 }
502 else
503 RTTestFailed(hTest, "UTF-16 -> UTF-8 failed, rc=%Rrc.", rc);
504 RTUtf16Free(pwszUtf16);
505 }
506 else
507 RTTestFailed(hTest, "UTF-8 -> UTF-16 failed, rc=%Rrc.", rc);
508
509 /*
510 * Convert UTF-8 to CPs.
511 */
512 RTTestSub(hTest, "UTF-8 -> UNI -> UTF-8");
513 PRTUNICP paCps;
514 rc = RTStrToUni(g_szAll, &paCps);
515 if (rc == VINF_SUCCESS)
516 {
517 if (mymemcmp(paCps, g_uszAll, sizeof(g_uszAll), 32))
518 RTTestFailed(hTest, "UTF-8 -> UTF-16 failed, rc=%Rrc.", rc);
519
520 size_t cCps;
521 rc = RTStrToUniEx(g_szAll, RTSTR_MAX, &paCps, RT_ELEMENTS(g_uszAll), &cCps);
522 if (rc == VINF_SUCCESS)
523 {
524 if (cCps != RT_ELEMENTS(g_uszAll) - 1)
525 RTTestFailed(hTest, "wrong Code Point count %zu, expected %zu\n", cCps, RT_ELEMENTS(g_uszAll) - 1);
526 }
527 else
528 RTTestFailed(hTest, "UTF-8 -> Code Points failed, rc=%Rrc.\n", rc);
529
530 /** @todo RTCpsToUtf8 or something. */
531 RTUniFree(paCps);
532 }
533 else
534 RTTestFailed(hTest, "UTF-8 -> Code Points failed, rc=%Rrc.\n", rc);
535
536 /*
537 * Check the various string lengths.
538 */
539 RTTestSub(hTest, "Lengths");
540 size_t cuc1 = RTStrCalcUtf16Len(g_szAll);
541 size_t cuc2 = RTUtf16Len(g_wszAll);
542 if (cuc1 != cuc2)
543 RTTestFailed(hTest, "cuc1=%zu != cuc2=%zu\n", cuc1, cuc2);
544 //size_t cuc3 = RTUniLen(g_uszAll);
545
546
547 /*
548 * Enumerate the strings.
549 */
550 RTTestSub(hTest, "Code Point Getters and Putters");
551 char *pszPut1Base = (char *)RTMemAlloc(sizeof(g_szAll));
552 AssertRelease(pszPut1Base);
553 char *pszPut1 = pszPut1Base;
554 PRTUTF16 pwszPut2Base = (PRTUTF16)RTMemAlloc(sizeof(g_wszAll));
555 AssertRelease(pwszPut2Base);
556 PRTUTF16 pwszPut2 = pwszPut2Base;
557 const char *psz1 = g_szAll;
558 const char *psz2 = g_szAll;
559 PCRTUTF16 pwsz3 = g_wszAll;
560 PCRTUTF16 pwsz4 = g_wszAll;
561 for (;;)
562 {
563 /*
564 * getters
565 */
566 RTUNICP uc1;
567 rc = RTStrGetCpEx(&psz1, &uc1);
568 if (RT_FAILURE(rc))
569 {
570 RTTestFailed(hTest, "RTStrGetCpEx failed with rc=%Rrc at %.10Rhxs", rc, psz2);
571 whereami(8, psz2 - &g_szAll[0]);
572 break;
573 }
574 char *pszPrev1 = RTStrPrevCp(g_szAll, psz1);
575 if (pszPrev1 != psz2)
576 {
577 RTTestFailed(hTest, "RTStrPrevCp returned %p expected %p!", pszPrev1, psz2);
578 whereami(8, psz2 - &g_szAll[0]);
579 break;
580 }
581 RTUNICP uc2 = RTStrGetCp(psz2);
582 if (uc2 != uc1)
583 {
584 RTTestFailed(hTest, "RTStrGetCpEx and RTStrGetCp returned different CPs: %RTunicp != %RTunicp", uc2, uc1);
585 whereami(8, psz2 - &g_szAll[0]);
586 break;
587 }
588 psz2 = RTStrNextCp(psz2);
589 if (psz2 != psz1)
590 {
591 RTTestFailed(hTest, "RTStrGetCpEx and RTStrGetNext returned different next pointer!");
592 whereami(8, psz2 - &g_szAll[0]);
593 break;
594 }
595
596 RTUNICP uc3;
597 rc = RTUtf16GetCpEx(&pwsz3, &uc3);
598 if (RT_FAILURE(rc))
599 {
600 RTTestFailed(hTest, "RTUtf16GetCpEx failed with rc=%Rrc at %.10Rhxs", rc, pwsz4);
601 whereami(16, pwsz4 - &g_wszAll[0]);
602 break;
603 }
604 if (uc3 != uc2)
605 {
606 RTTestFailed(hTest, "RTUtf16GetCpEx and RTStrGetCp returned different CPs: %RTunicp != %RTunicp", uc3, uc2);
607 whereami(16, pwsz4 - &g_wszAll[0]);
608 break;
609 }
610 RTUNICP uc4 = RTUtf16GetCp(pwsz4);
611 if (uc3 != uc4)
612 {
613 RTTestFailed(hTest, "RTUtf16GetCpEx and RTUtf16GetCp returned different CPs: %RTunicp != %RTunicp", uc3, uc4);
614 whereami(16, pwsz4 - &g_wszAll[0]);
615 break;
616 }
617 pwsz4 = RTUtf16NextCp(pwsz4);
618 if (pwsz4 != pwsz3)
619 {
620 RTTestFailed(hTest, "RTUtf16GetCpEx and RTUtf16GetNext returned different next pointer!");
621 whereami(8, pwsz4 - &g_wszAll[0]);
622 break;
623 }
624
625
626 /*
627 * putters
628 */
629 pszPut1 = RTStrPutCp(pszPut1, uc1);
630 if (pszPut1 - pszPut1Base != psz1 - &g_szAll[0])
631 {
632 RTTestFailed(hTest, "RTStrPutCp is not at the same offset! %p != %p",
633 pszPut1 - pszPut1Base, psz1 - &g_szAll[0]);
634 whereami(8, psz2 - &g_szAll[0]);
635 break;
636 }
637
638 pwszPut2 = RTUtf16PutCp(pwszPut2, uc3);
639 if (pwszPut2 - pwszPut2Base != pwsz3 - &g_wszAll[0])
640 {
641 RTTestFailed(hTest, "RTStrPutCp is not at the same offset! %p != %p",
642 pwszPut2 - pwszPut2Base, pwsz3 - &g_wszAll[0]);
643 whereami(8, pwsz4 - &g_wszAll[0]);
644 break;
645 }
646
647
648 /* the end? */
649 if (!uc1)
650 break;
651 }
652
653 /* check output if we seems to have made it thru it all. */
654 if (psz2 == &g_szAll[sizeof(g_szAll)])
655 {
656 if (mymemcmp(pszPut1Base, g_szAll, sizeof(g_szAll), 8))
657 RTTestFailed(hTest, "RTStrPutCp encoded the string incorrectly.");
658 if (mymemcmp(pwszPut2Base, g_wszAll, sizeof(g_wszAll), 16))
659 RTTestFailed(hTest, "RTUtf16PutCp encoded the string incorrectly.");
660 }
661
662 RTMemFree(pszPut1Base);
663 RTMemFree(pwszPut2Base);
664
665 RTTestSubDone(hTest);
666}
667
668
669/**
670 * Check case insensitivity.
671 */
672static void test3(RTTEST hTest)
673{
674 RTTestSub(hTest, "Case Sensitivity");
675
676 if ( RTUniCpToLower('a') != 'a'
677 || RTUniCpToLower('A') != 'a'
678 || RTUniCpToLower('b') != 'b'
679 || RTUniCpToLower('B') != 'b'
680 || RTUniCpToLower('Z') != 'z'
681 || RTUniCpToLower('z') != 'z'
682 || RTUniCpToUpper('c') != 'C'
683 || RTUniCpToUpper('C') != 'C'
684 || RTUniCpToUpper('z') != 'Z'
685 || RTUniCpToUpper('Z') != 'Z')
686 RTTestFailed(hTest, "RTUniToUpper/Lower failed basic tests.\n");
687
688 if (RTUtf16ICmp(g_wszAll, g_wszAll))
689 RTTestFailed(hTest, "RTUtf16ICmp failed the basic test.\n");
690
691 if (RTUtf16Cmp(g_wszAll, g_wszAll))
692 RTTestFailed(hTest, "RTUtf16Cmp failed the basic test.\n");
693
694 static RTUTF16 s_wszTst1a[] = { 'a', 'B', 'c', 'D', 'E', 'f', 'g', 'h', 'i', 'j', 'K', 'L', 'm', 'N', 'o', 'P', 'q', 'r', 'S', 't', 'u', 'V', 'w', 'x', 'Y', 'Z', 0xc5, 0xc6, 0xf8, 0 };
695 static RTUTF16 s_wszTst1b[] = { 'A', 'B', 'c', 'd', 'e', 'F', 'G', 'h', 'i', 'J', 'k', 'l', 'M', 'n', 'O', 'p', 'Q', 'R', 's', 't', 'U', 'v', 'w', 'X', 'y', 'z', 0xe5, 0xe6, 0xd8, 0 };
696 if ( RTUtf16ICmp(s_wszTst1b, s_wszTst1b)
697 || RTUtf16ICmp(s_wszTst1a, s_wszTst1a)
698 || RTUtf16ICmp(s_wszTst1a, s_wszTst1b)
699 || RTUtf16ICmp(s_wszTst1b, s_wszTst1a)
700 )
701 RTTestFailed(hTest, "RTUtf16ICmp failed the alphabet test.\n");
702
703 if ( RTUtf16Cmp(s_wszTst1b, s_wszTst1b)
704 || RTUtf16Cmp(s_wszTst1a, s_wszTst1a)
705 || !RTUtf16Cmp(s_wszTst1a, s_wszTst1b)
706 || !RTUtf16Cmp(s_wszTst1b, s_wszTst1a)
707 )
708 RTTestFailed(hTest, "RTUtf16Cmp failed the alphabet test.\n");
709
710 RTTestSubDone(hTest);
711}
712
713
714/**
715 * Test the RTStr*Cmp functions.
716 */
717static void TstRTStrXCmp(RTTEST hTest)
718{
719#define CHECK_DIFF(expr, op) \
720 do \
721 { \
722 int iDiff = expr; \
723 if (!(iDiff op 0)) \
724 RTTestFailed(hTest, "%d: %d " #op " 0: %s\n", __LINE__, iDiff, #expr); \
725 } while (0)
726
727/** @todo test the non-ascii bits. */
728
729 RTTestSub(hTest, "RTStrCmp");
730 CHECK_DIFF(RTStrCmp(NULL, NULL), == );
731 CHECK_DIFF(RTStrCmp(NULL, ""), < );
732 CHECK_DIFF(RTStrCmp("", NULL), > );
733 CHECK_DIFF(RTStrCmp("", ""), == );
734 CHECK_DIFF(RTStrCmp("abcdef", "abcdef"), == );
735 CHECK_DIFF(RTStrCmp("abcdef", "abcde"), > );
736 CHECK_DIFF(RTStrCmp("abcde", "abcdef"), < );
737 CHECK_DIFF(RTStrCmp("abcdeg", "abcdef"), > );
738 CHECK_DIFF(RTStrCmp("abcdef", "abcdeg"), < );
739 CHECK_DIFF(RTStrCmp("abcdeF", "abcdef"), < );
740 CHECK_DIFF(RTStrCmp("abcdef", "abcdeF"), > );
741
742
743 RTTestSub(hTest, "RTStrNCmp");
744 CHECK_DIFF(RTStrNCmp(NULL, NULL, RTSTR_MAX), == );
745 CHECK_DIFF(RTStrNCmp(NULL, "", RTSTR_MAX), < );
746 CHECK_DIFF(RTStrNCmp("", NULL, RTSTR_MAX), > );
747 CHECK_DIFF(RTStrNCmp("", "", RTSTR_MAX), == );
748 CHECK_DIFF(RTStrNCmp("abcdef", "abcdef", RTSTR_MAX), == );
749 CHECK_DIFF(RTStrNCmp("abcdef", "abcde", RTSTR_MAX), > );
750 CHECK_DIFF(RTStrNCmp("abcde", "abcdef", RTSTR_MAX), < );
751 CHECK_DIFF(RTStrNCmp("abcdeg", "abcdef", RTSTR_MAX), > );
752 CHECK_DIFF(RTStrNCmp("abcdef", "abcdeg", RTSTR_MAX), < );
753 CHECK_DIFF(RTStrNCmp("abcdeF", "abcdef", RTSTR_MAX), < );
754 CHECK_DIFF(RTStrNCmp("abcdef", "abcdeF", RTSTR_MAX), > );
755
756 CHECK_DIFF(RTStrNCmp("abcdef", "fedcba", 0), ==);
757 CHECK_DIFF(RTStrNCmp("abcdef", "abcdeF", 5), ==);
758 CHECK_DIFF(RTStrNCmp("abcdef", "abcdeF", 6), > );
759
760
761 RTTestSub(hTest, "RTStrICmp");
762 CHECK_DIFF(RTStrICmp(NULL, NULL), == );
763 CHECK_DIFF(RTStrICmp(NULL, ""), < );
764 CHECK_DIFF(RTStrICmp("", NULL), > );
765 CHECK_DIFF(RTStrICmp("", ""), == );
766 CHECK_DIFF(RTStrICmp("abcdef", "abcdef"), == );
767 CHECK_DIFF(RTStrICmp("abcdef", "abcde"), > );
768 CHECK_DIFF(RTStrICmp("abcde", "abcdef"), < );
769 CHECK_DIFF(RTStrICmp("abcdeg", "abcdef"), > );
770 CHECK_DIFF(RTStrICmp("abcdef", "abcdeg"), < );
771
772 CHECK_DIFF(RTStrICmp("abcdeF", "abcdef"), ==);
773 CHECK_DIFF(RTStrICmp("abcdef", "abcdeF"), ==);
774 CHECK_DIFF(RTStrICmp("ABCDEF", "abcdef"), ==);
775 CHECK_DIFF(RTStrICmp("abcdef", "ABCDEF"), ==);
776 CHECK_DIFF(RTStrICmp("AbCdEf", "aBcDeF"), ==);
777 CHECK_DIFF(RTStrICmp("AbCdEg", "aBcDeF"), > );
778 CHECK_DIFF(RTStrICmp("AbCdEG", "aBcDef"), > ); /* diff performed on the lower case cp. */
779
780
781 RTTestSub(hTest, "RTStrICmpAscii");
782 CHECK_DIFF(RTStrICmpAscii(NULL, NULL), == );
783 CHECK_DIFF(RTStrICmpAscii(NULL, ""), < );
784 CHECK_DIFF(RTStrICmpAscii("", NULL), > );
785 CHECK_DIFF(RTStrICmpAscii("", ""), == );
786 CHECK_DIFF(RTStrICmpAscii("abcdef", "abcdef"), == );
787 CHECK_DIFF(RTStrICmpAscii("abcdef", "abcde"), > );
788 CHECK_DIFF(RTStrICmpAscii("abcde", "abcdef"), < );
789 CHECK_DIFF(RTStrICmpAscii("abcdeg", "abcdef"), > );
790 CHECK_DIFF(RTStrICmpAscii("abcdef", "abcdeg"), < );
791
792 CHECK_DIFF(RTStrICmpAscii("abcdeF", "abcdef"), ==);
793 CHECK_DIFF(RTStrICmpAscii("abcdef", "abcdeF"), ==);
794 CHECK_DIFF(RTStrICmpAscii("ABCDEF", "abcdef"), ==);
795 CHECK_DIFF(RTStrICmpAscii("abcdef", "ABCDEF"), ==);
796 CHECK_DIFF(RTStrICmpAscii("AbCdEf", "aBcDeF"), ==);
797 CHECK_DIFF(RTStrICmpAscii("AbCdEg", "aBcDeF"), > );
798 CHECK_DIFF(RTStrICmpAscii("AbCdEG", "aBcDef"), > ); /* diff performed on the lower case cp. */
799
800
801 RTTestSub(hTest, "RTStrNICmp");
802 CHECK_DIFF(RTStrNICmp(NULL, NULL, RTSTR_MAX), == );
803 CHECK_DIFF(RTStrNICmp(NULL, "", RTSTR_MAX), < );
804 CHECK_DIFF(RTStrNICmp("", NULL, RTSTR_MAX), > );
805 CHECK_DIFF(RTStrNICmp("", "", RTSTR_MAX), == );
806 CHECK_DIFF(RTStrNICmp(NULL, NULL, 0), == );
807 CHECK_DIFF(RTStrNICmp(NULL, "", 0), == );
808 CHECK_DIFF(RTStrNICmp("", NULL, 0), == );
809 CHECK_DIFF(RTStrNICmp("", "", 0), == );
810 CHECK_DIFF(RTStrNICmp("abcdef", "abcdef", RTSTR_MAX), == );
811 CHECK_DIFF(RTStrNICmp("abcdef", "abcde", RTSTR_MAX), > );
812 CHECK_DIFF(RTStrNICmp("abcde", "abcdef", RTSTR_MAX), < );
813 CHECK_DIFF(RTStrNICmp("abcdeg", "abcdef", RTSTR_MAX), > );
814 CHECK_DIFF(RTStrNICmp("abcdef", "abcdeg", RTSTR_MAX), < );
815
816 CHECK_DIFF(RTStrNICmp("abcdeF", "abcdef", RTSTR_MAX), ==);
817 CHECK_DIFF(RTStrNICmp("abcdef", "abcdeF", RTSTR_MAX), ==);
818 CHECK_DIFF(RTStrNICmp("ABCDEF", "abcdef", RTSTR_MAX), ==);
819 CHECK_DIFF(RTStrNICmp("abcdef", "ABCDEF", RTSTR_MAX), ==);
820 CHECK_DIFF(RTStrNICmp("AbCdEf", "aBcDeF", RTSTR_MAX), ==);
821 CHECK_DIFF(RTStrNICmp("AbCdEg", "aBcDeF", RTSTR_MAX), > );
822 CHECK_DIFF(RTStrNICmp("AbCdEG", "aBcDef", RTSTR_MAX), > ); /* diff performed on the lower case cp. */
823
824 CHECK_DIFF(RTStrNICmp("ABCDEF", "fedcba", 0), ==);
825 CHECK_DIFF(RTStrNICmp("AbCdEg", "aBcDeF", 5), ==);
826 CHECK_DIFF(RTStrNICmp("AbCdEf", "aBcDeF", 5), ==);
827 CHECK_DIFF(RTStrNICmp("AbCdE", "aBcDe", 5), ==);
828 CHECK_DIFF(RTStrNICmp("AbCdE", "aBcDeF", 5), ==);
829 CHECK_DIFF(RTStrNICmp("AbCdEf", "aBcDe", 5), ==);
830 CHECK_DIFF(RTStrNICmp("AbCdEg", "aBcDeF", 6), > );
831 CHECK_DIFF(RTStrNICmp("AbCdEG", "aBcDef", 6), > ); /* diff performed on the lower case cp. */
832 /* We should continue using byte comparison when we hit the invalid CP. Will assert in debug builds. */
833 // CHECK_DIFF(RTStrNICmp("AbCd\xff""eg", "aBcD\xff""eF", 6), ==);
834
835 RTTestSubDone(hTest);
836}
837
838
839
840/**
841 * Check UTF-8 encoding purging.
842 */
843static void TstRTStrPurgeEncoding(RTTEST hTest)
844{
845 RTTestSub(hTest, "RTStrPurgeEncoding");
846
847 /*
848 * Test some good strings.
849 */
850 char sz1[] = "1234567890wertyuiopsdfghjklzxcvbnm";
851 char sz1Copy[sizeof(sz1)];
852 memcpy(sz1Copy, sz1, sizeof(sz1));
853
854 RTTESTI_CHECK_RETV(RTStrPurgeEncoding(sz1) == 0);
855 RTTESTI_CHECK_RETV(!memcmp(sz1, sz1Copy, sizeof(sz1)));
856
857 char *pszAll = RTStrDup(g_szAll);
858 if (pszAll)
859 {
860 RTTESTI_CHECK(RTStrPurgeEncoding(pszAll) == 0);
861 RTTESTI_CHECK(!memcmp(pszAll, g_szAll, sizeof(g_szAll)));
862 RTStrFree(pszAll);
863 }
864
865 /*
866 * Test some bad stuff.
867 */
868 struct
869 {
870 size_t cErrors;
871 unsigned char szIn[5];
872 const char *pszExpect;
873 } aTests[] =
874 {
875 { 0, { '1', '2', '3', '4', '\0' }, "1234" },
876 { 1, { 0x80, '2', '3', '4', '\0' }, "?234" },
877 { 1, { '1', 0x80, '3', '4', '\0' }, "1?34" },
878 { 1, { '1', '2', 0x80, '4', '\0' }, "12?4" },
879 { 1, { '1', '2', '3', 0x80, '\0' }, "123?" },
880 { 2, { 0x80, 0x81, '3', '4', '\0' }, "??34" },
881 { 2, { '1', 0x80, 0x81, '4', '\0' }, "1??4" },
882 { 2, { '1', '2', 0x80, 0x81, '\0' }, "12??" },
883 };
884 for (size_t i = 0; i < RT_ELEMENTS(aTests); i++)
885 {
886 size_t cErrors = RTStrPurgeEncoding((char *)aTests[i].szIn);
887 if (cErrors != aTests[i].cErrors)
888 RTTestFailed(hTest, "#%u: cErrors=%u expected %u\n", i, cErrors, aTests[i].cErrors);
889 else if (strcmp((char *)aTests[i].szIn, aTests[i].pszExpect))
890 RTTestFailed(hTest, "#%u: %.5Rhxs expected %.5Rhxs (%s)\n", i, aTests[i].szIn, aTests[i].pszExpect, aTests[i].pszExpect);
891 }
892
893 RTTestSubDone(hTest);
894}
895
896
897/**
898 * Check string sanitising.
899 */
900static void TstRTStrPurgeComplementSet(RTTEST hTest)
901{
902 RTTestSub(hTest, "RTStrPurgeComplementSet");
903 RTUNICP aCpSet[] = { '1', '5', 'w', 'w', 'r', 'r', 'e', 'f', 't', 't',
904 '\0' };
905 RTUNICP aCpBadSet[] = { '1', '5', 'w', 'w', 'r', 'r', 'e', 'f', 't', 't',
906 '7', '\0' }; /* Contains an incomplete pair. */
907 struct
908 {
909 const char *pcszIn;
910 const char *pcszOut;
911 PCRTUNICP pcCpSet;
912 char chReplacement;
913 ssize_t cExpected;
914 }
915 aTests[] =
916 {
917 { "1234werttrew4321", "1234werttrew4321", aCpSet, '_', 0 },
918 { "123654wert\xc2\xa2trew\xe2\x82\xac""4321",
919 "123_54wert__trew___4321", aCpSet, '_', 3 },
920 { "hjhj8766", "????????", aCpSet, '?', 8 },
921 { "123\xf0\xa4\xad\xa2""4", "123____4", aCpSet, '_', 1 },
922 { "\xff", "\xff", aCpSet, '_', -1 },
923 { "____", "____", aCpBadSet, '_', -1 }
924 };
925 enum { MAX_IN_STRING = 256 };
926
927 for (unsigned i = 0; i < RT_ELEMENTS(aTests); ++i)
928 {
929 char szCopy[MAX_IN_STRING];
930 ssize_t cReplacements;
931 AssertRC(RTStrCopy(szCopy, RT_ELEMENTS(szCopy), aTests[i].pcszIn));
932 RTTestDisableAssertions(hTest);
933 cReplacements = RTStrPurgeComplementSet(szCopy, aTests[i].pcCpSet, aTests[i].chReplacement);
934 RTTestRestoreAssertions(hTest);
935 if (cReplacements != aTests[i].cExpected)
936 RTTestFailed(hTest, "#%u: expected %lld, actual %lld\n", i,
937 (long long) aTests[i].cExpected,
938 (long long) cReplacements);
939 if (strcmp(aTests[i].pcszOut, szCopy))
940 RTTestFailed(hTest, "#%u: expected %s, actual %s\n", i,
941 aTests[i].pcszOut, szCopy);
942 }
943}
944
945
946/**
947 * Check string sanitising.
948 */
949static void TstRTUtf16PurgeComplementSet(RTTEST hTest)
950{
951 RTTestSub(hTest, "RTUtf16PurgeComplementSet");
952 RTUNICP aCpSet[] = { '1', '5', 'w', 'w', 'r', 'r', 'e', 'f', 't', 't',
953 '\0' };
954 RTUNICP aCpBadSet[] = { '1', '5', 'w', 'w', 'r', 'r', 'e', 'f', 't', 't',
955 '7', '\0' }; /* Contains an incomplete pair. */
956 struct
957 {
958 const char *pcszIn;
959 const char *pcszOut;
960 size_t cwc; /* Zero means the strings are Utf-8. */
961 PCRTUNICP pcCpSet;
962 char chReplacement;
963 ssize_t cExpected;
964 }
965 aTests[] =
966 {
967 { "1234werttrew4321", "1234werttrew4321", 0, aCpSet, '_', 0 },
968 { "123654wert\xc2\xa2trew\xe2\x82\xac""4321",
969 "123_54wert_trew_4321", 0, aCpSet, '_', 3 },
970 { "hjhj8766", "????????", 0, aCpSet, '?', 8 },
971 { "123\xf0\xa4\xad\xa2""4", "123__4", 0, aCpSet, '_', 1 },
972 { "\xff\xff\0", "\xff\xff\0", 2, aCpSet, '_', -1 },
973 { "\xff\xff\0", "\xff\xff\0", 2, aCpSet, '_', -1 },
974 { "____", "____", 0, aCpBadSet, '_', -1 }
975 };
976 enum { MAX_IN_STRING = 256 };
977
978 for (unsigned i = 0; i < RT_ELEMENTS(aTests); ++i)
979 {
980 RTUTF16 wszInCopy[MAX_IN_STRING], *pwszInCopy = wszInCopy;
981 RTUTF16 wszOutCopy[MAX_IN_STRING], *pwszOutCopy = wszOutCopy;
982 ssize_t cReplacements;
983 if (!aTests[i].cwc)
984 {
985 AssertRC(RTStrToUtf16Ex(aTests[i].pcszIn, RTSTR_MAX, &pwszInCopy,
986 RT_ELEMENTS(wszInCopy), NULL));
987 AssertRC(RTStrToUtf16Ex(aTests[i].pcszOut, RTSTR_MAX, &pwszOutCopy,
988 RT_ELEMENTS(wszOutCopy), NULL));
989 }
990 else
991 {
992 Assert(aTests[i].cwc <= RT_ELEMENTS(wszInCopy));
993 memcpy(wszInCopy, aTests[i].pcszIn, aTests[i].cwc * 2);
994 memcpy(wszOutCopy, aTests[i].pcszOut, aTests[i].cwc * 2);
995 }
996
997 RTTestDisableAssertions(hTest);
998 cReplacements = RTUtf16PurgeComplementSet(wszInCopy, aTests[i].pcCpSet, aTests[i].chReplacement);
999 RTTestRestoreAssertions(hTest);
1000
1001 if (cReplacements != aTests[i].cExpected)
1002 RTTestFailed(hTest, "#%u: expected %lld, actual %lld\n", i,
1003 (long long) aTests[i].cExpected,
1004 (long long) cReplacements);
1005 if (RTUtf16Cmp(wszInCopy, wszOutCopy))
1006 RTTestFailed(hTest, "#%u: expected %ls, actual %ls\n", i,
1007 wszOutCopy, wszInCopy);
1008 }
1009}
1010
1011
1012/**
1013 * Benchmark stuff.
1014 */
1015static void Benchmarks(RTTEST hTest)
1016{
1017 static union
1018 {
1019 RTUTF16 wszBuf[sizeof(g_wszAll)];
1020 char szBuf[sizeof(g_szAll)];
1021 } s_Buf;
1022
1023 RTTestSub(hTest, "Benchmarks");
1024/** @todo add RTTest* methods for reporting benchmark results. */
1025 RTTestPrintf(hTest, RTTESTLVL_ALWAYS, "Benchmarking RTStrToUtf16Ex: "); /** @todo figure this stuff into the test framework. */
1026 PRTUTF16 pwsz = &s_Buf.wszBuf[0];
1027 int rc = RTStrToUtf16Ex(&g_szAll[0], RTSTR_MAX, &pwsz, RT_ELEMENTS(s_Buf.wszBuf), NULL);
1028 if (RT_SUCCESS(rc))
1029 {
1030 int i;
1031 uint64_t u64Start = RTTimeNanoTS();
1032 for (i = 0; i < 100; i++)
1033 {
1034 rc = RTStrToUtf16Ex(&g_szAll[0], RTSTR_MAX, &pwsz, RT_ELEMENTS(s_Buf.wszBuf), NULL);
1035 if (RT_FAILURE(rc))
1036 {
1037 RTTestFailed(hTest, "UTF-8 -> UTF-16 benchmark failed at i=%d, rc=%Rrc\n", i, rc);
1038 break;
1039 }
1040 }
1041 uint64_t u64Elapsed = RTTimeNanoTS() - u64Start;
1042 RTTestPrintf(hTest, RTTESTLVL_ALWAYS, "%d in %'RI64 ns\n", i, u64Elapsed);
1043 }
1044
1045 RTTestPrintf(hTest, RTTESTLVL_ALWAYS, "Benchmarking RTUtf16ToUtf8Ex: ");
1046 char *psz = &s_Buf.szBuf[0];
1047 rc = RTUtf16ToUtf8Ex(&g_wszAll[0], RTSTR_MAX, &psz, RT_ELEMENTS(s_Buf.szBuf), NULL);
1048 if (RT_SUCCESS(rc))
1049 {
1050 int i;
1051 uint64_t u64Start = RTTimeNanoTS();
1052 for (i = 0; i < 100; i++)
1053 {
1054 rc = RTUtf16ToUtf8Ex(&g_wszAll[0], RTSTR_MAX, &psz, RT_ELEMENTS(s_Buf.szBuf), NULL);
1055 if (RT_FAILURE(rc))
1056 {
1057 RTTestFailed(hTest, "UTF-16 -> UTF-8 benchmark failed at i=%d, rc=%Rrc\n", i, rc);
1058 break;
1059 }
1060 }
1061 uint64_t u64Elapsed = RTTimeNanoTS() - u64Start;
1062 RTTestPrintf(hTest, RTTESTLVL_ALWAYS, "%d in %'RI64 ns\n", i, u64Elapsed);
1063 }
1064
1065 RTTestSubDone(hTest);
1066}
1067
1068
1069/**
1070 * Tests RTStrEnd
1071 */
1072static void testStrEnd(RTTEST hTest)
1073{
1074 RTTestSub(hTest, "RTStrEnd");
1075
1076 static char const s_szEmpty[1] = "";
1077 RTTESTI_CHECK(RTStrEnd(s_szEmpty, 0) == NULL);
1078 RTTESTI_CHECK(RTStrEnd(s_szEmpty, 1) == &s_szEmpty[0]);
1079 for (size_t i = 0; i < _1M; i++)
1080 RTTESTI_CHECK(RTStrEnd(s_szEmpty, ~i) == &s_szEmpty[0]);
1081
1082 /* Check the implementation won't ever overshoot the '\0' in the input in
1083 anyway that may lead to a SIGSEV. (VC++ 14.1 does this) */
1084 size_t const cchStr = 1023;
1085 char *pszStr = (char *)RTTestGuardedAllocTail(hTest, cchStr + 1);
1086 memset(pszStr, ' ', cchStr);
1087 char * const pszStrEnd = &pszStr[cchStr];
1088 *pszStrEnd = '\0';
1089 RTTEST_CHECK_RETV(hTest, strlen(pszStr) == cchStr);
1090
1091 for (size_t off = 0; off <= cchStr; off++)
1092 {
1093 RTTEST_CHECK(hTest, RTStrEnd(&pszStr[off], cchStr + 1 - off) == pszStrEnd);
1094 RTTEST_CHECK(hTest, RTStrEnd(&pszStr[off], RTSTR_MAX) == pszStrEnd);
1095
1096 RTTEST_CHECK(hTest, memchr(&pszStr[off], '\0', cchStr + 1 - off) == pszStrEnd);
1097 RTTEST_CHECK(hTest, strchr(&pszStr[off], '\0') == pszStrEnd);
1098 RTTEST_CHECK(hTest, strchr(&pszStr[off], '?') == NULL);
1099
1100 size_t cchMax = 0;
1101 for (; cchMax <= cchStr - off; cchMax++)
1102 {
1103 const char *pszRet = RTStrEnd(&pszStr[off], cchMax);
1104 if (pszRet != NULL)
1105 {
1106 RTTestFailed(hTest, "off=%zu cchMax=%zu: %p, expected NULL\n", off, cchMax, pszRet);
1107 break;
1108 }
1109 }
1110 for (; cchMax <= _8K; cchMax++)
1111 {
1112 const char *pszRet = RTStrEnd(&pszStr[off], cchMax);
1113 if (pszRet != pszStrEnd)
1114 {
1115 RTTestFailed(hTest, "off=%zu cchMax=%zu: off by %p\n", off, cchMax, pszRet);
1116 break;
1117 }
1118 }
1119 }
1120 RTTestGuardedFree(hTest, pszStr);
1121}
1122
1123
1124/**
1125 * Tests RTStrStr and RTStrIStr.
1126 */
1127static void testStrStr(RTTEST hTest)
1128{
1129#define CHECK_NULL(expr) \
1130 do { \
1131 const char *pszRet = expr; \
1132 if (pszRet != NULL) \
1133 RTTestFailed(hTest, "%d: %s -> %s expected NULL", __LINE__, #expr, pszRet); \
1134 } while (0)
1135
1136#define CHECK(expr, expect) \
1137 do { \
1138 const char * const pszRet = expr; \
1139 const char * const pszExpect = (expect); \
1140 if ( (pszRet != NULL && pszExpect == NULL) \
1141 || (pszRet == NULL && pszExpect != NULL) \
1142 || strcmp(pszRet, pszExpect) \
1143 ) \
1144 RTTestFailed(hTest, "%d: %s -> %s expected %s", __LINE__, #expr, pszRet, pszExpect); \
1145 } while (0)
1146
1147
1148 RTTestSub(hTest, "RTStrStr");
1149 CHECK(RTStrStr("abcdef", ""), "abcdef");
1150 CHECK_NULL(RTStrStr("abcdef", NULL));
1151 CHECK_NULL(RTStrStr(NULL, ""));
1152 CHECK_NULL(RTStrStr(NULL, NULL));
1153 CHECK(RTStrStr("abcdef", "abcdef"), "abcdef");
1154 CHECK(RTStrStr("abcdef", "b"), "bcdef");
1155 CHECK(RTStrStr("abcdef", "bcdef"), "bcdef");
1156 CHECK(RTStrStr("abcdef", "cdef"), "cdef");
1157 CHECK(RTStrStr("abcdef", "cde"), "cdef");
1158 CHECK(RTStrStr("abcdef", "cd"), "cdef");
1159 CHECK(RTStrStr("abcdef", "c"), "cdef");
1160 CHECK(RTStrStr("abcdef", "f"), "f");
1161 CHECK(RTStrStr("abcdef", "ef"), "ef");
1162 CHECK(RTStrStr("abcdef", "e"), "ef");
1163 CHECK_NULL(RTStrStr("abcdef", "z"));
1164 CHECK_NULL(RTStrStr("abcdef", "A"));
1165 CHECK_NULL(RTStrStr("abcdef", "F"));
1166
1167 RTTestSub(hTest, "RTStrIStr");
1168 CHECK(RTStrIStr("abcdef", ""), "abcdef");
1169 CHECK_NULL(RTStrIStr("abcdef", NULL));
1170 CHECK_NULL(RTStrIStr(NULL, ""));
1171 CHECK_NULL(RTStrIStr(NULL, NULL));
1172 CHECK(RTStrIStr("abcdef", "abcdef"), "abcdef");
1173 CHECK(RTStrIStr("abcdef", "Abcdef"), "abcdef");
1174 CHECK(RTStrIStr("abcdef", "ABcDeF"), "abcdef");
1175 CHECK(RTStrIStr("abcdef", "b"), "bcdef");
1176 CHECK(RTStrIStr("abcdef", "B"), "bcdef");
1177 CHECK(RTStrIStr("abcdef", "bcdef"), "bcdef");
1178 CHECK(RTStrIStr("abcdef", "BCdEf"), "bcdef");
1179 CHECK(RTStrIStr("abcdef", "bCdEf"), "bcdef");
1180 CHECK(RTStrIStr("abcdef", "bcdEf"), "bcdef");
1181 CHECK(RTStrIStr("abcdef", "BcdEf"), "bcdef");
1182 CHECK(RTStrIStr("abcdef", "cdef"), "cdef");
1183 CHECK(RTStrIStr("abcdef", "cde"), "cdef");
1184 CHECK(RTStrIStr("abcdef", "cd"), "cdef");
1185 CHECK(RTStrIStr("abcdef", "c"), "cdef");
1186 CHECK(RTStrIStr("abcdef", "f"), "f");
1187 CHECK(RTStrIStr("abcdeF", "F"), "F");
1188 CHECK(RTStrIStr("abcdef", "F"), "f");
1189 CHECK(RTStrIStr("abcdef", "ef"), "ef");
1190 CHECK(RTStrIStr("EeEef", "e"), "EeEef");
1191 CHECK(RTStrIStr("EeEef", "E"), "EeEef");
1192 CHECK(RTStrIStr("EeEef", "EE"), "EeEef");
1193 CHECK(RTStrIStr("EeEef", "EEE"), "EeEef");
1194 CHECK(RTStrIStr("EeEef", "EEEF"), "eEef");
1195 CHECK_NULL(RTStrIStr("EeEef", "z"));
1196
1197#undef CHECK
1198#undef CHECK_NULL
1199 RTTestSubDone(hTest);
1200}
1201
1202
1203static void testUtf8Latin1(RTTEST hTest)
1204{
1205 RTTestSub(hTest, "Latin-1 <-> Utf-8 conversion functions");
1206
1207 /* Test Utf8 -> Latin1 */
1208 size_t cch_szAll = 0;
1209 size_t cbShort = RTStrCalcLatin1Len(g_szAll);
1210 RTTEST_CHECK(hTest, cbShort == 0);
1211 int rc = RTStrCalcLatin1LenEx(g_szAll, 383, &cch_szAll);
1212 RTTEST_CHECK(hTest, (cch_szAll == 255));
1213 rc = RTStrCalcLatin1LenEx(g_szAll, RTSTR_MAX, &cch_szAll);
1214 RTTEST_CHECK_RC(hTest, rc, VERR_NO_TRANSLATION);
1215 char *psz = NULL;
1216 char szShort[256] = { 0 };
1217 memcpy(szShort, g_szAll, 255);
1218 cbShort = RTStrCalcLatin1Len(szShort);
1219 RTTEST_CHECK(hTest, cbShort == 191);
1220 rc = RTStrToLatin1(szShort, &psz);
1221 RTTEST_CHECK_RC_OK(hTest, rc);
1222 if (RT_SUCCESS(rc))
1223 {
1224 RTTEST_CHECK(hTest, (strlen(psz) == 191));
1225 for (unsigned i = 0, j = 1; psz[i] != '\0'; ++i, ++j)
1226 if (psz[i] != (char) j)
1227 {
1228 RTTestFailed(hTest, "conversion of g_szAll to Latin1 failed at position %u\n", i);
1229 break;
1230 }
1231 }
1232 RTStrFree(psz);
1233 rc = RTStrToLatin1(g_szAll, &psz);
1234 RTTEST_CHECK_RC(hTest, rc, VERR_NO_TRANSLATION);
1235 char sz[512];
1236 char *psz2 = &sz[0];
1237 size_t cchActual = 0;
1238 rc = RTStrToLatin1Ex(g_szAll, sizeof(sz) - 1, &psz2, sizeof(sz),
1239 &cchActual);
1240 RTTEST_CHECK_RC(hTest, rc, VERR_NO_TRANSLATION);
1241 RTTEST_CHECK_MSG(hTest, cchActual == 0,
1242 (hTest, "cchActual=%lu\n", cchActual));
1243 rc = RTStrToLatin1Ex(g_szAll, 383, &psz2, sizeof(sz),
1244 &cchActual);
1245 RTTEST_CHECK_RC_OK(hTest, rc);
1246 if (RT_SUCCESS(rc))
1247 {
1248 RTTEST_CHECK(hTest, (cchActual == 255));
1249 RTTEST_CHECK(hTest, (cchActual == strlen(sz)));
1250 for (unsigned i = 0, j = 1; psz2[i] != '\0'; ++i, ++j)
1251 if (psz2[i] != (char) j)
1252 {
1253 RTTestFailed(hTest, "second conversion of g_szAll to Latin1 failed at position %u\n", i);
1254 break;
1255 }
1256 }
1257 rc = RTStrToLatin1Ex(g_szAll, 129, &psz2, 128, &cchActual);
1258 RTTEST_CHECK_RC(hTest, rc, VERR_BUFFER_OVERFLOW);
1259 RTTEST_CHECK_MSG(hTest, cchActual == 128,
1260 (hTest, "cchActual=%lu\n", cchActual));
1261 rc = RTStrToLatin1Ex(g_szAll, 383, &psz, 0, &cchActual);
1262 RTTEST_CHECK_RC_OK(hTest, rc);
1263 if (RT_SUCCESS(rc))
1264 {
1265 RTTEST_CHECK(hTest, (cchActual == 255));
1266 RTTEST_CHECK(hTest, (cchActual == strlen(psz)));
1267 for (unsigned i = 0, j = 1; psz[i] != '\0'; ++i, ++j)
1268 if ( ((j < 0x100) && (psz[i] != (char) j))
1269 || ((j > 0xff) && psz[i] != '?'))
1270 {
1271 RTTestFailed(hTest, "third conversion of g_szAll to Latin1 failed at position %u\n", i);
1272 break;
1273 }
1274 }
1275 const char *pszBad = "Hello\xDC\xD8";
1276 rc = RTStrToLatin1Ex(pszBad, RTSTR_MAX, &psz2, sizeof(sz),
1277 &cchActual);
1278 RTTEST_CHECK_RC(hTest, rc, VERR_INVALID_UTF8_ENCODING);
1279 RTStrFree(psz);
1280
1281 /* Test Latin1 -> Utf8 */
1282 const char *pszLat1 = "\x01\x20\x40\x80\x81";
1283 RTTEST_CHECK(hTest, RTLatin1CalcUtf8Len(pszLat1) == 7);
1284 rc = RTLatin1CalcUtf8LenEx(pszLat1, 3, &cchActual);
1285 RTTEST_CHECK_RC_OK(hTest, rc);
1286 if (RT_SUCCESS(rc))
1287 RTTEST_CHECK(hTest, cchActual == 3);
1288 rc = RTLatin1CalcUtf8LenEx(pszLat1, RTSTR_MAX, &cchActual);
1289 RTTEST_CHECK_RC_OK(hTest, rc);
1290 if (RT_SUCCESS(rc))
1291 RTTEST_CHECK(hTest, cchActual == 7);
1292 char *pch = NULL;
1293 char ch[8];
1294 char *pch2 = &ch[0];
1295 cchActual = 0;
1296 rc = RTLatin1ToUtf8(pszLat1, &pch);
1297 RTTEST_CHECK_RC_OK(hTest, rc);
1298 if (RT_SUCCESS(rc))
1299 RTTEST_CHECK(hTest, !strcmp(pch, "\x01\x20\x40\xC2\x80\xC2\x81"));
1300 RTStrFree(pch);
1301 rc = RTLatin1ToUtf8Ex(pszLat1, RTSTR_MAX, &pch, 0, &cchActual);
1302 RTTEST_CHECK_RC_OK(hTest, rc);
1303 if (RT_SUCCESS(rc))
1304 {
1305 RTTEST_CHECK(hTest, (cchActual == 7));
1306 RTTEST_CHECK(hTest, !strcmp(pch, "\x01\x20\x40\xC2\x80\xC2\x81"));
1307 }
1308 RTStrFree(pch);
1309 rc = RTLatin1ToUtf8Ex(pszLat1, RTSTR_MAX, &pch, 0, NULL);
1310 RTTEST_CHECK_RC_OK(hTest, rc);
1311 if (RT_SUCCESS(rc))
1312 RTTEST_CHECK(hTest, !strcmp(pch, "\x01\x20\x40\xC2\x80\xC2\x81"));
1313 RTStrFree(pch);
1314 rc = RTLatin1ToUtf8Ex(pszLat1, RTSTR_MAX, &pch2, RT_ELEMENTS(ch),
1315 &cchActual);
1316 RTTEST_CHECK_RC_OK(hTest, rc);
1317 if (RT_SUCCESS(rc))
1318 {
1319 RTTEST_CHECK(hTest, (cchActual == 7));
1320 RTTEST_CHECK(hTest, !strcmp(pch2, "\x01\x20\x40\xC2\x80\xC2\x81"));
1321 }
1322 rc = RTLatin1ToUtf8Ex(pszLat1, 3, &pch2, RT_ELEMENTS(ch),
1323 &cchActual);
1324 RTTEST_CHECK_RC_OK(hTest, rc);
1325 if (RT_SUCCESS(rc))
1326 {
1327 RTTEST_CHECK(hTest, (cchActual == 3));
1328 RTTEST_CHECK(hTest, !strcmp(pch2, "\x01\x20\x40"));
1329 }
1330 rc = RTLatin1ToUtf8Ex(pszLat1, RTSTR_MAX, &pch2, RT_ELEMENTS(ch) - 1,
1331 &cchActual);
1332 RTTEST_CHECK_RC(hTest, rc, VERR_BUFFER_OVERFLOW);
1333 RTTEST_CHECK(hTest, (cchActual == 7));
1334 RTTestSubDone(hTest);
1335}
1336
1337
1338static void testUtf16Latin1(RTTEST hTest)
1339{
1340 RTTestSub(hTest, "Latin-1 <-> Utf-16 conversion functions");
1341
1342 /* Test Utf16 -> Latin1 */
1343 size_t cch_szAll = 0;
1344 size_t cbShort = RTUtf16CalcLatin1Len(g_wszAll);
1345 RTTEST_CHECK(hTest, cbShort == 0);
1346 int rc = RTUtf16CalcLatin1LenEx(g_wszAll, 255, &cch_szAll);
1347 RTTEST_CHECK(hTest, (cch_szAll == 255));
1348 rc = RTUtf16CalcLatin1LenEx(g_wszAll, RTSTR_MAX, &cch_szAll);
1349 RTTEST_CHECK_RC(hTest, rc, VERR_NO_TRANSLATION);
1350 char *psz = NULL;
1351 RTUTF16 wszShort[256] = { 0 };
1352 for (unsigned i = 0; i < 255; ++i)
1353 wszShort[i] = i + 1;
1354 cbShort = RTUtf16CalcLatin1Len(wszShort);
1355 RTTEST_CHECK(hTest, cbShort == 255);
1356 rc = RTUtf16ToLatin1(wszShort, &psz);
1357 RTTEST_CHECK_RC_OK(hTest, rc);
1358 if (RT_SUCCESS(rc))
1359 {
1360 RTTEST_CHECK(hTest, (strlen(psz) == 255));
1361 for (unsigned i = 0, j = 1; psz[i] != '\0'; ++i, ++j)
1362 if (psz[i] != (char) j)
1363 {
1364 RTTestFailed(hTest, "conversion of g_wszAll to Latin1 failed at position %u\n", i);
1365 break;
1366 }
1367 }
1368 RTStrFree(psz);
1369 rc = RTUtf16ToLatin1(g_wszAll, &psz);
1370 RTTEST_CHECK_RC(hTest, rc, VERR_NO_TRANSLATION);
1371 char sz[512];
1372 char *psz2 = &sz[0];
1373 size_t cchActual = 0;
1374 rc = RTUtf16ToLatin1Ex(g_wszAll, sizeof(sz) - 1, &psz2, sizeof(sz),
1375 &cchActual);
1376 RTTEST_CHECK_RC(hTest, rc, VERR_NO_TRANSLATION);
1377 RTTEST_CHECK_MSG(hTest, cchActual == 0,
1378 (hTest, "cchActual=%lu\n", cchActual));
1379 rc = RTUtf16ToLatin1Ex(g_wszAll, 255, &psz2, sizeof(sz),
1380 &cchActual);
1381 RTTEST_CHECK_RC_OK(hTest, rc);
1382 if (RT_SUCCESS(rc))
1383 {
1384 RTTEST_CHECK(hTest, (cchActual == 255));
1385 RTTEST_CHECK(hTest, (cchActual == strlen(sz)));
1386 for (unsigned i = 0, j = 1; psz2[i] != '\0'; ++i, ++j)
1387 if (psz2[i] != (char) j)
1388 {
1389 RTTestFailed(hTest, "second conversion of g_wszAll to Latin1 failed at position %u\n", i);
1390 break;
1391 }
1392 }
1393 rc = RTUtf16ToLatin1Ex(g_wszAll, 128, &psz2, 128, &cchActual);
1394 RTTEST_CHECK_RC(hTest, rc, VERR_BUFFER_OVERFLOW);
1395 RTTEST_CHECK_MSG(hTest, cchActual == 128,
1396 (hTest, "cchActual=%lu\n", cchActual));
1397 rc = RTUtf16ToLatin1Ex(g_wszAll, 255, &psz, 0, &cchActual);
1398 RTTEST_CHECK_RC_OK(hTest, rc);
1399 if (RT_SUCCESS(rc))
1400 {
1401 RTTEST_CHECK(hTest, (cchActual == 255));
1402 RTTEST_CHECK(hTest, (cchActual == strlen(psz)));
1403 for (unsigned i = 0, j = 1; psz[i] != '\0'; ++i, ++j)
1404 if ( ((j < 0x100) && (psz[i] != (char) j))
1405 || ((j > 0xff) && psz[i] != '?'))
1406 {
1407 RTTestFailed(hTest, "third conversion of g_wszAll to Latin1 failed at position %u\n", i);
1408 break;
1409 }
1410 }
1411 const char *pszBad = "H\0e\0l\0l\0o\0\0\xDC\0\xD8\0";
1412 rc = RTUtf16ToLatin1Ex((RTUTF16 *) pszBad, RTSTR_MAX, &psz2, sizeof(sz),
1413 &cchActual);
1414 RTTEST_CHECK_RC(hTest, rc, VERR_INVALID_UTF16_ENCODING);
1415 RTStrFree(psz);
1416
1417 /* Test Latin1 -> Utf16 */
1418 const char *pszLat1 = "\x01\x20\x40\x80\x81";
1419 RTTEST_CHECK(hTest, RTLatin1CalcUtf16Len(pszLat1) == 5);
1420 rc = RTLatin1CalcUtf16LenEx(pszLat1, 3, &cchActual);
1421 RTTEST_CHECK_RC_OK(hTest, rc);
1422 if (RT_SUCCESS(rc))
1423 RTTEST_CHECK(hTest, cchActual == 3);
1424 rc = RTLatin1CalcUtf16LenEx(pszLat1, RTSTR_MAX, &cchActual);
1425 RTTEST_CHECK_RC_OK(hTest, rc);
1426 if (RT_SUCCESS(rc))
1427 RTTEST_CHECK(hTest, cchActual == 5);
1428 RTUTF16 *pwc = NULL;
1429 RTUTF16 wc[6];
1430 RTUTF16 *pwc2 = &wc[0];
1431 size_t cwActual = 0;
1432 rc = RTLatin1ToUtf16(pszLat1, &pwc);
1433 RTTEST_CHECK_RC_OK(hTest, rc);
1434 if (RT_SUCCESS(rc))
1435 RTTEST_CHECK(hTest, (pwc[0] == 1) && (pwc[1] == 0x20)
1436 && (pwc[2] == 0x40) && (pwc[3] == 0x80)
1437 && (pwc[4] == 0x81) && (pwc[5] == '\0'));
1438 RTUtf16Free(pwc);
1439 rc = RTLatin1ToUtf16Ex(pszLat1, RTSTR_MAX, &pwc, 0, &cwActual);
1440 RTTEST_CHECK_RC_OK(hTest, rc);
1441 if (RT_SUCCESS(rc))
1442 {
1443 RTTEST_CHECK(hTest, (cwActual == 5));
1444 RTTEST_CHECK(hTest, (pwc[0] == 1) && (pwc[1] == 0x20)
1445 && (pwc[2] == 0x40) && (pwc[3] == 0x80)
1446 && (pwc[4] == 0x81) && (pwc[5] == '\0'));
1447 }
1448 RTUtf16Free(pwc);
1449 rc = RTLatin1ToUtf16Ex(pszLat1, RTSTR_MAX, &pwc, 0, NULL);
1450 RTTEST_CHECK_RC_OK(hTest, rc);
1451 if (RT_SUCCESS(rc))
1452 RTTEST_CHECK(hTest, (pwc[0] == 1) && (pwc[1] == 0x20)
1453 && (pwc[2] == 0x40) && (pwc[3] == 0x80)
1454 && (pwc[4] == 0x81) && (pwc[5] == '\0'));
1455 RTUtf16Free(pwc);
1456 rc = RTLatin1ToUtf16Ex(pszLat1, RTSTR_MAX, &pwc2, RT_ELEMENTS(wc),
1457 &cwActual);
1458 RTTEST_CHECK_RC_OK(hTest, rc);
1459 if (RT_SUCCESS(rc))
1460 {
1461 RTTEST_CHECK(hTest, (cwActual == 5));
1462 RTTEST_CHECK(hTest, (wc[0] == 1) && (wc[1] == 0x20)
1463 && (wc[2] == 0x40) && (wc[3] == 0x80)
1464 && (wc[4] == 0x81) && (wc[5] == '\0'));
1465 }
1466 rc = RTLatin1ToUtf16Ex(pszLat1, 3, &pwc2, RT_ELEMENTS(wc),
1467 &cwActual);
1468 RTTEST_CHECK_RC_OK(hTest, rc);
1469 if (RT_SUCCESS(rc))
1470 {
1471 RTTEST_CHECK(hTest, (cwActual == 3));
1472 RTTEST_CHECK(hTest, (wc[0] == 1) && (wc[1] == 0x20)
1473 && (wc[2] == 0x40) && (wc[3] == '\0'));
1474 }
1475 rc = RTLatin1ToUtf16Ex(pszLat1, RTSTR_MAX, &pwc2, RT_ELEMENTS(wc) - 1,
1476 &cwActual);
1477 RTTEST_CHECK_RC(hTest, rc, VERR_BUFFER_OVERFLOW);
1478 RTTEST_CHECK(hTest, (cwActual == 5));
1479 RTTestSubDone(hTest);
1480}
1481
1482
1483static void testNoTranslation(RTTEST hTest)
1484{
1485 /*
1486 * Try trigger a VERR_NO_TRANSLATION error in convert to
1487 * current CP to latin-1.
1488 *
1489 * On Windows / DOS OSes this is codepage 850.
1490 *
1491 * Note! On Windows-y systems there ALWAYS are two codepages active:
1492 * the OEM codepage for legacy (console) applications, and the ACP (ANSI CodePage).
1493 * 'chcp' only will tell you the OEM codepage, however.
1494 */
1495
1496 /* Unicode code points (some of it on 2300-23FF -> misc. technical) to try. */
1497 const RTUTF16 s_swzTest1[] = { 0x2358, 0x2242, 0x2357, 0x2359, 0x22f9, 0x2c4e, 0x0030, 0x0060,
1498 0x0092, 0x00c1, 0x00f2, 0x1f80, 0x0088, 0x2c38, 0x2c30, 0x0000 };
1499 char *pszTest1;
1500 int rc = RTUtf16ToUtf8(s_swzTest1, &pszTest1);
1501 RTTESTI_CHECK_RC_RETV(rc, VINF_SUCCESS);
1502
1503#ifdef RT_OS_WINDOWS
1504 UINT const uACP = GetACP();
1505 RTTestIPrintf(RTTESTLVL_ALWAYS, "Current Windows ANSI codepage is: %u%s\n",
1506 uACP, uACP == 65001 /* UTF-8 */ ? " (UTF-8)" : "");
1507#endif
1508
1509 RTTestSub(hTest, "VERR_NO_TRANSLATION/RTStrUtf8ToCurrentCP");
1510 char *pszOut;
1511 rc = RTStrUtf8ToCurrentCP(&pszOut, pszTest1);
1512 if (rc == VINF_SUCCESS)
1513 {
1514 RTTestIPrintf(RTTESTLVL_ALWAYS, "CurrentCP is UTF-8 or similar (LC_ALL=%s LANG=%s LC_CTYPE=%s)\n",
1515 RTEnvGet("LC_ALL"), RTEnvGet("LANG"), RTEnvGet("LC_CTYPE"));
1516#ifdef RT_OS_WINDOWS
1517 if (uACP == 65001 /* UTF-8 */)
1518 {
1519 /* The following string comparison will fail if the active ACP isn't UTF-8 (65001), so skip this then.
1520 * This applies to older Windows OSes like NT4. */
1521#endif
1522 if (strcmp(pszOut, pszTest1))
1523 RTTestFailed(hTest, "mismatch\nutf8: %.*Rhxs\n got: %.*Rhxs\n", strlen(pszTest1), pszTest1, strlen(pszOut), pszOut);
1524#ifdef RT_OS_WINDOWS
1525 }
1526#endif
1527 RTStrFree(pszOut);
1528 }
1529 else
1530 RTTESTI_CHECK_MSG(rc == VWRN_NO_TRANSLATION || rc == VERR_NO_TRANSLATION, ("rc=%Rrc\n", rc));
1531
1532 RTTestSub(hTest, "VERR_NO_TRANSLATION/RTUtf16ToLatin1");
1533 rc = RTUtf16ToLatin1(s_swzTest1, &pszOut);
1534 RTTESTI_CHECK_RC(rc, VERR_NO_TRANSLATION);
1535 if (RT_SUCCESS(rc))
1536 RTStrFree(pszOut);
1537
1538 RTStrFree(pszTest1);
1539 RTTestSubDone(hTest);
1540}
1541
1542static void testGetPut(RTTEST hTest)
1543{
1544 /*
1545 * Test RTStrPutCp, RTStrGetCp and RTStrGetCpEx.
1546 */
1547 RTTestSub(hTest, "RTStrPutCp, RTStrGetCp and RTStrGetCpEx");
1548
1549 RTUNICP uc = 0;
1550 while (uc <= 0x10fffd)
1551 {
1552 /* Figure the range - skip illegal ranges. */
1553 RTUNICP ucFirst = uc;
1554 if (ucFirst - UINT32_C(0xd800) <= 0x7ff)
1555 ucFirst = 0xe000;
1556 else if (ucFirst == UINT32_C(0xfffe) || ucFirst == UINT32_C(0xffff))
1557 ucFirst = 0x10000;
1558
1559 RTUNICP ucLast = ucFirst + 1023;
1560 if (ucLast - UINT32_C(0xd800) <= 0x7ff)
1561 ucLast = 0xd7ff;
1562 else if (ucLast == UINT32_C(0xfffe) || ucLast == UINT32_C(0xffff))
1563 ucLast = 0xfffd;
1564
1565 /* Encode the range into a string, decode each code point as we go along. */
1566 char sz1[8192];
1567 char *pszDst = sz1;
1568 for (uc = ucFirst; uc <= ucLast; uc++)
1569 {
1570 char *pszBefore = pszDst;
1571 pszDst = RTStrPutCp(pszDst, uc);
1572 RTTESTI_CHECK(pszBefore - pszDst < 6);
1573
1574 RTUNICP uc2 = RTStrGetCp(pszBefore);
1575 RTTESTI_CHECK_MSG(uc2 == uc, ("uc2=%#x uc=%#x\n", uc2, uc));
1576
1577 const char *pszSrc = pszBefore;
1578 RTUNICP uc3 = 42;
1579 RTTESTI_CHECK_RC(RTStrGetCpEx(&pszSrc, &uc3), VINF_SUCCESS);
1580 RTTESTI_CHECK_MSG(uc3 == uc, ("uc3=%#x uc=%#x\n", uc3, uc));
1581 RTTESTI_CHECK_MSG(pszSrc == pszDst, ("pszSrc=%p pszDst=%p\n", pszSrc, pszDst));
1582 }
1583
1584 /* Decode and re-encode it. */
1585 const char *pszSrc = pszDst = sz1;
1586 for (uc = ucFirst; uc <= ucLast; uc++)
1587 {
1588 RTUNICP uc2 = RTStrGetCp(pszSrc);
1589 RTTESTI_CHECK_MSG(uc2 == uc, ("uc2=%#x uc=%#x\n", uc2, uc));
1590
1591 RTUNICP uc3 = 42;
1592 RTTESTI_CHECK_RC(RTStrGetCpEx(&pszSrc, &uc3), VINF_SUCCESS);
1593 RTTESTI_CHECK_MSG(uc3 == uc, ("uc3=%#x uc=%#x\n", uc3, uc));
1594
1595 pszDst = RTStrPutCp(pszDst, uc);
1596 RTTESTI_CHECK_MSG(pszSrc == pszDst, ("pszSrc=%p pszDst=%p\n", pszSrc, pszDst));
1597 pszSrc = pszDst;
1598 }
1599
1600 /* Decode and wipe it (checking compiler optimizations). */
1601 pszSrc = pszDst = sz1;
1602 for (uc = ucFirst; uc <= ucLast; uc++)
1603 {
1604 RTUNICP uc2 = RTStrGetCp(pszSrc);
1605 RTTESTI_CHECK_MSG(uc2 == uc, ("uc2=%#x uc=%#x\n", uc2, uc));
1606
1607 RTUNICP uc3 = 42;
1608 RTTESTI_CHECK_RC(RTStrGetCpEx(&pszSrc, &uc3), VINF_SUCCESS);
1609 RTTESTI_CHECK_MSG(uc3 == uc, ("uc3=%#x uc=%#x\n", uc3, uc));
1610
1611 pszDst = RTStrPutCp(pszDst, 0);
1612 }
1613
1614 /* advance */
1615 uc = ucLast + 1;
1616 }
1617
1618}
1619
1620
1621int main()
1622{
1623 /*
1624 * Init the runtime, test and say hello.
1625 */
1626 RTTEST hTest;
1627 RTEXITCODE rcExit = RTTestInitAndCreate("tstUtf8", &hTest);
1628 if (rcExit != RTEXITCODE_SUCCESS)
1629 return rcExit;
1630 RTTestBanner(hTest);
1631
1632 /*
1633 * Run the tests.
1634 */
1635 InitStrings();
1636 test1(hTest);
1637 test2(hTest);
1638 test3(hTest);
1639 TstRTStrXCmp(hTest);
1640 TstRTStrPurgeEncoding(hTest);
1641 /* TstRT*PurgeComplementSet test conditions which assert. */
1642 TstRTStrPurgeComplementSet(hTest);
1643 TstRTUtf16PurgeComplementSet(hTest);
1644 testStrEnd(hTest);
1645 testStrStr(hTest);
1646 testUtf8Latin1(hTest);
1647 testUtf16Latin1(hTest);
1648 testNoTranslation(hTest);
1649 testGetPut(hTest);
1650
1651 Benchmarks(hTest);
1652
1653 /*
1654 * Summary
1655 */
1656 return RTTestSummaryAndDestroy(hTest);
1657}
1658
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette