VirtualBox

source: vbox/trunk/src/VBox/Runtime/testcase/tstUtf8.cpp@ 4968

Last change on this file since 4968 was 4071, checked in by vboxsync, 17 years ago

Biggest check-in ever. New source code headers for all (C) innotek files.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id
File size: 26.7 KB
Line 
1/* $Id: tstUtf8.cpp 4071 2007-08-07 17:07:59Z vboxsync $ */
2/** @file
3 * innotek Portable Runtime Testcase - UTF-8 and UTF-16 string conversions.
4 */
5
6/*
7 * Copyright (C) 2006-2007 innotek GmbH
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License as published by the Free Software Foundation,
13 * in version 2 as it comes in the "COPYING" file of the VirtualBox OSE
14 * distribution. VirtualBox OSE is distributed in the hope that it will
15 * be useful, but WITHOUT ANY WARRANTY of any kind.
16 */
17
18/*******************************************************************************
19* Header Files *
20*******************************************************************************/
21#include <iprt/string.h>
22#include <iprt/uni.h>
23#include <iprt/runtime.h>
24#include <iprt/uuid.h>
25#include <iprt/time.h>
26#include <iprt/stream.h>
27#include <iprt/alloc.h>
28#include <iprt/assert.h>
29#include <iprt/err.h>
30
31#include <stdlib.h>
32
33
34/*******************************************************************************
35* Global Variables *
36*******************************************************************************/
37static int g_cErrors = 0;
38
39
40/**
41 * Generate a random codepoint for simple UTF-16 encoding.
42 */
43static RTUTF16 GetRandUcs2(void)
44{
45 RTUTF16 wc;
46 do
47 {
48 wc = (RTUTF16)((long long)rand() * 0xffff / RAND_MAX);
49 } while ((wc >= 0xd800 && wc <= 0xdfff) || wc == 0);
50 return wc;
51}
52
53
54/**
55 *
56 */
57static void test1(void)
58{
59 static const char s_szBadString1[] = "Bad \xe0\x13\x0";
60 static const char s_szBadString2[] = "Bad \xef\xbf\xc3";
61 int rc;
62 char *pszUtf8;
63 char *pszCurrent;
64 PRTUTF16 pwsz;
65 PRTUTF16 pwszRand;
66
67 RTPrintf("tstUtf8: TEST 1\n");
68
69 /*
70 * Invalid UTF-8 to UCS-2 test.
71 */
72 rc = RTStrToUtf16(s_szBadString1, &pwsz);
73 if (rc != VERR_NO_TRANSLATION && rc != VERR_INVALID_UTF8_ENCODING)
74 {
75 RTPrintf("tstUtf8: FAILURE - %d: Conversion of first bad UTF-8 string to UTF-16 apparantly succeeded. It shouldn't. rc=%Vrc\n",
76 __LINE__, rc);
77 g_cErrors++;
78 }
79 rc = RTStrToUtf16(s_szBadString2, &pwsz);
80 if (rc != VERR_NO_TRANSLATION && rc != VERR_INVALID_UTF8_ENCODING)
81 {
82 RTPrintf("tstUtf8: FAILURE - %d: Conversion of second bad UTF-8 strings to UTF-16 apparantly succeeded. It shouldn't. rc=%Vrc\n",
83 __LINE__, rc);
84 g_cErrors++;
85 }
86
87 /*
88 * Test current CP convertion.
89 */
90 pwszRand = (PRTUTF16)RTMemAlloc(31 * sizeof(*pwsz));
91 srand((unsigned)RTTimeNanoTS());
92 for (int i = 0; i < 30; i++)
93 pwszRand[i] = GetRandUcs2();
94 pwszRand[30] = 0;
95
96 rc = RTUtf16ToUtf8(pwszRand, &pszUtf8);
97 if (rc == VINF_SUCCESS)
98 {
99 rc = RTStrUtf8ToCurrentCP(&pszCurrent, pszUtf8);
100 if (rc == VINF_SUCCESS)
101 {
102 rc = RTStrCurrentCPToUtf8(&pszUtf8, pszCurrent);
103 if (rc == VINF_SUCCESS)
104 RTPrintf("tstUtf8: Random UTF-16 -> UTF-8 -> Current -> UTF-8 successful.\n");
105 else
106 {
107 RTPrintf("tstUtf8: FAILURE - %d: The third part of random UTF-16 -> UTF-8 -> Current -> UTF-8 failed with return value %Vrc.\n",
108 __LINE__, rc);
109 g_cErrors++;
110 }
111 }
112 else if (rc == VERR_NO_TRANSLATION)
113 RTPrintf("tstUtf8: The second part of random UTF-16 -> UTF-8 -> Current -> UTF-8 returned VERR_NO_TRANSLATION. This is probably as it should be.\n");
114 else
115 {
116 RTPrintf("tstUtf8: FAILURE - %d: The second part of random UTF-16 -> UTF-8 -> Current -> UTF-8 failed with return value %Vrc.\n",
117 __LINE__, rc);
118 g_cErrors++;
119 }
120 }
121 else
122 {
123 RTPrintf("tstUtf8: FAILURE - %d: The first part of random UTF-16 -> UTF-8 -> Current -> UTF-8 failed with return value %Vrc.\n",
124 __LINE__, rc);
125 g_cErrors++;
126 }
127
128 /*
129 * Generate a new random string.
130 */
131 pwszRand = (PRTUTF16)RTMemAlloc(31 * sizeof(*pwsz));
132 srand((unsigned)RTTimeNanoTS());
133 for (int i = 0; i < 30; i++)
134 pwszRand[i] = GetRandUcs2();
135 pwszRand[30] = 0;
136 rc = RTUtf16ToUtf8(pwszRand, &pszUtf8);
137 if (rc == VINF_SUCCESS)
138 {
139 rc = RTStrToUtf16(pszUtf8, &pwsz);
140 if (rc == VINF_SUCCESS)
141 {
142 int i;
143 for (i = 0; pwszRand[i] == pwsz[i] && pwsz[i] != 0; i++)
144 /* nothing */;
145 if (pwszRand[i] == pwsz[i] && pwsz[i] == 0)
146 RTPrintf("tstUtf8: Random UTF-16 -> UTF-8 -> UTF-16 successful.\n");
147 else
148 {
149 RTPrintf("tstUtf8: FAILURE - %d: The second part of random UTF-16 -> UTF-8 -> UTF-16 failed.\n", __LINE__);
150 RTPrintf("tstUtf8: First differing character is at position %d and has the value %x.\n", i, pwsz[i]);
151 g_cErrors++;
152 }
153 }
154 else
155 {
156 RTPrintf("tstUtf8: FAILURE - %d: The second part of random UTF-16 -> UTF-8 -> UTF-16 failed with return value %Vrc.\n",
157 __LINE__, rc);
158 g_cErrors++;
159 }
160 }
161 else
162 {
163 RTPrintf("tstUtf8: FAILURE - %d: The first part of random UTF-16 -> UTF-8 -> UTF-16 failed with return value %Vrc.\n",
164 __LINE__, rc);
165 g_cErrors++;
166 }
167
168 /*
169 * Generate yet another random string and convert it to a buffer.
170 */
171 pwszRand = (PRTUTF16)RTMemAlloc(31 * sizeof(*pwsz));
172 srand((unsigned)RTTimeNanoTS());
173 for (int i = 0; i < 30; i++)
174 pwszRand[i] = GetRandUcs2();
175 pwszRand[30] = 0;
176
177 char szUtf8Array[120];
178 char *pszUtf8Array = szUtf8Array;
179 rc = RTUtf16ToUtf8Ex(pwszRand, RTSTR_MAX, &pszUtf8Array, 120, NULL);
180 if (rc == 0)
181 {
182 rc = RTStrToUtf16(pszUtf8Array, &pwsz);
183 if (rc == 0)
184 {
185 int i;
186 for (i = 0; pwszRand[i] == pwsz[i] && pwsz[i] != 0; i++);
187 if (pwsz[i] == 0 && i >= 8)
188 RTPrintf("tstUtf8: Random UTF-16 -> fixed length UTF-8 -> UTF-16 successful.\n");
189 else
190 {
191 RTPrintf("tstUtf8: FAILURE - %d: Incorrect conversion of UTF-16 -> fixed length UTF-8 -> UTF-16.\n", __LINE__);
192 RTPrintf("tstUtf8: First differing character is at position %d and has the value %x.\n", i, pwsz[i]);
193 g_cErrors++;
194 }
195 }
196 else
197 {
198 RTPrintf("tstUtf8: FAILURE - %d: The second part of random UTF-16 -> fixed length UTF-8 -> UTF-16 failed with return value %Vrc.\n",
199 __LINE__, rc);
200 g_cErrors++;
201 }
202 }
203 else
204 {
205 RTPrintf("tstUtf8: FAILURE - %d: The first part of random UTF-16 -> fixed length UTF-8 -> UTF-16 failed with return value %Vrc.\n",
206 __LINE__, rc);
207 g_cErrors++;
208 }
209
210 /*
211 * And again.
212 */
213 pwszRand = (PRTUTF16)RTMemAlloc(31 * sizeof(*pwsz));
214 srand((unsigned)RTTimeNanoTS());
215 for (int i = 0; i < 30; i++)
216 pwszRand[i] = GetRandUcs2();
217 pwszRand[30] = 0;
218
219 RTUTF16 wszBuf[70];
220 PRTUTF16 pwsz2Buf = wszBuf;
221 rc = RTUtf16ToUtf8(pwszRand, &pszUtf8);
222 if (rc == 0)
223 {
224 rc = RTStrToUtf16Ex(pszUtf8, RTSTR_MAX, &pwsz2Buf, 70, NULL);
225 if (rc == 0)
226 {
227 int i;
228 for (i = 0; pwszRand[i] == pwsz2Buf[i] && pwsz2Buf[i] != 0; i++);
229 if (pwszRand[i] == 0 && pwsz2Buf[i] == 0)
230 RTPrintf("tstUtf8: Random UTF-16 -> UTF-8 -> fixed length UTF-16 successful.\n");
231 else
232 {
233 RTPrintf("tstUtf8: FAILURE - %d: Incorrect conversion of random UTF-16 -> UTF-8 -> fixed length UTF-16.\n", __LINE__);
234 RTPrintf("tstUtf8: First differing character is at position %d and has the value %x.\n", i, pwsz2Buf[i]);
235 g_cErrors++;
236 }
237 }
238 else
239 {
240 RTPrintf("tstUtf8: FAILURE - %d: The second part of random UTF-16 -> UTF-8 -> fixed length UTF-16 failed with return value %Vrc.\n",
241 __LINE__, rc);
242 g_cErrors++;
243 }
244 }
245 else
246 {
247 RTPrintf("tstUtf8: FAILURE - %d: The first part of random UTF-16 -> UTF-8 -> fixed length UTF-16 failed with return value %Vrc.\n",
248 __LINE__, rc);
249 g_cErrors++;
250 }
251 pwszRand = (PRTUTF16)RTMemAlloc(31 * sizeof(*pwsz));
252 srand((unsigned)RTTimeNanoTS());
253 for (int i = 0; i < 30; i++)
254 pwszRand[i] = GetRandUcs2();
255 pwszRand[30] = 0;
256
257 rc = RTUtf16ToUtf8Ex(pwszRand, RTSTR_MAX, &pszUtf8Array, 20, NULL);
258 if (rc == VERR_BUFFER_OVERFLOW)
259 RTPrintf("tstUtf8: Random UTF-16 -> fixed length UTF-8 with too short buffer successfully rejected.\n");
260 else
261 {
262 RTPrintf("tstUtf8: FAILURE - %d: Random UTF-16 -> fixed length UTF-8 with too small buffer returned value %d instead of VERR_BUFFER_OVERFLOW.\n",
263 __LINE__, rc);
264 g_cErrors++;
265 }
266
267 /*
268 * last time...
269 */
270 pwszRand = (PRTUTF16)RTMemAlloc(31 * sizeof(*pwsz));
271 srand((unsigned)RTTimeNanoTS());
272 for (int i = 0; i < 30; i++)
273 pwszRand[i] = GetRandUcs2();
274 pwszRand[30] = 0;
275
276 rc = RTUtf16ToUtf8(pwszRand, &pszUtf8);
277 if (rc == VINF_SUCCESS)
278 {
279 rc = RTStrToUtf16Ex(pszUtf8, RTSTR_MAX, &pwsz2Buf, 20, NULL);
280 if (rc == VERR_BUFFER_OVERFLOW)
281 RTPrintf("tstUtf8: Random UTF-16 -> UTF-8 -> fixed length UTF-16 with too short buffer successfully rejected.\n");
282 else
283 {
284 RTPrintf("tstUtf8: FAILURE - %d: The second part of random UTF-16 -> UTF-8 -> fixed length UTF-16 with too short buffer returned value %Vrc instead of VERR_BUFFER_OVERFLOW.\n",
285 __LINE__, rc);
286 g_cErrors++;
287 }
288 }
289 else
290 {
291 RTPrintf("tstUtf8: FAILURE - %d:The first part of random UTF-16 -> UTF-8 -> fixed length UTF-16 failed with return value %Vrc.\n",
292 __LINE__, rc);
293 g_cErrors++;
294 }
295
296}
297
298
299static RTUNICP g_uszAll[0x110000 - 1 - 0x800 - 2 + 1];
300static RTUTF16 g_wszAll[0xfffe - (0xe000 - 0xd800) + (0x110000 - 0x10000) * 2];
301static char g_szAll[0x7f + (0x800 - 0x80) * 2 + (0xfffe - 0x800 - (0xe000 - 0xd800))* 3 + (0x110000 - 0x10000) * 4 + 1];
302
303static void whereami(int cBits, size_t off)
304{
305 if (cBits == 8)
306 {
307 if (off < 0x7f)
308 RTPrintf("UTF-8 U+%#x\n", off + 1);
309 else if (off < 0xf7f)
310 RTPrintf("UTF-8 U+%#x\n", (off - 0x7f) / 2 + 0x80);
311 else if (off < 0x27f7f)
312 RTPrintf("UTF-8 U+%#x\n", (off - 0xf7f) / 3 + 0x800);
313 else if (off < 0x2df79)
314 RTPrintf("UTF-8 U+%#x\n", (off - 0x27f7f) / 3 + 0xe000);
315 else if (off < 0x42df79)
316 RTPrintf("UTF-8 U+%#x\n", (off - 0x2df79) / 4 + 0x10000);
317 else
318 RTPrintf("UTF-8 ???\n");
319 }
320 else if (cBits == 16)
321 {
322 if (off < 0xd7ff*2)
323 RTPrintf("UTF-16 U+%#x\n", off / 2 + 1);
324 else if (off < 0xf7fd*2)
325 RTPrintf("UTF-16 U+%#x\n", (off - 0xd7ff*2) / 2 + 0xe000);
326 else if (off < 0x20f7fd)
327 RTPrintf("UTF-16 U+%#x\n", (off - 0xf7fd*2) / 4 + 0x10000);
328 else
329 RTPrintf("UTF-16 ???\n");
330 }
331 else
332 {
333 if (off < (0xd800 - 1) * sizeof(RTUNICP))
334 RTPrintf("RTUNICP U+%#x\n", off / sizeof(RTUNICP) + 1);
335 else if (off < (0xfffe - 0x800 - 1) * sizeof(RTUNICP))
336 RTPrintf("RTUNICP U+%#x\n", off / sizeof(RTUNICP) + 0x800 + 1);
337 else
338 RTPrintf("RTUNICP U+%#x\n", off / sizeof(RTUNICP) + 0x800 + 1 + 2);
339 }
340}
341
342int mymemcmp(const void *pv1, const void *pv2, size_t cb, int cBits)
343{
344 const uint8_t *pb1 = (const uint8_t *)pv1;
345 const uint8_t *pb2 = (const uint8_t *)pv2;
346 for (size_t off = 0; off < cb; off++)
347 {
348 if (pb1[off] != pb2[off])
349 {
350 RTPrintf("mismatch at %#x: ", off);
351 whereami(cBits, off);
352 RTPrintf(" %#x: %02x != %02x!\n", off-1, pb1[off-1], pb2[off-1]);
353 RTPrintf("*%#x: %02x != %02x!\n", off, pb1[off], pb2[off]);
354 RTPrintf(" %#x: %02x != %02x!\n", off+1, pb1[off+1], pb2[off+1]);
355 RTPrintf(" %#x: %02x != %02x!\n", off+2, pb1[off+2], pb2[off+2]);
356 RTPrintf(" %#x: %02x != %02x!\n", off+3, pb1[off+3], pb2[off+3]);
357 RTPrintf(" %#x: %02x != %02x!\n", off+4, pb1[off+4], pb2[off+4]);
358 RTPrintf(" %#x: %02x != %02x!\n", off+5, pb1[off+5], pb2[off+5]);
359 RTPrintf(" %#x: %02x != %02x!\n", off+6, pb1[off+6], pb2[off+6]);
360 RTPrintf(" %#x: %02x != %02x!\n", off+7, pb1[off+7], pb2[off+7]);
361 RTPrintf(" %#x: %02x != %02x!\n", off+8, pb1[off+8], pb2[off+8]);
362 RTPrintf(" %#x: %02x != %02x!\n", off+9, pb1[off+9], pb2[off+9]);
363 return 1;
364 }
365 }
366 return 0;
367}
368
369
370void InitStrings(void)
371{
372 /*
373 * Generate unicode string containing all the legal UTF-16 codepoints, both UTF-16 and UTF-8 version.
374 */
375 /* the simple code point array first */
376 unsigned i = 0;
377 RTUNICP uc = 1;
378 while (uc < 0xd800)
379 g_uszAll[i++] = uc++;
380 uc = 0xe000;
381 while (uc < 0xfffe)
382 g_uszAll[i++] = uc++;
383 uc = 0x10000;
384 while (uc < 0x110000)
385 g_uszAll[i++] = uc++;
386 g_uszAll[i++] = 0;
387 Assert(ELEMENTS(g_uszAll) == i);
388
389 /* the utf-16 one */
390 i = 0;
391 uc = 1;
392 //RTPrintf("tstUtf8: %#x=%#x", i, uc);
393 while (uc < 0xd800)
394 g_wszAll[i++] = uc++;
395 uc = 0xe000;
396 //RTPrintf(" %#x=%#x", i, uc);
397 while (uc < 0xfffe)
398 g_wszAll[i++] = uc++;
399 uc = 0x10000;
400 //RTPrintf(" %#x=%#x", i, uc);
401 while (uc < 0x110000)
402 {
403 g_wszAll[i++] = 0xd800 | ((uc - 0x10000) >> 10);
404 g_wszAll[i++] = 0xdc00 | ((uc - 0x10000) & 0x3ff);
405 uc++;
406 }
407 //RTPrintf(" %#x=%#x\n", i, uc);
408 g_wszAll[i++] = '\0';
409 Assert(ELEMENTS(g_wszAll) == i);
410
411 /*
412 * The utf-8 one
413 */
414 i = 0;
415 uc = 1;
416 //RTPrintf("tstUtf8: %#x=%#x", i, uc);
417 while (uc < 0x80)
418 g_szAll[i++] = uc++;
419 //RTPrintf(" %#x=%#x", i, uc);
420 while (uc < 0x800)
421 {
422 g_szAll[i++] = 0xc0 | (uc >> 6);
423 g_szAll[i++] = 0x80 | (uc & 0x3f);
424 Assert(!((uc >> 6) & ~0x1f));
425 uc++;
426 }
427 //RTPrintf(" %#x=%#x", i, uc);
428 while (uc < 0xd800)
429 {
430 g_szAll[i++] = 0xe0 | (uc >> 12);
431 g_szAll[i++] = 0x80 | ((uc >> 6) & 0x3f);
432 g_szAll[i++] = 0x80 | (uc & 0x3f);
433 Assert(!((uc >> 12) & ~0xf));
434 uc++;
435 }
436 uc = 0xe000;
437 //RTPrintf(" %#x=%#x", i, uc);
438 while (uc < 0xfffe)
439 {
440 g_szAll[i++] = 0xe0 | (uc >> 12);
441 g_szAll[i++] = 0x80 | ((uc >> 6) & 0x3f);
442 g_szAll[i++] = 0x80 | (uc & 0x3f);
443 Assert(!((uc >> 12) & ~0xf));
444 uc++;
445 }
446 uc = 0x10000;
447 //RTPrintf(" %#x=%#x", i, uc);
448 while (uc < 0x110000)
449 {
450 g_szAll[i++] = 0xf0 | (uc >> 18);
451 g_szAll[i++] = 0x80 | ((uc >> 12) & 0x3f);
452 g_szAll[i++] = 0x80 | ((uc >> 6) & 0x3f);
453 g_szAll[i++] = 0x80 | (uc & 0x3f);
454 Assert(!((uc >> 18) & ~0x7));
455 uc++;
456 }
457 //RTPrintf(" %#x=%#x\n", i, uc);
458 g_szAll[i++] = '\0';
459 Assert(ELEMENTS(g_szAll) == i);
460}
461
462
463void test2(void)
464{
465 RTPrintf("tstUtf8: TEST 2\n");
466
467 /*
468 * Convert to UTF-8 and back.
469 */
470 RTPrintf("tstUtf8: #1: UTF-16 -> UTF-8 -> UTF-16...\n");
471 char *pszUtf8;
472 int rc = RTUtf16ToUtf8(&g_wszAll[0], &pszUtf8);
473 if (rc == VINF_SUCCESS)
474 {
475 if (mymemcmp(pszUtf8, g_szAll, sizeof(g_szAll), 8))
476 {
477 RTPrintf("tstUtf8: FAILURE - the full #1: UTF-16 -> UTF-8 mismatch!\n");
478 g_cErrors++;
479 }
480
481 PRTUTF16 puszUcs2;
482 rc = RTStrToUtf16(pszUtf8, &puszUcs2);
483 if (rc == VINF_SUCCESS)
484 {
485 if (mymemcmp(puszUcs2, g_wszAll, sizeof(g_wszAll), 16))
486 {
487 RTPrintf("tstUtf8: FAILURE - the full #1: UTF-8 -> UTF-16 failed compare!\n");
488 g_cErrors++;
489 }
490 RTUtf16Free(puszUcs2);
491 }
492 else
493 {
494 RTPrintf("tstUtf8: FAILURE - the full #1: UTF-8 -> UTF-16 failed, rc=%Rrc.\n", rc);
495 g_cErrors++;
496 }
497 RTStrFree(pszUtf8);
498 }
499 else
500 {
501 RTPrintf("tstUtf8: FAILURE - the full #1: UTF-16 -> UTF-8 failed, rc=%Rrc.\n", rc);
502 g_cErrors++;
503 }
504
505
506 /*
507 * Convert to UTF-16 and back. (just in case the above test fails)
508 */
509 RTPrintf("tstUtf8: #2: UTF-8 -> UTF-16 -> UTF-8...\n");
510 PRTUTF16 puszUcs2;
511 rc = RTStrToUtf16(&g_szAll[0], &puszUcs2);
512 if (rc == VINF_SUCCESS)
513 {
514 if (mymemcmp(puszUcs2, g_wszAll, sizeof(g_wszAll), 16))
515 {
516 RTPrintf("tstUtf8: FAILURE - the full #2: UTF-8 -> UTF-16 failed compare!\n");
517 g_cErrors++;
518 }
519
520 char *pszUtf8;
521 rc = RTUtf16ToUtf8(puszUcs2, &pszUtf8);
522 if (rc == VINF_SUCCESS)
523 {
524 if (mymemcmp(pszUtf8, g_szAll, sizeof(g_szAll), 8))
525 {
526 RTPrintf("tstUtf8: FAILURE - the full #2: UTF-16 -> UTF-8 failed compare!\n");
527 g_cErrors++;
528 }
529 RTStrFree(pszUtf8);
530 }
531 else
532 {
533 RTPrintf("tstUtf8: FAILURE - the full #2: UTF-16 -> UTF-8 failed, rc=%Rrc.\n", rc);
534 g_cErrors++;
535 }
536 RTStrUcs2Free(puszUcs2);
537 }
538 else
539 {
540 RTPrintf("tstUtf8: FAILURE - the full #2: UTF-8 -> UTF-16 failed, rc=%Rrc.\n", rc);
541 g_cErrors++;
542 }
543
544 /*
545 * Convert UTF-8 to CPs.
546 */
547 PRTUNICP paCps;
548 rc = RTStrToUni(g_szAll, &paCps);
549 if (rc == VINF_SUCCESS)
550 {
551 if (mymemcmp(paCps, g_uszAll, sizeof(g_uszAll), 32))
552 {
553 RTPrintf("tstUtf8: FAILURE - the full #2: UTF-8 -> UTF-16 failed, rc=%Rrc.\n", rc);
554 g_cErrors++;
555 }
556
557 size_t cCps;
558 rc = RTStrToUniEx(g_szAll, RTSTR_MAX, &paCps, ELEMENTS(g_uszAll), &cCps);
559 if (rc == VINF_SUCCESS)
560 {
561 if (cCps != ELEMENTS(g_uszAll) - 1)
562 {
563 RTPrintf("tstUtf8: FAILURE - the full #3+: wrong Code Point count %zu, expected %zu\n", cCps, ELEMENTS(g_uszAll) - 1);
564 g_cErrors++;
565 }
566 }
567 else
568 {
569 RTPrintf("tstUtf8: FAILURE - the full #3+: UTF-8 -> Code Points failed, rc=%Rrc.\n", rc);
570 g_cErrors++;
571 }
572
573 /** @todo RTCpsToUtf8 or something. */
574 }
575 else
576 {
577 RTPrintf("tstUtf8: FAILURE - the full #3a: UTF-8 -> Code Points failed, rc=%Rrc.\n", rc);
578 g_cErrors++;
579 }
580
581 /*
582 * Check the various string lengths.
583 */
584 size_t cuc1 = RTStrCalcUtf16Len(g_szAll);
585 size_t cuc2 = RTUtf16Len(g_wszAll);
586 if (cuc1 != cuc2)
587 {
588 RTPrintf("tstUtf8: FAILURE - cuc1=%zu != cuc2=%zu\n", cuc1, cuc2);
589 g_cErrors++;
590 }
591 //size_t cuc3 = RTUniLen(g_uszAll);
592
593
594 /*
595 * Enumerate the strings.
596 */
597 char *pszPut1Base = (char *)RTMemAlloc(sizeof(g_szAll));
598 AssertRelease(pszPut1Base);
599 char *pszPut1 = pszPut1Base;
600 PRTUTF16 pwszPut2Base = (PRTUTF16)RTMemAlloc(sizeof(g_wszAll));
601 AssertRelease(pwszPut2Base);
602 PRTUTF16 pwszPut2 = pwszPut2Base;
603 const char *psz1 = g_szAll;
604 const char *psz2 = g_szAll;
605 PCRTUTF16 pwsz3 = g_wszAll;
606 PCRTUTF16 pwsz4 = g_wszAll;
607 for (;;)
608 {
609 /*
610 * getters
611 */
612 RTUNICP uc1;
613 rc = RTStrGetCpEx(&psz1, &uc1);
614 if (RT_FAILURE(rc))
615 {
616 RTPrintf("tstUtf8: FAILURE - RTStrGetCpEx failed with rc=%Rrc at %.10Rhxs\n", rc, psz2);
617 whereami(8, psz2 - &g_szAll[0]);
618 g_cErrors++;
619 break;
620 }
621 char *pszPrev1 = RTStrPrevCp(g_szAll, psz1);
622 if (pszPrev1 != psz2)
623 {
624 RTPrintf("tstUtf8: FAILURE - RTStrPrevCp returned %p expected %p!\n", pszPrev1, psz2);
625 whereami(8, psz2 - &g_szAll[0]);
626 g_cErrors++;
627 break;
628 }
629 RTUNICP uc2 = RTStrGetCp(psz2);
630 if (uc2 != uc1)
631 {
632 RTPrintf("tstUtf8: FAILURE - RTStrGetCpEx and RTStrGetCp returned different CPs: %RTunicp != %RTunicp\n", uc2, uc1);
633 whereami(8, psz2 - &g_szAll[0]);
634 g_cErrors++;
635 break;
636 }
637 psz2 = RTStrNextCp(psz2);
638 if (psz2 != psz1)
639 {
640 RTPrintf("tstUtf8: FAILURE - RTStrGetCpEx and RTStrGetNext returned different next pointer!\n");
641 whereami(8, psz2 - &g_szAll[0]);
642 g_cErrors++;
643 break;
644 }
645
646 RTUNICP uc3;
647 rc = RTUtf16GetCpEx(&pwsz3, &uc3);
648 if (RT_FAILURE(rc))
649 {
650 RTPrintf("tstUtf8: FAILURE - RTUtf16GetCpEx failed with rc=%Rrc at %.10Rhxs\n", rc, pwsz4);
651 whereami(16, pwsz4 - &g_wszAll[0]);
652 g_cErrors++;
653 break;
654 }
655 if (uc3 != uc2)
656 {
657 RTPrintf("tstUtf8: FAILURE - RTUtf16GetCpEx and RTStrGetCp returned different CPs: %RTunicp != %RTunicp\n", uc3, uc2);
658 whereami(16, pwsz4 - &g_wszAll[0]);
659 g_cErrors++;
660 break;
661 }
662 RTUNICP uc4 = RTUtf16GetCp(pwsz4);
663 if (uc3 != uc4)
664 {
665 RTPrintf("tstUtf8: FAILURE - RTUtf16GetCpEx and RTUtf16GetCp returned different CPs: %RTunicp != %RTunicp\n", uc3, uc4);
666 whereami(16, pwsz4 - &g_wszAll[0]);
667 g_cErrors++;
668 break;
669 }
670 pwsz4 = RTUtf16NextCp(pwsz4);
671 if (pwsz4 != pwsz3)
672 {
673 RTPrintf("tstUtf8: FAILURE - RTUtf16GetCpEx and RTUtf16GetNext returned different next pointer!\n");
674 whereami(8, pwsz4 - &g_wszAll[0]);
675 g_cErrors++;
676 break;
677 }
678
679
680 /*
681 * putters
682 */
683 pszPut1 = RTStrPutCp(pszPut1, uc1);
684 if (pszPut1 - pszPut1Base != psz1 - &g_szAll[0])
685 {
686 RTPrintf("tstUtf8: FAILURE - RTStrPutCp is not at the same offset! %p != %p\n",
687 pszPut1 - pszPut1Base, psz1 - &g_szAll[0]);
688 whereami(8, psz2 - &g_szAll[0]);
689 g_cErrors++;
690 break;
691 }
692
693 pwszPut2 = RTUtf16PutCp(pwszPut2, uc3);
694 if (pwszPut2 - pwszPut2Base != pwsz3 - &g_wszAll[0])
695 {
696 RTPrintf("tstUtf8: FAILURE - RTStrPutCp is not at the same offset! %p != %p\n",
697 pwszPut2 - pwszPut2Base, pwsz3 - &g_wszAll[0]);
698 whereami(8, pwsz4 - &g_wszAll[0]);
699 g_cErrors++;
700 break;
701 }
702
703
704 /* the end? */
705 if (!uc1)
706 break;
707 }
708
709 /* check output if we seems to have made it thru it all. */
710 if (psz2 == &g_szAll[sizeof(g_szAll)])
711 {
712 if (mymemcmp(pszPut1Base, g_szAll, sizeof(g_szAll), 8))
713 {
714 RTPrintf("tstUtf8: FAILURE - RTStrPutCp encoded the string incorrectly.\n");
715 g_cErrors++;
716 }
717 if (mymemcmp(pwszPut2Base, g_wszAll, sizeof(g_wszAll), 16))
718 {
719 RTPrintf("tstUtf8: FAILURE - RTUtf16PutCp encoded the string incorrectly.\n");
720 g_cErrors++;
721 }
722 }
723
724 RTMemFree(pszPut1Base);
725 RTMemFree(pwszPut2Base);
726}
727
728
729/**
730 * Check case insensitivity.
731 */
732void test3(void)
733{
734 RTPrintf("tstUtf8: TEST 3\n");
735
736 if ( RTUniCpToLower('a') != 'a'
737 || RTUniCpToLower('A') != 'a'
738 || RTUniCpToLower('b') != 'b'
739 || RTUniCpToLower('B') != 'b'
740 || RTUniCpToLower('Z') != 'z'
741 || RTUniCpToLower('z') != 'z'
742 || RTUniCpToUpper('c') != 'C'
743 || RTUniCpToUpper('C') != 'C'
744 || RTUniCpToUpper('z') != 'Z'
745 || RTUniCpToUpper('Z') != 'Z')
746 {
747 RTPrintf("tstUtf8: FAILURE - RTUniToUpper/Lower failed basic tests.\n");
748 g_cErrors++;
749 }
750
751 if (RTUtf16ICmp(g_wszAll, g_wszAll))
752 {
753 RTPrintf("tstUtf8: FAILURE - RTUtf16ICmp failed the basic test.\n");
754 g_cErrors++;
755 }
756
757 if (RTUtf16Cmp(g_wszAll, g_wszAll))
758 {
759 RTPrintf("tstUtf8: FAILURE - RTUtf16Cmp failed the basic test.\n");
760 g_cErrors++;
761 }
762
763 static RTUTF16 s_wszTst1a[] = { 'a', 'B', 'c', 'D', 'E', 'f', 'g', 'h', 'i', 'j', 'K', 'L', 'm', 'N', 'o', 'P', 'q', 'r', 'S', 't', 'u', 'V', 'w', 'x', 'Y', 'Z', 0xc5, 0xc6, 0xf8, 0 };
764 static RTUTF16 s_wszTst1b[] = { 'A', 'B', 'c', 'd', 'e', 'F', 'G', 'h', 'i', 'J', 'k', 'l', 'M', 'n', 'O', 'p', 'Q', 'R', 's', 't', 'U', 'v', 'w', 'X', 'y', 'z', 0xe5, 0xe6, 0xd8, 0 };
765 if ( RTUtf16ICmp(s_wszTst1b, s_wszTst1b)
766 || RTUtf16ICmp(s_wszTst1a, s_wszTst1a)
767 || RTUtf16ICmp(s_wszTst1a, s_wszTst1b)
768 || RTUtf16ICmp(s_wszTst1b, s_wszTst1a)
769 )
770 {
771 RTPrintf("tstUtf8: FAILURE - RTUtf16ICmp failed the alphabet test.\n");
772 g_cErrors++;
773 }
774
775 if ( RTUtf16Cmp(s_wszTst1b, s_wszTst1b)
776 || RTUtf16Cmp(s_wszTst1a, s_wszTst1a)
777 || !RTUtf16Cmp(s_wszTst1a, s_wszTst1b)
778 || !RTUtf16Cmp(s_wszTst1b, s_wszTst1a)
779 )
780 {
781 RTPrintf("tstUtf8: FAILURE - RTUtf16Cmp failed the alphabet test.\n");
782 g_cErrors++;
783 }
784}
785
786
787/**
788 * Benchmark stuff.
789 */
790void Benchmarks(void)
791{
792 RTPrintf("tstUtf8: BENCHMARKS\n");
793 static union
794 {
795 RTUTF16 wszBuf[sizeof(g_wszAll)];
796 char szBuf[sizeof(g_szAll)];
797 } s_Buf;
798
799 PRTUTF16 pwsz = &s_Buf.wszBuf[0];
800 int rc = RTStrToUtf16Ex(&g_szAll[0], RTSTR_MAX, &pwsz, ELEMENTS(s_Buf.wszBuf), NULL);
801 if (RT_SUCCESS(rc))
802 {
803 int i;
804 uint64_t u64Start = RTTimeNanoTS();
805 for (i = 0; i < 100; i++)
806 {
807 rc = RTStrToUtf16Ex(&g_szAll[0], RTSTR_MAX, &pwsz, ELEMENTS(s_Buf.wszBuf), NULL);
808 if (RT_FAILURE(rc))
809 {
810 RTPrintf("tstUtf8: UTF-8 -> UTF-16 benchmark failed at i=%d, rc=%Rrc\n", i, rc);
811 break;
812 }
813 }
814 uint64_t u64Elapsed = RTTimeNanoTS() - u64Start;
815 RTPrintf("tstUtf8: UTF-8 -> UTF-16: %d in %RI64ns\n", i, u64Elapsed);
816 }
817
818 char *psz = &s_Buf.szBuf[0];
819 rc = RTUtf16ToUtf8Ex(&g_wszAll[0], RTSTR_MAX, &psz, ELEMENTS(s_Buf.szBuf), NULL);
820 if (RT_SUCCESS(rc))
821 {
822 int i;
823 uint64_t u64Start = RTTimeNanoTS();
824 for (i = 0; i < 100; i++)
825 {
826 rc = RTUtf16ToUtf8Ex(&g_wszAll[0], RTSTR_MAX, &psz, ELEMENTS(s_Buf.szBuf), NULL);
827 if (RT_FAILURE(rc))
828 {
829 RTPrintf("tstUtf8: UTF-16 -> UTF-8 benchmark failed at i=%d, rc=%Rrc\n", i, rc);
830 break;
831 }
832 }
833 uint64_t u64Elapsed = RTTimeNanoTS() - u64Start;
834 RTPrintf("tstUtf8: UTF-16 -> UTF-8: %d in %RI64ns\n", i, u64Elapsed);
835 }
836
837}
838
839
840int main()
841{
842 RTR3Init(false);
843
844 InitStrings();
845 test1();
846 test2();
847 test3();
848 Benchmarks();
849
850 /*
851 * Summary
852 */
853 if (!g_cErrors)
854 RTPrintf("tstUtf8: SUCCESS\n");
855 else
856 RTPrintf("tstUtf8: FAILURE - %d errors!\n", g_cErrors);
857
858 return !!g_cErrors;
859}
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette