tstUtf8.cpp@ 95685

Last change on this file since 95685 was 95044, checked in by vboxsync, 2 years ago
IPRT/testcase: tstUtf8: Skip the string comparison on older Windows OSes like NT4 in testNoTranslation().
Property svn:eol-style set to `native` Property svn:keywords set to `Id Revision`
File size: 58.5 KB

Line
1	/* $Id: tstUtf8.cpp 95044 2022-05-19 15:43:57Z vboxsync $ */
2	/** @file
3	* IPRT Testcase - UTF-8 and UTF-16 string conversions.
4	*/
5
6	/*
7	* Copyright (C) 2006-2022 Oracle Corporation
8	*
9	* This file is part of VirtualBox Open Source Edition (OSE), as
10	* available from http://www.virtualbox.org. This file is free software;
11	* you can redistribute it and/or modify it under the terms of the GNU
12	* General Public License (GPL) as published by the Free Software
13	* Foundation, in version 2 as it comes in the "COPYING" file of the
14	* VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15	* hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16	*
17	* The contents of this file may alternatively be used under the terms
18	* of the Common Development and Distribution License Version 1.0
19	* (CDDL) only, as it comes in the "COPYING.CDDL" file of the
20	* VirtualBox OSE distribution, in which case the provisions of the
21	* CDDL are applicable instead of those of the GPL.
22	*
23	* You may elect to license modified versions of this file under the
24	* terms and conditions of either the GPL or the CDDL or both.
25	*/
26
27
28	/*********************************************************************************************************************************
29	* Header Files *
30	*********************************************************************************************************************************/
31	#include <iprt/string.h>
32	#include <iprt/latin1.h>
33	#include <iprt/utf16.h>
34
35	#include <iprt/alloc.h>
36	#include <iprt/assert.h>
37	#include <iprt/env.h>
38	#include <iprt/err.h>
39	#include <iprt/rand.h>
40	#include <iprt/stream.h>
41	#include <iprt/test.h>
42	#include <iprt/time.h>
43	#include <iprt/uni.h>
44	#include <iprt/uuid.h>
45
46	#ifdef RT_OS_WINDOWS
47	# include <iprt/win/windows.h> /* For GetACP(). */
48	#endif
49
50
51	/**
52	* Generate a random codepoint for simple UTF-16 encoding.
53	*/
54	static RTUTF16 GetRandUtf16(void)
55	{
56	RTUTF16 wc;
57	do
58	{
59	wc = (RTUTF16)RTRandU32Ex(1, 0xfffd);
60	} while (wc >= 0xd800 && wc <= 0xdfff);
61	return wc;
62	}
63
64
65	/**
66	*
67	*/
68	static void test1(RTTEST hTest)
69	{
70	static const char s_szBadString1[] = "Bad \xe0\x13\x0";
71	static const char s_szBadString2[] = "Bad \xef\xbf\xc3";
72	int rc;
73	char *pszUtf8;
74	char *pszCurrent;
75	PRTUTF16 pwsz;
76	PRTUTF16 pwszRand;
77
78	/*
79	* Invalid UTF-8 to UCS-2 test.
80	*/
81	RTTestSub(hTest, "Feeding bad UTF-8 to RTStrToUtf16");
82	rc = RTStrToUtf16(s_szBadString1, &pwsz);
83	RTTEST_CHECK_MSG(hTest, rc == VERR_NO_TRANSLATION \|\| rc == VERR_INVALID_UTF8_ENCODING,
84	(hTest, "Conversion of first bad UTF-8 string to UTF-16 apparently succeeded. It shouldn't. rc=%Rrc\n", rc));
85	rc = RTStrToUtf16(s_szBadString2, &pwsz);
86	RTTEST_CHECK_MSG(hTest, rc == VERR_NO_TRANSLATION \|\| rc == VERR_INVALID_UTF8_ENCODING,
87	(hTest, "Conversion of second bad UTF-8 strings to UTF-16 apparently succeeded. It shouldn't. rc=%Rrc\n", rc));
88
89	/*
90	* Test current CP conversion.
91	*/
92	RTTestSub(hTest, "Rand UTF-16 -> UTF-8 -> CP -> UTF-8");
93	pwszRand = (PRTUTF16)RTMemAlloc(31 * sizeof(*pwsz));
94	for (int i = 0; i < 30; i++)
95	pwszRand[i] = GetRandUtf16();
96	pwszRand[30] = 0;
97
98	rc = RTUtf16ToUtf8(pwszRand, &pszUtf8);
99	if (rc == VINF_SUCCESS)
100	{
101	rc = RTStrUtf8ToCurrentCP(&pszCurrent, pszUtf8);
102	if (rc == VINF_SUCCESS)
103	{
104	RTStrFree(pszUtf8);
105	rc = RTStrCurrentCPToUtf8(&pszUtf8, pszCurrent);
106	if (rc == VINF_SUCCESS)
107	RTTestPassed(hTest, "Random UTF-16 -> UTF-8 -> Current -> UTF-8 successful.\n");
108	else
109	RTTestFailed(hTest, "%d: The third part of random UTF-16 -> UTF-8 -> Current -> UTF-8 failed with return value %Rrc.",
110	__LINE__, rc);
111	if (RT_SUCCESS(rc))
112	RTStrFree(pszUtf8);
113	RTStrFree(pszCurrent);
114	}
115	else
116	{
117	if (rc == VERR_NO_TRANSLATION)
118	RTTestPassed(hTest, "The second part of random UTF-16 -> UTF-8 -> Current -> UTF-8 returned VERR_NO_TRANSLATION. This is probably as it should be.\n");
119	else if (rc == VWRN_NO_TRANSLATION)
120	RTTestPassed(hTest, "The second part of random UTF-16 -> UTF-8 -> Current -> UTF-8 returned VWRN_NO_TRANSLATION. This is probably as it should be.\n");
121	else
122	RTTestFailed(hTest, "%d: The second part of random UTF-16 -> UTF-8 -> Current -> UTF-8 failed with return value %Rrc.",
123	__LINE__, rc);
124	if (RT_SUCCESS(rc))
125	RTStrFree(pszCurrent);
126	RTStrFree(pszUtf8);
127	}
128	}
129	else
130	RTTestFailed(hTest, "%d: The first part of random UTF-16 -> UTF-8 -> Current -> UTF-8 failed with return value %Rrc.",
131	__LINE__, rc);
132	RTMemFree(pwszRand);
133
134	/*
135	* Generate a new random string.
136	*/
137	RTTestSub(hTest, "Random UTF-16 -> UTF-8 -> UTF-16");
138	pwszRand = (PRTUTF16)RTMemAlloc(31 * sizeof(*pwsz));
139	for (int i = 0; i < 30; i++)
140	pwszRand[i] = GetRandUtf16();
141	pwszRand[30] = 0;
142	rc = RTUtf16ToUtf8(pwszRand, &pszUtf8);
143	if (rc == VINF_SUCCESS)
144	{
145	rc = RTStrToUtf16(pszUtf8, &pwsz);
146	if (rc == VINF_SUCCESS)
147	{
148	int i;
149	for (i = 0; pwszRand[i] == pwsz[i] && pwsz[i] != 0; i++)
150	/* nothing */;
151	if (pwszRand[i] == pwsz[i] && pwsz[i] == 0)
152	RTTestPassed(hTest, "Random UTF-16 -> UTF-8 -> UTF-16 successful.\n");
153	else
154	{
155	RTTestFailed(hTest, "%d: The second part of random UTF-16 -> UTF-8 -> UTF-16 failed.", __LINE__);
156	RTTestPrintf(hTest, RTTESTLVL_FAILURE, "First differing character is at position %d and has the value %x.\n", i, pwsz[i]);
157	}
158	RTUtf16Free(pwsz);
159	}
160	else
161	RTTestFailed(hTest, "%d: The second part of random UTF-16 -> UTF-8 -> UTF-16 failed with return value %Rrc.",
162	__LINE__, rc);
163	RTStrFree(pszUtf8);
164	}
165	else
166	RTTestFailed(hTest, "%d: The first part of random UTF-16 -> UTF-8 -> UTF-16 failed with return value %Rrc.",
167	__LINE__, rc);
168	RTMemFree(pwszRand);
169
170	/*
171	* Generate yet another random string and convert it to a buffer.
172	*/
173	RTTestSub(hTest, "Random RTUtf16ToUtf8Ex + RTStrToUtf16");
174	pwszRand = (PRTUTF16)RTMemAlloc(31 * sizeof(*pwsz));
175	for (int i = 0; i < 30; i++)
176	pwszRand[i] = GetRandUtf16();
177	pwszRand[30] = 0;
178
179	char szUtf8Array[120];
180	char *pszUtf8Array = szUtf8Array;
181	rc = RTUtf16ToUtf8Ex(pwszRand, RTSTR_MAX, &pszUtf8Array, 120, NULL);
182	if (rc == 0)
183	{
184	rc = RTStrToUtf16(pszUtf8Array, &pwsz);
185	if (rc == 0)
186	{
187	int i;
188	for (i = 0; pwszRand[i] == pwsz[i] && pwsz[i] != 0; i++)
189	;
190	if (pwsz[i] == 0 && i >= 8)
191	RTTestPassed(hTest, "Random UTF-16 -> fixed length UTF-8 -> UTF-16 successful.\n");
192	else
193	{
194	RTTestFailed(hTest, "%d: Incorrect conversion of UTF-16 -> fixed length UTF-8 -> UTF-16.\n", __LINE__);
195	RTTestPrintf(hTest, RTTESTLVL_FAILURE, "First differing character is at position %d and has the value %x.\n", i, pwsz[i]);
196	}
197	RTUtf16Free(pwsz);
198	}
199	else
200	RTTestFailed(hTest, "%d: The second part of random UTF-16 -> fixed length UTF-8 -> UTF-16 failed with return value %Rrc.\n", __LINE__, rc);
201	}
202	else
203	RTTestFailed(hTest, "%d: The first part of random UTF-16 -> fixed length UTF-8 -> UTF-16 failed with return value %Rrc.\n", __LINE__, rc);
204	RTMemFree(pwszRand);
205
206	/*
207	* And again.
208	*/
209	RTTestSub(hTest, "Random RTUtf16ToUtf8 + RTStrToUtf16Ex");
210	pwszRand = (PRTUTF16)RTMemAlloc(31 * sizeof(*pwsz));
211	for (int i = 0; i < 30; i++)
212	pwszRand[i] = GetRandUtf16();
213	pwszRand[30] = 0;
214
215	RTUTF16 wszBuf[70];
216	PRTUTF16 pwsz2Buf = wszBuf;
217	rc = RTUtf16ToUtf8(pwszRand, &pszUtf8);
218	if (rc == 0)
219	{
220	rc = RTStrToUtf16Ex(pszUtf8, RTSTR_MAX, &pwsz2Buf, 70, NULL);
221	if (rc == 0)
222	{
223	int i;
224	for (i = 0; pwszRand[i] == pwsz2Buf[i] && pwsz2Buf[i] != 0; i++)
225	;
226	if (pwszRand[i] == 0 && pwsz2Buf[i] == 0)
227	RTTestPassed(hTest, "Random UTF-16 -> UTF-8 -> fixed length UTF-16 successful.\n");
228	else
229	{
230	RTTestFailed(hTest, "%d: Incorrect conversion of random UTF-16 -> UTF-8 -> fixed length UTF-16.\n", __LINE__);
231	RTTestPrintf(hTest, RTTESTLVL_FAILURE, "First differing character is at position %d and has the value %x.\n", i, pwsz2Buf[i]);
232	}
233	}
234	else
235	RTTestFailed(hTest, "%d: The second part of random UTF-16 -> UTF-8 -> fixed length UTF-16 failed with return value %Rrc.\n", __LINE__, rc);
236	RTStrFree(pszUtf8);
237	}
238	else
239	RTTestFailed(hTest, "%d: The first part of random UTF-16 -> UTF-8 -> fixed length UTF-16 failed with return value %Rrc.\n",
240	__LINE__, rc);
241	RTMemFree(pwszRand);
242
243	pwszRand = (PRTUTF16)RTMemAlloc(31 * sizeof(*pwsz));
244	for (int i = 0; i < 30; i++)
245	pwszRand[i] = GetRandUtf16();
246	pwszRand[30] = 0;
247
248	rc = RTUtf16ToUtf8Ex(pwszRand, RTSTR_MAX, &pszUtf8Array, 20, NULL);
249	if (rc == VERR_BUFFER_OVERFLOW)
250	RTTestPassed(hTest, "Random UTF-16 -> fixed length UTF-8 with too short buffer successfully rejected.\n");
251	else
252	RTTestFailed(hTest, "%d: Random UTF-16 -> fixed length UTF-8 with too small buffer returned value %d instead of VERR_BUFFER_OVERFLOW.\n",
253	__LINE__, rc);
254	RTMemFree(pwszRand);
255
256	/*
257	* last time...
258	*/
259	RTTestSub(hTest, "Random RTUtf16ToUtf8 + RTStrToUtf16Ex");
260	pwszRand = (PRTUTF16)RTMemAlloc(31 * sizeof(*pwsz));
261	for (int i = 0; i < 30; i++)
262	pwszRand[i] = GetRandUtf16();
263	pwszRand[30] = 0;
264
265	rc = RTUtf16ToUtf8(pwszRand, &pszUtf8);
266	if (rc == VINF_SUCCESS)
267	{
268	rc = RTStrToUtf16Ex(pszUtf8, RTSTR_MAX, &pwsz2Buf, 20, NULL);
269	if (rc == VERR_BUFFER_OVERFLOW)
270	RTTestPassed(hTest, "Random UTF-16 -> UTF-8 -> fixed length UTF-16 with too short buffer successfully rejected.\n");
271	else
272	RTTestFailed(hTest, "%d: The second part of random UTF-16 -> UTF-8 -> fixed length UTF-16 with too short buffer returned value %Rrc instead of VERR_BUFFER_OVERFLOW.\n",
273	__LINE__, rc);
274	RTStrFree(pszUtf8);
275	}
276	else
277	RTTestFailed(hTest, "%d:The first part of random UTF-16 -> UTF-8 -> fixed length UTF-16 failed with return value %Rrc.\n",
278	__LINE__, rc);
279	RTMemFree(pwszRand);
280
281	RTTestSubDone(hTest);
282	}
283
284
285	static RTUNICP g_uszAll[0x110000 - 1 - 0x800 - 2 + 1];
286	static RTUTF16 g_wszAll[0xfffe - (0xe000 - 0xd800) + (0x110000 - 0x10000) * 2];
287	static char g_szAll[0x7f + (0x800 - 0x80) * 2 + (0xfffe - 0x800 - (0xe000 - 0xd800))* 3 + (0x110000 - 0x10000) * 4 + 1];
288
289	static void whereami(int cBits, size_t off)
290	{
291	if (cBits == 8)
292	{
293	if (off < 0x7f)
294	RTTestPrintf(NIL_RTTEST, RTTESTLVL_FAILURE, "UTF-8 U+%#x\n", off + 1);
295	else if (off < 0xf7f)
296	RTTestPrintf(NIL_RTTEST, RTTESTLVL_FAILURE, "UTF-8 U+%#x\n", (off - 0x7f) / 2 + 0x80);
297	else if (off < 0x27f7f)
298	RTTestPrintf(NIL_RTTEST, RTTESTLVL_FAILURE, "UTF-8 U+%#x\n", (off - 0xf7f) / 3 + 0x800);
299	else if (off < 0x2df79)
300	RTTestPrintf(NIL_RTTEST, RTTESTLVL_FAILURE, "UTF-8 U+%#x\n", (off - 0x27f7f) / 3 + 0xe000);
301	else if (off < 0x42df79)
302	RTTestPrintf(NIL_RTTEST, RTTESTLVL_FAILURE, "UTF-8 U+%#x\n", (off - 0x2df79) / 4 + 0x10000);
303	else
304	RTTestPrintf(NIL_RTTEST, RTTESTLVL_FAILURE, "UTF-8 ???\n");
305	}
306	else if (cBits == 16)
307	{
308	if (off < 0xd7ff*2)
309	RTTestPrintf(NIL_RTTEST, RTTESTLVL_FAILURE, "UTF-16 U+%#x\n", off / 2 + 1);
310	else if (off < 0xf7fd*2)
311	RTTestPrintf(NIL_RTTEST, RTTESTLVL_FAILURE, "UTF-16 U+%#x\n", (off - 0xd7ff*2) / 2 + 0xe000);
312	else if (off < 0x20f7fd)
313	RTTestPrintf(NIL_RTTEST, RTTESTLVL_FAILURE, "UTF-16 U+%#x\n", (off - 0xf7fd*2) / 4 + 0x10000);
314	else
315	RTTestPrintf(NIL_RTTEST, RTTESTLVL_FAILURE, "UTF-16 ???\n");
316	}
317	else
318	{
319	if (off < (0xd800 - 1) * sizeof(RTUNICP))
320	RTTestPrintf(NIL_RTTEST, RTTESTLVL_FAILURE, "RTUNICP U+%#x\n", off / sizeof(RTUNICP) + 1);
321	else if (off < (0xfffe - 0x800 - 1) * sizeof(RTUNICP))
322	RTTestPrintf(NIL_RTTEST, RTTESTLVL_FAILURE, "RTUNICP U+%#x\n", off / sizeof(RTUNICP) + 0x800 + 1);
323	else
324	RTTestPrintf(NIL_RTTEST, RTTESTLVL_FAILURE, "RTUNICP U+%#x\n", off / sizeof(RTUNICP) + 0x800 + 1 + 2);
325	}
326	}
327
328	int mymemcmp(const void pv1, const void pv2, size_t cb, int cBits)
329	{
330	const uint8_t pb1 = (const uint8_t )pv1;
331	const uint8_t pb2 = (const uint8_t )pv2;
332	for (size_t off = 0; off < cb; off++)
333	{
334	if (pb1[off] != pb2[off])
335	{
336	RTTestPrintf(NIL_RTTEST, RTTESTLVL_FAILURE, "mismatch at %#x: ", off);
337	whereami(cBits, off);
338	if (off > 0)
339	RTTestPrintf(NIL_RTTEST, RTTESTLVL_FAILURE, " %#x: %02x != %02x!\n", off-1, pb1[off-1], pb2[off-1]);
340	RTTestPrintf(NIL_RTTEST, RTTESTLVL_FAILURE, "*%#x: %02x != %02x!\n", off, pb1[off], pb2[off]);
341	for (size_t i = 1; i < 10; i++)
342	if (off + i < cb)
343	RTTestPrintf(NIL_RTTEST, RTTESTLVL_FAILURE, " %#x: %02x != %02x!\n", off+i, pb1[off+i], pb2[off+i]);
344	return 1;
345	}
346	}
347	return 0;
348	}
349
350
351	void InitStrings()
352	{
353	/*
354	* Generate unicode string containing all the legal UTF-16 codepoints, both UTF-16 and UTF-8 version.
355	*/
356	/* the simple code point array first */
357	unsigned i = 0;
358	RTUNICP uc = 1;
359	while (uc < 0xd800)
360	g_uszAll[i++] = uc++;
361	uc = 0xe000;
362	while (uc < 0xfffe)
363	g_uszAll[i++] = uc++;
364	uc = 0x10000;
365	while (uc < 0x110000)
366	g_uszAll[i++] = uc++;
367	g_uszAll[i++] = 0;
368	Assert(RT_ELEMENTS(g_uszAll) == i);
369
370	/* the utf-16 one */
371	i = 0;
372	uc = 1;
373	//RTPrintf("tstUtf8: %#x=%#x", i, uc);
374	while (uc < 0xd800)
375	g_wszAll[i++] = uc++;
376	uc = 0xe000;
377	//RTPrintf(" %#x=%#x", i, uc);
378	while (uc < 0xfffe)
379	g_wszAll[i++] = uc++;
380	uc = 0x10000;
381	//RTPrintf(" %#x=%#x", i, uc);
382	while (uc < 0x110000)
383	{
384	g_wszAll[i++] = 0xd800 \| ((uc - 0x10000) >> 10);
385	g_wszAll[i++] = 0xdc00 \| ((uc - 0x10000) & 0x3ff);
386	uc++;
387	}
388	//RTPrintf(" %#x=%#x\n", i, uc);
389	g_wszAll[i++] = '\0';
390	Assert(RT_ELEMENTS(g_wszAll) == i);
391
392	/*
393	* The utf-8 one
394	*/
395	i = 0;
396	uc = 1;
397	//RTPrintf("tstUtf8: %#x=%#x", i, uc);
398	while (uc < 0x80)
399	g_szAll[i++] = uc++;
400	//RTPrintf(" %#x=%#x", i, uc);
401	while (uc < 0x800)
402	{
403	g_szAll[i++] = 0xc0 \| (uc >> 6);
404	g_szAll[i++] = 0x80 \| (uc & 0x3f);
405	Assert(!((uc >> 6) & ~0x1f));
406	uc++;
407	}
408	//RTPrintf(" %#x=%#x", i, uc);
409	while (uc < 0xd800)
410	{
411	g_szAll[i++] = 0xe0 \| (uc >> 12);
412	g_szAll[i++] = 0x80 \| ((uc >> 6) & 0x3f);
413	g_szAll[i++] = 0x80 \| (uc & 0x3f);
414	Assert(!((uc >> 12) & ~0xf));
415	uc++;
416	}
417	uc = 0xe000;
418	//RTPrintf(" %#x=%#x", i, uc);
419	while (uc < 0xfffe)
420	{
421	g_szAll[i++] = 0xe0 \| (uc >> 12);
422	g_szAll[i++] = 0x80 \| ((uc >> 6) & 0x3f);
423	g_szAll[i++] = 0x80 \| (uc & 0x3f);
424	Assert(!((uc >> 12) & ~0xf));
425	uc++;
426	}
427	uc = 0x10000;
428	//RTPrintf(" %#x=%#x", i, uc);
429	while (uc < 0x110000)
430	{
431	g_szAll[i++] = 0xf0 \| (uc >> 18);
432	g_szAll[i++] = 0x80 \| ((uc >> 12) & 0x3f);
433	g_szAll[i++] = 0x80 \| ((uc >> 6) & 0x3f);
434	g_szAll[i++] = 0x80 \| (uc & 0x3f);
435	Assert(!((uc >> 18) & ~0x7));
436	uc++;
437	}
438	//RTPrintf(" %#x=%#x\n", i, uc);
439	g_szAll[i++] = '\0';
440	Assert(RT_ELEMENTS(g_szAll) == i);
441	}
442
443
444	void test2(RTTEST hTest)
445	{
446	/*
447	* Convert to UTF-8 and back.
448	*/
449	RTTestSub(hTest, "UTF-16 -> UTF-8 -> UTF-16");
450	char *pszUtf8;
451	int rc = RTUtf16ToUtf8(&g_wszAll[0], &pszUtf8);
452	if (rc == VINF_SUCCESS)
453	{
454	pszUtf8[0] = 1;
455	if (mymemcmp(pszUtf8, g_szAll, sizeof(g_szAll), 8))
456	RTTestFailed(hTest, "UTF-16 -> UTF-8 mismatch!");
457
458	PRTUTF16 pwszUtf16;
459	rc = RTStrToUtf16(pszUtf8, &pwszUtf16);
460	if (rc == VINF_SUCCESS)
461	{
462	if (mymemcmp(pwszUtf16, g_wszAll, sizeof(g_wszAll), 16))
463	RTTestFailed(hTest, "UTF-8 -> UTF-16 failed compare!");
464	RTUtf16Free(pwszUtf16);
465	}
466	else
467	RTTestFailed(hTest, "UTF-8 -> UTF-16 failed, rc=%Rrc.", rc);
468	RTStrFree(pszUtf8);
469	}
470	else
471	RTTestFailed(hTest, "UTF-16 -> UTF-8 failed, rc=%Rrc.", rc);
472
473
474	/*
475	* Convert to UTF-16 and back. (just in case the above test fails)
476	*/
477	RTTestSub(hTest, "UTF-8 -> UTF-16 -> UTF-8");
478	PRTUTF16 pwszUtf16;
479	rc = RTStrToUtf16(&g_szAll[0], &pwszUtf16);
480	if (rc == VINF_SUCCESS)
481	{
482	if (mymemcmp(pwszUtf16, g_wszAll, sizeof(g_wszAll), 16))
483	RTTestFailed(hTest, "UTF-8 -> UTF-16 failed compare!");
484
485	rc = RTUtf16ToUtf8(pwszUtf16, &pszUtf8);
486	if (rc == VINF_SUCCESS)
487	{
488	if (mymemcmp(pszUtf8, g_szAll, sizeof(g_szAll), 8))
489	RTTestFailed(hTest, "UTF-16 -> UTF-8 failed compare!");
490	RTStrFree(pszUtf8);
491	}
492	else
493	RTTestFailed(hTest, "UTF-16 -> UTF-8 failed, rc=%Rrc.", rc);
494	RTUtf16Free(pwszUtf16);
495	}
496	else
497	RTTestFailed(hTest, "UTF-8 -> UTF-16 failed, rc=%Rrc.", rc);
498
499	/*
500	* Convert UTF-8 to CPs.
501	*/
502	RTTestSub(hTest, "UTF-8 -> UNI -> UTF-8");
503	PRTUNICP paCps;
504	rc = RTStrToUni(g_szAll, &paCps);
505	if (rc == VINF_SUCCESS)
506	{
507	if (mymemcmp(paCps, g_uszAll, sizeof(g_uszAll), 32))
508	RTTestFailed(hTest, "UTF-8 -> UTF-16 failed, rc=%Rrc.", rc);
509
510	size_t cCps;
511	rc = RTStrToUniEx(g_szAll, RTSTR_MAX, &paCps, RT_ELEMENTS(g_uszAll), &cCps);
512	if (rc == VINF_SUCCESS)
513	{
514	if (cCps != RT_ELEMENTS(g_uszAll) - 1)
515	RTTestFailed(hTest, "wrong Code Point count %zu, expected %zu\n", cCps, RT_ELEMENTS(g_uszAll) - 1);
516	}
517	else
518	RTTestFailed(hTest, "UTF-8 -> Code Points failed, rc=%Rrc.\n", rc);
519
520	/** @todo RTCpsToUtf8 or something. */
521	RTUniFree(paCps);
522	}
523	else
524	RTTestFailed(hTest, "UTF-8 -> Code Points failed, rc=%Rrc.\n", rc);
525
526	/*
527	* Check the various string lengths.
528	*/
529	RTTestSub(hTest, "Lengths");
530	size_t cuc1 = RTStrCalcUtf16Len(g_szAll);
531	size_t cuc2 = RTUtf16Len(g_wszAll);
532	if (cuc1 != cuc2)
533	RTTestFailed(hTest, "cuc1=%zu != cuc2=%zu\n", cuc1, cuc2);
534	//size_t cuc3 = RTUniLen(g_uszAll);
535
536
537	/*
538	* Enumerate the strings.
539	*/
540	RTTestSub(hTest, "Code Point Getters and Putters");
541	char pszPut1Base = (char )RTMemAlloc(sizeof(g_szAll));
542	AssertRelease(pszPut1Base);
543	char *pszPut1 = pszPut1Base;
544	PRTUTF16 pwszPut2Base = (PRTUTF16)RTMemAlloc(sizeof(g_wszAll));
545	AssertRelease(pwszPut2Base);
546	PRTUTF16 pwszPut2 = pwszPut2Base;
547	const char *psz1 = g_szAll;
548	const char *psz2 = g_szAll;
549	PCRTUTF16 pwsz3 = g_wszAll;
550	PCRTUTF16 pwsz4 = g_wszAll;
551	for (;;)
552	{
553	/*
554	* getters
555	*/
556	RTUNICP uc1;
557	rc = RTStrGetCpEx(&psz1, &uc1);
558	if (RT_FAILURE(rc))
559	{
560	RTTestFailed(hTest, "RTStrGetCpEx failed with rc=%Rrc at %.10Rhxs", rc, psz2);
561	whereami(8, psz2 - &g_szAll[0]);
562	break;
563	}
564	char *pszPrev1 = RTStrPrevCp(g_szAll, psz1);
565	if (pszPrev1 != psz2)
566	{
567	RTTestFailed(hTest, "RTStrPrevCp returned %p expected %p!", pszPrev1, psz2);
568	whereami(8, psz2 - &g_szAll[0]);
569	break;
570	}
571	RTUNICP uc2 = RTStrGetCp(psz2);
572	if (uc2 != uc1)
573	{
574	RTTestFailed(hTest, "RTStrGetCpEx and RTStrGetCp returned different CPs: %RTunicp != %RTunicp", uc2, uc1);
575	whereami(8, psz2 - &g_szAll[0]);
576	break;
577	}
578	psz2 = RTStrNextCp(psz2);
579	if (psz2 != psz1)
580	{
581	RTTestFailed(hTest, "RTStrGetCpEx and RTStrGetNext returned different next pointer!");
582	whereami(8, psz2 - &g_szAll[0]);
583	break;
584	}
585
586	RTUNICP uc3;
587	rc = RTUtf16GetCpEx(&pwsz3, &uc3);
588	if (RT_FAILURE(rc))
589	{
590	RTTestFailed(hTest, "RTUtf16GetCpEx failed with rc=%Rrc at %.10Rhxs", rc, pwsz4);
591	whereami(16, pwsz4 - &g_wszAll[0]);
592	break;
593	}
594	if (uc3 != uc2)
595	{
596	RTTestFailed(hTest, "RTUtf16GetCpEx and RTStrGetCp returned different CPs: %RTunicp != %RTunicp", uc3, uc2);
597	whereami(16, pwsz4 - &g_wszAll[0]);
598	break;
599	}
600	RTUNICP uc4 = RTUtf16GetCp(pwsz4);
601	if (uc3 != uc4)
602	{
603	RTTestFailed(hTest, "RTUtf16GetCpEx and RTUtf16GetCp returned different CPs: %RTunicp != %RTunicp", uc3, uc4);
604	whereami(16, pwsz4 - &g_wszAll[0]);
605	break;
606	}
607	pwsz4 = RTUtf16NextCp(pwsz4);
608	if (pwsz4 != pwsz3)
609	{
610	RTTestFailed(hTest, "RTUtf16GetCpEx and RTUtf16GetNext returned different next pointer!");
611	whereami(8, pwsz4 - &g_wszAll[0]);
612	break;
613	}
614
615
616	/*
617	* putters
618	*/
619	pszPut1 = RTStrPutCp(pszPut1, uc1);
620	if (pszPut1 - pszPut1Base != psz1 - &g_szAll[0])
621	{
622	RTTestFailed(hTest, "RTStrPutCp is not at the same offset! %p != %p",
623	pszPut1 - pszPut1Base, psz1 - &g_szAll[0]);
624	whereami(8, psz2 - &g_szAll[0]);
625	break;
626	}
627
628	pwszPut2 = RTUtf16PutCp(pwszPut2, uc3);
629	if (pwszPut2 - pwszPut2Base != pwsz3 - &g_wszAll[0])
630	{
631	RTTestFailed(hTest, "RTStrPutCp is not at the same offset! %p != %p",
632	pwszPut2 - pwszPut2Base, pwsz3 - &g_wszAll[0]);
633	whereami(8, pwsz4 - &g_wszAll[0]);
634	break;
635	}
636
637
638	/* the end? */
639	if (!uc1)
640	break;
641	}
642
643	/* check output if we seems to have made it thru it all. */
644	if (psz2 == &g_szAll[sizeof(g_szAll)])
645	{
646	if (mymemcmp(pszPut1Base, g_szAll, sizeof(g_szAll), 8))
647	RTTestFailed(hTest, "RTStrPutCp encoded the string incorrectly.");
648	if (mymemcmp(pwszPut2Base, g_wszAll, sizeof(g_wszAll), 16))
649	RTTestFailed(hTest, "RTUtf16PutCp encoded the string incorrectly.");
650	}
651
652	RTMemFree(pszPut1Base);
653	RTMemFree(pwszPut2Base);
654
655	RTTestSubDone(hTest);
656	}
657
658
659	/**
660	* Check case insensitivity.
661	*/
662	void test3(RTTEST hTest)
663	{
664	RTTestSub(hTest, "Case Sensitivity");
665
666	if ( RTUniCpToLower('a') != 'a'
667	\|\| RTUniCpToLower('A') != 'a'
668	\|\| RTUniCpToLower('b') != 'b'
669	\|\| RTUniCpToLower('B') != 'b'
670	\|\| RTUniCpToLower('Z') != 'z'
671	\|\| RTUniCpToLower('z') != 'z'
672	\|\| RTUniCpToUpper('c') != 'C'
673	\|\| RTUniCpToUpper('C') != 'C'
674	\|\| RTUniCpToUpper('z') != 'Z'
675	\|\| RTUniCpToUpper('Z') != 'Z')
676	RTTestFailed(hTest, "RTUniToUpper/Lower failed basic tests.\n");
677
678	if (RTUtf16ICmp(g_wszAll, g_wszAll))
679	RTTestFailed(hTest, "RTUtf16ICmp failed the basic test.\n");
680
681	if (RTUtf16Cmp(g_wszAll, g_wszAll))
682	RTTestFailed(hTest, "RTUtf16Cmp failed the basic test.\n");
683
684	static RTUTF16 s_wszTst1a[] = { 'a', 'B', 'c', 'D', 'E', 'f', 'g', 'h', 'i', 'j', 'K', 'L', 'm', 'N', 'o', 'P', 'q', 'r', 'S', 't', 'u', 'V', 'w', 'x', 'Y', 'Z', 0xc5, 0xc6, 0xf8, 0 };
685	static RTUTF16 s_wszTst1b[] = { 'A', 'B', 'c', 'd', 'e', 'F', 'G', 'h', 'i', 'J', 'k', 'l', 'M', 'n', 'O', 'p', 'Q', 'R', 's', 't', 'U', 'v', 'w', 'X', 'y', 'z', 0xe5, 0xe6, 0xd8, 0 };
686	if ( RTUtf16ICmp(s_wszTst1b, s_wszTst1b)
687	\|\| RTUtf16ICmp(s_wszTst1a, s_wszTst1a)
688	\|\| RTUtf16ICmp(s_wszTst1a, s_wszTst1b)
689	\|\| RTUtf16ICmp(s_wszTst1b, s_wszTst1a)
690	)
691	RTTestFailed(hTest, "RTUtf16ICmp failed the alphabet test.\n");
692
693	if ( RTUtf16Cmp(s_wszTst1b, s_wszTst1b)
694	\|\| RTUtf16Cmp(s_wszTst1a, s_wszTst1a)
695	\|\| !RTUtf16Cmp(s_wszTst1a, s_wszTst1b)
696	\|\| !RTUtf16Cmp(s_wszTst1b, s_wszTst1a)
697	)
698	RTTestFailed(hTest, "RTUtf16Cmp failed the alphabet test.\n");
699
700	RTTestSubDone(hTest);
701	}
702
703
704	/**
705	* Test the RTStr*Cmp functions.
706	*/
707	void TstRTStrXCmp(RTTEST hTest)
708	{
709	#define CHECK_DIFF(expr, op) \
710	do \
711	{ \
712	int iDiff = expr; \
713	if (!(iDiff op 0)) \
714	RTTestFailed(hTest, "%d: %d " #op " 0: %s\n", __LINE__, iDiff, #expr); \
715	} while (0)
716
717	/** @todo test the non-ascii bits. */
718
719	RTTestSub(hTest, "RTStrCmp");
720	CHECK_DIFF(RTStrCmp(NULL, NULL), == );
721	CHECK_DIFF(RTStrCmp(NULL, ""), < );
722	CHECK_DIFF(RTStrCmp("", NULL), > );
723	CHECK_DIFF(RTStrCmp("", ""), == );
724	CHECK_DIFF(RTStrCmp("abcdef", "abcdef"), == );
725	CHECK_DIFF(RTStrCmp("abcdef", "abcde"), > );
726	CHECK_DIFF(RTStrCmp("abcde", "abcdef"), < );
727	CHECK_DIFF(RTStrCmp("abcdeg", "abcdef"), > );
728	CHECK_DIFF(RTStrCmp("abcdef", "abcdeg"), < );
729	CHECK_DIFF(RTStrCmp("abcdeF", "abcdef"), < );
730	CHECK_DIFF(RTStrCmp("abcdef", "abcdeF"), > );
731
732
733	RTTestSub(hTest, "RTStrNCmp");
734	CHECK_DIFF(RTStrNCmp(NULL, NULL, RTSTR_MAX), == );
735	CHECK_DIFF(RTStrNCmp(NULL, "", RTSTR_MAX), < );
736	CHECK_DIFF(RTStrNCmp("", NULL, RTSTR_MAX), > );
737	CHECK_DIFF(RTStrNCmp("", "", RTSTR_MAX), == );
738	CHECK_DIFF(RTStrNCmp("abcdef", "abcdef", RTSTR_MAX), == );
739	CHECK_DIFF(RTStrNCmp("abcdef", "abcde", RTSTR_MAX), > );
740	CHECK_DIFF(RTStrNCmp("abcde", "abcdef", RTSTR_MAX), < );
741	CHECK_DIFF(RTStrNCmp("abcdeg", "abcdef", RTSTR_MAX), > );
742	CHECK_DIFF(RTStrNCmp("abcdef", "abcdeg", RTSTR_MAX), < );
743	CHECK_DIFF(RTStrNCmp("abcdeF", "abcdef", RTSTR_MAX), < );
744	CHECK_DIFF(RTStrNCmp("abcdef", "abcdeF", RTSTR_MAX), > );
745
746	CHECK_DIFF(RTStrNCmp("abcdef", "fedcba", 0), ==);
747	CHECK_DIFF(RTStrNCmp("abcdef", "abcdeF", 5), ==);
748	CHECK_DIFF(RTStrNCmp("abcdef", "abcdeF", 6), > );
749
750
751	RTTestSub(hTest, "RTStrICmp");
752	CHECK_DIFF(RTStrICmp(NULL, NULL), == );
753	CHECK_DIFF(RTStrICmp(NULL, ""), < );
754	CHECK_DIFF(RTStrICmp("", NULL), > );
755	CHECK_DIFF(RTStrICmp("", ""), == );
756	CHECK_DIFF(RTStrICmp("abcdef", "abcdef"), == );
757	CHECK_DIFF(RTStrICmp("abcdef", "abcde"), > );
758	CHECK_DIFF(RTStrICmp("abcde", "abcdef"), < );
759	CHECK_DIFF(RTStrICmp("abcdeg", "abcdef"), > );
760	CHECK_DIFF(RTStrICmp("abcdef", "abcdeg"), < );
761
762	CHECK_DIFF(RTStrICmp("abcdeF", "abcdef"), ==);
763	CHECK_DIFF(RTStrICmp("abcdef", "abcdeF"), ==);
764	CHECK_DIFF(RTStrICmp("ABCDEF", "abcdef"), ==);
765	CHECK_DIFF(RTStrICmp("abcdef", "ABCDEF"), ==);
766	CHECK_DIFF(RTStrICmp("AbCdEf", "aBcDeF"), ==);
767	CHECK_DIFF(RTStrICmp("AbCdEg", "aBcDeF"), > );
768	CHECK_DIFF(RTStrICmp("AbCdEG", "aBcDef"), > ); /* diff performed on the lower case cp. */
769
770
771	RTTestSub(hTest, "RTStrICmpAscii");
772	CHECK_DIFF(RTStrICmpAscii(NULL, NULL), == );
773	CHECK_DIFF(RTStrICmpAscii(NULL, ""), < );
774	CHECK_DIFF(RTStrICmpAscii("", NULL), > );
775	CHECK_DIFF(RTStrICmpAscii("", ""), == );
776	CHECK_DIFF(RTStrICmpAscii("abcdef", "abcdef"), == );
777	CHECK_DIFF(RTStrICmpAscii("abcdef", "abcde"), > );
778	CHECK_DIFF(RTStrICmpAscii("abcde", "abcdef"), < );
779	CHECK_DIFF(RTStrICmpAscii("abcdeg", "abcdef"), > );
780	CHECK_DIFF(RTStrICmpAscii("abcdef", "abcdeg"), < );
781
782	CHECK_DIFF(RTStrICmpAscii("abcdeF", "abcdef"), ==);
783	CHECK_DIFF(RTStrICmpAscii("abcdef", "abcdeF"), ==);
784	CHECK_DIFF(RTStrICmpAscii("ABCDEF", "abcdef"), ==);
785	CHECK_DIFF(RTStrICmpAscii("abcdef", "ABCDEF"), ==);
786	CHECK_DIFF(RTStrICmpAscii("AbCdEf", "aBcDeF"), ==);
787	CHECK_DIFF(RTStrICmpAscii("AbCdEg", "aBcDeF"), > );
788	CHECK_DIFF(RTStrICmpAscii("AbCdEG", "aBcDef"), > ); /* diff performed on the lower case cp. */
789
790
791	RTTestSub(hTest, "RTStrNICmp");
792	CHECK_DIFF(RTStrNICmp(NULL, NULL, RTSTR_MAX), == );
793	CHECK_DIFF(RTStrNICmp(NULL, "", RTSTR_MAX), < );
794	CHECK_DIFF(RTStrNICmp("", NULL, RTSTR_MAX), > );
795	CHECK_DIFF(RTStrNICmp("", "", RTSTR_MAX), == );
796	CHECK_DIFF(RTStrNICmp(NULL, NULL, 0), == );
797	CHECK_DIFF(RTStrNICmp(NULL, "", 0), == );
798	CHECK_DIFF(RTStrNICmp("", NULL, 0), == );
799	CHECK_DIFF(RTStrNICmp("", "", 0), == );
800	CHECK_DIFF(RTStrNICmp("abcdef", "abcdef", RTSTR_MAX), == );
801	CHECK_DIFF(RTStrNICmp("abcdef", "abcde", RTSTR_MAX), > );
802	CHECK_DIFF(RTStrNICmp("abcde", "abcdef", RTSTR_MAX), < );
803	CHECK_DIFF(RTStrNICmp("abcdeg", "abcdef", RTSTR_MAX), > );
804	CHECK_DIFF(RTStrNICmp("abcdef", "abcdeg", RTSTR_MAX), < );
805
806	CHECK_DIFF(RTStrNICmp("abcdeF", "abcdef", RTSTR_MAX), ==);
807	CHECK_DIFF(RTStrNICmp("abcdef", "abcdeF", RTSTR_MAX), ==);
808	CHECK_DIFF(RTStrNICmp("ABCDEF", "abcdef", RTSTR_MAX), ==);
809	CHECK_DIFF(RTStrNICmp("abcdef", "ABCDEF", RTSTR_MAX), ==);
810	CHECK_DIFF(RTStrNICmp("AbCdEf", "aBcDeF", RTSTR_MAX), ==);
811	CHECK_DIFF(RTStrNICmp("AbCdEg", "aBcDeF", RTSTR_MAX), > );
812	CHECK_DIFF(RTStrNICmp("AbCdEG", "aBcDef", RTSTR_MAX), > ); /* diff performed on the lower case cp. */
813
814	CHECK_DIFF(RTStrNICmp("ABCDEF", "fedcba", 0), ==);
815	CHECK_DIFF(RTStrNICmp("AbCdEg", "aBcDeF", 5), ==);
816	CHECK_DIFF(RTStrNICmp("AbCdEf", "aBcDeF", 5), ==);
817	CHECK_DIFF(RTStrNICmp("AbCdE", "aBcDe", 5), ==);
818	CHECK_DIFF(RTStrNICmp("AbCdE", "aBcDeF", 5), ==);
819	CHECK_DIFF(RTStrNICmp("AbCdEf", "aBcDe", 5), ==);
820	CHECK_DIFF(RTStrNICmp("AbCdEg", "aBcDeF", 6), > );
821	CHECK_DIFF(RTStrNICmp("AbCdEG", "aBcDef", 6), > ); /* diff performed on the lower case cp. */
822	/* We should continue using byte comparison when we hit the invalid CP. Will assert in debug builds. */
823	// CHECK_DIFF(RTStrNICmp("AbCd\xff""eg", "aBcD\xff""eF", 6), ==);
824
825	RTTestSubDone(hTest);
826	}
827
828
829
830	/**
831	* Check UTF-8 encoding purging.
832	*/
833	void TstRTStrPurgeEncoding(RTTEST hTest)
834	{
835	RTTestSub(hTest, "RTStrPurgeEncoding");
836
837	/*
838	* Test some good strings.
839	*/
840	char sz1[] = "1234567890wertyuiopsdfghjklzxcvbnm";
841	char sz1Copy[sizeof(sz1)];
842	memcpy(sz1Copy, sz1, sizeof(sz1));
843
844	RTTESTI_CHECK_RETV(RTStrPurgeEncoding(sz1) == 0);
845	RTTESTI_CHECK_RETV(!memcmp(sz1, sz1Copy, sizeof(sz1)));
846
847	char *pszAll = RTStrDup(g_szAll);
848	if (pszAll)
849	{
850	RTTESTI_CHECK(RTStrPurgeEncoding(pszAll) == 0);
851	RTTESTI_CHECK(!memcmp(pszAll, g_szAll, sizeof(g_szAll)));
852	RTStrFree(pszAll);
853	}
854
855	/*
856	* Test some bad stuff.
857	*/
858	struct
859	{
860	size_t cErrors;
861	unsigned char szIn[5];
862	const char *pszExpect;
863	} aTests[] =
864	{
865	{ 0, { '1', '2', '3', '4', '\0' }, "1234" },
866	{ 1, { 0x80, '2', '3', '4', '\0' }, "?234" },
867	{ 1, { '1', 0x80, '3', '4', '\0' }, "1?34" },
868	{ 1, { '1', '2', 0x80, '4', '\0' }, "12?4" },
869	{ 1, { '1', '2', '3', 0x80, '\0' }, "123?" },
870	{ 2, { 0x80, 0x81, '3', '4', '\0' }, "??34" },
871	{ 2, { '1', 0x80, 0x81, '4', '\0' }, "1??4" },
872	{ 2, { '1', '2', 0x80, 0x81, '\0' }, "12??" },
873	};
874	for (size_t i = 0; i < RT_ELEMENTS(aTests); i++)
875	{
876	size_t cErrors = RTStrPurgeEncoding((char *)aTests[i].szIn);
877	if (cErrors != aTests[i].cErrors)
878	RTTestFailed(hTest, "#%u: cErrors=%u expected %u\n", i, cErrors, aTests[i].cErrors);
879	else if (strcmp((char *)aTests[i].szIn, aTests[i].pszExpect))
880	RTTestFailed(hTest, "#%u: %.5Rhxs expected %.5Rhxs (%s)\n", i, aTests[i].szIn, aTests[i].pszExpect, aTests[i].pszExpect);
881	}
882
883	RTTestSubDone(hTest);
884	}
885
886
887	/**
888	* Check string sanitising.
889	*/
890	void TstRTStrPurgeComplementSet(RTTEST hTest)
891	{
892	RTTestSub(hTest, "RTStrPurgeComplementSet");
893	RTUNICP aCpSet[] = { '1', '5', 'w', 'w', 'r', 'r', 'e', 'f', 't', 't',
894	'\0' };
895	RTUNICP aCpBadSet[] = { '1', '5', 'w', 'w', 'r', 'r', 'e', 'f', 't', 't',
896	'7', '\0' }; /* Contains an incomplete pair. */
897	struct
898	{
899	const char *pcszIn;
900	const char *pcszOut;
901	PCRTUNICP pcCpSet;
902	char chReplacement;
903	ssize_t cExpected;
904	}
905	aTests[] =
906	{
907	{ "1234werttrew4321", "1234werttrew4321", aCpSet, '_', 0 },
908	{ "123654wert\xc2\xa2trew\xe2\x82\xac""4321",
909	"123_54wert__trew___4321", aCpSet, '_', 3 },
910	{ "hjhj8766", "????????", aCpSet, '?', 8 },
911	{ "123\xf0\xa4\xad\xa2""4", "123____4", aCpSet, '_', 1 },
912	{ "\xff", "\xff", aCpSet, '_', -1 },
913	{ "____", "____", aCpBadSet, '_', -1 }
914	};
915	enum { MAX_IN_STRING = 256 };
916
917	for (unsigned i = 0; i < RT_ELEMENTS(aTests); ++i)
918	{
919	char szCopy[MAX_IN_STRING];
920	ssize_t cReplacements;
921	AssertRC(RTStrCopy(szCopy, RT_ELEMENTS(szCopy), aTests[i].pcszIn));
922	RTTestDisableAssertions(hTest);
923	cReplacements = RTStrPurgeComplementSet(szCopy, aTests[i].pcCpSet, aTests[i].chReplacement);
924	RTTestRestoreAssertions(hTest);
925	if (cReplacements != aTests[i].cExpected)
926	RTTestFailed(hTest, "#%u: expected %lld, actual %lld\n", i,
927	(long long) aTests[i].cExpected,
928	(long long) cReplacements);
929	if (strcmp(aTests[i].pcszOut, szCopy))
930	RTTestFailed(hTest, "#%u: expected %s, actual %s\n", i,
931	aTests[i].pcszOut, szCopy);
932	}
933	}
934
935
936	/**
937	* Check string sanitising.
938	*/
939	void TstRTUtf16PurgeComplementSet(RTTEST hTest)
940	{
941	RTTestSub(hTest, "RTUtf16PurgeComplementSet");
942	RTUNICP aCpSet[] = { '1', '5', 'w', 'w', 'r', 'r', 'e', 'f', 't', 't',
943	'\0' };
944	RTUNICP aCpBadSet[] = { '1', '5', 'w', 'w', 'r', 'r', 'e', 'f', 't', 't',
945	'7', '\0' }; /* Contains an incomplete pair. */
946	struct
947	{
948	const char *pcszIn;
949	const char *pcszOut;
950	size_t cwc; /* Zero means the strings are Utf-8. */
951	PCRTUNICP pcCpSet;
952	char chReplacement;
953	ssize_t cExpected;
954	}
955	aTests[] =
956	{
957	{ "1234werttrew4321", "1234werttrew4321", 0, aCpSet, '_', 0 },
958	{ "123654wert\xc2\xa2trew\xe2\x82\xac""4321",
959	"123_54wert_trew_4321", 0, aCpSet, '_', 3 },
960	{ "hjhj8766", "????????", 0, aCpSet, '?', 8 },
961	{ "123\xf0\xa4\xad\xa2""4", "123__4", 0, aCpSet, '_', 1 },
962	{ "\xff\xff\0", "\xff\xff\0", 2, aCpSet, '_', -1 },
963	{ "\xff\xff\0", "\xff\xff\0", 2, aCpSet, '_', -1 },
964	{ "____", "____", 0, aCpBadSet, '_', -1 }
965	};
966	enum { MAX_IN_STRING = 256 };
967
968	for (unsigned i = 0; i < RT_ELEMENTS(aTests); ++i)
969	{
970	RTUTF16 wszInCopy[MAX_IN_STRING], *pwszInCopy = wszInCopy;
971	RTUTF16 wszOutCopy[MAX_IN_STRING], *pwszOutCopy = wszOutCopy;
972	ssize_t cReplacements;
973	if (!aTests[i].cwc)
974	{
975	AssertRC(RTStrToUtf16Ex(aTests[i].pcszIn, RTSTR_MAX, &pwszInCopy,
976	RT_ELEMENTS(wszInCopy), NULL));
977	AssertRC(RTStrToUtf16Ex(aTests[i].pcszOut, RTSTR_MAX, &pwszOutCopy,
978	RT_ELEMENTS(wszOutCopy), NULL));
979	}
980	else
981	{
982	Assert(aTests[i].cwc <= RT_ELEMENTS(wszInCopy));
983	memcpy(wszInCopy, aTests[i].pcszIn, aTests[i].cwc * 2);
984	memcpy(wszOutCopy, aTests[i].pcszOut, aTests[i].cwc * 2);
985	}
986
987	RTTestDisableAssertions(hTest);
988	cReplacements = RTUtf16PurgeComplementSet(wszInCopy, aTests[i].pcCpSet, aTests[i].chReplacement);
989	RTTestRestoreAssertions(hTest);
990
991	if (cReplacements != aTests[i].cExpected)
992	RTTestFailed(hTest, "#%u: expected %lld, actual %lld\n", i,
993	(long long) aTests[i].cExpected,
994	(long long) cReplacements);
995	if (RTUtf16Cmp(wszInCopy, wszOutCopy))
996	RTTestFailed(hTest, "#%u: expected %ls, actual %ls\n", i,
997	wszOutCopy, wszInCopy);
998	}
999	}
1000
1001
1002	/**
1003	* Benchmark stuff.
1004	*/
1005	void Benchmarks(RTTEST hTest)
1006	{
1007	static union
1008	{
1009	RTUTF16 wszBuf[sizeof(g_wszAll)];
1010	char szBuf[sizeof(g_szAll)];
1011	} s_Buf;
1012
1013	RTTestSub(hTest, "Benchmarks");
1014	/** @todo add RTTest* methods for reporting benchmark results. */
1015	RTTestPrintf(hTest, RTTESTLVL_ALWAYS, "Benchmarking RTStrToUtf16Ex: "); /** @todo figure this stuff into the test framework. */
1016	PRTUTF16 pwsz = &s_Buf.wszBuf[0];
1017	int rc = RTStrToUtf16Ex(&g_szAll[0], RTSTR_MAX, &pwsz, RT_ELEMENTS(s_Buf.wszBuf), NULL);
1018	if (RT_SUCCESS(rc))
1019	{
1020	int i;
1021	uint64_t u64Start = RTTimeNanoTS();
1022	for (i = 0; i < 100; i++)
1023	{
1024	rc = RTStrToUtf16Ex(&g_szAll[0], RTSTR_MAX, &pwsz, RT_ELEMENTS(s_Buf.wszBuf), NULL);
1025	if (RT_FAILURE(rc))
1026	{
1027	RTTestFailed(hTest, "UTF-8 -> UTF-16 benchmark failed at i=%d, rc=%Rrc\n", i, rc);
1028	break;
1029	}
1030	}
1031	uint64_t u64Elapsed = RTTimeNanoTS() - u64Start;
1032	RTTestPrintf(hTest, RTTESTLVL_ALWAYS, "%d in %'RI64 ns\n", i, u64Elapsed);
1033	}
1034
1035	RTTestPrintf(hTest, RTTESTLVL_ALWAYS, "Benchmarking RTUtf16ToUtf8Ex: ");
1036	char *psz = &s_Buf.szBuf[0];
1037	rc = RTUtf16ToUtf8Ex(&g_wszAll[0], RTSTR_MAX, &psz, RT_ELEMENTS(s_Buf.szBuf), NULL);
1038	if (RT_SUCCESS(rc))
1039	{
1040	int i;
1041	uint64_t u64Start = RTTimeNanoTS();
1042	for (i = 0; i < 100; i++)
1043	{
1044	rc = RTUtf16ToUtf8Ex(&g_wszAll[0], RTSTR_MAX, &psz, RT_ELEMENTS(s_Buf.szBuf), NULL);
1045	if (RT_FAILURE(rc))
1046	{
1047	RTTestFailed(hTest, "UTF-16 -> UTF-8 benchmark failed at i=%d, rc=%Rrc\n", i, rc);
1048	break;
1049	}
1050	}
1051	uint64_t u64Elapsed = RTTimeNanoTS() - u64Start;
1052	RTTestPrintf(hTest, RTTESTLVL_ALWAYS, "%d in %'RI64 ns\n", i, u64Elapsed);
1053	}
1054
1055	RTTestSubDone(hTest);
1056	}
1057
1058
1059	/**
1060	* Tests RTStrEnd
1061	*/
1062	static void testStrEnd(RTTEST hTest)
1063	{
1064	RTTestSub(hTest, "RTStrEnd");
1065
1066	static char const s_szEmpty[1] = "";
1067	RTTESTI_CHECK(RTStrEnd(s_szEmpty, 0) == NULL);
1068	RTTESTI_CHECK(RTStrEnd(s_szEmpty, 1) == &s_szEmpty[0]);
1069	for (size_t i = 0; i < _1M; i++)
1070	RTTESTI_CHECK(RTStrEnd(s_szEmpty, ~i) == &s_szEmpty[0]);
1071
1072	/* Check the implementation won't ever overshoot the '\0' in the input in
1073	anyway that may lead to a SIGSEV. (VC++ 14.1 does this) */
1074	size_t const cchStr = 1023;
1075	char pszStr = (char )RTTestGuardedAllocTail(hTest, cchStr + 1);
1076	memset(pszStr, ' ', cchStr);
1077	char * const pszStrEnd = &pszStr[cchStr];
1078	*pszStrEnd = '\0';
1079	RTTEST_CHECK_RETV(hTest, strlen(pszStr) == cchStr);
1080
1081	for (size_t off = 0; off <= cchStr; off++)
1082	{
1083	RTTEST_CHECK(hTest, RTStrEnd(&pszStr[off], cchStr + 1 - off) == pszStrEnd);
1084	RTTEST_CHECK(hTest, RTStrEnd(&pszStr[off], RTSTR_MAX) == pszStrEnd);
1085
1086	RTTEST_CHECK(hTest, memchr(&pszStr[off], '\0', cchStr + 1 - off) == pszStrEnd);
1087	RTTEST_CHECK(hTest, strchr(&pszStr[off], '\0') == pszStrEnd);
1088	RTTEST_CHECK(hTest, strchr(&pszStr[off], '?') == NULL);
1089
1090	size_t cchMax = 0;
1091	for (; cchMax <= cchStr - off; cchMax++)
1092	{
1093	const char *pszRet = RTStrEnd(&pszStr[off], cchMax);
1094	if (pszRet != NULL)
1095	{
1096	RTTestFailed(hTest, "off=%zu cchMax=%zu: %p, expected NULL\n", off, cchMax, pszRet);
1097	break;
1098	}
1099	}
1100	for (; cchMax <= _8K; cchMax++)
1101	{
1102	const char *pszRet = RTStrEnd(&pszStr[off], cchMax);
1103	if (pszRet != pszStrEnd)
1104	{
1105	RTTestFailed(hTest, "off=%zu cchMax=%zu: off by %p\n", off, cchMax, pszRet);
1106	break;
1107	}
1108	}
1109	}
1110	RTTestGuardedFree(hTest, pszStr);
1111	}
1112
1113
1114	/**
1115	* Tests RTStrStr and RTStrIStr.
1116	*/
1117	static void testStrStr(RTTEST hTest)
1118	{
1119	#define CHECK_NULL(expr) \
1120	do { \
1121	const char *pszRet = expr; \
1122	if (pszRet != NULL) \
1123	RTTestFailed(hTest, "%d: %#x -> %s expected NULL", __LINE__, #expr, pszRet); \
1124	} while (0)
1125
1126	#define CHECK(expr, expect) \
1127	do { \
1128	const char *pszRet = expr; \
1129	const char *pszExpect = (expect); \
1130	if ( (pszRet != NULL && pszExpect == NULL) \
1131	\|\| (pszRet == NULL && pszExpect != NULL) \
1132	\|\| strcmp(pszRet, pszExpect) \
1133	) \
1134	RTTestFailed(hTest, "%d: %#x -> %s expected %s", __LINE__, #expr, pszRet, pszExpect); \
1135	} while (0)
1136
1137
1138	RTTestSub(hTest, "RTStrStr");
1139	CHECK(RTStrStr("abcdef", ""), "abcdef");
1140	CHECK_NULL(RTStrStr("abcdef", NULL));
1141	CHECK_NULL(RTStrStr(NULL, ""));
1142	CHECK_NULL(RTStrStr(NULL, NULL));
1143	CHECK(RTStrStr("abcdef", "abcdef"), "abcdef");
1144	CHECK(RTStrStr("abcdef", "b"), "bcdef");
1145	CHECK(RTStrStr("abcdef", "bcdef"), "bcdef");
1146	CHECK(RTStrStr("abcdef", "cdef"), "cdef");
1147	CHECK(RTStrStr("abcdef", "cde"), "cdef");
1148	CHECK(RTStrStr("abcdef", "cd"), "cdef");
1149	CHECK(RTStrStr("abcdef", "c"), "cdef");
1150	CHECK(RTStrStr("abcdef", "f"), "f");
1151	CHECK(RTStrStr("abcdef", "ef"), "ef");
1152	CHECK(RTStrStr("abcdef", "e"), "ef");
1153	CHECK_NULL(RTStrStr("abcdef", "z"));
1154	CHECK_NULL(RTStrStr("abcdef", "A"));
1155	CHECK_NULL(RTStrStr("abcdef", "F"));
1156
1157	RTTestSub(hTest, "RTStrIStr");
1158	CHECK(RTStrIStr("abcdef", ""), "abcdef");
1159	CHECK_NULL(RTStrIStr("abcdef", NULL));
1160	CHECK_NULL(RTStrIStr(NULL, ""));
1161	CHECK_NULL(RTStrIStr(NULL, NULL));
1162	CHECK(RTStrIStr("abcdef", "abcdef"), "abcdef");
1163	CHECK(RTStrIStr("abcdef", "Abcdef"), "abcdef");
1164	CHECK(RTStrIStr("abcdef", "ABcDeF"), "abcdef");
1165	CHECK(RTStrIStr("abcdef", "b"), "bcdef");
1166	CHECK(RTStrIStr("abcdef", "B"), "bcdef");
1167	CHECK(RTStrIStr("abcdef", "bcdef"), "bcdef");
1168	CHECK(RTStrIStr("abcdef", "BCdEf"), "bcdef");
1169	CHECK(RTStrIStr("abcdef", "bCdEf"), "bcdef");
1170	CHECK(RTStrIStr("abcdef", "bcdEf"), "bcdef");
1171	CHECK(RTStrIStr("abcdef", "BcdEf"), "bcdef");
1172	CHECK(RTStrIStr("abcdef", "cdef"), "cdef");
1173	CHECK(RTStrIStr("abcdef", "cde"), "cdef");
1174	CHECK(RTStrIStr("abcdef", "cd"), "cdef");
1175	CHECK(RTStrIStr("abcdef", "c"), "cdef");
1176	CHECK(RTStrIStr("abcdef", "f"), "f");
1177	CHECK(RTStrIStr("abcdeF", "F"), "F");
1178	CHECK(RTStrIStr("abcdef", "F"), "f");
1179	CHECK(RTStrIStr("abcdef", "ef"), "ef");
1180	CHECK(RTStrIStr("EeEef", "e"), "EeEef");
1181	CHECK(RTStrIStr("EeEef", "E"), "EeEef");
1182	CHECK(RTStrIStr("EeEef", "EE"), "EeEef");
1183	CHECK(RTStrIStr("EeEef", "EEE"), "EeEef");
1184	CHECK(RTStrIStr("EeEef", "EEEF"), "eEef");
1185	CHECK_NULL(RTStrIStr("EeEef", "z"));
1186
1187	#undef CHECK
1188	#undef CHECK_NULL
1189	RTTestSubDone(hTest);
1190	}
1191
1192
1193	void testUtf8Latin1(RTTEST hTest)
1194	{
1195	RTTestSub(hTest, "Latin-1 <-> Utf-8 conversion functions");
1196
1197	/* Test Utf8 -> Latin1 */
1198	size_t cch_szAll = 0;
1199	size_t cbShort = RTStrCalcLatin1Len(g_szAll);
1200	RTTEST_CHECK(hTest, cbShort == 0);
1201	int rc = RTStrCalcLatin1LenEx(g_szAll, 383, &cch_szAll);
1202	RTTEST_CHECK(hTest, (cch_szAll == 255));
1203	rc = RTStrCalcLatin1LenEx(g_szAll, RTSTR_MAX, &cch_szAll);
1204	RTTEST_CHECK_RC(hTest, rc, VERR_NO_TRANSLATION);
1205	char *psz = NULL;
1206	char szShort[256] = { 0 };
1207	memcpy(szShort, g_szAll, 255);
1208	cbShort = RTStrCalcLatin1Len(szShort);
1209	RTTEST_CHECK(hTest, cbShort == 191);
1210	rc = RTStrToLatin1(szShort, &psz);
1211	RTTEST_CHECK_RC_OK(hTest, rc);
1212	if (RT_SUCCESS(rc))
1213	{
1214	RTTEST_CHECK(hTest, (strlen(psz) == 191));
1215	for (unsigned i = 0, j = 1; psz[i] != '\0'; ++i, ++j)
1216	if (psz[i] != (char) j)
1217	{
1218	RTTestFailed(hTest, "conversion of g_szAll to Latin1 failed at position %u\n", i);
1219	break;
1220	}
1221	}
1222	RTStrFree(psz);
1223	rc = RTStrToLatin1(g_szAll, &psz);
1224	RTTEST_CHECK_RC(hTest, rc, VERR_NO_TRANSLATION);
1225	char sz[512];
1226	char *psz2 = &sz[0];
1227	size_t cchActual = 0;
1228	rc = RTStrToLatin1Ex(g_szAll, sizeof(sz) - 1, &psz2, sizeof(sz),
1229	&cchActual);
1230	RTTEST_CHECK_RC(hTest, rc, VERR_NO_TRANSLATION);
1231	RTTEST_CHECK_MSG(hTest, cchActual == 0,
1232	(hTest, "cchActual=%lu\n", cchActual));
1233	rc = RTStrToLatin1Ex(g_szAll, 383, &psz2, sizeof(sz),
1234	&cchActual);
1235	RTTEST_CHECK_RC_OK(hTest, rc);
1236	if (RT_SUCCESS(rc))
1237	{
1238	RTTEST_CHECK(hTest, (cchActual == 255));
1239	RTTEST_CHECK(hTest, (cchActual == strlen(sz)));
1240	for (unsigned i = 0, j = 1; psz2[i] != '\0'; ++i, ++j)
1241	if (psz2[i] != (char) j)
1242	{
1243	RTTestFailed(hTest, "second conversion of g_szAll to Latin1 failed at position %u\n", i);
1244	break;
1245	}
1246	}
1247	rc = RTStrToLatin1Ex(g_szAll, 129, &psz2, 128, &cchActual);
1248	RTTEST_CHECK_RC(hTest, rc, VERR_BUFFER_OVERFLOW);
1249	RTTEST_CHECK_MSG(hTest, cchActual == 128,
1250	(hTest, "cchActual=%lu\n", cchActual));
1251	rc = RTStrToLatin1Ex(g_szAll, 383, &psz, 0, &cchActual);
1252	RTTEST_CHECK_RC_OK(hTest, rc);
1253	if (RT_SUCCESS(rc))
1254	{
1255	RTTEST_CHECK(hTest, (cchActual == 255));
1256	RTTEST_CHECK(hTest, (cchActual == strlen(psz)));
1257	for (unsigned i = 0, j = 1; psz[i] != '\0'; ++i, ++j)
1258	if ( ((j < 0x100) && (psz[i] != (char) j))
1259	\|\| ((j > 0xff) && psz[i] != '?'))
1260	{
1261	RTTestFailed(hTest, "third conversion of g_szAll to Latin1 failed at position %u\n", i);
1262	break;
1263	}
1264	}
1265	const char *pszBad = "Hello\xDC\xD8";
1266	rc = RTStrToLatin1Ex(pszBad, RTSTR_MAX, &psz2, sizeof(sz),
1267	&cchActual);
1268	RTTEST_CHECK_RC(hTest, rc, VERR_INVALID_UTF8_ENCODING);
1269	RTStrFree(psz);
1270
1271	/* Test Latin1 -> Utf8 */
1272	const char *pszLat1 = "\x01\x20\x40\x80\x81";
1273	RTTEST_CHECK(hTest, RTLatin1CalcUtf8Len(pszLat1) == 7);
1274	rc = RTLatin1CalcUtf8LenEx(pszLat1, 3, &cchActual);
1275	RTTEST_CHECK_RC_OK(hTest, rc);
1276	if (RT_SUCCESS(rc))
1277	RTTEST_CHECK(hTest, cchActual == 3);
1278	rc = RTLatin1CalcUtf8LenEx(pszLat1, RTSTR_MAX, &cchActual);
1279	RTTEST_CHECK_RC_OK(hTest, rc);
1280	if (RT_SUCCESS(rc))
1281	RTTEST_CHECK(hTest, cchActual == 7);
1282	char *pch = NULL;
1283	char ch[8];
1284	char *pch2 = &ch[0];
1285	cchActual = 0;
1286	rc = RTLatin1ToUtf8(pszLat1, &pch);
1287	RTTEST_CHECK_RC_OK(hTest, rc);
1288	if (RT_SUCCESS(rc))
1289	RTTEST_CHECK(hTest, !strcmp(pch, "\x01\x20\x40\xC2\x80\xC2\x81"));
1290	RTStrFree(pch);
1291	rc = RTLatin1ToUtf8Ex(pszLat1, RTSTR_MAX, &pch, 0, &cchActual);
1292	RTTEST_CHECK_RC_OK(hTest, rc);
1293	if (RT_SUCCESS(rc))
1294	{
1295	RTTEST_CHECK(hTest, (cchActual == 7));
1296	RTTEST_CHECK(hTest, !strcmp(pch, "\x01\x20\x40\xC2\x80\xC2\x81"));
1297	}
1298	RTStrFree(pch);
1299	rc = RTLatin1ToUtf8Ex(pszLat1, RTSTR_MAX, &pch, 0, NULL);
1300	RTTEST_CHECK_RC_OK(hTest, rc);
1301	if (RT_SUCCESS(rc))
1302	RTTEST_CHECK(hTest, !strcmp(pch, "\x01\x20\x40\xC2\x80\xC2\x81"));
1303	RTStrFree(pch);
1304	rc = RTLatin1ToUtf8Ex(pszLat1, RTSTR_MAX, &pch2, RT_ELEMENTS(ch),
1305	&cchActual);
1306	RTTEST_CHECK_RC_OK(hTest, rc);
1307	if (RT_SUCCESS(rc))
1308	{
1309	RTTEST_CHECK(hTest, (cchActual == 7));
1310	RTTEST_CHECK(hTest, !strcmp(pch2, "\x01\x20\x40\xC2\x80\xC2\x81"));
1311	}
1312	rc = RTLatin1ToUtf8Ex(pszLat1, 3, &pch2, RT_ELEMENTS(ch),
1313	&cchActual);
1314	RTTEST_CHECK_RC_OK(hTest, rc);
1315	if (RT_SUCCESS(rc))
1316	{
1317	RTTEST_CHECK(hTest, (cchActual == 3));
1318	RTTEST_CHECK(hTest, !strcmp(pch2, "\x01\x20\x40"));
1319	}
1320	rc = RTLatin1ToUtf8Ex(pszLat1, RTSTR_MAX, &pch2, RT_ELEMENTS(ch) - 1,
1321	&cchActual);
1322	RTTEST_CHECK_RC(hTest, rc, VERR_BUFFER_OVERFLOW);
1323	RTTEST_CHECK(hTest, (cchActual == 7));
1324	RTTestSubDone(hTest);
1325	}
1326
1327
1328	void testUtf16Latin1(RTTEST hTest)
1329	{
1330	RTTestSub(hTest, "Latin-1 <-> Utf-16 conversion functions");
1331
1332	/* Test Utf16 -> Latin1 */
1333	size_t cch_szAll = 0;
1334	size_t cbShort = RTUtf16CalcLatin1Len(g_wszAll);
1335	RTTEST_CHECK(hTest, cbShort == 0);
1336	int rc = RTUtf16CalcLatin1LenEx(g_wszAll, 255, &cch_szAll);
1337	RTTEST_CHECK(hTest, (cch_szAll == 255));
1338	rc = RTUtf16CalcLatin1LenEx(g_wszAll, RTSTR_MAX, &cch_szAll);
1339	RTTEST_CHECK_RC(hTest, rc, VERR_NO_TRANSLATION);
1340	char *psz = NULL;
1341	RTUTF16 wszShort[256] = { 0 };
1342	for (unsigned i = 0; i < 255; ++i)
1343	wszShort[i] = i + 1;
1344	cbShort = RTUtf16CalcLatin1Len(wszShort);
1345	RTTEST_CHECK(hTest, cbShort == 255);
1346	rc = RTUtf16ToLatin1(wszShort, &psz);
1347	RTTEST_CHECK_RC_OK(hTest, rc);
1348	if (RT_SUCCESS(rc))
1349	{
1350	RTTEST_CHECK(hTest, (strlen(psz) == 255));
1351	for (unsigned i = 0, j = 1; psz[i] != '\0'; ++i, ++j)
1352	if (psz[i] != (char) j)
1353	{
1354	RTTestFailed(hTest, "conversion of g_wszAll to Latin1 failed at position %u\n", i);
1355	break;
1356	}
1357	}
1358	RTStrFree(psz);
1359	rc = RTUtf16ToLatin1(g_wszAll, &psz);
1360	RTTEST_CHECK_RC(hTest, rc, VERR_NO_TRANSLATION);
1361	char sz[512];
1362	char *psz2 = &sz[0];
1363	size_t cchActual = 0;
1364	rc = RTUtf16ToLatin1Ex(g_wszAll, sizeof(sz) - 1, &psz2, sizeof(sz),
1365	&cchActual);
1366	RTTEST_CHECK_RC(hTest, rc, VERR_NO_TRANSLATION);
1367	RTTEST_CHECK_MSG(hTest, cchActual == 0,
1368	(hTest, "cchActual=%lu\n", cchActual));
1369	rc = RTUtf16ToLatin1Ex(g_wszAll, 255, &psz2, sizeof(sz),
1370	&cchActual);
1371	RTTEST_CHECK_RC_OK(hTest, rc);
1372	if (RT_SUCCESS(rc))
1373	{
1374	RTTEST_CHECK(hTest, (cchActual == 255));
1375	RTTEST_CHECK(hTest, (cchActual == strlen(sz)));
1376	for (unsigned i = 0, j = 1; psz2[i] != '\0'; ++i, ++j)
1377	if (psz2[i] != (char) j)
1378	{
1379	RTTestFailed(hTest, "second conversion of g_wszAll to Latin1 failed at position %u\n", i);
1380	break;
1381	}
1382	}
1383	rc = RTUtf16ToLatin1Ex(g_wszAll, 128, &psz2, 128, &cchActual);
1384	RTTEST_CHECK_RC(hTest, rc, VERR_BUFFER_OVERFLOW);
1385	RTTEST_CHECK_MSG(hTest, cchActual == 128,
1386	(hTest, "cchActual=%lu\n", cchActual));
1387	rc = RTUtf16ToLatin1Ex(g_wszAll, 255, &psz, 0, &cchActual);
1388	RTTEST_CHECK_RC_OK(hTest, rc);
1389	if (RT_SUCCESS(rc))
1390	{
1391	RTTEST_CHECK(hTest, (cchActual == 255));
1392	RTTEST_CHECK(hTest, (cchActual == strlen(psz)));
1393	for (unsigned i = 0, j = 1; psz[i] != '\0'; ++i, ++j)
1394	if ( ((j < 0x100) && (psz[i] != (char) j))
1395	\|\| ((j > 0xff) && psz[i] != '?'))
1396	{
1397	RTTestFailed(hTest, "third conversion of g_wszAll to Latin1 failed at position %u\n", i);
1398	break;
1399	}
1400	}
1401	const char *pszBad = "H\0e\0l\0l\0o\0\0\xDC\0\xD8\0";
1402	rc = RTUtf16ToLatin1Ex((RTUTF16 *) pszBad, RTSTR_MAX, &psz2, sizeof(sz),
1403	&cchActual);
1404	RTTEST_CHECK_RC(hTest, rc, VERR_INVALID_UTF16_ENCODING);
1405	RTStrFree(psz);
1406
1407	/* Test Latin1 -> Utf16 */
1408	const char *pszLat1 = "\x01\x20\x40\x80\x81";
1409	RTTEST_CHECK(hTest, RTLatin1CalcUtf16Len(pszLat1) == 5);
1410	rc = RTLatin1CalcUtf16LenEx(pszLat1, 3, &cchActual);
1411	RTTEST_CHECK_RC_OK(hTest, rc);
1412	if (RT_SUCCESS(rc))
1413	RTTEST_CHECK(hTest, cchActual == 3);
1414	rc = RTLatin1CalcUtf16LenEx(pszLat1, RTSTR_MAX, &cchActual);
1415	RTTEST_CHECK_RC_OK(hTest, rc);
1416	if (RT_SUCCESS(rc))
1417	RTTEST_CHECK(hTest, cchActual == 5);
1418	RTUTF16 *pwc = NULL;
1419	RTUTF16 wc[6];
1420	RTUTF16 *pwc2 = &wc[0];
1421	size_t cwActual = 0;
1422	rc = RTLatin1ToUtf16(pszLat1, &pwc);
1423	RTTEST_CHECK_RC_OK(hTest, rc);
1424	if (RT_SUCCESS(rc))
1425	RTTEST_CHECK(hTest, (pwc[0] == 1) && (pwc[1] == 0x20)
1426	&& (pwc[2] == 0x40) && (pwc[3] == 0x80)
1427	&& (pwc[4] == 0x81) && (pwc[5] == '\0'));
1428	RTUtf16Free(pwc);
1429	rc = RTLatin1ToUtf16Ex(pszLat1, RTSTR_MAX, &pwc, 0, &cwActual);
1430	RTTEST_CHECK_RC_OK(hTest, rc);
1431	if (RT_SUCCESS(rc))
1432	{
1433	RTTEST_CHECK(hTest, (cwActual == 5));
1434	RTTEST_CHECK(hTest, (pwc[0] == 1) && (pwc[1] == 0x20)
1435	&& (pwc[2] == 0x40) && (pwc[3] == 0x80)
1436	&& (pwc[4] == 0x81) && (pwc[5] == '\0'));
1437	}
1438	RTUtf16Free(pwc);
1439	rc = RTLatin1ToUtf16Ex(pszLat1, RTSTR_MAX, &pwc, 0, NULL);
1440	RTTEST_CHECK_RC_OK(hTest, rc);
1441	if (RT_SUCCESS(rc))
1442	RTTEST_CHECK(hTest, (pwc[0] == 1) && (pwc[1] == 0x20)
1443	&& (pwc[2] == 0x40) && (pwc[3] == 0x80)
1444	&& (pwc[4] == 0x81) && (pwc[5] == '\0'));
1445	RTUtf16Free(pwc);
1446	rc = RTLatin1ToUtf16Ex(pszLat1, RTSTR_MAX, &pwc2, RT_ELEMENTS(wc),
1447	&cwActual);
1448	RTTEST_CHECK_RC_OK(hTest, rc);
1449	if (RT_SUCCESS(rc))
1450	{
1451	RTTEST_CHECK(hTest, (cwActual == 5));
1452	RTTEST_CHECK(hTest, (wc[0] == 1) && (wc[1] == 0x20)
1453	&& (wc[2] == 0x40) && (wc[3] == 0x80)
1454	&& (wc[4] == 0x81) && (wc[5] == '\0'));
1455	}
1456	rc = RTLatin1ToUtf16Ex(pszLat1, 3, &pwc2, RT_ELEMENTS(wc),
1457	&cwActual);
1458	RTTEST_CHECK_RC_OK(hTest, rc);
1459	if (RT_SUCCESS(rc))
1460	{
1461	RTTEST_CHECK(hTest, (cwActual == 3));
1462	RTTEST_CHECK(hTest, (wc[0] == 1) && (wc[1] == 0x20)
1463	&& (wc[2] == 0x40) && (wc[3] == '\0'));
1464	}
1465	rc = RTLatin1ToUtf16Ex(pszLat1, RTSTR_MAX, &pwc2, RT_ELEMENTS(wc) - 1,
1466	&cwActual);
1467	RTTEST_CHECK_RC(hTest, rc, VERR_BUFFER_OVERFLOW);
1468	RTTEST_CHECK(hTest, (cwActual == 5));
1469	RTTestSubDone(hTest);
1470	}
1471
1472
1473	static void testNoTranslation(RTTEST hTest)
1474	{
1475	/*
1476	* Try trigger a VERR_NO_TRANSLATION error in convert to
1477	* current CP to latin-1.
1478	*
1479	* On Windows / DOS OSes this is codepage 850.
1480	*
1481	* Note! On Windows-y systems there ALWAYS are two codepages active:
1482	* the OEM codepage for legacy (console) applications, and the ACP (ANSI CodePage).
1483	* 'chcp' only will tell you the OEM codepage, however.
1484	*/
1485
1486	/* Unicode code points (some of it on 2300-23FF -> misc. technical) to try. */
1487	const RTUTF16 s_swzTest1[] = { 0x2358, 0x2242, 0x2357, 0x2359, 0x22f9, 0x2c4e, 0x0030, 0x0060,
1488	0x0092, 0x00c1, 0x00f2, 0x1f80, 0x0088, 0x2c38, 0x2c30, 0x0000 };
1489	char *pszTest1;
1490	int rc = RTUtf16ToUtf8(s_swzTest1, &pszTest1);
1491	RTTESTI_CHECK_RC_RETV(rc, VINF_SUCCESS);
1492
1493	#ifdef RT_OS_WINDOWS
1494	UINT const uACP = GetACP();
1495	RTTestIPrintf(RTTESTLVL_ALWAYS, "Current Windows ANSI codepage is: %u%s\n",
1496	uACP, uACP == 65001 /* UTF-8 */ ? " (UTF-8)" : "");
1497	#endif
1498
1499	RTTestSub(hTest, "VERR_NO_TRANSLATION/RTStrUtf8ToCurrentCP");
1500	char *pszOut;
1501	rc = RTStrUtf8ToCurrentCP(&pszOut, pszTest1);
1502	if (rc == VINF_SUCCESS)
1503	{
1504	RTTestIPrintf(RTTESTLVL_ALWAYS, "CurrentCP is UTF-8 or similar (LC_ALL=%s LANG=%s LC_CTYPE=%s)\n",
1505	RTEnvGet("LC_ALL"), RTEnvGet("LANG"), RTEnvGet("LC_CTYPE"));
1506	#ifdef RT_OS_WINDOWS
1507	if (uACP == 65001 /* UTF-8 */)
1508	{
1509	/* The following string comparison will fail if the active ACP isn't UTF-8 (65001), so skip this then.
1510	* This applies to older Windows OSes like NT4. */
1511	#endif
1512	if (strcmp(pszOut, pszTest1))
1513	RTTestFailed(hTest, "mismatch\nutf8: %.Rhxs\n got: %.Rhxs\n", strlen(pszTest1), pszTest1, strlen(pszOut), pszOut);
1514	#ifdef RT_OS_WINDOWS
1515	}
1516	#endif
1517	RTStrFree(pszOut);
1518	}
1519	else
1520	RTTESTI_CHECK_MSG(rc == VWRN_NO_TRANSLATION \|\| rc == VERR_NO_TRANSLATION, ("rc=%Rrc\n", rc));
1521
1522	RTTestSub(hTest, "VERR_NO_TRANSLATION/RTUtf16ToLatin1");
1523	rc = RTUtf16ToLatin1(s_swzTest1, &pszOut);
1524	RTTESTI_CHECK_RC(rc, VERR_NO_TRANSLATION);
1525	if (RT_SUCCESS(rc))
1526	RTStrFree(pszOut);
1527
1528	RTStrFree(pszTest1);
1529	RTTestSubDone(hTest);
1530	}
1531
1532	static void testGetPut(RTTEST hTest)
1533	{
1534	/*
1535	* Test RTStrPutCp, RTStrGetCp and RTStrGetCpEx.
1536	*/
1537	RTTestSub(hTest, "RTStrPutCp, RTStrGetCp and RTStrGetCpEx");
1538
1539	RTUNICP uc = 0;
1540	while (uc <= 0x10fffd)
1541	{
1542	/* Figure the range - skip illegal ranges. */
1543	RTUNICP ucFirst = uc;
1544	if (ucFirst - UINT32_C(0xd800) <= 0x7ff)
1545	ucFirst = 0xe000;
1546	else if (ucFirst == UINT32_C(0xfffe) \|\| ucFirst == UINT32_C(0xffff))
1547	ucFirst = 0x10000;
1548
1549	RTUNICP ucLast = ucFirst + 1023;
1550	if (ucLast - UINT32_C(0xd800) <= 0x7ff)
1551	ucLast = 0xd7ff;
1552	else if (ucLast == UINT32_C(0xfffe) \|\| ucLast == UINT32_C(0xffff))
1553	ucLast = 0xfffd;
1554
1555	/* Encode the range into a string, decode each code point as we go along. */
1556	char sz1[8192];
1557	char *pszDst = sz1;
1558	for (uc = ucFirst; uc <= ucLast; uc++)
1559	{
1560	char *pszBefore = pszDst;
1561	pszDst = RTStrPutCp(pszDst, uc);
1562	RTTESTI_CHECK(pszBefore - pszDst < 6);
1563
1564	RTUNICP uc2 = RTStrGetCp(pszBefore);
1565	RTTESTI_CHECK_MSG(uc2 == uc, ("uc2=%#x uc=%#x\n", uc2, uc));
1566
1567	const char *pszSrc = pszBefore;
1568	RTUNICP uc3 = 42;
1569	RTTESTI_CHECK_RC(RTStrGetCpEx(&pszSrc, &uc3), VINF_SUCCESS);
1570	RTTESTI_CHECK_MSG(uc3 == uc, ("uc3=%#x uc=%#x\n", uc3, uc));
1571	RTTESTI_CHECK_MSG(pszSrc == pszDst, ("pszSrc=%p pszDst=%p\n", pszSrc, pszDst));
1572	}
1573
1574	/* Decode and re-encode it. */
1575	const char *pszSrc = pszDst = sz1;
1576	for (uc = ucFirst; uc <= ucLast; uc++)
1577	{
1578	RTUNICP uc2 = RTStrGetCp(pszSrc);
1579	RTTESTI_CHECK_MSG(uc2 == uc, ("uc2=%#x uc=%#x\n", uc2, uc));
1580
1581	RTUNICP uc3 = 42;
1582	RTTESTI_CHECK_RC(RTStrGetCpEx(&pszSrc, &uc3), VINF_SUCCESS);
1583	RTTESTI_CHECK_MSG(uc3 == uc, ("uc3=%#x uc=%#x\n", uc3, uc));
1584
1585	pszDst = RTStrPutCp(pszDst, uc);
1586	RTTESTI_CHECK_MSG(pszSrc == pszDst, ("pszSrc=%p pszDst=%p\n", pszSrc, pszDst));
1587	pszSrc = pszDst;
1588	}
1589
1590	/* Decode and wipe it (checking compiler optimizations). */
1591	pszSrc = pszDst = sz1;
1592	for (uc = ucFirst; uc <= ucLast; uc++)
1593	{
1594	RTUNICP uc2 = RTStrGetCp(pszSrc);
1595	RTTESTI_CHECK_MSG(uc2 == uc, ("uc2=%#x uc=%#x\n", uc2, uc));
1596
1597	RTUNICP uc3 = 42;
1598	RTTESTI_CHECK_RC(RTStrGetCpEx(&pszSrc, &uc3), VINF_SUCCESS);
1599	RTTESTI_CHECK_MSG(uc3 == uc, ("uc3=%#x uc=%#x\n", uc3, uc));
1600
1601	pszDst = RTStrPutCp(pszDst, 0);
1602	}
1603
1604	/* advance */
1605	uc = ucLast + 1;
1606	}
1607
1608	}
1609
1610
1611	int main()
1612	{
1613	/*
1614	* Init the runtime, test and say hello.
1615	*/
1616	RTTEST hTest;
1617	RTEXITCODE rcExit = RTTestInitAndCreate("tstUtf8", &hTest);
1618	if (rcExit != RTEXITCODE_SUCCESS)
1619	return rcExit;
1620	RTTestBanner(hTest);
1621
1622	/*
1623	* Run the tests.
1624	*/
1625	InitStrings();
1626	test1(hTest);
1627	test2(hTest);
1628	test3(hTest);
1629	TstRTStrXCmp(hTest);
1630	TstRTStrPurgeEncoding(hTest);
1631	/* TstRTPurgeComplementSet test conditions which assert. /
1632	TstRTStrPurgeComplementSet(hTest);
1633	TstRTUtf16PurgeComplementSet(hTest);
1634	testStrEnd(hTest);
1635	testStrStr(hTest);
1636	testUtf8Latin1(hTest);
1637	testUtf16Latin1(hTest);
1638	testNoTranslation(hTest);
1639	testGetPut(hTest);
1640
1641	Benchmarks(hTest);
1642
1643	/*
1644	* Summary
1645	*/
1646	return RTTestSummaryAndDestroy(hTest);
1647	}
1648

Note: See TracBrowser for help on using the repository browser.

source: vbox/trunk/src/VBox/Runtime/testcase/tstUtf8.cpp@ 95685

Download in other formats: