VirtualBox

source: vbox/trunk/src/VBox/Runtime/common/string/base64-utf16.cpp@ 93103

Last change on this file since 93103 was 84296, checked in by vboxsync, 4 years ago

IPRT/base64: Optimize '\0' handling a little and unify the two versions a little more. Try to address cranky linux build boxes wrt mangling. bugref:9224

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 12.4 KB
Line 
1/* $Id: base64-utf16.cpp 84296 2020-05-13 16:46:27Z vboxsync $ */
2/** @file
3 * IPRT - Base64, MIME content transfer encoding.
4 *
5 * @note The base64.cpp file must be diffable with this one.
6 * Fixed typically applies to both files.
7 */
8
9/*
10 * Copyright (C) 2009-2020 Oracle Corporation
11 *
12 * This file is part of VirtualBox Open Source Edition (OSE), as
13 * available from http://www.virtualbox.org. This file is free software;
14 * you can redistribute it and/or modify it under the terms of the GNU
15 * General Public License (GPL) as published by the Free Software
16 * Foundation, in version 2 as it comes in the "COPYING" file of the
17 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
18 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
19 *
20 * The contents of this file may alternatively be used under the terms
21 * of the Common Development and Distribution License Version 1.0
22 * (CDDL) only, as it comes in the "COPYING.CDDL" file of the
23 * VirtualBox OSE distribution, in which case the provisions of the
24 * CDDL are applicable instead of those of the GPL.
25 *
26 * You may elect to license modified versions of this file under the
27 * terms and conditions of either the GPL or the CDDL or both.
28 */
29
30
31/*********************************************************************************************************************************
32* Header Files *
33*********************************************************************************************************************************/
34#include <iprt/base64.h>
35#include "internal/iprt.h"
36
37#include <iprt/assert.h>
38#include <iprt/err.h>
39#include <iprt/string.h>
40#include <iprt/uni.h>
41#ifdef RT_STRICT
42# include <iprt/asm.h>
43#endif
44
45#include "base64.h"
46
47
48/** Translates the given character. */
49DECL_FORCE_INLINE(uint8_t) rtBase64TranslateUtf16(RTUTF16 wc)
50{
51 if (wc < RT_ELEMENTS(g_au8rtBase64CharToVal))
52 return g_au8rtBase64CharToVal[wc];
53 if (RTUniCpIsSpace(wc))
54 return BASE64_SPACE;
55 return BASE64_INVALID;
56}
57
58
59/** Fetched the next character in the string and translates it. */
60DECL_FORCE_INLINE(uint8_t) rtBase64TranslateNextUtf16(PCRTUTF16 pwszString, size_t cwcStringMax)
61{
62 if (cwcStringMax > 0)
63 return rtBase64TranslateUtf16(*pwszString);
64 return BASE64_NULL;
65}
66
67
68/*
69 * Mostly the same as RTBase64DecodedSizeEx, except for the wider character
70 * type and therefore more careful handling of g_szrtBase64ValToChar and additional
71 * space characters. Fixes must be applied to both copies of the code.
72 */
73RTDECL(ssize_t) RTBase64DecodedUtf16SizeEx(PCRTUTF16 pwszString, size_t cwcStringMax, PRTUTF16 *ppwszEnd)
74{
75#ifdef RT_STRICT
76 rtBase64Sanity();
77#endif
78
79 /*
80 * Walk the string until a non-encoded or non-space character is encountered.
81 */
82 uint32_t c6Bits = 0;
83 uint8_t u8;
84
85 while ((u8 = rtBase64TranslateNextUtf16(pwszString, cwcStringMax)) != BASE64_NULL)
86 {
87 if (u8 < 64)
88 c6Bits++;
89 else if (RT_UNLIKELY(u8 != BASE64_SPACE))
90 break;
91
92 /* advance */
93 pwszString++;
94 cwcStringMax--;
95 }
96
97 /*
98 * Padding can only be found at the end and there is
99 * only 1 or 2 padding chars. Deal with it first.
100 */
101 unsigned cbPad = 0;
102 if (u8 == BASE64_PAD)
103 {
104 cbPad = 1;
105 c6Bits++;
106 pwszString++;
107 cwcStringMax--;
108 while ((u8 = rtBase64TranslateNextUtf16(pwszString, cwcStringMax)) != BASE64_NULL)
109 {
110 if (u8 != BASE64_SPACE)
111 {
112 if (u8 != BASE64_PAD)
113 break;
114 c6Bits++;
115 cbPad++;
116 }
117 pwszString++;
118 cwcStringMax--;
119 }
120 if (cbPad >= 3)
121 return -1;
122 }
123
124 /*
125 * Invalid char and no where to indicate where the
126 * Base64 text ends? Return failure.
127 */
128 if ( u8 == BASE64_INVALID
129 && !ppwszEnd)
130 return -1;
131
132 /*
133 * Recalc 6-bit to 8-bit and adjust for padding.
134 */
135 if (ppwszEnd)
136 *ppwszEnd = (PRTUTF16)pwszString;
137 return rtBase64DecodedSizeRecalc(c6Bits, cbPad);
138}
139RT_EXPORT_SYMBOL(RTBase64DecodedUtf16SizeEx);
140
141
142RTDECL(ssize_t) RTBase64DecodedUtf16Size(PCRTUTF16 pwszString, PRTUTF16 *ppwszEnd)
143{
144 return RTBase64DecodedUtf16SizeEx(pwszString, RTSTR_MAX, ppwszEnd);
145}
146RT_EXPORT_SYMBOL(RTBase64DecodedUtf16Size);
147
148
149RTDECL(int) RTBase64DecodeUtf16Ex(PCRTUTF16 pwszString, size_t cwcStringMax, void *pvData, size_t cbData,
150 size_t *pcbActual, PRTUTF16 *ppwszEnd)
151{
152#ifdef RT_STRICT
153 rtBase64Sanity();
154#endif
155
156 /*
157 * Process input in groups of 4 input / 3 output chars.
158 */
159 uint8_t u8Trio[3] = { 0, 0, 0 }; /* shuts up gcc */
160 uint8_t *pbData = (uint8_t *)pvData;
161 uint8_t u8;
162 unsigned c6Bits = 0;
163
164 for (;;)
165 {
166 /* The first 6-bit group. */
167 while ((u8 = rtBase64TranslateNextUtf16(pwszString, cwcStringMax)) == BASE64_SPACE)
168 pwszString++, cwcStringMax--;
169 if (u8 >= 64)
170 {
171 c6Bits = 0;
172 break;
173 }
174 u8Trio[0] = u8 << 2;
175 pwszString++;
176 cwcStringMax--;
177
178 /* The second 6-bit group. */
179 while ((u8 = rtBase64TranslateNextUtf16(pwszString, cwcStringMax)) == BASE64_SPACE)
180 pwszString++, cwcStringMax--;
181 if (u8 >= 64)
182 {
183 c6Bits = 1;
184 break;
185 }
186 u8Trio[0] |= u8 >> 4;
187 u8Trio[1] = u8 << 4;
188 pwszString++;
189 cwcStringMax--;
190
191 /* The third 6-bit group. */
192 u8 = BASE64_INVALID;
193 while ((u8 = rtBase64TranslateNextUtf16(pwszString, cwcStringMax)) == BASE64_SPACE)
194 pwszString++, cwcStringMax--;
195 if (u8 >= 64)
196 {
197 c6Bits = 2;
198 break;
199 }
200 u8Trio[1] |= u8 >> 2;
201 u8Trio[2] = u8 << 6;
202 pwszString++;
203 cwcStringMax--;
204
205 /* The fourth 6-bit group. */
206 u8 = BASE64_INVALID;
207 while ((u8 = rtBase64TranslateNextUtf16(pwszString, cwcStringMax)) == BASE64_SPACE)
208 pwszString++, cwcStringMax--;
209 if (u8 >= 64)
210 {
211 c6Bits = 3;
212 break;
213 }
214 u8Trio[2] |= u8;
215 pwszString++;
216 cwcStringMax--;
217
218 /* flush the trio */
219 if (cbData < 3)
220 return VERR_BUFFER_OVERFLOW;
221 cbData -= 3;
222 pbData[0] = u8Trio[0];
223 pbData[1] = u8Trio[1];
224 pbData[2] = u8Trio[2];
225 pbData += 3;
226 }
227
228 /*
229 * Padding can only be found at the end and there is
230 * only 1 or 2 padding chars. Deal with it first.
231 */
232 unsigned cbPad = 0;
233 if (u8 == BASE64_PAD)
234 {
235 cbPad = 1;
236 pwszString++;
237 cwcStringMax--;
238 while ((u8 = rtBase64TranslateNextUtf16(pwszString, cwcStringMax)) != BASE64_NULL)
239 {
240 if (u8 != BASE64_SPACE)
241 {
242 if (u8 != BASE64_PAD)
243 break;
244 cbPad++;
245 }
246 pwszString++;
247 cwcStringMax--;
248 }
249 if (cbPad >= 3)
250 return VERR_INVALID_BASE64_ENCODING;
251 }
252
253 /*
254 * Invalid char and no where to indicate where the
255 * Base64 text ends? Return failure.
256 */
257 if ( u8 == BASE64_INVALID
258 && !ppwszEnd)
259 return VERR_INVALID_BASE64_ENCODING;
260
261 /*
262 * Check padding vs. pending sextets, if anything left to do finish it off.
263 */
264 if (c6Bits || cbPad)
265 {
266 if (c6Bits + cbPad != 4)
267 return VERR_INVALID_BASE64_ENCODING;
268
269 switch (c6Bits)
270 {
271 case 1:
272 u8Trio[1] = u8Trio[2] = 0;
273 break;
274 case 2:
275 u8Trio[2] = 0;
276 break;
277 case 3:
278 default:
279 break;
280 }
281 switch (3 - cbPad)
282 {
283 case 1:
284 if (cbData < 1)
285 return VERR_BUFFER_OVERFLOW;
286 cbData--;
287 pbData[0] = u8Trio[0];
288 pbData++;
289 break;
290
291 case 2:
292 if (cbData < 2)
293 return VERR_BUFFER_OVERFLOW;
294 cbData -= 2;
295 pbData[0] = u8Trio[0];
296 pbData[1] = u8Trio[1];
297 pbData += 2;
298 break;
299
300 default:
301 break;
302 }
303 }
304
305 /*
306 * Set optional return values and return successfully.
307 */
308 if (ppwszEnd)
309 *ppwszEnd = (PRTUTF16)pwszString;
310 if (pcbActual)
311 *pcbActual = pbData - (uint8_t *)pvData;
312 return VINF_SUCCESS;
313}
314RT_EXPORT_SYMBOL(RTBase64DecodeUtf16Ex);
315
316
317RTDECL(int) RTBase64DecodeUtf16(PCRTUTF16 pwszString, void *pvData, size_t cbData, size_t *pcbActual, PRTUTF16 *ppwszEnd)
318{
319 return RTBase64DecodeUtf16Ex(pwszString, RTSTR_MAX, pvData, cbData, pcbActual, ppwszEnd);
320}
321RT_EXPORT_SYMBOL(RTBase64DecodeUtf16);
322
323
324RTDECL(size_t) RTBase64EncodedUtf16Length(size_t cbData)
325{
326 return RTBase64EncodedLengthEx(cbData, 0);
327}
328RT_EXPORT_SYMBOL(RTBase64EncodedUtf16Length);
329
330
331RTDECL(size_t) RTBase64EncodedUtf16LengthEx(size_t cbData, uint32_t fFlags)
332{
333 return RTBase64EncodedLengthEx(cbData, fFlags);
334}
335RT_EXPORT_SYMBOL(RTBase64EncodedUtf16LengthEx);
336
337
338RTDECL(int) RTBase64EncodeUtf16(const void *pvData, size_t cbData, PRTUTF16 pwszBuf, size_t cwcBuf, size_t *pcwcActual)
339{
340 return RTBase64EncodeUtf16Ex(pvData, cbData, 0, pwszBuf, cwcBuf, pcwcActual);
341}
342RT_EXPORT_SYMBOL(RTBase64EncodeUtf16);
343
344
345/*
346 * Please note that RTBase64EncodeEx contains an almost exact copy of
347 * this code, just using different output character type and variable prefixes.
348 * So, all fixes must be applied to both versions of the code.
349 */
350RTDECL(int) RTBase64EncodeUtf16Ex(const void *pvData, size_t cbData, uint32_t fFlags,
351 PRTUTF16 pwszBuf, size_t cwcBuf, size_t *pcwcActual)
352{
353 /* Expand the EOL style flags: */
354 size_t const cchEol = g_acchrtBase64EolStyles[fFlags & RTBASE64_FLAGS_EOL_STYLE_MASK];
355 char const chEol0 = g_aachrtBase64EolStyles[fFlags & RTBASE64_FLAGS_EOL_STYLE_MASK][0];
356 char const chEol1 = g_aachrtBase64EolStyles[fFlags & RTBASE64_FLAGS_EOL_STYLE_MASK][1];
357 Assert(cchEol == (chEol0 != '\0' ? 1U : 0U) + (chEol1 != '\0' ? 1U : 0U));
358
359 /*
360 * Process whole "trios" of input data.
361 */
362 uint8_t u8A;
363 uint8_t u8B;
364 uint8_t u8C;
365 size_t cwcLineFeed = cchEol ? cwcBuf - RTBASE64_LINE_LEN : ~(size_t)0;
366 const uint8_t *pbSrc = (const uint8_t *)pvData;
367 PRTUTF16 pwcDst = pwszBuf;
368 while (cbData >= 3)
369 {
370 if (cwcBuf < 4 + 1)
371 return VERR_BUFFER_OVERFLOW;
372
373 /* encode */
374 u8A = pbSrc[0];
375 pwcDst[0] = g_szrtBase64ValToChar[u8A >> 2];
376 u8B = pbSrc[1];
377 pwcDst[1] = g_szrtBase64ValToChar[((u8A << 4) & 0x3f) | (u8B >> 4)];
378 u8C = pbSrc[2];
379 pwcDst[2] = g_szrtBase64ValToChar[((u8B << 2) & 0x3f) | (u8C >> 6)];
380 pwcDst[3] = g_szrtBase64ValToChar[u8C & 0x3f];
381
382 /* advance */
383 cwcBuf -= 4;
384 pwcDst += 4;
385 cbData -= 3;
386 pbSrc += 3;
387
388 /* deal out end-of-line */
389 if (cwcBuf == cwcLineFeed && cbData && cchEol)
390 {
391 if (cwcBuf < cchEol + 1)
392 return VERR_BUFFER_OVERFLOW;
393 cwcBuf -= cchEol;
394 *pwcDst++ = chEol0;
395 if (chEol1)
396 *pwcDst++ = chEol1;
397 cwcLineFeed = cwcBuf - RTBASE64_LINE_LEN;
398 }
399 }
400
401 /*
402 * Deal with the odd bytes and string termination.
403 */
404 if (cbData)
405 {
406 if (cwcBuf < 4 + 1)
407 return VERR_BUFFER_OVERFLOW;
408 switch (cbData)
409 {
410 case 1:
411 u8A = pbSrc[0];
412 pwcDst[0] = g_szrtBase64ValToChar[u8A >> 2];
413 pwcDst[1] = g_szrtBase64ValToChar[(u8A << 4) & 0x3f];
414 pwcDst[2] = '=';
415 pwcDst[3] = '=';
416 break;
417 case 2:
418 u8A = pbSrc[0];
419 pwcDst[0] = g_szrtBase64ValToChar[u8A >> 2];
420 u8B = pbSrc[1];
421 pwcDst[1] = g_szrtBase64ValToChar[((u8A << 4) & 0x3f) | (u8B >> 4)];
422 pwcDst[2] = g_szrtBase64ValToChar[(u8B << 2) & 0x3f];
423 pwcDst[3] = '=';
424 break;
425 }
426 pwcDst += 4;
427 }
428
429 *pwcDst = '\0';
430
431 if (pcwcActual)
432 *pcwcActual = pwcDst - pwszBuf;
433 return VINF_SUCCESS;
434}
435RT_EXPORT_SYMBOL(RTBase64EncodeUtf16Ex);
436
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette