VirtualBox

source: vbox/trunk/src/VBox/Runtime/common/string/base64-utf16.cpp

Last change on this file was 106061, checked in by vboxsync, 8 weeks ago

Copyright year updates by scm.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 12.7 KB
Line 
1/* $Id: base64-utf16.cpp 106061 2024-09-16 14:03:52Z vboxsync $ */
2/** @file
3 * IPRT - Base64, MIME content transfer encoding.
4 *
5 * @note The base64.cpp file must be diffable with this one.
6 * Fixed typically applies to both files.
7 */
8
9/*
10 * Copyright (C) 2009-2024 Oracle and/or its affiliates.
11 *
12 * This file is part of VirtualBox base platform packages, as
13 * available from https://www.virtualbox.org.
14 *
15 * This program is free software; you can redistribute it and/or
16 * modify it under the terms of the GNU General Public License
17 * as published by the Free Software Foundation, in version 3 of the
18 * License.
19 *
20 * This program is distributed in the hope that it will be useful, but
21 * WITHOUT ANY WARRANTY; without even the implied warranty of
22 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
23 * General Public License for more details.
24 *
25 * You should have received a copy of the GNU General Public License
26 * along with this program; if not, see <https://www.gnu.org/licenses>.
27 *
28 * The contents of this file may alternatively be used under the terms
29 * of the Common Development and Distribution License Version 1.0
30 * (CDDL), a copy of it is provided in the "COPYING.CDDL" file included
31 * in the VirtualBox distribution, in which case the provisions of the
32 * CDDL are applicable instead of those of the GPL.
33 *
34 * You may elect to license modified versions of this file under the
35 * terms and conditions of either the GPL or the CDDL or both.
36 *
37 * SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0
38 */
39
40
41/*********************************************************************************************************************************
42* Header Files *
43*********************************************************************************************************************************/
44#include <iprt/base64.h>
45#include "internal/iprt.h"
46
47#include <iprt/assert.h>
48#include <iprt/err.h>
49#include <iprt/string.h>
50#include <iprt/uni.h>
51#ifdef RT_STRICT
52# include <iprt/asm.h>
53#endif
54
55#include "base64.h"
56
57
58/** Translates the given character. */
59DECL_FORCE_INLINE(uint8_t) rtBase64TranslateUtf16(RTUTF16 wc)
60{
61 if (wc < RT_ELEMENTS(g_au8rtBase64CharToVal))
62 return g_au8rtBase64CharToVal[wc];
63 if (RTUniCpIsSpace(wc))
64 return BASE64_SPACE;
65 return BASE64_INVALID;
66}
67
68
69/** Fetched the next character in the string and translates it. */
70DECL_FORCE_INLINE(uint8_t) rtBase64TranslateNextUtf16(PCRTUTF16 pwszString, size_t cwcStringMax)
71{
72 if (cwcStringMax > 0)
73 return rtBase64TranslateUtf16(*pwszString);
74 return BASE64_NULL;
75}
76
77
78/*
79 * Mostly the same as RTBase64DecodedSizeEx, except for the wider character
80 * type and therefore more careful handling of g_szrtBase64ValToChar and additional
81 * space characters. Fixes must be applied to both copies of the code.
82 */
83RTDECL(ssize_t) RTBase64DecodedUtf16SizeEx(PCRTUTF16 pwszString, size_t cwcStringMax, PRTUTF16 *ppwszEnd)
84{
85#ifdef RT_STRICT
86 rtBase64Sanity();
87#endif
88
89 /*
90 * Walk the string until a non-encoded or non-space character is encountered.
91 */
92 uint32_t c6Bits = 0;
93 uint8_t u8;
94
95 while ((u8 = rtBase64TranslateNextUtf16(pwszString, cwcStringMax)) != BASE64_NULL)
96 {
97 if (u8 < 64)
98 c6Bits++;
99 else if (RT_UNLIKELY(u8 != BASE64_SPACE))
100 break;
101
102 /* advance */
103 pwszString++;
104 cwcStringMax--;
105 }
106
107 /*
108 * Padding can only be found at the end and there is
109 * only 1 or 2 padding chars. Deal with it first.
110 */
111 unsigned cbPad = 0;
112 if (u8 == BASE64_PAD)
113 {
114 cbPad = 1;
115 c6Bits++;
116 pwszString++;
117 cwcStringMax--;
118 while ((u8 = rtBase64TranslateNextUtf16(pwszString, cwcStringMax)) != BASE64_NULL)
119 {
120 if (u8 != BASE64_SPACE)
121 {
122 if (u8 != BASE64_PAD)
123 break;
124 c6Bits++;
125 cbPad++;
126 }
127 pwszString++;
128 cwcStringMax--;
129 }
130 if (cbPad >= 3)
131 return -1;
132 }
133
134 /*
135 * Invalid char and no where to indicate where the
136 * Base64 text ends? Return failure.
137 */
138 if ( u8 == BASE64_INVALID
139 && !ppwszEnd)
140 return -1;
141
142 /*
143 * Recalc 6-bit to 8-bit and adjust for padding.
144 */
145 if (ppwszEnd)
146 *ppwszEnd = (PRTUTF16)pwszString;
147 return rtBase64DecodedSizeRecalc(c6Bits, cbPad);
148}
149RT_EXPORT_SYMBOL(RTBase64DecodedUtf16SizeEx);
150
151
152RTDECL(ssize_t) RTBase64DecodedUtf16Size(PCRTUTF16 pwszString, PRTUTF16 *ppwszEnd)
153{
154 return RTBase64DecodedUtf16SizeEx(pwszString, RTSTR_MAX, ppwszEnd);
155}
156RT_EXPORT_SYMBOL(RTBase64DecodedUtf16Size);
157
158
159RTDECL(int) RTBase64DecodeUtf16Ex(PCRTUTF16 pwszString, size_t cwcStringMax, void *pvData, size_t cbData,
160 size_t *pcbActual, PRTUTF16 *ppwszEnd)
161{
162#ifdef RT_STRICT
163 rtBase64Sanity();
164#endif
165
166 /*
167 * Process input in groups of 4 input / 3 output chars.
168 */
169 uint8_t u8Trio[3] = { 0, 0, 0 }; /* shuts up gcc */
170 uint8_t *pbData = (uint8_t *)pvData;
171 uint8_t u8;
172 unsigned c6Bits = 0;
173
174 for (;;)
175 {
176 /* The first 6-bit group. */
177 while ((u8 = rtBase64TranslateNextUtf16(pwszString, cwcStringMax)) == BASE64_SPACE)
178 pwszString++, cwcStringMax--;
179 if (u8 >= 64)
180 {
181 c6Bits = 0;
182 break;
183 }
184 u8Trio[0] = u8 << 2;
185 pwszString++;
186 cwcStringMax--;
187
188 /* The second 6-bit group. */
189 while ((u8 = rtBase64TranslateNextUtf16(pwszString, cwcStringMax)) == BASE64_SPACE)
190 pwszString++, cwcStringMax--;
191 if (u8 >= 64)
192 {
193 c6Bits = 1;
194 break;
195 }
196 u8Trio[0] |= u8 >> 4;
197 u8Trio[1] = u8 << 4;
198 pwszString++;
199 cwcStringMax--;
200
201 /* The third 6-bit group. */
202 u8 = BASE64_INVALID;
203 while ((u8 = rtBase64TranslateNextUtf16(pwszString, cwcStringMax)) == BASE64_SPACE)
204 pwszString++, cwcStringMax--;
205 if (u8 >= 64)
206 {
207 c6Bits = 2;
208 break;
209 }
210 u8Trio[1] |= u8 >> 2;
211 u8Trio[2] = u8 << 6;
212 pwszString++;
213 cwcStringMax--;
214
215 /* The fourth 6-bit group. */
216 u8 = BASE64_INVALID;
217 while ((u8 = rtBase64TranslateNextUtf16(pwszString, cwcStringMax)) == BASE64_SPACE)
218 pwszString++, cwcStringMax--;
219 if (u8 >= 64)
220 {
221 c6Bits = 3;
222 break;
223 }
224 u8Trio[2] |= u8;
225 pwszString++;
226 cwcStringMax--;
227
228 /* flush the trio */
229 if (cbData < 3)
230 return VERR_BUFFER_OVERFLOW;
231 cbData -= 3;
232 pbData[0] = u8Trio[0];
233 pbData[1] = u8Trio[1];
234 pbData[2] = u8Trio[2];
235 pbData += 3;
236 }
237
238 /*
239 * Padding can only be found at the end and there is
240 * only 1 or 2 padding chars. Deal with it first.
241 */
242 unsigned cbPad = 0;
243 if (u8 == BASE64_PAD)
244 {
245 cbPad = 1;
246 pwszString++;
247 cwcStringMax--;
248 while ((u8 = rtBase64TranslateNextUtf16(pwszString, cwcStringMax)) != BASE64_NULL)
249 {
250 if (u8 != BASE64_SPACE)
251 {
252 if (u8 != BASE64_PAD)
253 break;
254 cbPad++;
255 }
256 pwszString++;
257 cwcStringMax--;
258 }
259 if (cbPad >= 3)
260 return VERR_INVALID_BASE64_ENCODING;
261 }
262
263 /*
264 * Invalid char and no where to indicate where the
265 * Base64 text ends? Return failure.
266 */
267 if ( u8 == BASE64_INVALID
268 && !ppwszEnd)
269 return VERR_INVALID_BASE64_ENCODING;
270
271 /*
272 * Check padding vs. pending sextets, if anything left to do finish it off.
273 */
274 if (c6Bits || cbPad)
275 {
276 if (c6Bits + cbPad != 4)
277 return VERR_INVALID_BASE64_ENCODING;
278
279 switch (c6Bits)
280 {
281 case 1:
282 u8Trio[1] = u8Trio[2] = 0;
283 break;
284 case 2:
285 u8Trio[2] = 0;
286 break;
287 case 3:
288 default:
289 break;
290 }
291 switch (3 - cbPad)
292 {
293 case 1:
294 if (cbData < 1)
295 return VERR_BUFFER_OVERFLOW;
296 cbData--;
297 pbData[0] = u8Trio[0];
298 pbData++;
299 break;
300
301 case 2:
302 if (cbData < 2)
303 return VERR_BUFFER_OVERFLOW;
304 cbData -= 2;
305 pbData[0] = u8Trio[0];
306 pbData[1] = u8Trio[1];
307 pbData += 2;
308 break;
309
310 default:
311 break;
312 }
313 }
314
315 /*
316 * Set optional return values and return successfully.
317 */
318 if (ppwszEnd)
319 *ppwszEnd = (PRTUTF16)pwszString;
320 if (pcbActual)
321 *pcbActual = pbData - (uint8_t *)pvData;
322 return VINF_SUCCESS;
323}
324RT_EXPORT_SYMBOL(RTBase64DecodeUtf16Ex);
325
326
327RTDECL(int) RTBase64DecodeUtf16(PCRTUTF16 pwszString, void *pvData, size_t cbData, size_t *pcbActual, PRTUTF16 *ppwszEnd)
328{
329 return RTBase64DecodeUtf16Ex(pwszString, RTSTR_MAX, pvData, cbData, pcbActual, ppwszEnd);
330}
331RT_EXPORT_SYMBOL(RTBase64DecodeUtf16);
332
333
334RTDECL(size_t) RTBase64EncodedUtf16Length(size_t cbData)
335{
336 return RTBase64EncodedLengthEx(cbData, 0);
337}
338RT_EXPORT_SYMBOL(RTBase64EncodedUtf16Length);
339
340
341RTDECL(size_t) RTBase64EncodedUtf16LengthEx(size_t cbData, uint32_t fFlags)
342{
343 return RTBase64EncodedLengthEx(cbData, fFlags);
344}
345RT_EXPORT_SYMBOL(RTBase64EncodedUtf16LengthEx);
346
347
348RTDECL(int) RTBase64EncodeUtf16(const void *pvData, size_t cbData, PRTUTF16 pwszBuf, size_t cwcBuf, size_t *pcwcActual)
349{
350 return RTBase64EncodeUtf16Ex(pvData, cbData, 0, pwszBuf, cwcBuf, pcwcActual);
351}
352RT_EXPORT_SYMBOL(RTBase64EncodeUtf16);
353
354
355/*
356 * Please note that RTBase64EncodeEx contains an almost exact copy of
357 * this code, just using different output character type and variable prefixes.
358 * So, all fixes must be applied to both versions of the code.
359 */
360RTDECL(int) RTBase64EncodeUtf16Ex(const void *pvData, size_t cbData, uint32_t fFlags,
361 PRTUTF16 pwszBuf, size_t cwcBuf, size_t *pcwcActual)
362{
363 /* Expand the EOL style flags: */
364 size_t const cchEol = g_acchrtBase64EolStyles[fFlags & RTBASE64_FLAGS_EOL_STYLE_MASK];
365 char const chEol0 = g_aachrtBase64EolStyles[fFlags & RTBASE64_FLAGS_EOL_STYLE_MASK][0];
366 char const chEol1 = g_aachrtBase64EolStyles[fFlags & RTBASE64_FLAGS_EOL_STYLE_MASK][1];
367 Assert(cchEol == (chEol0 != '\0' ? 1U : 0U) + (chEol1 != '\0' ? 1U : 0U));
368
369 /*
370 * Process whole "trios" of input data.
371 */
372 uint8_t u8A;
373 uint8_t u8B;
374 uint8_t u8C;
375 size_t cwcLineFeed = cchEol ? cwcBuf - RTBASE64_LINE_LEN : ~(size_t)0;
376 const uint8_t *pbSrc = (const uint8_t *)pvData;
377 PRTUTF16 pwcDst = pwszBuf;
378 while (cbData >= 3)
379 {
380 if (cwcBuf < 4 + 1)
381 return VERR_BUFFER_OVERFLOW;
382
383 /* encode */
384 u8A = pbSrc[0];
385 pwcDst[0] = g_szrtBase64ValToChar[u8A >> 2];
386 u8B = pbSrc[1];
387 pwcDst[1] = g_szrtBase64ValToChar[((u8A << 4) & 0x3f) | (u8B >> 4)];
388 u8C = pbSrc[2];
389 pwcDst[2] = g_szrtBase64ValToChar[((u8B << 2) & 0x3f) | (u8C >> 6)];
390 pwcDst[3] = g_szrtBase64ValToChar[u8C & 0x3f];
391
392 /* advance */
393 cwcBuf -= 4;
394 pwcDst += 4;
395 cbData -= 3;
396 pbSrc += 3;
397
398 /* deal out end-of-line */
399 if (cwcBuf == cwcLineFeed && cbData && cchEol)
400 {
401 if (cwcBuf < cchEol + 1)
402 return VERR_BUFFER_OVERFLOW;
403 cwcBuf -= cchEol;
404 *pwcDst++ = chEol0;
405 if (chEol1)
406 *pwcDst++ = chEol1;
407 cwcLineFeed = cwcBuf - RTBASE64_LINE_LEN;
408 }
409 }
410
411 /*
412 * Deal with the odd bytes and string termination.
413 */
414 if (cbData)
415 {
416 if (cwcBuf < 4 + 1)
417 return VERR_BUFFER_OVERFLOW;
418 switch (cbData)
419 {
420 case 1:
421 u8A = pbSrc[0];
422 pwcDst[0] = g_szrtBase64ValToChar[u8A >> 2];
423 pwcDst[1] = g_szrtBase64ValToChar[(u8A << 4) & 0x3f];
424 pwcDst[2] = '=';
425 pwcDst[3] = '=';
426 break;
427 case 2:
428 u8A = pbSrc[0];
429 pwcDst[0] = g_szrtBase64ValToChar[u8A >> 2];
430 u8B = pbSrc[1];
431 pwcDst[1] = g_szrtBase64ValToChar[((u8A << 4) & 0x3f) | (u8B >> 4)];
432 pwcDst[2] = g_szrtBase64ValToChar[(u8B << 2) & 0x3f];
433 pwcDst[3] = '=';
434 break;
435 }
436 pwcDst += 4;
437 }
438
439 *pwcDst = '\0';
440
441 if (pcwcActual)
442 *pcwcActual = pwcDst - pwszBuf;
443 return VINF_SUCCESS;
444}
445RT_EXPORT_SYMBOL(RTBase64EncodeUtf16Ex);
446
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette