VirtualBox

source: vbox/trunk/src/VBox/Runtime/common/string/base64.cpp@ 25837

Last change on this file since 25837 was 24678, checked in by vboxsync, 15 years ago

Misc: Shut up annoying gcc warnings on darwin.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 16.8 KB
Line 
1/* $Id: base64.cpp 24678 2009-11-15 16:07:51Z vboxsync $ */
2/** @file
3 * IPRT - Base64, MIME content transfer encoding.
4 */
5
6/*
7 * Copyright (C) 2009 Sun Microsystems, Inc.
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 *
17 * The contents of this file may alternatively be used under the terms
18 * of the Common Development and Distribution License Version 1.0
19 * (CDDL) only, as it comes in the "COPYING.CDDL" file of the
20 * VirtualBox OSE distribution, in which case the provisions of the
21 * CDDL are applicable instead of those of the GPL.
22 *
23 * You may elect to license modified versions of this file under the
24 * terms and conditions of either the GPL or the CDDL or both.
25 *
26 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa
27 * Clara, CA 95054 USA or visit http://www.sun.com if you need
28 * additional information or have any questions.
29 */
30
31
32/*******************************************************************************
33* Header Files *
34*******************************************************************************/
35#include <iprt/base64.h>
36#include "internal/iprt.h"
37
38#include <iprt/assert.h>
39#include <iprt/err.h>
40#include <iprt/ctype.h>
41#ifdef RT_STRICT
42# include <iprt/asm.h>
43#endif
44
45
46/*******************************************************************************
47* Defined Constants And Macros *
48*******************************************************************************/
49/** The line length used for encoding. */
50#define RTBASE64_LINE_LEN 64
51
52/** @name Special g_au8CharToVal values
53 * @{ */
54#define BASE64_SPACE 0xc0
55#define BASE64_PAD 0xe0
56#define BASE64_INVALID 0xff
57/** @} */
58
59
60/*******************************************************************************
61* Global Variables *
62*******************************************************************************/
63/** Base64 character to value. (RFC 2045)
64 * ASSUMES ASCII / UTF-8. */
65static const uint8_t g_au8CharToVal[256] =
66{
67 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xff, 0xff, /* 0x00..0x0f */
68 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0x10..0x1f */
69 0xc0, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 62, 0xff, 0xff, 0xff, 63, /* 0x20..0x2f */
70 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 0xff, 0xff, 0xff, 0xe0, 0xff, 0xff, /* 0x30..0x3f */
71 0xff, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, /* 0x40..0x4f */
72 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0x50..0x5f */
73 0xff, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, /* 0x60..0x6f */
74 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0x70..0x7f */
75 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0x80..0x8f */
76 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0x90..0x9f */
77 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0xa0..0xaf */
78 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0xb0..0xbf */
79 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0xc0..0xcf */
80 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0xd0..0xdf */
81 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0xe0..0xef */
82 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff /* 0xf0..0xff */
83};
84
85/** Value to Base64 character. (RFC 2045) */
86static const char g_szValToChar[64+1] =
87 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
88
89
90#ifdef RT_STRICT
91/**
92 * Perform table sanity checks on the first call.
93 */
94static void rtBase64Sanity(void)
95{
96 static bool s_fSane = false;
97 if (RT_UNLIKELY(!s_fSane))
98 {
99 for (unsigned i = 0; i < 64; i++)
100 {
101 unsigned ch = g_szValToChar[i];
102 Assert(ch);
103 Assert(g_au8CharToVal[ch] == i);
104 }
105
106 for (unsigned i = 0; i < 256; i++)
107 {
108 uint8_t u8 = g_au8CharToVal[i];
109 Assert( ( u8 == BASE64_INVALID
110 && !RT_C_IS_ALNUM(i)
111 && !RT_C_IS_SPACE(i))
112 || ( u8 == BASE64_PAD
113 && i == '=')
114 || ( u8 == BASE64_SPACE
115 && RT_C_IS_SPACE(i))
116 || ( u8 < 64
117 && (unsigned)g_szValToChar[u8] == i));
118 }
119 ASMAtomicWriteBool(&s_fSane, true);
120 }
121}
122#endif /* RT_STRICT */
123
124
125/**
126 * Calculates the decoded data size for a Base64 encoded string.
127 *
128 * @returns The length in bytes. -1 if the encoding is bad.
129 *
130 * @param pszString The Base64 encoded string.
131 * @param ppszEnd If not NULL, this will point to the first char
132 * following the Base64 encoded text block. If
133 * NULL the entire string is assumed to be Base64.
134 */
135RTDECL(ssize_t) RTBase64DecodedSize(const char *pszString, char **ppszEnd)
136{
137#ifdef RT_STRICT
138 rtBase64Sanity();
139#endif
140
141 /*
142 * Walk the string until a non-encoded or non-space character is encountered.
143 */
144 uint32_t c6Bits = 0;
145 uint8_t u8 = BASE64_INVALID;
146 unsigned ch;
147 AssertCompile(sizeof(char) == sizeof(uint8_t));
148
149 while ((ch = *pszString))
150 {
151 u8 = g_au8CharToVal[ch];
152 if (u8 < 64)
153 c6Bits++;
154 else if (RT_UNLIKELY(u8 != BASE64_SPACE))
155 break;
156
157 /* advance */
158 pszString++;
159 }
160
161 /*
162 * Padding can only be found at the end and there is
163 * only 1 or 2 padding chars. Deal with it first.
164 */
165 unsigned cbPad = 0;
166 if (u8 == BASE64_PAD)
167 {
168 cbPad = 1;
169 c6Bits++;
170 pszString++;
171 while ((ch = *pszString))
172 {
173 u8 = g_au8CharToVal[ch];
174 if (u8 != BASE64_SPACE)
175 {
176 if (u8 != BASE64_PAD)
177 break;
178 c6Bits++;
179 cbPad++;
180 }
181 pszString++;
182 }
183 if (cbPad >= 3)
184 return -1;
185 }
186
187 /*
188 * Invalid char and no where to indicate where the
189 * Base64 text ends? Return failure.
190 */
191 if ( u8 == BASE64_INVALID
192 && !ppszEnd
193 && ch)
194 return -1;
195
196 /*
197 * Recalc 6-bit to 8-bit and adjust for padding.
198 */
199 size_t cb;
200 if (c6Bits * 3 / 3 == c6Bits)
201 {
202 if ((c6Bits * 3 % 4) != 0)
203 return -1;
204 cb = c6Bits * 3 / 4;
205 }
206 else
207 {
208 if ((c6Bits * (uint64_t)3 % 4) != 0)
209 return -1;
210 cb = c6Bits * (uint64_t)3 / 4;
211 }
212
213 if (cb < cbPad)
214 return -1;
215 cb -= cbPad;
216
217 if (ppszEnd)
218 *ppszEnd = (char *)pszString;
219 return cb;
220}
221RT_EXPORT_SYMBOL(RTBase64DecodedSize);
222
223
224/**
225 * Decodes a Base64 encoded string into the buffer supplied by the caller.
226 *
227 * @returns IPRT status code.
228 * @retval VERR_BUFFER_OVERFLOW if the buffer is too small. pcbActual will not
229 * be set, nor will ppszEnd.
230 * @retval VERR_INVALID_BASE64_ENCODING if the encoding is wrong.
231 *
232 * @param pszString The Base64 string. Whether the entire string or
233 * just the start of the string is in Base64 depends
234 * on wther ppszEnd is specified or not.
235 * @param pvData Where to store the decoded data.
236 * @param cbData The size of the output buffer that pvData points to.
237 * @param pcbActual Where to store the actual number of bytes returned.
238 * Optional.
239 * @param ppszEnd Indicats that the string may contain other stuff
240 * after the Base64 encoded data when not NULL. Will
241 * be set to point to the first char that's not part of
242 * the encoding. If NULL the entire string must be part
243 * of the Base64 encoded data.
244 */
245RTDECL(int) RTBase64Decode(const char *pszString, void *pvData, size_t cbData, size_t *pcbActual, char **ppszEnd)
246{
247#ifdef RT_STRICT
248 rtBase64Sanity();
249#endif
250
251 /*
252 * Process input in groups of 4 input / 3 output chars.
253 */
254 uint8_t u8Trio[3] = { 0, 0, 0 }; /* shuts up gcc */
255 uint8_t *pbData = (uint8_t *)pvData;
256 uint8_t u8 = BASE64_INVALID;
257 unsigned c6Bits = 0;
258 unsigned ch;
259 AssertCompile(sizeof(char) == sizeof(uint8_t));
260
261 for (;;)
262 {
263 /* The first 6-bit group. */
264 while ((u8 = g_au8CharToVal[ch = *pszString]) == BASE64_SPACE)
265 pszString++;
266 if (u8 >= 64)
267 {
268 c6Bits = 0;
269 break;
270 }
271 u8Trio[0] = u8 << 2;
272 pszString++;
273
274 /* The second 6-bit group. */
275 while ((u8 = g_au8CharToVal[ch = *pszString]) == BASE64_SPACE)
276 pszString++;
277 if (u8 >= 64)
278 {
279 c6Bits = 1;
280 break;
281 }
282 u8Trio[0] |= u8 >> 4;
283 u8Trio[1] = u8 << 4;
284 pszString++;
285
286 /* The third 6-bit group. */
287 while ((u8 = g_au8CharToVal[ch = *pszString]) == BASE64_SPACE)
288 pszString++;
289 if (u8 >= 64)
290 {
291 c6Bits = 2;
292 break;
293 }
294 u8Trio[1] |= u8 >> 2;
295 u8Trio[2] = u8 << 6;
296 pszString++;
297
298 /* The fourth 6-bit group. */
299 while ((u8 = g_au8CharToVal[ch = *pszString]) == BASE64_SPACE)
300 pszString++;
301 if (u8 >= 64)
302 {
303 c6Bits = 3;
304 break;
305 }
306 u8Trio[2] |= u8;
307 pszString++;
308
309 /* flush the trio */
310 if (cbData < 3)
311 return VERR_BUFFER_OVERFLOW;
312 cbData -= 3;
313 pbData[0] = u8Trio[0];
314 pbData[1] = u8Trio[1];
315 pbData[2] = u8Trio[2];
316 pbData += 3;
317 }
318
319 /*
320 * Padding can only be found at the end and there is
321 * only 1 or 2 padding chars. Deal with it first.
322 */
323 unsigned cbPad = 0;
324 if (u8 == BASE64_PAD)
325 {
326 cbPad = 1;
327 pszString++;
328 while ((ch = *pszString))
329 {
330 u8 = g_au8CharToVal[ch];
331 if (u8 != BASE64_SPACE)
332 {
333 if (u8 != BASE64_PAD)
334 break;
335 cbPad++;
336 }
337 pszString++;
338 }
339 if (cbPad >= 3)
340 return VERR_INVALID_BASE64_ENCODING;
341 }
342
343 /*
344 * Invalid char and no where to indicate where the
345 * Base64 text ends? Return failure.
346 */
347 if ( u8 == BASE64_INVALID
348 && !ppszEnd
349 && ch)
350 return VERR_INVALID_BASE64_ENCODING;
351
352 /*
353 * Check padding vs. pending sextets, if anything left to do finish it off.
354 */
355 if (c6Bits || cbPad)
356 {
357 if (c6Bits + cbPad != 4)
358 return VERR_INVALID_BASE64_ENCODING;
359
360 switch (c6Bits)
361 {
362 case 1:
363 u8Trio[1] = u8Trio[2] = 0;
364 break;
365 case 2:
366 u8Trio[2] = 0;
367 break;
368 case 3:
369 default:
370 break;
371 }
372 switch (3 - cbPad)
373 {
374 case 1:
375 if (cbData < 1)
376 return VERR_BUFFER_OVERFLOW;
377 cbData--;
378 pbData[0] = u8Trio[0];
379 pbData++;
380 break;
381
382 case 2:
383 if (cbData < 2)
384 return VERR_BUFFER_OVERFLOW;
385 cbData -= 2;
386 pbData[0] = u8Trio[0];
387 pbData[1] = u8Trio[1];
388 pbData += 2;
389 break;
390
391 default:
392 break;
393 }
394 }
395
396 /*
397 * Set optional return values and return successfully.
398 */
399 if (ppszEnd)
400 *ppszEnd = (char *)pszString;
401 if (pcbActual)
402 *pcbActual = pbData - (uint8_t *)pvData;
403 return VINF_SUCCESS;
404}
405RT_EXPORT_SYMBOL(RTBase64Decode);
406
407
408/**
409 * Calculates the length of the Base64 encoding of a given number of bytes of
410 * data.
411 *
412 * This will assume line breaks every 64 chars. A RTBase64EncodedLengthEx
413 * function can be added if closer control over the output is found to be
414 * required.
415 *
416 * @returns The Base64 string length.
417 * @param cbData The number of bytes to encode.
418 */
419RTDECL(size_t) RTBase64EncodedLength(size_t cbData)
420{
421 if (cbData * 8 / 8 != cbData)
422 {
423 AssertReturn(sizeof(size_t) == sizeof(uint64_t), ~(size_t)0);
424 uint64_t cch = cbData * (uint64_t)8;
425 while (cch % 24)
426 cch += 8;
427 cch /= 6;
428
429 cch += (cch / RTBASE64_LINE_LEN) * RTBASE64_EOL_SIZE;
430 cch -= (cch % RTBASE64_LINE_LEN) == 0;
431 return cch;
432 }
433
434 size_t cch = cbData * 8;
435 while (cch % 24)
436 cch += 8;
437 cch /= 6;
438
439 cch += (cch / RTBASE64_LINE_LEN) * RTBASE64_EOL_SIZE;
440 cch -= (cch % RTBASE64_LINE_LEN) == 0;
441 return cch;
442}
443RT_EXPORT_SYMBOL(RTBase64EncodedLength);
444
445
446/**
447 * Encodes the specifed data into a Base64 string, the caller supplies the
448 * output buffer.
449 *
450 * This will make the same assumptions about line breaks and EOL size as
451 * RTBase64EncodedLength() does. A RTBase64EncodeEx function can be added if
452 * more strict control over the output formatting is found necessary.
453 *
454 * @returns IRPT status code.
455 * @retval VERR_BUFFER_OVERFLOW if the output buffer is too small. The buffer
456 * may contain an invalid Base64 string.
457 *
458 * @param pvData The data to encode.
459 * @param cbData The number of bytes to encode.
460 * @param pszBuf Where to put the Base64 string.
461 * @param cbBuf The size of the output buffer, including the terminator.
462 * @param pcchActual The actual number of characters returned.
463 */
464RTDECL(int) RTBase64Encode(const void *pvData, size_t cbData, char *pszBuf, size_t cbBuf, size_t *pcchActual)
465{
466 /*
467 * Process whole "trios" of input data.
468 */
469 uint8_t u8A;
470 uint8_t u8B;
471 uint8_t u8C;
472 size_t cbLineFeed = cbBuf - RTBASE64_LINE_LEN;
473 const uint8_t *pbSrc = (const uint8_t *)pvData;
474 char *pchDst = pszBuf;
475 while (cbData >= 3)
476 {
477 if (cbBuf < 4 + 1)
478 return VERR_BUFFER_OVERFLOW;
479
480 /* encode */
481 u8A = pbSrc[0];
482 pchDst[0] = g_szValToChar[u8A >> 2];
483 u8B = pbSrc[1];
484 pchDst[1] = g_szValToChar[((u8A << 4) & 0x3f) | (u8B >> 4)];
485 u8C = pbSrc[2];
486 pchDst[2] = g_szValToChar[((u8B << 2) & 0x3f) | (u8C >> 6)];
487 pchDst[3] = g_szValToChar[u8C & 0x3f];
488
489 /* advance */
490 cbBuf -= 4;
491 pchDst += 4;
492 cbData -= 3;
493 pbSrc += 3;
494
495 /* deal out linefeeds */
496 if (cbBuf == cbLineFeed && cbData)
497 {
498 if (cbBuf < RTBASE64_EOL_SIZE + 1)
499 return VERR_BUFFER_OVERFLOW;
500 cbBuf -= RTBASE64_EOL_SIZE;
501 if (RTBASE64_EOL_SIZE == 2)
502 *pchDst++ = '\r';
503 *pchDst++ = '\n';
504 cbLineFeed = cbBuf - RTBASE64_LINE_LEN;
505 }
506 }
507
508 /*
509 * Deal with the odd bytes and string termination.
510 */
511 if (cbData)
512 {
513 if (cbBuf < 4 + 1)
514 return VERR_BUFFER_OVERFLOW;
515 switch (cbData)
516 {
517 case 1:
518 u8A = pbSrc[0];
519 pchDst[0] = g_szValToChar[u8A >> 2];
520 pchDst[1] = g_szValToChar[(u8A << 4) & 0x3f];
521 pchDst[2] = '=';
522 pchDst[3] = '=';
523 break;
524 case 2:
525 u8A = pbSrc[0];
526 pchDst[0] = g_szValToChar[u8A >> 2];
527 u8B = pbSrc[1];
528 pchDst[1] = g_szValToChar[((u8A << 4) & 0x3f) | (u8B >> 4)];
529 pchDst[2] = g_szValToChar[(u8B << 2) & 0x3f];
530 pchDst[3] = '=';
531 break;
532 }
533 pchDst += 4;
534 }
535
536 *pchDst = '\0';
537
538 if (pcchActual)
539 *pcchActual = pchDst - pszBuf;
540 return VINF_SUCCESS;
541}
542RT_EXPORT_SYMBOL(RTBase64Encode);
543
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette