VirtualBox

source: vbox/trunk/src/VBox/Runtime/common/misc/uri.cpp@ 53624

Last change on this file since 53624 was 50508, checked in by vboxsync, 11 years ago

DnD: Update.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 20.3 KB
Line 
1/* $Id: uri.cpp 50508 2014-02-19 15:45:58Z vboxsync $ */
2/** @file
3 * IPRT - Uniform Resource Identifier handling.
4 */
5
6/*
7 * Copyright (C) 2011 Oracle Corporation
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 *
17 * The contents of this file may alternatively be used under the terms
18 * of the Common Development and Distribution License Version 1.0
19 * (CDDL) only, as it comes in the "COPYING.CDDL" file of the
20 * VirtualBox OSE distribution, in which case the provisions of the
21 * CDDL are applicable instead of those of the GPL.
22 *
23 * You may elect to license modified versions of this file under the
24 * terms and conditions of either the GPL or the CDDL or both.
25 */
26
27
28/*******************************************************************************
29* Header Files *
30*******************************************************************************/
31#include <iprt/uri.h>
32
33#include <iprt/string.h>
34#include <iprt/mem.h>
35#include <iprt/path.h>
36#include <iprt/stream.h>
37
38/* General URI format:
39
40 foo://example.com:8042/over/there?name=ferret#nose
41 \_/ \______________/\_________/ \_________/ \__/
42 | | | | |
43 scheme authority path query fragment
44 | _____________________|__
45 / \ / \
46 urn:example:animal:ferret:nose
47*/
48
49
50/*******************************************************************************
51* Private RTUri helper *
52*******************************************************************************/
53
54/* The following defines characters which have to be % escaped:
55 control = 00-1F
56 space = ' '
57 delims = '<' , '>' , '#' , '%' , '"'
58 unwise = '{' , '}' , '|' , '\' , '^' , '[' , ']' , '`'
59*/
60#define URI_EXCLUDED(a) \
61 ((a) >= 0x0 && (a) <= 0x20) \
62 || ((a) >= 0x5B && (a) <= 0x5E) \
63 || ((a) >= 0x7B && (a) <= 0x7D) \
64 || (a) == '<' || (a) == '>' || (a) == '#' \
65 || (a) == '%' || (a) == '"' || (a) == '`'
66
67static char *rtUriPercentEncodeN(const char *pszString, size_t cchMax)
68{
69 if (!pszString)
70 return NULL;
71
72 int rc = VINF_SUCCESS;
73
74 size_t cbLen = RT_MIN(strlen(pszString), cchMax);
75 /* The new string can be max 3 times in size of the original string. */
76 char *pszNew = (char*)RTMemAlloc(cbLen * 3 + 1);
77 if (!pszNew)
78 return NULL;
79 char *pszRes = NULL;
80 size_t iIn = 0;
81 size_t iOut = 0;
82 while(iIn < cbLen)
83 {
84 if (URI_EXCLUDED(pszString[iIn]))
85 {
86 char szNum[3] = { 0, 0, 0 };
87 RTStrFormatU8(&szNum[0], 3, pszString[iIn++], 16, 2, 2, RTSTR_F_CAPITAL | RTSTR_F_ZEROPAD);
88 pszNew[iOut++] = '%';
89 pszNew[iOut++] = szNum[0];
90 pszNew[iOut++] = szNum[1];
91 }
92 else
93 pszNew[iOut++] = pszString[iIn++];
94 }
95 if (RT_SUCCESS(rc))
96 {
97 pszNew[iOut] = '\0';
98 if (iOut != iIn)
99 {
100 /* If the source and target strings have different size, recreate
101 * the target string with the correct size. */
102 pszRes = RTStrDupN(pszNew, iOut);
103 RTStrFree(pszNew);
104 }
105 else
106 pszRes = pszNew;
107 }
108 else
109 RTStrFree(pszNew);
110
111 return pszRes;
112}
113
114static char *rtUriPercentDecodeN(const char *pszString, size_t cchMax)
115{
116 if (!pszString)
117 return NULL;
118
119 int rc = VINF_SUCCESS;
120 size_t cbLen = RT_MIN(strlen(pszString), cchMax);
121 /* The new string can only get smaller. */
122 char *pszNew = (char*)RTMemAlloc(cbLen + 1);
123 if (!pszNew)
124 return NULL;
125 char *pszRes = NULL;
126 size_t iIn = 0;
127 size_t iOut = 0;
128 while(iIn < cbLen)
129 {
130 if (pszString[iIn] == '%')
131 {
132 /* % encoding means the percent sign and exactly 2 hexadecimal
133 * digits describing the ASCII number of the character. */
134 ++iIn;
135 char szNum[3];
136 szNum[0] = pszString[iIn++];
137 szNum[1] = pszString[iIn++];
138 szNum[2] = '\0';
139
140 uint8_t u8;
141 rc = RTStrToUInt8Ex(szNum, NULL, 16, &u8);
142 if (RT_FAILURE(rc))
143 break;
144 pszNew[iOut] = u8;
145 }
146 else
147 pszNew[iOut] = pszString[iIn++];
148 ++iOut;
149 }
150 if (RT_SUCCESS(rc))
151 {
152 pszNew[iOut] = '\0';
153 if (iOut != iIn)
154 {
155 /* If the source and target strings have different size, recreate
156 * the target string with the correct size. */
157 pszRes = RTStrDupN(pszNew, iOut);
158 RTStrFree(pszNew);
159 }
160 else
161 pszRes = pszNew;
162 }
163 else
164 RTStrFree(pszNew);
165
166 return pszRes;
167}
168
169static bool rtUriFindSchemeEnd(const char *pszUri, size_t iStart, size_t cbLen, size_t *piEnd)
170{
171 size_t i = iStart;
172 /* The scheme has to end with ':'. */
173 while(i < iStart + cbLen)
174 {
175 if (pszUri[i] == ':')
176 {
177 *piEnd = i;
178 return true;
179 }
180 ++i;
181 }
182 return false;
183}
184
185static bool rtUriCheckAuthorityStart(const char *pszUri, size_t iStart, size_t cbLen, size_t *piStart)
186{
187 /* The authority have to start with '//' */
188 if ( cbLen >= 2
189 && pszUri[iStart ] == '/'
190 && pszUri[iStart + 1] == '/')
191 {
192 *piStart = iStart + 2;
193 return true;
194 }
195
196 return false;
197}
198
199static bool rtUriFindAuthorityEnd(const char *pszUri, size_t iStart, size_t cbLen, size_t *piEnd)
200{
201 size_t i = iStart;
202 /* The authority can end with '/' || '?' || '#'. */
203 while(i < iStart + cbLen)
204 {
205 if ( pszUri[i] == '/'
206 || pszUri[i] == '?'
207 || pszUri[i] == '#')
208 {
209 *piEnd = i;
210 return true;
211 }
212 ++i;
213 }
214 return false;
215}
216
217static bool rtUriCheckPathStart(const char *pszUri, size_t iStart, size_t cbLen, size_t *piStart)
218{
219 /* The path could start with a '/'. */
220 if ( cbLen >= 1
221 && pszUri[iStart] == '/')
222 {
223 *piStart = iStart; /* Including '/' */
224 return true;
225 }
226 /* '?' || '#' means there is no path. */
227 if ( cbLen >= 1
228 && ( pszUri[iStart] == '?'
229 || pszUri[iStart] == '#'))
230 return false;
231 /* All other values are allowed. */
232 *piStart = iStart;
233 return true;
234}
235
236static bool rtUriFindPathEnd(const char *pszUri, size_t iStart, size_t cbLen, size_t *piEnd)
237{
238 size_t i = iStart;
239 /* The path can end with '?' || '#'. */
240 while(i < iStart + cbLen)
241 {
242 if ( pszUri[i] == '?'
243 || pszUri[i] == '#')
244 {
245 *piEnd = i;
246 return true;
247 }
248 ++i;
249 }
250 return false;
251}
252
253static bool rtUriCheckQueryStart(const char *pszUri, size_t iStart, size_t cbLen, size_t *piStart)
254{
255 /* The query start with a '?'. */
256 if ( cbLen >= 1
257 && pszUri[iStart] == '?')
258 {
259 *piStart = iStart + 1; /* Excluding '?' */
260 return true;
261 }
262 return false;
263}
264
265static bool rtUriFindQueryEnd(const char *pszUri, size_t iStart, size_t cbLen, size_t *piEnd)
266{
267 size_t i = iStart;
268 /* The query can end with '?' || '#'. */
269 while(i < iStart + cbLen)
270 {
271 if (pszUri[i] == '#')
272 {
273 *piEnd = i;
274 return true;
275 }
276 ++i;
277 }
278 return false;
279}
280
281static bool rtUriCheckFragmentStart(const char *pszUri, size_t iStart, size_t cbLen, size_t *piStart)
282{
283 /* The fragment start with a '#'. */
284 if ( cbLen >= 1
285 && pszUri[iStart] == '#')
286 {
287 *piStart = iStart + 1; /* Excluding '#' */
288 return true;
289 }
290 return false;
291}
292
293/*******************************************************************************
294* Public RTUri interface *
295*******************************************************************************/
296
297/*******************************************************************************
298* Generic Uri methods *
299*******************************************************************************/
300
301RTR3DECL(char *) RTUriCreate(const char *pszScheme, const char *pszAuthority, const char *pszPath, const char *pszQuery, const char *pszFragment)
302{
303 if (!pszScheme) /* Scheme is minimum requirement */
304 return NULL;
305
306 char *pszResult = 0;
307 char *pszAuthority1 = 0;
308 char *pszPath1 = 0;
309 char *pszQuery1 = 0;
310 char *pszFragment1 = 0;
311
312 do
313 {
314 /* Create the percent encoded strings and calculate the necessary uri
315 * length. */
316 size_t cbSize = strlen(pszScheme) + 1 + 1; /* plus zero byte */
317 if (pszAuthority)
318 {
319 pszAuthority1 = rtUriPercentEncodeN(pszAuthority, RTSTR_MAX);
320 if (!pszAuthority1)
321 break;
322 cbSize += strlen(pszAuthority1) + 2;
323 }
324 if (pszPath)
325 {
326 pszPath1 = rtUriPercentEncodeN(pszPath, RTSTR_MAX);
327 if (!pszPath1)
328 break;
329 cbSize += strlen(pszPath1);
330 }
331 if (pszQuery)
332 {
333 pszQuery1 = rtUriPercentEncodeN(pszQuery, RTSTR_MAX);
334 if (!pszQuery1)
335 break;
336 cbSize += strlen(pszQuery1) + 1;
337 }
338 if (pszFragment)
339 {
340 pszFragment1 = rtUriPercentEncodeN(pszFragment, RTSTR_MAX);
341 if (!pszFragment1)
342 break;
343 cbSize += strlen(pszFragment1) + 1;
344 }
345
346 char *pszTmp = pszResult = (char*)RTMemAllocZ(cbSize);
347 if (!pszResult)
348 break;
349 /* Compose the target uri string. */
350 RTStrCatP(&pszTmp, &cbSize, pszScheme);
351 RTStrCatP(&pszTmp, &cbSize, ":");
352 if (pszAuthority1)
353 {
354 RTStrCatP(&pszTmp, &cbSize, "//");
355 RTStrCatP(&pszTmp, &cbSize, pszAuthority1);
356 }
357 if (pszPath1)
358 {
359 RTStrCatP(&pszTmp, &cbSize, pszPath1);
360 }
361 if (pszQuery1)
362 {
363 RTStrCatP(&pszTmp, &cbSize, "?");
364 RTStrCatP(&pszTmp, &cbSize, pszQuery1);
365 }
366 if (pszFragment1)
367 {
368 RTStrCatP(&pszTmp, &cbSize, "#");
369 RTStrCatP(&pszTmp, &cbSize, pszFragment1);
370 }
371 }while (0);
372
373 /* Cleanup */
374 if (pszAuthority1)
375 RTStrFree(pszAuthority1);
376 if (pszPath1)
377 RTStrFree(pszPath1);
378 if (pszQuery1)
379 RTStrFree(pszQuery1);
380 if (pszFragment1)
381 RTStrFree(pszFragment1);
382
383 return pszResult;
384}
385
386RTR3DECL(bool) RTUriHasScheme(const char *pszUri, const char *pszScheme)
387{
388 bool fRes = false;
389 char *pszTmp = RTUriScheme(pszUri);
390 if (pszTmp)
391 {
392 fRes = RTStrNICmp(pszScheme, pszTmp, strlen(pszTmp)) == 0;
393 RTStrFree(pszTmp);
394 }
395 return fRes;
396}
397
398RTR3DECL(char *) RTUriScheme(const char *pszUri)
399{
400 AssertPtrReturn(pszUri, NULL);
401
402 size_t iPos1;
403 size_t cbLen = strlen(pszUri);
404 if (rtUriFindSchemeEnd(pszUri, 0, cbLen, &iPos1))
405 return rtUriPercentDecodeN(pszUri, iPos1);
406 return NULL;
407}
408
409RTR3DECL(char *) RTUriAuthority(const char *pszUri)
410{
411 AssertPtrReturn(pszUri, NULL);
412
413 size_t iPos1;
414 size_t cbLen = strlen(pszUri);
415 /* Find the end of the scheme. */
416 if (!rtUriFindSchemeEnd(pszUri, 0, cbLen, &iPos1))
417 return NULL; /* no URI */
418 else
419 ++iPos1; /* Skip ':' */
420
421 size_t iPos2;
422 /* Find the start of the authority. */
423 if (rtUriCheckAuthorityStart(pszUri, iPos1, cbLen - iPos1, &iPos2))
424 {
425 size_t iPos3 = cbLen;
426 /* Find the end of the authority. If not found, the rest of the string
427 * is used. */
428 rtUriFindAuthorityEnd(pszUri, iPos2, cbLen - iPos2, &iPos3);
429 if (iPos3 > iPos2) /* Length check */
430 return rtUriPercentDecodeN(&pszUri[iPos2], iPos3 - iPos2);
431 else
432 return NULL;
433 }
434 return NULL;
435}
436
437RTR3DECL(char *) RTUriPath(const char *pszUri)
438{
439 AssertPtrReturn(pszUri, NULL);
440
441 size_t iPos1;
442 size_t cbLen = strlen(pszUri);
443 /* Find the end of the scheme. */
444 if (!rtUriFindSchemeEnd(pszUri, 0, cbLen, &iPos1))
445 return NULL; /* no URI */
446 else
447 ++iPos1; /* Skip ':' */
448
449 size_t iPos2;
450 size_t iPos3 = iPos1; /* Skip if no authority is found */
451 /* Find the start of the authority. */
452 if (rtUriCheckAuthorityStart(pszUri, iPos1, cbLen - iPos1, &iPos2))
453 {
454 /* Find the end of the authority. If not found, then there is no path
455 * component, cause the authority is the rest of the string. */
456 if (!rtUriFindAuthorityEnd(pszUri, iPos2, cbLen - iPos2, &iPos3))
457 return NULL; /* no path! */
458 }
459
460 size_t iPos4;
461 /* Find the start of the path */
462 if (rtUriCheckPathStart(pszUri, iPos3, cbLen - iPos3, &iPos4))
463 {
464 /* Search for the end of the scheme. */
465 size_t iPos5 = cbLen;
466 rtUriFindPathEnd(pszUri, iPos4, cbLen - iPos4, &iPos5);
467 if (iPos5 > iPos4) /* Length check */
468 return rtUriPercentDecodeN(&pszUri[iPos4], iPos5 - iPos4);
469 }
470
471 return NULL;
472}
473
474RTR3DECL(char *) RTUriQuery(const char *pszUri)
475{
476 AssertPtrReturn(pszUri, NULL);
477
478 size_t iPos1;
479 size_t cbLen = strlen(pszUri);
480 /* Find the end of the scheme. */
481 if (!rtUriFindSchemeEnd(pszUri, 0, cbLen, &iPos1))
482 return NULL; /* no URI */
483 else
484 ++iPos1; /* Skip ':' */
485
486 size_t iPos2;
487 size_t iPos3 = iPos1; /* Skip if no authority is found */
488 /* Find the start of the authority. */
489 if (rtUriCheckAuthorityStart(pszUri, iPos1, cbLen - iPos1, &iPos2))
490 {
491 /* Find the end of the authority. If not found, then there is no path
492 * component, cause the authority is the rest of the string. */
493 if (!rtUriFindAuthorityEnd(pszUri, iPos2, cbLen - iPos2, &iPos3))
494 return NULL; /* no path! */
495 }
496
497 size_t iPos4;
498 size_t iPos5 = iPos3; /* Skip if no path is found */
499 /* Find the start of the path */
500 if (rtUriCheckPathStart(pszUri, iPos3, cbLen - iPos3, &iPos4))
501 {
502 /* Find the end of the path. If not found, then there is no query
503 * component, cause the path is the rest of the string. */
504 if (!rtUriFindPathEnd(pszUri, iPos4, cbLen - iPos4, &iPos5))
505 return NULL; /* no query! */
506 }
507
508 size_t iPos6;
509 /* Find the start of the query */
510 if (rtUriCheckQueryStart(pszUri, iPos5, cbLen - iPos5, &iPos6))
511 {
512 /* Search for the end of the query. */
513 size_t iPos7 = cbLen;
514 rtUriFindQueryEnd(pszUri, iPos6, cbLen - iPos6, &iPos7);
515 if (iPos7 > iPos6) /* Length check */
516 return rtUriPercentDecodeN(&pszUri[iPos6], iPos7 - iPos6);
517 }
518
519 return NULL;
520}
521
522RTR3DECL(char *) RTUriFragment(const char *pszUri)
523{
524 AssertPtrReturn(pszUri, NULL);
525
526 size_t iPos1;
527 size_t cbLen = strlen(pszUri);
528 /* Find the end of the scheme. */
529 if (!rtUriFindSchemeEnd(pszUri, 0, cbLen, &iPos1))
530 return NULL; /* no URI */
531 else
532 ++iPos1; /* Skip ':' */
533
534 size_t iPos2;
535 size_t iPos3 = iPos1; /* Skip if no authority is found */
536 /* Find the start of the authority. */
537 if (rtUriCheckAuthorityStart(pszUri, iPos1, cbLen - iPos1, &iPos2))
538 {
539 /* Find the end of the authority. If not found, then there is no path
540 * component, cause the authority is the rest of the string. */
541 if (!rtUriFindAuthorityEnd(pszUri, iPos2, cbLen - iPos2, &iPos3))
542 return NULL; /* no path! */
543 }
544
545 size_t iPos4;
546 size_t iPos5 = iPos3; /* Skip if no path is found */
547 /* Find the start of the path */
548 if (rtUriCheckPathStart(pszUri, iPos3, cbLen - iPos3, &iPos4))
549 {
550 /* Find the end of the path. If not found, then there is no query
551 * component, cause the path is the rest of the string. */
552 if (!rtUriFindPathEnd(pszUri, iPos4, cbLen - iPos4, &iPos5))
553 return NULL; /* no query! */
554 }
555
556 size_t iPos6;
557 size_t iPos7 = iPos5; /* Skip if no query is found */
558 /* Find the start of the query */
559 if (rtUriCheckQueryStart(pszUri, iPos5, cbLen - iPos5, &iPos6))
560 {
561 /* Find the end of the query If not found, then there is no fragment
562 * component, cause the query is the rest of the string. */
563 if (!rtUriFindQueryEnd(pszUri, iPos6, cbLen - iPos6, &iPos7))
564 return NULL; /* no query! */
565 }
566
567
568 size_t iPos8;
569 /* Find the start of the fragment */
570 if (rtUriCheckFragmentStart(pszUri, iPos7, cbLen - iPos7, &iPos8))
571 {
572 /* There could be nothing behind a fragment. So use the rest of the
573 * string. */
574 if (cbLen > iPos8) /* Length check */
575 return rtUriPercentDecodeN(&pszUri[iPos8], cbLen - iPos8);
576 }
577 return NULL;
578}
579
580/*******************************************************************************
581* File Uri methods *
582*******************************************************************************/
583
584RTR3DECL(char *) RTUriFileCreate(const char *pszPath)
585{
586 if (!pszPath)
587 return NULL;
588
589 char *pszResult = 0;
590 char *pszPath1 = 0;
591
592 do
593 {
594 /* Create the percent encoded strings and calculate the necessary uri
595 * length. */
596 pszPath1 = rtUriPercentEncodeN(pszPath, RTSTR_MAX);
597 if (!pszPath1)
598 break;
599 size_t cbSize = 7 /* file:// */ + strlen(pszPath1) + 1; /* plus zero byte */
600 if (pszPath1[0] != '/')
601 ++cbSize;
602 char *pszTmp = pszResult = (char*)RTMemAllocZ(cbSize);
603 if (!pszResult)
604 break;
605 /* Compose the target uri string. */
606 RTStrCatP(&pszTmp, &cbSize, "file://");
607 if (pszPath1[0] != '/')
608 RTStrCatP(&pszTmp, &cbSize, "/");
609 RTStrCatP(&pszTmp, &cbSize, pszPath1);
610 }while (0);
611
612 /* Cleanup */
613 if (pszPath1)
614 RTStrFree(pszPath1);
615
616 return pszResult;
617}
618
619RTR3DECL(char *) RTUriFilePath(const char *pszUri, uint32_t uFormat)
620{
621 return RTUriFileNPath(pszUri, uFormat, RTSTR_MAX);
622}
623
624RTR3DECL(char *) RTUriFileNPath(const char *pszUri, uint32_t uFormat, size_t cchMax)
625{
626 AssertPtrReturn(pszUri, NULL);
627
628 size_t iPos1;
629 size_t cbLen = RT_MIN(strlen(pszUri), cchMax);
630 /* Find the end of the scheme. */
631 if (!rtUriFindSchemeEnd(pszUri, 0, cbLen, &iPos1))
632 return NULL; /* no URI */
633 else
634 ++iPos1; /* Skip ':' */
635
636 /* Check that this is a file Uri */
637 if (RTStrNICmp(pszUri, "file:", iPos1) != 0)
638 return NULL;
639
640 size_t iPos2;
641 size_t iPos3 = iPos1; /* Skip if no authority is found */
642 /* Find the start of the authority. */
643 if (rtUriCheckAuthorityStart(pszUri, iPos1, cbLen - iPos1, &iPos2))
644 {
645 /* Find the end of the authority. If not found, then there is no path
646 * component, cause the authority is the rest of the string. */
647 if (!rtUriFindAuthorityEnd(pszUri, iPos2, cbLen - iPos2, &iPos3))
648 return NULL; /* no path! */
649 }
650
651 size_t iPos4;
652 /* Find the start of the path */
653 if (rtUriCheckPathStart(pszUri, iPos3, cbLen - iPos3, &iPos4))
654 {
655 uint32_t uFIntern = uFormat;
656 /* Auto is based on the current OS. */
657 if (uFormat == URI_FILE_FORMAT_AUTO)
658#ifdef RT_OS_WINDOWS
659 uFIntern = URI_FILE_FORMAT_WIN;
660#else /* RT_OS_WINDOWS */
661 uFIntern = URI_FILE_FORMAT_UNIX;
662#endif /* !RT_OS_WINDOWS */
663
664 if ( uFIntern != URI_FILE_FORMAT_UNIX
665 && pszUri[iPos4] == '/')
666 ++iPos4;
667 /* Search for the end of the scheme. */
668 size_t iPos5 = cbLen;
669 rtUriFindPathEnd(pszUri, iPos4, cbLen - iPos4, &iPos5);
670 if (iPos5 > iPos4) /* Length check */
671 {
672 char *pszPath = rtUriPercentDecodeN(&pszUri[iPos4], iPos5 - iPos4);
673 if (uFIntern == URI_FILE_FORMAT_UNIX)
674 return RTPathChangeToUnixSlashes(pszPath, true);
675 else if (uFIntern == URI_FILE_FORMAT_WIN)
676 return RTPathChangeToDosSlashes(pszPath, true);
677 else
678 {
679 RTStrFree(pszPath);
680 AssertMsgFailed(("Unknown uri file format %u", uFIntern));
681 return NULL;
682 }
683 }
684 }
685
686 return NULL;
687}
688
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette