VirtualBox

source: vbox/trunk/src/VBox/Runtime/common/misc/uri.cpp@ 82968

Last change on this file since 82968 was 82968, checked in by vboxsync, 5 years ago

Copyright year updates by scm.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 39.7 KB
Line 
1/* $Id: uri.cpp 82968 2020-02-04 10:35:17Z vboxsync $ */
2/** @file
3 * IPRT - Uniform Resource Identifier handling.
4 */
5
6/*
7 * Copyright (C) 2011-2020 Oracle Corporation
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 *
17 * The contents of this file may alternatively be used under the terms
18 * of the Common Development and Distribution License Version 1.0
19 * (CDDL) only, as it comes in the "COPYING.CDDL" file of the
20 * VirtualBox OSE distribution, in which case the provisions of the
21 * CDDL are applicable instead of those of the GPL.
22 *
23 * You may elect to license modified versions of this file under the
24 * terms and conditions of either the GPL or the CDDL or both.
25 */
26
27
28/*********************************************************************************************************************************
29* Header Files *
30*********************************************************************************************************************************/
31#include <iprt/uri.h>
32
33#include <iprt/assert.h>
34#include <iprt/ctype.h>
35#include <iprt/err.h>
36#include <iprt/path.h>
37#include <iprt/string.h>
38
39
40/*********************************************************************************************************************************
41* Defined Constants And Macros *
42*********************************************************************************************************************************/
43/** Internal magic value we use to check if a RTURIPARSED structure has made it thru RTUriParse. */
44#define RTURIPARSED_MAGIC UINT32_C(0x439e0745)
45
46
47/* General URI format:
48
49 foo://example.com:8042/over/there?name=ferret#nose
50 \_/ \______________/\_________/ \_________/ \__/
51 | | | | |
52 scheme authority path query fragment
53 | _____________________|__
54 / \ / \
55 urn:example:animal:ferret:nose
56*/
57
58
59/**
60 * The following defines characters which have to be % escaped:
61 * control = 00-1F
62 * space = ' '
63 * delims = '<' , '>' , '#' , '%' , '"'
64 * unwise = '{' , '}' , '|' , '\' , '^' , '[' , ']' , '`'
65 */
66#define URI_EXCLUDED(a) \
67 ( ((a) >= 0x0 && (a) <= 0x20) \
68 || ((a) >= 0x5B && (a) <= 0x5E) \
69 || ((a) >= 0x7B && (a) <= 0x7D) \
70 || (a) == '<' || (a) == '>' || (a) == '#' \
71 || (a) == '%' || (a) == '"' || (a) == '`' )
72
73static char *rtUriPercentEncodeN(const char *pszString, size_t cchMax)
74{
75 if (!pszString)
76 return NULL;
77
78 int rc = VINF_SUCCESS;
79
80 size_t cbLen = RT_MIN(strlen(pszString), cchMax);
81 /* The new string can be max 3 times in size of the original string. */
82 char *pszNew = RTStrAlloc(cbLen * 3 + 1);
83 if (!pszNew)
84 return NULL;
85
86 char *pszRes = NULL;
87 size_t iIn = 0;
88 size_t iOut = 0;
89 while (iIn < cbLen)
90 {
91 if (URI_EXCLUDED(pszString[iIn]))
92 {
93 char szNum[3] = { 0, 0, 0 };
94 RTStrFormatU8(&szNum[0], 3, pszString[iIn++], 16, 2, 2, RTSTR_F_CAPITAL | RTSTR_F_ZEROPAD);
95 pszNew[iOut++] = '%';
96 pszNew[iOut++] = szNum[0];
97 pszNew[iOut++] = szNum[1];
98 }
99 else
100 pszNew[iOut++] = pszString[iIn++];
101 }
102 if (RT_SUCCESS(rc))
103 {
104 pszNew[iOut] = '\0';
105 if (iOut != iIn)
106 {
107 /* If the source and target strings have different size, recreate
108 * the target string with the correct size. */
109 pszRes = RTStrDupN(pszNew, iOut);
110 RTStrFree(pszNew);
111 }
112 else
113 pszRes = pszNew;
114 }
115 else
116 RTStrFree(pszNew);
117
118 return pszRes;
119}
120
121
122/**
123 * Calculates the encoded string length.
124 *
125 * @returns Number of chars (excluding the terminator).
126 * @param pszString The string to encode.
127 * @param cchMax The maximum string length (e.g. RTSTR_MAX).
128 * @param fEncodeDosSlash Whether to encode DOS slashes or not.
129 */
130static size_t rtUriCalcEncodedLength(const char *pszString, size_t cchMax, bool fEncodeDosSlash)
131{
132 size_t cchEncoded = 0;
133 if (pszString)
134 {
135 size_t cchSrcLeft = RTStrNLen(pszString, cchMax);
136 while (cchSrcLeft-- > 0)
137 {
138 char const ch = *pszString++;
139 if (!URI_EXCLUDED(ch) || (ch == '\\' && !fEncodeDosSlash))
140 cchEncoded += 1;
141 else
142 cchEncoded += 3;
143 }
144 }
145 return cchEncoded;
146}
147
148
149/**
150 * Encodes an URI into a caller allocated buffer.
151 *
152 * @returns IPRT status code.
153 * @param pszString The string to encode.
154 * @param cchMax The maximum string length (e.g. RTSTR_MAX).
155 * @param fEncodeDosSlash Whether to encode DOS slashes or not.
156 * @param pszDst The destination buffer.
157 * @param cbDst The size of the destination buffer.
158 */
159static int rtUriEncodeIntoBuffer(const char *pszString, size_t cchMax, bool fEncodeDosSlash, char *pszDst, size_t cbDst)
160{
161 AssertReturn(pszString, VERR_INVALID_POINTER);
162 AssertPtrReturn(pszDst, VERR_INVALID_POINTER);
163
164 /*
165 * We do buffer size checking up front and every time we encode a special
166 * character. That's faster than checking for each char.
167 */
168 size_t cchSrcLeft = RTStrNLen(pszString, cchMax);
169 AssertMsgReturn(cbDst > cchSrcLeft, ("cbDst=%zu cchSrcLeft=%zu\n", cbDst, cchSrcLeft), VERR_BUFFER_OVERFLOW);
170 cbDst -= cchSrcLeft;
171
172 while (cchSrcLeft-- > 0)
173 {
174 char const ch = *pszString++;
175 if (!URI_EXCLUDED(ch) || (ch == '\\' && !fEncodeDosSlash))
176 *pszDst++ = ch;
177 else
178 {
179 AssertReturn(cbDst >= 3, VERR_BUFFER_OVERFLOW); /* 2 extra bytes + zero terminator. */
180 cbDst -= 2;
181
182 *pszDst++ = '%';
183 ssize_t cchTmp = RTStrFormatU8(pszDst, 3, (unsigned char)ch, 16, 2, 2, RTSTR_F_CAPITAL | RTSTR_F_ZEROPAD);
184 Assert(cchTmp == 2); NOREF(cchTmp);
185 pszDst += 2;
186 }
187 }
188
189 *pszDst = '\0';
190 return VINF_SUCCESS;
191}
192
193
194static char *rtUriPercentDecodeN(const char *pszString, size_t cchString)
195{
196 AssertPtrReturn(pszString, NULL);
197 AssertReturn(memchr(pszString, '\0', cchString) == NULL, NULL);
198
199 /*
200 * The new string can only get smaller, so use the input length as a
201 * staring buffer size.
202 */
203 char *pszDecoded = RTStrAlloc(cchString + 1);
204 if (pszDecoded)
205 {
206 /*
207 * Knowing that the pszString itself is valid UTF-8, we only have to
208 * validate the escape sequences.
209 */
210 size_t cchLeft = cchString;
211 char const *pchSrc = pszString;
212 char *pchDst = pszDecoded;
213 while (cchLeft > 0)
214 {
215 const char *pchPct = (const char *)memchr(pchSrc, '%', cchLeft);
216 if (pchPct)
217 {
218 size_t cchBefore = pchPct - pchSrc;
219 if (cchBefore)
220 {
221 memcpy(pchDst, pchSrc, cchBefore);
222 pchDst += cchBefore;
223 pchSrc += cchBefore;
224 cchLeft -= cchBefore;
225 }
226
227 char chHigh, chLow;
228 if ( cchLeft >= 3
229 && RT_C_IS_XDIGIT(chHigh = pchSrc[1])
230 && RT_C_IS_XDIGIT(chLow = pchSrc[2]))
231 {
232 uint8_t b = RT_C_IS_DIGIT(chHigh) ? chHigh - '0' : (chHigh & ~0x20) - 'A' + 10;
233 b <<= 4;
234 b |= RT_C_IS_DIGIT(chLow) ? chLow - '0' : (chLow & ~0x20) - 'A' + 10;
235 *pchDst++ = (char)b;
236 pchSrc += 3;
237 cchLeft -= 3;
238 }
239 else
240 {
241 AssertFailed();
242 *pchDst++ = *pchSrc++;
243 cchLeft--;
244 }
245 }
246 else
247 {
248 memcpy(pchDst, pchSrc, cchLeft);
249 pchDst += cchLeft;
250 pchSrc += cchLeft;
251 cchLeft = 0;
252 break;
253 }
254 }
255
256 *pchDst = '\0';
257
258 /*
259 * If we've got lof space room in the result string, reallocate it.
260 */
261 size_t cchDecoded = pchDst - pszDecoded;
262 Assert(cchDecoded <= cchString);
263 if (cchString - cchDecoded > 64)
264 RTStrRealloc(&pszDecoded, cchDecoded + 1);
265 }
266 return pszDecoded;
267}
268
269
270/**
271 * Calculates the decoded string length.
272 *
273 * @returns Number of chars (excluding the terminator).
274 * @param pszString The string to decode.
275 * @param cchMax The maximum string length (e.g. RTSTR_MAX).
276 */
277static size_t rtUriCalcDecodedLength(const char *pszString, size_t cchMax)
278{
279 size_t cchDecoded;
280 if (pszString)
281 {
282 size_t cchSrcLeft = cchDecoded = RTStrNLen(pszString, cchMax);
283 while (cchSrcLeft-- > 0)
284 {
285 char const ch = *pszString++;
286 if (ch != '%')
287 { /* typical */}
288 else if ( cchSrcLeft >= 2
289 && RT_C_IS_XDIGIT(pszString[0])
290 && RT_C_IS_XDIGIT(pszString[1]))
291 {
292 cchDecoded -= 2;
293 pszString += 2;
294 cchSrcLeft -= 2;
295 }
296 }
297 }
298 else
299 cchDecoded = 0;
300 return cchDecoded;
301}
302
303
304/**
305 * Decodes a string into a buffer.
306 *
307 * @returns IPRT status code.
308 * @param pchSrc The source string.
309 * @param cchSrc The max number of bytes to decode in the source string.
310 * @param pszDst The destination buffer.
311 * @param cbDst The size of the buffer (including terminator).
312 */
313static int rtUriDecodeIntoBuffer(const char *pchSrc, size_t cchSrc, char *pszDst, size_t cbDst)
314{
315 AssertPtrReturn(pchSrc, VERR_INVALID_POINTER);
316 AssertPtrReturn(pszDst, VERR_INVALID_POINTER);
317
318 /*
319 * Knowing that the pszString itself is valid UTF-8, we only have to
320 * validate the escape sequences.
321 */
322 cchSrc = RTStrNLen(pchSrc, cchSrc);
323 while (cchSrc > 0)
324 {
325 const char *pchPct = (const char *)memchr(pchSrc, '%', cchSrc);
326 if (pchPct)
327 {
328 size_t cchBefore = pchPct - pchSrc;
329 AssertReturn(cchBefore + 1 < cbDst, VERR_BUFFER_OVERFLOW);
330 if (cchBefore)
331 {
332 memcpy(pszDst, pchSrc, cchBefore);
333 pszDst += cchBefore;
334 cbDst -= cchBefore;
335 pchSrc += cchBefore;
336 cchSrc -= cchBefore;
337 }
338
339 char chHigh, chLow;
340 if ( cchSrc >= 3
341 && RT_C_IS_XDIGIT(chHigh = pchSrc[1])
342 && RT_C_IS_XDIGIT(chLow = pchSrc[2]))
343 {
344 uint8_t b = RT_C_IS_DIGIT(chHigh) ? chHigh - '0' : (chHigh & ~0x20) - 'A' + 10;
345 b <<= 4;
346 b |= RT_C_IS_DIGIT(chLow) ? chLow - '0' : (chLow & ~0x20) - 'A' + 10;
347 *pszDst++ = (char)b;
348 pchSrc += 3;
349 cchSrc -= 3;
350 }
351 else
352 {
353 AssertFailed();
354 *pszDst++ = *pchSrc++;
355 cchSrc--;
356 }
357 cbDst -= 1;
358 }
359 else
360 {
361 AssertReturn(cchSrc < cbDst, VERR_BUFFER_OVERFLOW);
362 memcpy(pszDst, pchSrc, cchSrc);
363 pszDst += cchSrc;
364 cbDst -= cchSrc;
365 pchSrc += cchSrc;
366 cchSrc = 0;
367 break;
368 }
369 }
370
371 AssertReturn(cbDst > 0, VERR_BUFFER_OVERFLOW);
372 *pszDst = '\0';
373 return VINF_SUCCESS;
374}
375
376
377
378static int rtUriParse(const char *pszUri, PRTURIPARSED pParsed)
379{
380 /*
381 * Validate the input and clear the output.
382 */
383 AssertPtrReturn(pParsed, VERR_INVALID_POINTER);
384 RT_ZERO(*pParsed);
385 pParsed->uAuthorityPort = UINT32_MAX;
386
387 AssertPtrReturn(pszUri, VERR_INVALID_POINTER);
388
389 size_t const cchUri = strlen(pszUri);
390 if (RT_LIKELY(cchUri >= 3)) { /* likely */ }
391 else return cchUri ? VERR_URI_TOO_SHORT : VERR_URI_EMPTY;
392
393 /*
394 * Validating escaped text sequences is much simpler if we know that
395 * that the base URI string is valid. Also, we don't necessarily trust
396 * the developer calling us to remember to do this.
397 */
398 int rc = RTStrValidateEncoding(pszUri);
399 AssertRCReturn(rc, rc);
400
401 /*
402 * RFC-3986, section 3.1:
403 * scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
404 *
405 * The scheme ends with a ':', which we also skip here.
406 */
407 size_t off = 0;
408 char ch = pszUri[off++];
409 if (RT_LIKELY(RT_C_IS_ALPHA(ch))) { /* likely */ }
410 else return VERR_URI_INVALID_SCHEME;
411 for (;;)
412 {
413 ch = pszUri[off];
414 if (ch == ':')
415 break;
416 if (RT_LIKELY(RT_C_IS_ALNUM(ch) || ch == '.' || ch == '-' || ch == '+')) { /* likely */ }
417 else return VERR_URI_INVALID_SCHEME;
418 off++;
419 }
420 pParsed->cchScheme = off;
421
422 /* Require the scheme length to be at least two chars so we won't confuse
423 it with a path starting with a DOS drive letter specification. */
424 if (RT_LIKELY(off >= 2)) { /* likely */ }
425 else return VERR_URI_INVALID_SCHEME;
426
427 off++; /* (skip colon) */
428
429 /*
430 * Find the end of the path, we'll need this several times.
431 * Also, while we're potentially scanning the whole thing, check for '%'.
432 */
433 size_t const offHash = RTStrOffCharOrTerm(&pszUri[off], '#') + off;
434 size_t const offQuestionMark = RTStrOffCharOrTerm(&pszUri[off], '?') + off;
435
436 if (memchr(pszUri, '%', cchUri) != NULL)
437 pParsed->fFlags |= RTURIPARSED_F_CONTAINS_ESCAPED_CHARS;
438
439 /*
440 * RFC-3986, section 3.2:
441 * The authority component is preceeded by a double slash ("//")...
442 */
443 if ( pszUri[off] == '/'
444 && pszUri[off + 1] == '/')
445 {
446 off += 2;
447 pParsed->offAuthority = pParsed->offAuthorityUsername = pParsed->offAuthorityPassword = pParsed->offAuthorityHost = off;
448 pParsed->fFlags |= RTURIPARSED_F_HAS_AUTHORITY;
449
450 /*
451 * RFC-3986, section 3.2:
452 * ...and is terminated by the next slash ("/"), question mark ("?"),
453 * or number sign ("#") character, or by the end of the URI.
454 */
455 const char *pszAuthority = &pszUri[off];
456 size_t cchAuthority = RTStrOffCharOrTerm(pszAuthority, '/');
457 cchAuthority = RT_MIN(cchAuthority, offHash - off);
458 cchAuthority = RT_MIN(cchAuthority, offQuestionMark - off);
459 pParsed->cchAuthority = cchAuthority;
460
461 /* The Authority can be empty, like for: file:///usr/bin/grep */
462 if (cchAuthority > 0)
463 {
464 pParsed->cchAuthorityHost = cchAuthority;
465
466 /*
467 * If there is a userinfo part, it is ended by a '@'.
468 */
469 const char *pszAt = (const char *)memchr(pszAuthority, '@', cchAuthority);
470 if (pszAt)
471 {
472 size_t cchTmp = pszAt - pszAuthority;
473 pParsed->offAuthorityHost += cchTmp + 1;
474 pParsed->cchAuthorityHost -= cchTmp + 1;
475
476 /* If there is a password part, it's separated from the username with a colon. */
477 const char *pszColon = (const char *)memchr(pszAuthority, ':', cchTmp);
478 if (pszColon)
479 {
480 pParsed->cchAuthorityUsername = pszColon - pszAuthority;
481 pParsed->offAuthorityPassword = &pszColon[1] - pszUri;
482 pParsed->cchAuthorityPassword = pszAt - &pszColon[1];
483 }
484 else
485 {
486 pParsed->cchAuthorityUsername = cchTmp;
487 pParsed->offAuthorityPassword = off + cchTmp;
488 }
489 }
490
491 /*
492 * If there is a port part, its after the last colon in the host part.
493 */
494 const char *pszColon = (const char *)memrchr(&pszUri[pParsed->offAuthorityHost], ':', pParsed->cchAuthorityHost);
495 if (pszColon)
496 {
497 size_t cchTmp = &pszUri[pParsed->offAuthorityHost + pParsed->cchAuthorityHost] - &pszColon[1];
498 pParsed->cchAuthorityHost -= cchTmp + 1;
499 pParsed->fFlags |= RTURIPARSED_F_HAS_PORT;
500 if (cchTmp > 0)
501 {
502 pParsed->uAuthorityPort = 0;
503 while (cchTmp-- > 0)
504 {
505 ch = *++pszColon;
506 if ( RT_C_IS_DIGIT(ch)
507 && pParsed->uAuthorityPort < UINT32_MAX / UINT32_C(10))
508 {
509 pParsed->uAuthorityPort *= 10;
510 pParsed->uAuthorityPort += ch - '0';
511 }
512 else
513 return VERR_URI_INVALID_PORT_NUMBER;
514 }
515 }
516 }
517 }
518
519 /* Skip past the authority. */
520 off += cchAuthority;
521 }
522 else
523 pParsed->offAuthority = pParsed->offAuthorityUsername = pParsed->offAuthorityPassword = pParsed->offAuthorityHost = off;
524
525 /*
526 * RFC-3986, section 3.3: Path
527 * The path is terminated by the first question mark ("?")
528 * or number sign ("#") character, or by the end of the URI.
529 */
530 pParsed->offPath = off;
531 pParsed->cchPath = RT_MIN(offHash, offQuestionMark) - off;
532 off += pParsed->cchPath;
533
534 /*
535 * RFC-3986, section 3.4: Query
536 * The query component is indicated by the first question mark ("?")
537 * character and terminated by a number sign ("#") character or by the
538 * end of the URI.
539 */
540 if ( off == offQuestionMark
541 && off < cchUri)
542 {
543 Assert(pszUri[offQuestionMark] == '?');
544 pParsed->offQuery = ++off;
545 pParsed->cchQuery = offHash - off;
546 off = offHash;
547 }
548 else
549 {
550 Assert(!pszUri[offQuestionMark]);
551 pParsed->offQuery = off;
552 }
553
554 /*
555 * RFC-3986, section 3.5: Fragment
556 * A fragment identifier component is indicated by the presence of a
557 * number sign ("#") character and terminated by the end of the URI.
558 */
559 if ( off == offHash
560 && off < cchUri)
561 {
562 pParsed->offFragment = ++off;
563 pParsed->cchFragment = cchUri - off;
564 }
565 else
566 {
567 Assert(!pszUri[offHash]);
568 pParsed->offFragment = off;
569 }
570
571 /*
572 * If there are any escape sequences, validate them.
573 *
574 * This is reasonably simple as we already know that the string is valid UTF-8
575 * before they get decoded. Thus we only have to validate the escaped sequences.
576 */
577 if (pParsed->fFlags & RTURIPARSED_F_CONTAINS_ESCAPED_CHARS)
578 {
579 const char *pchSrc = (const char *)memchr(pszUri, '%', cchUri);
580 AssertReturn(pchSrc, VERR_INTERNAL_ERROR);
581 do
582 {
583 char szUtf8Seq[8];
584 unsigned cchUtf8Seq = 0;
585 unsigned cchNeeded = 0;
586 size_t cchLeft = &pszUri[cchUri] - pchSrc;
587 do
588 {
589 if (cchLeft >= 3)
590 {
591 char chHigh = pchSrc[1];
592 char chLow = pchSrc[2];
593 if ( RT_C_IS_XDIGIT(chHigh)
594 && RT_C_IS_XDIGIT(chLow))
595 {
596 uint8_t b = RT_C_IS_DIGIT(chHigh) ? chHigh - '0' : (chHigh & ~0x20) - 'A' + 10;
597 b <<= 4;
598 b |= RT_C_IS_DIGIT(chLow) ? chLow - '0' : (chLow & ~0x20) - 'A' + 10;
599
600 if (!(b & 0x80))
601 {
602 /* We don't want the string to be terminated prematurely. */
603 if (RT_LIKELY(b != 0)) { /* likely */ }
604 else return VERR_URI_ESCAPED_ZERO;
605
606 /* Check that we're not expecting more UTF-8 bytes. */
607 if (RT_LIKELY(cchNeeded == 0)) { /* likely */ }
608 else return VERR_URI_MISSING_UTF8_CONTINUATION_BYTE;
609 }
610 /* Are we waiting UTF-8 bytes? */
611 else if (cchNeeded > 0)
612 {
613 if (RT_LIKELY(!(b & 0x40))) { /* likely */ }
614 else return VERR_URI_INVALID_ESCAPED_UTF8_CONTINUATION_BYTE;
615
616 szUtf8Seq[cchUtf8Seq++] = (char)b;
617 if (--cchNeeded == 0)
618 {
619 szUtf8Seq[cchUtf8Seq] = '\0';
620 rc = RTStrValidateEncoding(szUtf8Seq);
621 if (RT_FAILURE(rc))
622 return VERR_URI_ESCAPED_CHARS_NOT_VALID_UTF8;
623 cchUtf8Seq = 0;
624 }
625 }
626 /* Start a new UTF-8 sequence. */
627 else
628 {
629 if ((b & 0xf8) == 0xf0)
630 cchNeeded = 3;
631 else if ((b & 0xf0) == 0xe0)
632 cchNeeded = 2;
633 else if ((b & 0xe0) == 0xc0)
634 cchNeeded = 1;
635 else
636 return VERR_URI_INVALID_ESCAPED_UTF8_LEAD_BYTE;
637 szUtf8Seq[0] = (char)b;
638 cchUtf8Seq = 1;
639 }
640 pchSrc += 3;
641 cchLeft -= 3;
642 }
643 else
644 return VERR_URI_INVALID_ESCAPE_SEQ;
645 }
646 else
647 return VERR_URI_INVALID_ESCAPE_SEQ;
648 } while (cchLeft > 0 && pchSrc[0] == '%');
649
650 /* Check that we're not expecting more UTF-8 bytes. */
651 if (RT_LIKELY(cchNeeded == 0)) { /* likely */ }
652 else return VERR_URI_MISSING_UTF8_CONTINUATION_BYTE;
653
654 /* next */
655 pchSrc = (const char *)memchr(pchSrc, '%', cchLeft);
656 } while (pchSrc);
657 }
658
659 pParsed->u32Magic = RTURIPARSED_MAGIC;
660 return VINF_SUCCESS;
661}
662
663
664RTDECL(int) RTUriParse(const char *pszUri, PRTURIPARSED pParsed)
665{
666 return rtUriParse(pszUri, pParsed);
667}
668
669
670RTDECL(char *) RTUriParsedScheme(const char *pszUri, PCRTURIPARSED pParsed)
671{
672 AssertPtrReturn(pszUri, NULL);
673 AssertPtrReturn(pParsed, NULL);
674 AssertReturn(pParsed->u32Magic == RTURIPARSED_MAGIC, NULL);
675 return RTStrDupN(pszUri, pParsed->cchScheme);
676}
677
678
679RTDECL(char *) RTUriParsedAuthority(const char *pszUri, PCRTURIPARSED pParsed)
680{
681 AssertPtrReturn(pszUri, NULL);
682 AssertPtrReturn(pParsed, NULL);
683 AssertReturn(pParsed->u32Magic == RTURIPARSED_MAGIC, NULL);
684 if (pParsed->cchAuthority || (pParsed->fFlags & RTURIPARSED_F_HAS_AUTHORITY))
685 return rtUriPercentDecodeN(&pszUri[pParsed->offAuthority], pParsed->cchAuthority);
686 return NULL;
687}
688
689
690RTDECL(char *) RTUriParsedAuthorityUsername(const char *pszUri, PCRTURIPARSED pParsed)
691{
692 AssertPtrReturn(pszUri, NULL);
693 AssertPtrReturn(pParsed, NULL);
694 AssertReturn(pParsed->u32Magic == RTURIPARSED_MAGIC, NULL);
695 if (pParsed->cchAuthorityUsername)
696 return rtUriPercentDecodeN(&pszUri[pParsed->offAuthorityUsername], pParsed->cchAuthorityUsername);
697 return NULL;
698}
699
700
701RTDECL(char *) RTUriParsedAuthorityPassword(const char *pszUri, PCRTURIPARSED pParsed)
702{
703 AssertPtrReturn(pszUri, NULL);
704 AssertPtrReturn(pParsed, NULL);
705 AssertReturn(pParsed->u32Magic == RTURIPARSED_MAGIC, NULL);
706 if (pParsed->cchAuthorityPassword)
707 return rtUriPercentDecodeN(&pszUri[pParsed->offAuthorityPassword], pParsed->cchAuthorityPassword);
708 return NULL;
709}
710
711
712RTDECL(char *) RTUriParsedAuthorityHost(const char *pszUri, PCRTURIPARSED pParsed)
713{
714 AssertPtrReturn(pszUri, NULL);
715 AssertPtrReturn(pParsed, NULL);
716 AssertReturn(pParsed->u32Magic == RTURIPARSED_MAGIC, NULL);
717 if (pParsed->cchAuthorityHost)
718 return rtUriPercentDecodeN(&pszUri[pParsed->offAuthorityHost], pParsed->cchAuthorityHost);
719 return NULL;
720}
721
722
723RTDECL(uint32_t) RTUriParsedAuthorityPort(const char *pszUri, PCRTURIPARSED pParsed)
724{
725 AssertPtrReturn(pszUri, UINT32_MAX);
726 AssertPtrReturn(pParsed, UINT32_MAX);
727 AssertReturn(pParsed->u32Magic == RTURIPARSED_MAGIC, UINT32_MAX);
728 return pParsed->uAuthorityPort;
729}
730
731
732RTDECL(char *) RTUriParsedPath(const char *pszUri, PCRTURIPARSED pParsed)
733{
734 AssertPtrReturn(pszUri, NULL);
735 AssertPtrReturn(pParsed, NULL);
736 AssertReturn(pParsed->u32Magic == RTURIPARSED_MAGIC, NULL);
737 if (pParsed->cchPath)
738 return rtUriPercentDecodeN(&pszUri[pParsed->offPath], pParsed->cchPath);
739 return NULL;
740}
741
742
743RTDECL(char *) RTUriParsedQuery(const char *pszUri, PCRTURIPARSED pParsed)
744{
745 AssertPtrReturn(pszUri, NULL);
746 AssertPtrReturn(pParsed, NULL);
747 AssertReturn(pParsed->u32Magic == RTURIPARSED_MAGIC, NULL);
748 if (pParsed->cchQuery)
749 return rtUriPercentDecodeN(&pszUri[pParsed->offQuery], pParsed->cchQuery);
750 return NULL;
751}
752
753
754RTDECL(char *) RTUriParsedFragment(const char *pszUri, PCRTURIPARSED pParsed)
755{
756 AssertPtrReturn(pszUri, NULL);
757 AssertPtrReturn(pParsed, NULL);
758 AssertReturn(pParsed->u32Magic == RTURIPARSED_MAGIC, NULL);
759 if (pParsed->cchFragment)
760 return rtUriPercentDecodeN(&pszUri[pParsed->offFragment], pParsed->cchFragment);
761 return NULL;
762}
763
764
765RTDECL(char *) RTUriCreate(const char *pszScheme, const char *pszAuthority, const char *pszPath, const char *pszQuery,
766 const char *pszFragment)
767{
768 if (!pszScheme) /* Scheme is minimum requirement */
769 return NULL;
770
771 char *pszResult = 0;
772 char *pszAuthority1 = 0;
773 char *pszPath1 = 0;
774 char *pszQuery1 = 0;
775 char *pszFragment1 = 0;
776
777 do
778 {
779 /* Create the percent encoded strings and calculate the necessary uri
780 * length. */
781 size_t cbSize = strlen(pszScheme) + 1 + 1; /* plus zero byte */
782 if (pszAuthority)
783 {
784 pszAuthority1 = rtUriPercentEncodeN(pszAuthority, RTSTR_MAX);
785 if (!pszAuthority1)
786 break;
787 cbSize += strlen(pszAuthority1) + 2;
788 }
789 if (pszPath)
790 {
791 pszPath1 = rtUriPercentEncodeN(pszPath, RTSTR_MAX);
792 if (!pszPath1)
793 break;
794 cbSize += strlen(pszPath1);
795 }
796 if (pszQuery)
797 {
798 pszQuery1 = rtUriPercentEncodeN(pszQuery, RTSTR_MAX);
799 if (!pszQuery1)
800 break;
801 cbSize += strlen(pszQuery1) + 1;
802 }
803 if (pszFragment)
804 {
805 pszFragment1 = rtUriPercentEncodeN(pszFragment, RTSTR_MAX);
806 if (!pszFragment1)
807 break;
808 cbSize += strlen(pszFragment1) + 1;
809 }
810
811 char *pszTmp = pszResult = (char *)RTStrAlloc(cbSize);
812 if (!pszResult)
813 break;
814 RT_BZERO(pszTmp, cbSize);
815
816 /* Compose the target uri string. */
817 RTStrCatP(&pszTmp, &cbSize, pszScheme);
818 RTStrCatP(&pszTmp, &cbSize, ":");
819 if (pszAuthority1)
820 {
821 RTStrCatP(&pszTmp, &cbSize, "//");
822 RTStrCatP(&pszTmp, &cbSize, pszAuthority1);
823 }
824 if (pszPath1)
825 {
826 RTStrCatP(&pszTmp, &cbSize, pszPath1);
827 }
828 if (pszQuery1)
829 {
830 RTStrCatP(&pszTmp, &cbSize, "?");
831 RTStrCatP(&pszTmp, &cbSize, pszQuery1);
832 }
833 if (pszFragment1)
834 {
835 RTStrCatP(&pszTmp, &cbSize, "#");
836 RTStrCatP(&pszTmp, &cbSize, pszFragment1);
837 }
838 } while (0);
839
840 /* Cleanup */
841 if (pszAuthority1)
842 RTStrFree(pszAuthority1);
843 if (pszPath1)
844 RTStrFree(pszPath1);
845 if (pszQuery1)
846 RTStrFree(pszQuery1);
847 if (pszFragment1)
848 RTStrFree(pszFragment1);
849
850 return pszResult;
851}
852
853
854RTDECL(bool) RTUriIsSchemeMatch(const char *pszUri, const char *pszScheme)
855{
856 AssertPtrReturn(pszUri, false);
857 size_t const cchScheme = strlen(pszScheme);
858 return RTStrNICmp(pszUri, pszScheme, cchScheme) == 0
859 && pszUri[cchScheme] == ':';
860}
861
862
863RTDECL(int) RTUriFileCreateEx(const char *pszPath, uint32_t fPathStyle, char **ppszUri, size_t cbUri, size_t *pcchUri)
864{
865 /*
866 * Validate and adjust input. (RTPathParse check pszPath out for us)
867 */
868 if (pcchUri)
869 {
870 AssertPtrReturn(pcchUri, VERR_INVALID_POINTER);
871 *pcchUri = ~(size_t)0;
872 }
873 AssertPtrReturn(ppszUri, VERR_INVALID_POINTER);
874 AssertReturn(!(fPathStyle & ~RTPATH_STR_F_STYLE_MASK) && fPathStyle != RTPATH_STR_F_STYLE_RESERVED, VERR_INVALID_FLAGS);
875 if (fPathStyle == RTPATH_STR_F_STYLE_HOST)
876 fPathStyle = RTPATH_STYLE;
877
878 /*
879 * Let the RTPath code parse the stuff (no reason to duplicate path parsing
880 * and get it slightly wrong here).
881 */
882 union
883 {
884 RTPATHPARSED ParsedPath;
885 uint8_t abPadding[sizeof(RTPATHPARSED)];
886 } u;
887 int rc = RTPathParse(pszPath, &u.ParsedPath, sizeof(u.ParsedPath), fPathStyle);
888 if (RT_SUCCESS(rc) || rc == VERR_BUFFER_OVERFLOW)
889 {
890 /* Skip leading slashes. */
891 if (u.ParsedPath.fProps & RTPATH_PROP_ROOT_SLASH)
892 {
893 if (fPathStyle == RTPATH_STR_F_STYLE_DOS)
894 while (pszPath[0] == '/' || pszPath[0] == '\\')
895 pszPath++;
896 else
897 while (pszPath[0] == '/')
898 pszPath++;
899 }
900 const size_t cchPath = strlen(pszPath);
901
902 /*
903 * Calculate the encoded length and figure destination buffering.
904 */
905 static const char s_szPrefix[] = "file:///";
906 size_t const cchPrefix = sizeof(s_szPrefix) - (u.ParsedPath.fProps & RTPATH_PROP_UNC ? 2 : 1);
907 size_t cchEncoded = rtUriCalcEncodedLength(pszPath, cchPath, fPathStyle != RTPATH_STR_F_STYLE_DOS);
908
909 if (pcchUri)
910 *pcchUri = cchEncoded;
911
912 char *pszDst;
913 char *pszFreeMe = NULL;
914 if (!cbUri || *ppszUri == NULL)
915 {
916 cbUri = RT_MAX(cbUri, cchPrefix + cchEncoded + 1);
917 *ppszUri = pszFreeMe = pszDst = RTStrAlloc(cbUri);
918 AssertReturn(pszDst, VERR_NO_STR_MEMORY);
919 }
920 else if (cchEncoded < cbUri)
921 pszDst = *ppszUri;
922 else
923 return VERR_BUFFER_OVERFLOW;
924
925 /*
926 * Construct the URI.
927 */
928 memcpy(pszDst, s_szPrefix, cchPrefix);
929 pszDst[cchPrefix] = '\0';
930 rc = rtUriEncodeIntoBuffer(pszPath, cchPath, fPathStyle != RTPATH_STR_F_STYLE_DOS, &pszDst[cchPrefix], cbUri - cchPrefix);
931 if (RT_SUCCESS(rc))
932 {
933 Assert(strlen(pszDst) == cbUri - 1);
934 if (fPathStyle == RTPATH_STR_F_STYLE_DOS)
935 RTPathChangeToUnixSlashes(pszDst, true /*fForce*/);
936 return VINF_SUCCESS;
937 }
938
939 AssertRC(rc); /* Impossible! rtUriCalcEncodedLength or something above is busted! */
940 if (pszFreeMe)
941 RTStrFree(pszFreeMe);
942 }
943 return rc;
944}
945
946
947RTDECL(char *) RTUriFileCreate(const char *pszPath)
948{
949 char *pszUri = NULL;
950 int rc = RTUriFileCreateEx(pszPath, RTPATH_STR_F_STYLE_HOST, &pszUri, 0 /*cbUri*/, NULL /*pcchUri*/);
951 if (RT_SUCCESS(rc))
952 return pszUri;
953 return NULL;
954}
955
956
957RTDECL(int) RTUriFilePathEx(const char *pszUri, uint32_t fPathStyle, char **ppszPath, size_t cbPath, size_t *pcchPath)
958{
959 /*
960 * Validate and adjust input.
961 */
962 if (pcchPath)
963 {
964 AssertPtrReturn(pcchPath, VERR_INVALID_POINTER);
965 *pcchPath = ~(size_t)0;
966 }
967 AssertPtrReturn(ppszPath, VERR_INVALID_POINTER);
968 AssertReturn(!(fPathStyle & ~RTPATH_STR_F_STYLE_MASK) && fPathStyle != RTPATH_STR_F_STYLE_RESERVED, VERR_INVALID_FLAGS);
969 if (fPathStyle == RTPATH_STR_F_STYLE_HOST)
970 fPathStyle = RTPATH_STYLE;
971 AssertPtrReturn(pszUri, VERR_INVALID_POINTER);
972
973 /*
974 * Check that this is a file URI.
975 */
976 if (RTStrNICmp(pszUri, RT_STR_TUPLE("file:")) == 0)
977 { /* likely */ }
978 else
979 return VERR_URI_NOT_FILE_SCHEME;
980
981 /*
982 * We may have a number of variations here, mostly thanks to
983 * various windows software. First the canonical variations:
984 * - file:///C:/Windows/System32/kernel32.dll
985 * - file:///C|/Windows/System32/kernel32.dll
986 * - file:///C:%5CWindows%5CSystem32%5Ckernel32.dll
987 * - file://localhost/C:%5CWindows%5CSystem32%5Ckernel32.dll
988 * - file://cifsserver.dev/systemshare%5CWindows%5CSystem32%5Ckernel32.dll
989 * - file://cifsserver.dev:139/systemshare%5CWindows%5CSystem32%5Ckernel32.dll (not quite sure here, but whatever)
990 *
991 * Legacy variant without any slashes after the schema:
992 * - file:C:/Windows/System32/kernel32.dll
993 * - file:C|/Windows/System32%5Ckernel32.dll
994 * - file:~/.bashrc
995 * \--path-/
996 *
997 * Legacy variant with exactly one slashes after the schema:
998 * - file:/C:/Windows/System32%5Ckernel32.dll
999 * - file:/C|/Windows/System32/kernel32.dll
1000 * - file:/usr/bin/env
1001 * \---path---/
1002 *
1003 * Legacy variant with two slashes after the schema and an unescaped DOS path:
1004 * - file://C:/Windows/System32\kernel32.dll (**)
1005 * - file://C|/Windows/System32\kernel32.dll
1006 * \---path---------------------/
1007 * -- authority, with ':' as non-working port separator
1008 *
1009 * Legacy variant with exactly four slashes after the schema and an unescaped DOS path.
1010 * - file:////C:/Windows\System32\user32.dll
1011 *
1012 * Legacy variant with four or more slashes after the schema and an unescaped UNC path:
1013 * - file:////cifsserver.dev/systemshare/System32%\kernel32.dll
1014 * - file://///cifsserver.dev/systemshare/System32\kernel32.dll
1015 * \---path--------------------------------------------/
1016 *
1017 * The two unescaped variants shouldn't be handed to rtUriParse, which
1018 * is good as we cannot actually handle the one marked by (**). So, handle
1019 * those two special when parsing.
1020 */
1021 RTURIPARSED Parsed;
1022 int rc;
1023 size_t cSlashes = 0;
1024 while (pszUri[5 + cSlashes] == '/')
1025 cSlashes++;
1026 if ( (cSlashes == 2 || cSlashes == 4)
1027 && RT_C_IS_ALPHA(pszUri[5 + cSlashes])
1028 && (pszUri[5 + cSlashes + 1] == ':' || pszUri[5 + cSlashes + 1] == '|'))
1029 {
1030 RT_ZERO(Parsed); /* RTURIPARSED_F_CONTAINS_ESCAPED_CHARS is now clear. */
1031 Parsed.offPath = 5 + cSlashes;
1032 Parsed.cchPath = strlen(&pszUri[Parsed.offPath]);
1033 rc = RTStrValidateEncoding(&pszUri[Parsed.offPath]);
1034 }
1035 else if (cSlashes >= 4)
1036 {
1037 RT_ZERO(Parsed);
1038 Parsed.fFlags = cSlashes > 4 ? RTURIPARSED_F_CONTAINS_ESCAPED_CHARS : 0;
1039 Parsed.offPath = 5 + cSlashes - 2;
1040 Parsed.cchPath = strlen(&pszUri[Parsed.offPath]);
1041 rc = RTStrValidateEncoding(&pszUri[Parsed.offPath]);
1042 }
1043 else
1044 rc = rtUriParse(pszUri, &Parsed);
1045 if (RT_SUCCESS(rc))
1046 {
1047 /*
1048 * Ignore localhost as hostname (it's implicit).
1049 */
1050 static char const s_szLocalhost[] = "localhost";
1051 if ( Parsed.cchAuthorityHost == sizeof(s_szLocalhost) - 1U
1052 && RTStrNICmp(&pszUri[Parsed.offAuthorityHost], RT_STR_TUPLE(s_szLocalhost)) == 0)
1053 {
1054 Parsed.cchAuthorityHost = 0;
1055 Parsed.cchAuthority = 0;
1056 }
1057
1058 /*
1059 * Ignore leading path slash/separator if we detect a DOS drive letter
1060 * and we don't have a host name.
1061 */
1062 if ( Parsed.cchPath >= 3
1063 && Parsed.cchAuthorityHost == 0
1064 && pszUri[Parsed.offPath] == '/' /* Leading path slash/separator. */
1065 && ( pszUri[Parsed.offPath + 2] == ':' /* Colon after drive letter. */
1066 || pszUri[Parsed.offPath + 2] == '|') /* Colon alternative. */
1067 && RT_C_IS_ALPHA(pszUri[Parsed.offPath + 1]) ) /* Drive letter. */
1068 {
1069 Parsed.offPath++;
1070 Parsed.cchPath--;
1071 }
1072
1073 /*
1074 * Calculate the size of the encoded result.
1075 *
1076 * Since we're happily returning "C:/Windows/System32/kernel.dll"
1077 * style paths when the caller requested UNIX style paths, we will
1078 * return straight UNC paths too ("//cifsserver/share/dir/file").
1079 */
1080 size_t cchDecodedHost = 0;
1081 size_t cbResult;
1082 if (Parsed.fFlags & RTURIPARSED_F_CONTAINS_ESCAPED_CHARS)
1083 {
1084 cchDecodedHost = rtUriCalcDecodedLength(&pszUri[Parsed.offAuthorityHost], Parsed.cchAuthorityHost);
1085 cbResult = cchDecodedHost + rtUriCalcDecodedLength(&pszUri[Parsed.offPath], Parsed.cchPath) + 1;
1086 }
1087 else
1088 {
1089 cchDecodedHost = 0;
1090 cbResult = Parsed.cchAuthorityHost + Parsed.cchPath + 1;
1091 }
1092 if (pcchPath)
1093 *pcchPath = cbResult - 1;
1094 if (cbResult > 1)
1095 {
1096 /*
1097 * Prepare the necessary buffer space for the result.
1098 */
1099 char *pszDst;
1100 char *pszFreeMe = NULL;
1101 if (!cbPath || *ppszPath == NULL)
1102 {
1103 cbPath = RT_MAX(cbPath, cbResult);
1104 *ppszPath = pszFreeMe = pszDst = RTStrAlloc(cbPath);
1105 AssertReturn(pszDst, VERR_NO_STR_MEMORY);
1106 }
1107 else if (cbResult <= cbPath)
1108 pszDst = *ppszPath;
1109 else
1110 return VERR_BUFFER_OVERFLOW;
1111
1112 /*
1113 * Compose the result.
1114 */
1115 if (Parsed.fFlags & RTURIPARSED_F_CONTAINS_ESCAPED_CHARS)
1116 {
1117 rc = rtUriDecodeIntoBuffer(&pszUri[Parsed.offAuthorityHost],Parsed.cchAuthorityHost,
1118 pszDst, cchDecodedHost + 1);
1119 Assert(RT_SUCCESS(rc) && strlen(pszDst) == cchDecodedHost);
1120 if (RT_SUCCESS(rc))
1121 rc = rtUriDecodeIntoBuffer(&pszUri[Parsed.offPath], Parsed.cchPath,
1122 &pszDst[cchDecodedHost], cbResult - cchDecodedHost);
1123 Assert(RT_SUCCESS(rc) && strlen(pszDst) == cbResult - 1);
1124 }
1125 else
1126 {
1127 memcpy(pszDst, &pszUri[Parsed.offAuthorityHost], Parsed.cchAuthorityHost);
1128 memcpy(&pszDst[Parsed.cchAuthorityHost], &pszUri[Parsed.offPath], Parsed.cchPath);
1129 pszDst[cbResult - 1] = '\0';
1130 }
1131 if (RT_SUCCESS(rc))
1132 {
1133 /*
1134 * Convert colon DOS driver letter colon alternative.
1135 * We do this regardless of the desired path style.
1136 */
1137 if ( RT_C_IS_ALPHA(pszDst[0])
1138 && pszDst[1] == '|')
1139 pszDst[1] = ':';
1140
1141 /*
1142 * Fix slashes.
1143 */
1144 if (fPathStyle == RTPATH_STR_F_STYLE_DOS)
1145 RTPathChangeToDosSlashes(pszDst, true);
1146 else if (fPathStyle == RTPATH_STR_F_STYLE_UNIX)
1147 RTPathChangeToUnixSlashes(pszDst, true); /** @todo not quite sure how this actually makes sense... */
1148 else
1149 AssertFailed();
1150 return rc;
1151 }
1152
1153 /* bail out */
1154 RTStrFree(pszFreeMe);
1155 }
1156 else
1157 rc = VERR_PATH_ZERO_LENGTH;
1158 }
1159 return rc;
1160}
1161
1162
1163RTDECL(char *) RTUriFilePath(const char *pszUri)
1164{
1165 char *pszPath = NULL;
1166 int rc = RTUriFilePathEx(pszUri, RTPATH_STR_F_STYLE_HOST, &pszPath, 0 /*cbPath*/, NULL /*pcchPath*/);
1167 if (RT_SUCCESS(rc))
1168 return pszPath;
1169 return NULL;
1170}
1171
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette