uri.cpp@ 62461

Last change on this file since 62461 was 62461, checked in by vboxsync, 8 years ago
IPRT: scm
Property svn:eol-style set to `native` Property svn:keywords set to `Author Date Id Revision`
File size: 39.5 KB

Line
1	/* $Id: uri.cpp 62461 2016-07-22 16:21:26Z vboxsync $ */
2	/** @file
3	* IPRT - Uniform Resource Identifier handling.
4	*/
5
6	/*
7	* Copyright (C) 2011-2015 Oracle Corporation
8	*
9	* This file is part of VirtualBox Open Source Edition (OSE), as
10	* available from http://www.virtualbox.org. This file is free software;
11	* you can redistribute it and/or modify it under the terms of the GNU
12	* General Public License (GPL) as published by the Free Software
13	* Foundation, in version 2 as it comes in the "COPYING" file of the
14	* VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15	* hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16	*
17	* The contents of this file may alternatively be used under the terms
18	* of the Common Development and Distribution License Version 1.0
19	* (CDDL) only, as it comes in the "COPYING.CDDL" file of the
20	* VirtualBox OSE distribution, in which case the provisions of the
21	* CDDL are applicable instead of those of the GPL.
22	*
23	* You may elect to license modified versions of this file under the
24	* terms and conditions of either the GPL or the CDDL or both.
25	*/
26
27
28	/*********************************************************************************************************************************
29	* Header Files *
30	*********************************************************************************************************************************/
31	#include <iprt/uri.h>
32
33	#include <iprt/assert.h>
34	#include <iprt/ctype.h>
35	#include <iprt/path.h>
36	#include <iprt/string.h>
37
38
39	/*********************************************************************************************************************************
40	* Defined Constants And Macros *
41	*********************************************************************************************************************************/
42	/** Internal magic value we use to check if a RTURIPARSED structure has made it thru RTUriParse. */
43	#define RTURIPARSED_MAGIC UINT32_C(0x439e0745)
44
45
46	/* General URI format:
47
48	foo://example.com:8042/over/there?name=ferret#nose
49	\_/ \______________/\_________/ \_________/ \__/
50	\| \| \| \| \|
51	scheme authority path query fragment
52	\| _____________________\|__
53	/ \ / \
54	urn:example:animal:ferret:nose
55	*/
56
57
58	/**
59	* The following defines characters which have to be % escaped:
60	* control = 00-1F
61	* space = ' '
62	* delims = '<' , '>' , '#' , '%' , '"'
63	* unwise = '{' , '}' , '\|' , '\' , '^' , '[' , ']' , '`'
64	*/
65	#define URI_EXCLUDED(a) \
66	( ((a) >= 0x0 && (a) <= 0x20) \
67	\|\| ((a) >= 0x5B && (a) <= 0x5E) \
68	\|\| ((a) >= 0x7B && (a) <= 0x7D) \
69	\|\| (a) == '<' \|\| (a) == '>' \|\| (a) == '#' \
70	\|\| (a) == '%' \|\| (a) == '"' \|\| (a) == '`' )
71
72	static char rtUriPercentEncodeN(const char pszString, size_t cchMax)
73	{
74	if (!pszString)
75	return NULL;
76
77	int rc = VINF_SUCCESS;
78
79	size_t cbLen = RT_MIN(strlen(pszString), cchMax);
80	/* The new string can be max 3 times in size of the original string. */
81	char pszNew = RTStrAlloc(cbLen 3 + 1);
82	if (!pszNew)
83	return NULL;
84
85	char *pszRes = NULL;
86	size_t iIn = 0;
87	size_t iOut = 0;
88	while (iIn < cbLen)
89	{
90	if (URI_EXCLUDED(pszString[iIn]))
91	{
92	char szNum[3] = { 0, 0, 0 };
93	RTStrFormatU8(&szNum[0], 3, pszString[iIn++], 16, 2, 2, RTSTR_F_CAPITAL \| RTSTR_F_ZEROPAD);
94	pszNew[iOut++] = '%';
95	pszNew[iOut++] = szNum[0];
96	pszNew[iOut++] = szNum[1];
97	}
98	else
99	pszNew[iOut++] = pszString[iIn++];
100	}
101	if (RT_SUCCESS(rc))
102	{
103	pszNew[iOut] = '\0';
104	if (iOut != iIn)
105	{
106	/* If the source and target strings have different size, recreate
107	* the target string with the correct size. */
108	pszRes = RTStrDupN(pszNew, iOut);
109	RTStrFree(pszNew);
110	}
111	else
112	pszRes = pszNew;
113	}
114	else
115	RTStrFree(pszNew);
116
117	return pszRes;
118	}
119
120
121	/**
122	* Calculates the encoded string length.
123	*
124	* @returns Number of chars (excluding the terminator).
125	* @param pszString The string to encode.
126	* @param cchMax The maximum string length (e.g. RTSTR_MAX).
127	* @param fEncodeDosSlash Whether to encode DOS slashes or not.
128	*/
129	static size_t rtUriCalcEncodedLength(const char *pszString, size_t cchMax, bool fEncodeDosSlash)
130	{
131	size_t cchEncoded = 0;
132	if (pszString)
133	{
134	size_t cchSrcLeft = RTStrNLen(pszString, cchMax);
135	while (cchSrcLeft-- > 0)
136	{
137	char const ch = *pszString++;
138	if (!URI_EXCLUDED(ch) \|\| (ch == '\\' && !fEncodeDosSlash))
139	cchEncoded += 1;
140	else
141	cchEncoded += 3;
142	}
143	}
144	return cchEncoded;
145	}
146
147
148	/**
149	* Encodes an URI into a caller allocated buffer.
150	*
151	* @returns IPRT status code.
152	* @param pszString The string to encode.
153	* @param cchMax The maximum string length (e.g. RTSTR_MAX).
154	* @param fEncodeDosSlash Whether to encode DOS slashes or not.
155	* @param pszDst The destination buffer.
156	* @param cbDst The size of the destination buffer.
157	*/
158	static int rtUriEncodeIntoBuffer(const char pszString, size_t cchMax, bool fEncodeDosSlash, char pszDst, size_t cbDst)
159	{
160	AssertReturn(pszString, VERR_INVALID_POINTER);
161	AssertPtrReturn(pszDst, VERR_INVALID_POINTER);
162
163	/*
164	* We do buffer size checking up front and every time we encode a special
165	* character. That's faster than checking for each char.
166	*/
167	size_t cchSrcLeft = RTStrNLen(pszString, cchMax);
168	AssertMsgReturn(cbDst > cchSrcLeft, ("cbDst=%zu cchSrcLeft=%zu\n", cbDst, cchSrcLeft), VERR_BUFFER_OVERFLOW);
169	cbDst -= cchSrcLeft;
170
171	while (cchSrcLeft-- > 0)
172	{
173	char const ch = *pszString++;
174	if (!URI_EXCLUDED(ch) \|\| (ch == '\\' && !fEncodeDosSlash))
175	*pszDst++ = ch;
176	else
177	{
178	AssertReturn(cbDst >= 3, VERR_BUFFER_OVERFLOW); /* 2 extra bytes + zero terminator. */
179	cbDst -= 2;
180
181	*pszDst++ = '%';
182	ssize_t cchTmp = RTStrFormatU8(pszDst, 3, (unsigned char)ch, 16, 2, 2, RTSTR_F_CAPITAL \| RTSTR_F_ZEROPAD);
183	Assert(cchTmp == 2); NOREF(cchTmp);
184	pszDst += 2;
185	}
186	}
187
188	*pszDst = '\0';
189	return VINF_SUCCESS;
190	}
191
192
193	static char rtUriPercentDecodeN(const char pszString, size_t cchString)
194	{
195	AssertPtrReturn(pszString, NULL);
196	AssertReturn(memchr(pszString, '\0', cchString) == NULL, NULL);
197
198	/*
199	* The new string can only get smaller, so use the input length as a
200	* staring buffer size.
201	*/
202	char *pszDecoded = RTStrAlloc(cchString + 1);
203	if (pszDecoded)
204	{
205	/*
206	* Knowing that the pszString itself is valid UTF-8, we only have to
207	* validate the escape sequences.
208	*/
209	size_t cchLeft = cchString;
210	char const *pchSrc = pszString;
211	char *pchDst = pszDecoded;
212	while (cchLeft > 0)
213	{
214	const char pchPct = (const char )memchr(pchSrc, '%', cchLeft);
215	if (pchPct)
216	{
217	size_t cchBefore = pchPct - pchSrc;
218	if (cchBefore)
219	{
220	memcpy(pchDst, pchSrc, cchBefore);
221	pchDst += cchBefore;
222	pchSrc += cchBefore;
223	cchLeft -= cchBefore;
224	}
225
226	char chHigh, chLow;
227	if ( cchLeft >= 3
228	&& RT_C_IS_XDIGIT(chHigh = pchSrc[1])
229	&& RT_C_IS_XDIGIT(chLow = pchSrc[2]))
230	{
231	uint8_t b = RT_C_IS_DIGIT(chHigh) ? chHigh - '0' : (chHigh & ~0x20) - 'A' + 10;
232	b <<= 4;
233	b \|= RT_C_IS_DIGIT(chLow) ? chLow - '0' : (chLow & ~0x20) - 'A' + 10;
234	*pchDst++ = (char)b;
235	pchSrc += 3;
236	cchLeft -= 3;
237	}
238	else
239	{
240	AssertFailed();
241	pchDst++ = pchSrc++;
242	cchLeft--;
243	}
244	}
245	else
246	{
247	memcpy(pchDst, pchSrc, cchLeft);
248	pchDst += cchLeft;
249	pchSrc += cchLeft;
250	cchLeft = 0;
251	break;
252	}
253	}
254
255	*pchDst = '\0';
256
257	/*
258	* If we've got lof space room in the result string, reallocate it.
259	*/
260	size_t cchDecoded = pchDst - pszDecoded;
261	Assert(cchDecoded <= cchString);
262	if (cchString - cchDecoded > 64)
263	RTStrRealloc(&pszDecoded, cchDecoded + 1);
264	}
265	return pszDecoded;
266	}
267
268
269	/**
270	* Calculates the decoded string length.
271	*
272	* @returns Number of chars (excluding the terminator).
273	* @param pszString The string to decode.
274	* @param cchMax The maximum string length (e.g. RTSTR_MAX).
275	*/
276	static size_t rtUriCalcDecodedLength(const char *pszString, size_t cchMax)
277	{
278	size_t cchDecoded;
279	if (pszString)
280	{
281	size_t cchSrcLeft = cchDecoded = RTStrNLen(pszString, cchMax);
282	while (cchSrcLeft-- > 0)
283	{
284	char const ch = *pszString++;
285	if (ch != '%')
286	{ /* typical */}
287	else if ( cchSrcLeft >= 2
288	&& RT_C_IS_XDIGIT(pszString[0])
289	&& RT_C_IS_XDIGIT(pszString[1]))
290	{
291	cchDecoded -= 2;
292	pszString += 2;
293	cchSrcLeft -= 2;
294	}
295	}
296	}
297	else
298	cchDecoded = 0;
299	return cchDecoded;
300	}
301
302
303	/**
304	* Decodes a string into a buffer.
305	*
306	* @returns IPRT status code.
307	* @param pchSrc The source string.
308	* @param cchSrc The max number of bytes to decode in the source string.
309	* @param pszDst The destination buffer.
310	* @param cbDst The size of the buffer (including terminator).
311	*/
312	static int rtUriDecodeIntoBuffer(const char pchSrc, size_t cchSrc, char pszDst, size_t cbDst)
313	{
314	AssertPtrReturn(pchSrc, VERR_INVALID_POINTER);
315	AssertPtrReturn(pszDst, VERR_INVALID_POINTER);
316
317	/*
318	* Knowing that the pszString itself is valid UTF-8, we only have to
319	* validate the escape sequences.
320	*/
321	cchSrc = RTStrNLen(pchSrc, cchSrc);
322	while (cchSrc > 0)
323	{
324	const char pchPct = (const char )memchr(pchSrc, '%', cchSrc);
325	if (pchPct)
326	{
327	size_t cchBefore = pchPct - pchSrc;
328	AssertReturn(cchBefore + 1 < cbDst, VERR_BUFFER_OVERFLOW);
329	if (cchBefore)
330	{
331	memcpy(pszDst, pchSrc, cchBefore);
332	pszDst += cchBefore;
333	cbDst -= cchBefore;
334	pchSrc += cchBefore;
335	cchSrc -= cchBefore;
336	}
337
338	char chHigh, chLow;
339	if ( cchSrc >= 3
340	&& RT_C_IS_XDIGIT(chHigh = pchSrc[1])
341	&& RT_C_IS_XDIGIT(chLow = pchSrc[2]))
342	{
343	uint8_t b = RT_C_IS_DIGIT(chHigh) ? chHigh - '0' : (chHigh & ~0x20) - 'A' + 10;
344	b <<= 4;
345	b \|= RT_C_IS_DIGIT(chLow) ? chLow - '0' : (chLow & ~0x20) - 'A' + 10;
346	*pszDst++ = (char)b;
347	pchSrc += 3;
348	cchSrc -= 3;
349	}
350	else
351	{
352	AssertFailed();
353	pszDst++ = pchSrc++;
354	cchSrc--;
355	}
356	cbDst -= 1;
357	}
358	else
359	{
360	AssertReturn(cchSrc < cbDst, VERR_BUFFER_OVERFLOW);
361	memcpy(pszDst, pchSrc, cchSrc);
362	pszDst += cchSrc;
363	cbDst -= cchSrc;
364	pchSrc += cchSrc;
365	cchSrc = 0;
366	break;
367	}
368	}
369
370	AssertReturn(cbDst > 0, VERR_BUFFER_OVERFLOW);
371	*pszDst = '\0';
372	return VINF_SUCCESS;
373	}
374
375
376
377	static int rtUriParse(const char *pszUri, PRTURIPARSED pParsed)
378	{
379	/*
380	* Validate the input and clear the output.
381	*/
382	AssertPtrReturn(pParsed, VERR_INVALID_POINTER);
383	RT_ZERO(*pParsed);
384	pParsed->uAuthorityPort = UINT32_MAX;
385
386	AssertPtrReturn(pszUri, VERR_INVALID_POINTER);
387
388	size_t const cchUri = strlen(pszUri);
389	if (RT_LIKELY(cchUri >= 3)) { /* likely */ }
390	else return cchUri ? VERR_URI_TOO_SHORT : VERR_URI_EMPTY;
391
392	/*
393	* Validating escaped text sequences is much simpler if we know that
394	* that the base URI string is valid. Also, we don't necessarily trust
395	* the developer calling us to remember to do this.
396	*/
397	int rc = RTStrValidateEncoding(pszUri);
398	AssertRCReturn(rc, rc);
399
400	/*
401	* RFC-3986, section 3.1:
402	* scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
403	*
404	* The scheme ends with a ':', which we also skip here.
405	*/
406	size_t off = 0;
407	char ch = pszUri[off++];
408	if (RT_LIKELY(RT_C_IS_ALPHA(ch))) { /* likely */ }
409	else return VERR_URI_INVALID_SCHEME;
410	for (;;)
411	{
412	ch = pszUri[off];
413	if (ch == ':')
414	break;
415	if (RT_LIKELY(RT_C_IS_ALNUM(ch) \|\| ch == '.' \|\| ch == '-' \|\| ch == '+')) { /* likely */ }
416	else return VERR_URI_INVALID_SCHEME;
417	off++;
418	}
419	pParsed->cchScheme = off;
420
421	/* Require the scheme length to be at least two chars so we won't confuse
422	it with a path starting with a DOS drive letter specification. */
423	if (RT_LIKELY(off >= 2)) { /* likely */ }
424	else return VERR_URI_INVALID_SCHEME;
425
426	off++; /* (skip colon) */
427
428	/*
429	* Find the end of the path, we'll need this several times.
430	* Also, while we're potentially scanning the whole thing, check for '%'.
431	*/
432	size_t const offHash = RTStrOffCharOrTerm(&pszUri[off], '#') + off;
433	size_t const offQuestionMark = RTStrOffCharOrTerm(&pszUri[off], '?') + off;
434
435	if (memchr(pszUri, '%', cchUri) != NULL)
436	pParsed->fFlags \|= RTURIPARSED_F_CONTAINS_ESCAPED_CHARS;
437
438	/*
439	* RFC-3986, section 3.2:
440	* The authority component is preceeded by a double slash ("//")...
441	*/
442	if ( pszUri[off] == '/'
443	&& pszUri[off + 1] == '/')
444	{
445	off += 2;
446	pParsed->offAuthority = pParsed->offAuthorityUsername = pParsed->offAuthorityPassword = pParsed->offAuthorityHost = off;
447	pParsed->fFlags \|= RTURIPARSED_F_HAVE_AUTHORITY;
448
449	/*
450	* RFC-3986, section 3.2:
451	* ...and is terminated by the next slash ("/"), question mark ("?"),
452	* or number sign ("#") character, or by the end of the URI.
453	*/
454	const char *pszAuthority = &pszUri[off];
455	size_t cchAuthority = RTStrOffCharOrTerm(pszAuthority, '/');
456	cchAuthority = RT_MIN(cchAuthority, offHash - off);
457	cchAuthority = RT_MIN(cchAuthority, offQuestionMark - off);
458	pParsed->cchAuthority = cchAuthority;
459
460	/* The Authority can be empty, like for: file:///usr/bin/grep */
461	if (cchAuthority > 0)
462	{
463	pParsed->cchAuthorityHost = cchAuthority;
464
465	/*
466	* If there is a userinfo part, it is ended by a '@'.
467	*/
468	const char pszAt = (const char )memchr(pszAuthority, '@', cchAuthority);
469	if (pszAt)
470	{
471	size_t cchTmp = pszAt - pszAuthority;
472	pParsed->offAuthorityHost += cchTmp + 1;
473	pParsed->cchAuthorityHost -= cchTmp + 1;
474
475	/* If there is a password part, it's separated from the username with a colon. */
476	const char pszColon = (const char )memchr(pszAuthority, ':', cchTmp);
477	if (pszColon)
478	{
479	pParsed->cchAuthorityUsername = pszColon - pszAuthority;
480	pParsed->offAuthorityPassword = &pszColon[1] - pszUri;
481	pParsed->cchAuthorityPassword = pszAt - &pszColon[1];
482	}
483	else
484	{
485	pParsed->cchAuthorityUsername = cchTmp;
486	pParsed->offAuthorityPassword = off + cchTmp;
487	}
488	}
489
490	/*
491	* If there is a port part, its after the last colon in the host part.
492	*/
493	const char pszColon = (const char )memrchr(&pszUri[pParsed->offAuthorityHost], ':', pParsed->cchAuthorityHost);
494	if (pszColon)
495	{
496	size_t cchTmp = &pszUri[pParsed->offAuthorityHost + pParsed->cchAuthorityHost] - &pszColon[1];
497	pParsed->cchAuthorityHost -= cchTmp + 1;
498
499	pParsed->uAuthorityPort = 0;
500	while (cchTmp-- > 0)
501	{
502	ch = *++pszColon;
503	if ( RT_C_IS_DIGIT(ch)
504	&& pParsed->uAuthorityPort < UINT32_MAX / UINT32_C(10))
505	{
506	pParsed->uAuthorityPort *= 10;
507	pParsed->uAuthorityPort += ch - '0';
508	}
509	else
510	return VERR_URI_INVALID_PORT_NUMBER;
511	}
512	}
513	}
514
515	/* Skip past the authority. */
516	off += cchAuthority;
517	}
518	else
519	pParsed->offAuthority = pParsed->offAuthorityUsername = pParsed->offAuthorityPassword = pParsed->offAuthorityHost = off;
520
521	/*
522	* RFC-3986, section 3.3: Path
523	* The path is terminated by the first question mark ("?")
524	* or number sign ("#") character, or by the end of the URI.
525	*/
526	pParsed->offPath = off;
527	pParsed->cchPath = RT_MIN(offHash, offQuestionMark) - off;
528	off += pParsed->cchPath;
529
530	/*
531	* RFC-3986, section 3.4: Query
532	* The query component is indicated by the first question mark ("?")
533	* character and terminated by a number sign ("#") character or by the
534	* end of the URI.
535	*/
536	if ( off == offQuestionMark
537	&& off < cchUri)
538	{
539	Assert(pszUri[offQuestionMark] == '?');
540	pParsed->offQuery = ++off;
541	pParsed->cchQuery = offHash - off;
542	off = offHash;
543	}
544	else
545	{
546	Assert(!pszUri[offQuestionMark]);
547	pParsed->offQuery = off;
548	}
549
550	/*
551	* RFC-3986, section 3.5: Fragment
552	* A fragment identifier component is indicated by the presence of a
553	* number sign ("#") character and terminated by the end of the URI.
554	*/
555	if ( off == offHash
556	&& off < cchUri)
557	{
558	pParsed->offFragment = ++off;
559	pParsed->cchFragment = cchUri - off;
560	}
561	else
562	{
563	Assert(!pszUri[offHash]);
564	pParsed->offFragment = off;
565	}
566
567	/*
568	* If there are any escape sequences, validate them.
569	*
570	* This is reasonably simple as we already know that the string is valid UTF-8
571	* before they get decoded. Thus we only have to validate the escaped sequences.
572	*/
573	if (pParsed->fFlags & RTURIPARSED_F_CONTAINS_ESCAPED_CHARS)
574	{
575	const char pchSrc = (const char )memchr(pszUri, '%', cchUri);
576	AssertReturn(pchSrc, VERR_INTERNAL_ERROR);
577	do
578	{
579	char szUtf8Seq[8];
580	unsigned cchUtf8Seq = 0;
581	unsigned cchNeeded = 0;
582	size_t cchLeft = &pszUri[cchUri] - pchSrc;
583	do
584	{
585	if (cchLeft >= 3)
586	{
587	char chHigh = pchSrc[1];
588	char chLow = pchSrc[2];
589	if ( RT_C_IS_XDIGIT(chHigh)
590	&& RT_C_IS_XDIGIT(chLow))
591	{
592	uint8_t b = RT_C_IS_DIGIT(chHigh) ? chHigh - '0' : (chHigh & ~0x20) - 'A' + 10;
593	b <<= 4;
594	b \|= RT_C_IS_DIGIT(chLow) ? chLow - '0' : (chLow & ~0x20) - 'A' + 10;
595
596	if (!(b & 0x80))
597	{
598	/* We don't want the string to be terminated prematurely. */
599	if (RT_LIKELY(b != 0)) { /* likely */ }
600	else return VERR_URI_ESCAPED_ZERO;
601
602	/* Check that we're not expecting more UTF-8 bytes. */
603	if (RT_LIKELY(cchNeeded == 0)) { /* likely */ }
604	else return VERR_URI_MISSING_UTF8_CONTINUATION_BYTE;
605	}
606	/* Are we waiting UTF-8 bytes? */
607	else if (cchNeeded > 0)
608	{
609	if (RT_LIKELY(!(b & 0x40))) { /* likely */ }
610	else return VERR_URI_INVALID_ESCAPED_UTF8_CONTINUATION_BYTE;
611
612	szUtf8Seq[cchUtf8Seq++] = (char)b;
613	if (--cchNeeded == 0)
614	{
615	szUtf8Seq[cchUtf8Seq] = '\0';
616	rc = RTStrValidateEncoding(szUtf8Seq);
617	if (RT_FAILURE(rc))
618	return VERR_URI_ESCAPED_CHARS_NOT_VALID_UTF8;
619	cchUtf8Seq = 0;
620	}
621	}
622	/* Start a new UTF-8 sequence. */
623	else
624	{
625	if ((b & 0xf8) == 0xf0)
626	cchNeeded = 3;
627	else if ((b & 0xf0) == 0xe0)
628	cchNeeded = 2;
629	else if ((b & 0xe0) == 0xc0)
630	cchNeeded = 1;
631	else
632	return VERR_URI_INVALID_ESCAPED_UTF8_LEAD_BYTE;
633	szUtf8Seq[0] = (char)b;
634	cchUtf8Seq = 1;
635	}
636	pchSrc += 3;
637	cchLeft -= 3;
638	}
639	else
640	return VERR_URI_INVALID_ESCAPE_SEQ;
641	}
642	else
643	return VERR_URI_INVALID_ESCAPE_SEQ;
644	} while (cchLeft > 0 && pchSrc[0] == '%');
645
646	/* Check that we're not expecting more UTF-8 bytes. */
647	if (RT_LIKELY(cchNeeded == 0)) { /* likely */ }
648	else return VERR_URI_MISSING_UTF8_CONTINUATION_BYTE;
649
650	/* next */
651	pchSrc = (const char *)memchr(pchSrc, '%', cchLeft);
652	} while (pchSrc);
653	}
654
655	pParsed->u32Magic = RTURIPARSED_MAGIC;
656	return VINF_SUCCESS;
657	}
658
659
660	RTDECL(int) RTUriParse(const char *pszUri, PRTURIPARSED pParsed)
661	{
662	return rtUriParse(pszUri, pParsed);
663	}
664
665
666	RTDECL(char ) RTUriParsedScheme(const char pszUri, PCRTURIPARSED pParsed)
667	{
668	AssertPtrReturn(pszUri, NULL);
669	AssertPtrReturn(pParsed, NULL);
670	AssertReturn(pParsed->u32Magic == RTURIPARSED_MAGIC, NULL);
671	return RTStrDupN(pszUri, pParsed->cchScheme);
672	}
673
674
675	RTDECL(char ) RTUriParsedAuthority(const char pszUri, PCRTURIPARSED pParsed)
676	{
677	AssertPtrReturn(pszUri, NULL);
678	AssertPtrReturn(pParsed, NULL);
679	AssertReturn(pParsed->u32Magic == RTURIPARSED_MAGIC, NULL);
680	if (pParsed->cchAuthority \|\| (pParsed->fFlags & RTURIPARSED_F_HAVE_AUTHORITY))
681	return rtUriPercentDecodeN(&pszUri[pParsed->offAuthority], pParsed->cchAuthority);
682	return NULL;
683	}
684
685
686	RTDECL(char ) RTUriParsedAuthorityUsername(const char pszUri, PCRTURIPARSED pParsed)
687	{
688	AssertPtrReturn(pszUri, NULL);
689	AssertPtrReturn(pParsed, NULL);
690	AssertReturn(pParsed->u32Magic == RTURIPARSED_MAGIC, NULL);
691	if (pParsed->cchAuthorityUsername)
692	return rtUriPercentDecodeN(&pszUri[pParsed->offAuthorityUsername], pParsed->cchAuthorityUsername);
693	return NULL;
694	}
695
696
697	RTDECL(char ) RTUriParsedAuthorityPassword(const char pszUri, PCRTURIPARSED pParsed)
698	{
699	AssertPtrReturn(pszUri, NULL);
700	AssertPtrReturn(pParsed, NULL);
701	AssertReturn(pParsed->u32Magic == RTURIPARSED_MAGIC, NULL);
702	if (pParsed->cchAuthorityPassword)
703	return rtUriPercentDecodeN(&pszUri[pParsed->offAuthorityPassword], pParsed->cchAuthorityPassword);
704	return NULL;
705	}
706
707
708	RTDECL(char ) RTUriParsedAuthorityHost(const char pszUri, PCRTURIPARSED pParsed)
709	{
710	AssertPtrReturn(pszUri, NULL);
711	AssertPtrReturn(pParsed, NULL);
712	AssertReturn(pParsed->u32Magic == RTURIPARSED_MAGIC, NULL);
713	if (pParsed->cchAuthorityHost)
714	return rtUriPercentDecodeN(&pszUri[pParsed->offAuthorityHost], pParsed->cchAuthorityHost);
715	return NULL;
716	}
717
718
719	RTDECL(uint32_t) RTUriParsedAuthorityPort(const char *pszUri, PCRTURIPARSED pParsed)
720	{
721	AssertPtrReturn(pszUri, UINT32_MAX);
722	AssertPtrReturn(pParsed, UINT32_MAX);
723	AssertReturn(pParsed->u32Magic == RTURIPARSED_MAGIC, UINT32_MAX);
724	return pParsed->uAuthorityPort;
725	}
726
727
728	RTDECL(char ) RTUriParsedPath(const char pszUri, PCRTURIPARSED pParsed)
729	{
730	AssertPtrReturn(pszUri, NULL);
731	AssertPtrReturn(pParsed, NULL);
732	AssertReturn(pParsed->u32Magic == RTURIPARSED_MAGIC, NULL);
733	if (pParsed->cchPath)
734	return rtUriPercentDecodeN(&pszUri[pParsed->offPath], pParsed->cchPath);
735	return NULL;
736	}
737
738
739	RTDECL(char ) RTUriParsedQuery(const char pszUri, PCRTURIPARSED pParsed)
740	{
741	AssertPtrReturn(pszUri, NULL);
742	AssertPtrReturn(pParsed, NULL);
743	AssertReturn(pParsed->u32Magic == RTURIPARSED_MAGIC, NULL);
744	if (pParsed->cchQuery)
745	return rtUriPercentDecodeN(&pszUri[pParsed->offQuery], pParsed->cchQuery);
746	return NULL;
747	}
748
749
750	RTDECL(char ) RTUriParsedFragment(const char pszUri, PCRTURIPARSED pParsed)
751	{
752	AssertPtrReturn(pszUri, NULL);
753	AssertPtrReturn(pParsed, NULL);
754	AssertReturn(pParsed->u32Magic == RTURIPARSED_MAGIC, NULL);
755	if (pParsed->cchFragment)
756	return rtUriPercentDecodeN(&pszUri[pParsed->offFragment], pParsed->cchFragment);
757	return NULL;
758	}
759
760
761	RTDECL(char ) RTUriCreate(const char pszScheme, const char pszAuthority, const char pszPath, const char *pszQuery,
762	const char *pszFragment)
763	{
764	if (!pszScheme) /* Scheme is minimum requirement */
765	return NULL;
766
767	char *pszResult = 0;
768	char *pszAuthority1 = 0;
769	char *pszPath1 = 0;
770	char *pszQuery1 = 0;
771	char *pszFragment1 = 0;
772
773	do
774	{
775	/* Create the percent encoded strings and calculate the necessary uri
776	* length. */
777	size_t cbSize = strlen(pszScheme) + 1 + 1; /* plus zero byte */
778	if (pszAuthority)
779	{
780	pszAuthority1 = rtUriPercentEncodeN(pszAuthority, RTSTR_MAX);
781	if (!pszAuthority1)
782	break;
783	cbSize += strlen(pszAuthority1) + 2;
784	}
785	if (pszPath)
786	{
787	pszPath1 = rtUriPercentEncodeN(pszPath, RTSTR_MAX);
788	if (!pszPath1)
789	break;
790	cbSize += strlen(pszPath1);
791	}
792	if (pszQuery)
793	{
794	pszQuery1 = rtUriPercentEncodeN(pszQuery, RTSTR_MAX);
795	if (!pszQuery1)
796	break;
797	cbSize += strlen(pszQuery1) + 1;
798	}
799	if (pszFragment)
800	{
801	pszFragment1 = rtUriPercentEncodeN(pszFragment, RTSTR_MAX);
802	if (!pszFragment1)
803	break;
804	cbSize += strlen(pszFragment1) + 1;
805	}
806
807	char pszTmp = pszResult = (char )RTStrAlloc(cbSize);
808	if (!pszResult)
809	break;
810	RT_BZERO(pszTmp, cbSize);
811
812	/* Compose the target uri string. */
813	RTStrCatP(&pszTmp, &cbSize, pszScheme);
814	RTStrCatP(&pszTmp, &cbSize, ":");
815	if (pszAuthority1)
816	{
817	RTStrCatP(&pszTmp, &cbSize, "//");
818	RTStrCatP(&pszTmp, &cbSize, pszAuthority1);
819	}
820	if (pszPath1)
821	{
822	RTStrCatP(&pszTmp, &cbSize, pszPath1);
823	}
824	if (pszQuery1)
825	{
826	RTStrCatP(&pszTmp, &cbSize, "?");
827	RTStrCatP(&pszTmp, &cbSize, pszQuery1);
828	}
829	if (pszFragment1)
830	{
831	RTStrCatP(&pszTmp, &cbSize, "#");
832	RTStrCatP(&pszTmp, &cbSize, pszFragment1);
833	}
834	} while (0);
835
836	/* Cleanup */
837	if (pszAuthority1)
838	RTStrFree(pszAuthority1);
839	if (pszPath1)
840	RTStrFree(pszPath1);
841	if (pszQuery1)
842	RTStrFree(pszQuery1);
843	if (pszFragment1)
844	RTStrFree(pszFragment1);
845
846	return pszResult;
847	}
848
849
850	RTDECL(bool) RTUriIsSchemeMatch(const char pszUri, const char pszScheme)
851	{
852	AssertPtrReturn(pszUri, false);
853	size_t const cchScheme = strlen(pszScheme);
854	return RTStrNICmp(pszUri, pszScheme, cchScheme) == 0
855	&& pszUri[cchScheme] == ':';
856	}
857
858
859	RTDECL(int) RTUriFileCreateEx(const char pszPath, uint32_t fPathStyle, char ppszUri, size_t cbUri, size_t pcchUri)
860	{
861	/*
862	* Validate and adjust input. (RTPathParse check pszPath out for us)
863	*/
864	if (pcchUri)
865	{
866	AssertPtrReturn(pcchUri, VERR_INVALID_POINTER);
867	*pcchUri = ~(size_t)0;
868	}
869	AssertPtrReturn(ppszUri, VERR_INVALID_POINTER);
870	AssertReturn(!(fPathStyle & ~RTPATH_STR_F_STYLE_MASK) && fPathStyle != RTPATH_STR_F_STYLE_RESERVED, VERR_INVALID_FLAGS);
871	if (fPathStyle == RTPATH_STR_F_STYLE_HOST)
872	fPathStyle = RTPATH_STYLE;
873
874	/*
875	* Let the RTPath code parse the stuff (no reason to duplicate path parsing
876	* and get it slightly wrong here).
877	*/
878	RTPATHPARSED ParsedPath;
879	int rc = RTPathParse(pszPath, &ParsedPath, sizeof(ParsedPath), fPathStyle);
880	if (RT_SUCCESS(rc) \|\| rc == VERR_BUFFER_OVERFLOW)
881	{
882	/* Skip leading slashes. */
883	if (ParsedPath.fProps & RTPATH_PROP_ROOT_SLASH)
884	{
885	if (fPathStyle == RTPATH_STR_F_STYLE_DOS)
886	while (pszPath[0] == '/' \|\| pszPath[0] == '\\')
887	pszPath++;
888	else
889	while (pszPath[0] == '/')
890	pszPath++;
891	}
892	const size_t cchPath = strlen(pszPath);
893
894	/*
895	* Calculate the encoded length and figure destination buffering.
896	*/
897	static const char s_szPrefix[] = "file:///";
898	size_t const cchPrefix = sizeof(s_szPrefix) - (ParsedPath.fProps & RTPATH_PROP_UNC ? 2 : 1);
899	size_t cchEncoded = rtUriCalcEncodedLength(pszPath, cchPath, fPathStyle != RTPATH_STR_F_STYLE_DOS);
900
901	if (pcchUri)
902	*pcchUri = cchEncoded;
903
904	char *pszDst;
905	char *pszFreeMe = NULL;
906	if (!cbUri \|\| *ppszUri == NULL)
907	{
908	cbUri = RT_MAX(cbUri, cchPrefix + cchEncoded + 1);
909	*ppszUri = pszFreeMe = pszDst = RTStrAlloc(cbUri);
910	AssertReturn(pszDst, VERR_NO_STR_MEMORY);
911	}
912	else if (cchEncoded < cbUri)
913	pszDst = *ppszUri;
914	else
915	return VERR_BUFFER_OVERFLOW;
916
917	/*
918	* Construct the URI.
919	*/
920	memcpy(pszDst, s_szPrefix, cchPrefix);
921	pszDst[cchPrefix] = '\0';
922	rc = rtUriEncodeIntoBuffer(pszPath, cchPath, fPathStyle != RTPATH_STR_F_STYLE_DOS, &pszDst[cchPrefix], cbUri - cchPrefix);
923	if (RT_SUCCESS(rc))
924	{
925	Assert(strlen(pszDst) == cbUri - 1);
926	if (fPathStyle == RTPATH_STR_F_STYLE_DOS)
927	RTPathChangeToUnixSlashes(pszDst, true /fForce/);
928	return VINF_SUCCESS;
929	}
930
931	AssertRC(rc); /* Impossible! rtUriCalcEncodedLength or something above is busted! */
932	if (pszFreeMe)
933	RTStrFree(pszFreeMe);
934	}
935	return rc;
936	}
937
938
939	RTDECL(char ) RTUriFileCreate(const char pszPath)
940	{
941	char *pszUri = NULL;
942	int rc = RTUriFileCreateEx(pszPath, RTPATH_STR_F_STYLE_HOST, &pszUri, 0 /cbUri/, NULL /pcchUri/);
943	if (RT_SUCCESS(rc))
944	return pszUri;
945	return NULL;
946	}
947
948
949	RTDECL(int) RTUriFilePathEx(const char pszUri, uint32_t fPathStyle, char ppszPath, size_t cbPath, size_t pcchPath)
950	{
951	/*
952	* Validate and adjust input.
953	*/
954	if (pcchPath)
955	{
956	AssertPtrReturn(pcchPath, VERR_INVALID_POINTER);
957	*pcchPath = ~(size_t)0;
958	}
959	AssertPtrReturn(ppszPath, VERR_INVALID_POINTER);
960	AssertReturn(!(fPathStyle & ~RTPATH_STR_F_STYLE_MASK) && fPathStyle != RTPATH_STR_F_STYLE_RESERVED, VERR_INVALID_FLAGS);
961	if (fPathStyle == RTPATH_STR_F_STYLE_HOST)
962	fPathStyle = RTPATH_STYLE;
963	AssertPtrReturn(pszUri, VERR_INVALID_POINTER);
964
965	/*
966	* Check that this is a file URI.
967	*/
968	if (RTStrNICmp(pszUri, RT_STR_TUPLE("file:")) == 0)
969	{ /* likely */ }
970	else
971	return VERR_URI_NOT_FILE_SCHEME;
972
973	/*
974	* We may have a number of variations here, mostly thanks to
975	* various windows software. First the canonical variations:
976	* - file:///C:/Windows/System32/kernel32.dll
977	* - file:///C\|/Windows/System32/kernel32.dll
978	* - file:///C:%5CWindows%5CSystem32%5Ckernel32.dll
979	* - file://localhost/C:%5CWindows%5CSystem32%5Ckernel32.dll
980	* - file://cifsserver.dev/systemshare%5CWindows%5CSystem32%5Ckernel32.dll
981	* - file://cifsserver.dev:139/systemshare%5CWindows%5CSystem32%5Ckernel32.dll (not quite sure here, but whatever)
982	*
983	* Legacy variant without any slashes after the schema:
984	* - file:C:/Windows/System32/kernel32.dll
985	* - file:C\|/Windows/System32%5Ckernel32.dll
986	* - file:~/.bashrc
987	* \--path-/
988	*
989	* Legacy variant with exactly one slashes after the schema:
990	* - file:/C:/Windows/System32%5Ckernel32.dll
991	* - file:/C\|/Windows/System32/kernel32.dll
992	* - file:/usr/bin/env
993	* \---path---/
994	*
995	* Legacy variant with two slashes after the schema and an unescaped DOS path:
996	* - file://C:/Windows/System32\kernel32.dll (**)
997	* - file://C\|/Windows/System32\kernel32.dll
998	* \---path---------------------/
999	* -- authority, with ':' as non-working port separator
1000	*
1001	* Legacy variant with exactly four slashes after the schema and an unescaped DOS path.
1002	* - file:////C:/Windows\System32\user32.dll
1003	*
1004	* Legacy variant with four or more slashes after the schema and an unescaped UNC path:
1005	* - file:////cifsserver.dev/systemshare/System32%\kernel32.dll
1006	* - file://///cifsserver.dev/systemshare/System32\kernel32.dll
1007	* \---path--------------------------------------------/
1008	*
1009	* The the two unescaped variants shouldn't be handed to rtUriParse, which
1010	* is good as we cannot actually handle the one marked by (**). So, handle
1011	* those two special when parsing.
1012	*/
1013	RTURIPARSED Parsed;
1014	int rc;
1015	size_t cSlashes = 0;
1016	while (pszUri[5 + cSlashes] == '/')
1017	cSlashes++;
1018	if ( (cSlashes == 2 \|\| cSlashes == 4)
1019	&& RT_C_IS_ALPHA(pszUri[5 + cSlashes])
1020	&& (pszUri[5 + cSlashes + 1] == ':' \|\| pszUri[5 + cSlashes + 1] == '\|'))
1021	{
1022	RT_ZERO(Parsed); /* RTURIPARSED_F_CONTAINS_ESCAPED_CHARS is now clear. */
1023	Parsed.offPath = 5 + cSlashes;
1024	Parsed.cchPath = strlen(&pszUri[Parsed.offPath]);
1025	rc = RTStrValidateEncoding(&pszUri[Parsed.offPath]);
1026	}
1027	else if (cSlashes >= 4)
1028	{
1029	RT_ZERO(Parsed);
1030	Parsed.fFlags = cSlashes > 4 ? RTURIPARSED_F_CONTAINS_ESCAPED_CHARS : 0;
1031	Parsed.offPath = 5 + cSlashes - 2;
1032	Parsed.cchPath = strlen(&pszUri[Parsed.offPath]);
1033	rc = RTStrValidateEncoding(&pszUri[Parsed.offPath]);
1034	}
1035	else
1036	rc = rtUriParse(pszUri, &Parsed);
1037	if (RT_SUCCESS(rc))
1038	{
1039	/*
1040	* Ignore localhost as hostname (it's implicit).
1041	*/
1042	static char const s_szLocalhost[] = "localhost";
1043	if ( Parsed.cchAuthorityHost == sizeof(s_szLocalhost) - 1U
1044	&& RTStrNICmp(&pszUri[Parsed.offAuthorityHost], RT_STR_TUPLE(s_szLocalhost)) == 0)
1045	{
1046	Parsed.cchAuthorityHost = 0;
1047	Parsed.cchAuthority = 0;
1048	}
1049
1050	/*
1051	* Ignore leading path slash/separator if we detect a DOS drive letter
1052	* and we don't have a host name.
1053	*/
1054	if ( Parsed.cchPath >= 3
1055	&& Parsed.cchAuthorityHost == 0
1056	&& pszUri[Parsed.offPath] == '/' /* Leading path slash/separator. */
1057	&& ( pszUri[Parsed.offPath + 2] == ':' /* Colon after drive letter. */
1058	\|\| pszUri[Parsed.offPath + 2] == '\|') /* Colon alternative. */
1059	&& RT_C_IS_ALPHA(pszUri[Parsed.offPath + 1]) ) /* Drive letter. */
1060	{
1061	Parsed.offPath++;
1062	Parsed.cchPath--;
1063	}
1064
1065	/*
1066	* Calculate the size of the encoded result.
1067	*
1068	* Since we're happily returning "C:/Windows/System32/kernel.dll"
1069	* style paths when the caller requested UNIX style paths, we will
1070	* return straight UNC paths too ("//cifsserver/share/dir/file").
1071	*/
1072	size_t cchDecodedHost = 0;
1073	size_t cbResult;
1074	if (Parsed.fFlags & RTURIPARSED_F_CONTAINS_ESCAPED_CHARS)
1075	{
1076	cchDecodedHost = rtUriCalcDecodedLength(&pszUri[Parsed.offAuthorityHost], Parsed.cchAuthorityHost);
1077	cbResult = cchDecodedHost + rtUriCalcDecodedLength(&pszUri[Parsed.offPath], Parsed.cchPath) + 1;
1078	}
1079	else
1080	{
1081	cchDecodedHost = 0;
1082	cbResult = Parsed.cchAuthorityHost + Parsed.cchPath + 1;
1083	}
1084	if (pcchPath)
1085	*pcchPath = cbResult - 1;
1086	if (cbResult > 1)
1087	{
1088	/*
1089	* Prepare the necessary buffer space for the result.
1090	*/
1091	char *pszDst;
1092	char *pszFreeMe = NULL;
1093	if (!cbPath \|\| *ppszPath == NULL)
1094	{
1095	cbPath = RT_MAX(cbPath, cbResult);
1096	*ppszPath = pszFreeMe = pszDst = RTStrAlloc(cbPath);
1097	AssertReturn(pszDst, VERR_NO_STR_MEMORY);
1098	}
1099	else if (cbResult <= cbPath)
1100	pszDst = *ppszPath;
1101	else
1102	return VERR_BUFFER_OVERFLOW;
1103
1104	/*
1105	* Compose the result.
1106	*/
1107	if (Parsed.fFlags & RTURIPARSED_F_CONTAINS_ESCAPED_CHARS)
1108	{
1109	rc = rtUriDecodeIntoBuffer(&pszUri[Parsed.offAuthorityHost],Parsed.cchAuthorityHost,
1110	pszDst, cchDecodedHost + 1);
1111	Assert(RT_SUCCESS(rc) && strlen(pszDst) == cchDecodedHost);
1112	if (RT_SUCCESS(rc))
1113	rc = rtUriDecodeIntoBuffer(&pszUri[Parsed.offPath], Parsed.cchPath,
1114	&pszDst[cchDecodedHost], cbResult - cchDecodedHost);
1115	Assert(RT_SUCCESS(rc) && strlen(pszDst) == cbResult - 1);
1116	}
1117	else
1118	{
1119	memcpy(pszDst, &pszUri[Parsed.offAuthorityHost], Parsed.cchAuthorityHost);
1120	memcpy(&pszDst[Parsed.cchAuthorityHost], &pszUri[Parsed.offPath], Parsed.cchPath);
1121	pszDst[cbResult - 1] = '\0';
1122	}
1123	if (RT_SUCCESS(rc))
1124	{
1125	/*
1126	* Convert colon DOS driver letter colon alternative.
1127	* We do this regardless of the desired path style.
1128	*/
1129	if ( RT_C_IS_ALPHA(pszDst[0])
1130	&& pszDst[1] == '\|')
1131	pszDst[1] = ':';
1132
1133	/*
1134	* Fix slashes.
1135	*/
1136	if (fPathStyle == RTPATH_STR_F_STYLE_DOS)
1137	RTPathChangeToDosSlashes(pszDst, true);
1138	else if (fPathStyle == RTPATH_STR_F_STYLE_UNIX)
1139	RTPathChangeToUnixSlashes(pszDst, true); /** @todo not quite sure how this actually makes sense... */
1140	else
1141	AssertFailed();
1142	return rc;
1143	}
1144
1145	/* bail out */
1146	RTStrFree(pszFreeMe);
1147	}
1148	else
1149	rc = VERR_PATH_ZERO_LENGTH;
1150	}
1151	return rc;
1152	}
1153
1154
1155	RTDECL(char ) RTUriFilePath(const char pszUri)
1156	{
1157	char *pszPath = NULL;
1158	int rc = RTUriFilePathEx(pszUri, RTPATH_STR_F_STYLE_HOST, &pszPath, 0 /cbPath/, NULL /pcchPath/);
1159	if (RT_SUCCESS(rc))
1160	return pszPath;
1161	return NULL;
1162	}
1163

Note: See TracBrowser for help on using the repository browser.

source: vbox/trunk/src/VBox/Runtime/common/misc/uri.cpp@ 62461

Download in other formats: