uri.c@ 102654

Last change on this file since 102654 was 95312, checked in by vboxsync, 2 years ago
libs/{curl,libxml2}: OSE export fixes, bugref:8515
Property svn:eol-style set to `native`
File size: 65.2 KB

Line
1	/**
2	* uri.c: set of generic URI related routines
3	*
4	* Reference: RFCs 3986, 2732 and 2373
5	*
6	* See Copyright for the status of this software.
7	*
8	* daniel@veillard.com
9	*/
10
11	#define IN_LIBXML
12	#include "libxml.h"
13
14	#include <limits.h>
15	#include <string.h>
16
17	#include <libxml/xmlmemory.h>
18	#include <libxml/uri.h>
19	#include <libxml/globals.h>
20	#include <libxml/xmlerror.h>
21
22	/**
23	* MAX_URI_LENGTH:
24	*
25	* The definition of the URI regexp in the above RFC has no size limit
26	* In practice they are usually relatively short except for the
27	* data URI scheme as defined in RFC 2397. Even for data URI the usual
28	* maximum size before hitting random practical limits is around 64 KB
29	* and 4KB is usually a maximum admitted limit for proper operations.
30	* The value below is more a security limit than anything else and
31	* really should never be hit by 'normal' operations
32	* Set to 1 MByte in 2012, this is only enforced on output
33	*/
34	#define MAX_URI_LENGTH 1024 * 1024
35
36	static void
37	xmlURIErrMemory(const char *extra)
38	{
39	if (extra)
40	__xmlRaiseError(NULL, NULL, NULL,
41	NULL, NULL, XML_FROM_URI,
42	XML_ERR_NO_MEMORY, XML_ERR_FATAL, NULL, 0,
43	extra, NULL, NULL, 0, 0,
44	"Memory allocation failed : %s\n", extra);
45	else
46	__xmlRaiseError(NULL, NULL, NULL,
47	NULL, NULL, XML_FROM_URI,
48	XML_ERR_NO_MEMORY, XML_ERR_FATAL, NULL, 0,
49	NULL, NULL, NULL, 0, 0,
50	"Memory allocation failed\n");
51	}
52
53	static void xmlCleanURI(xmlURIPtr uri);
54
55	/*
56	* Old rule from 2396 used in legacy handling code
57	* alpha = lowalpha \| upalpha
58	*/
59	#define IS_ALPHA(x) (IS_LOWALPHA(x) \|\| IS_UPALPHA(x))
60
61
62	/*
63	* lowalpha = "a" \| "b" \| "c" \| "d" \| "e" \| "f" \| "g" \| "h" \| "i" \| "j" \|
64	* "k" \| "l" \| "m" \| "n" \| "o" \| "p" \| "q" \| "r" \| "s" \| "t" \|
65	* "u" \| "v" \| "w" \| "x" \| "y" \| "z"
66	*/
67
68	#define IS_LOWALPHA(x) (((x) >= 'a') && ((x) <= 'z'))
69
70	/*
71	* upalpha = "A" \| "B" \| "C" \| "D" \| "E" \| "F" \| "G" \| "H" \| "I" \| "J" \|
72	* "K" \| "L" \| "M" \| "N" \| "O" \| "P" \| "Q" \| "R" \| "S" \| "T" \|
73	* "U" \| "V" \| "W" \| "X" \| "Y" \| "Z"
74	*/
75	#define IS_UPALPHA(x) (((x) >= 'A') && ((x) <= 'Z'))
76
77	#ifdef IS_DIGIT
78	#undef IS_DIGIT
79	#endif
80	/*
81	* digit = "0" \| "1" \| "2" \| "3" \| "4" \| "5" \| "6" \| "7" \| "8" \| "9"
82	*/
83	#define IS_DIGIT(x) (((x) >= '0') && ((x) <= '9'))
84
85	/*
86	* alphanum = alpha \| digit
87	*/
88
89	#define IS_ALPHANUM(x) (IS_ALPHA(x) \|\| IS_DIGIT(x))
90
91	/*
92	* mark = "-" \| "_" \| "." \| "!" \| "~" \| "*" \| "'" \| "(" \| ")"
93	*/
94
95	#define IS_MARK(x) (((x) == '-') \|\| ((x) == '_') \|\| ((x) == '.') \|\| \
96	((x) == '!') \|\| ((x) == '~') \|\| ((x) == '*') \|\| ((x) == '\'') \|\| \
97	((x) == '(') \|\| ((x) == ')'))
98
99	/*
100	* unwise = "{" \| "}" \| "\|" \| "\" \| "^" \| "`"
101	*/
102
103	#define IS_UNWISE(p) \
104	((((p) == '{')) \|\| (((p) == '}')) \|\| ((*(p) == '\|')) \|\| \
105	(((p) == '\\')) \|\| (((p) == '^')) \|\| ((*(p) == '[')) \|\| \
106	(((p) == ']')) \|\| (((p) == '`')))
107	/*
108	* reserved = ";" \| "/" \| "?" \| ":" \| "@" \| "&" \| "=" \| "+" \| "$" \| "," \|
109	* "[" \| "]"
110	*/
111
112	#define IS_RESERVED(x) (((x) == ';') \|\| ((x) == '/') \|\| ((x) == '?') \|\| \
113	((x) == ':') \|\| ((x) == '@') \|\| ((x) == '&') \|\| ((x) == '=') \|\| \
114	((x) == '+') \|\| ((x) == '$') \|\| ((x) == ',') \|\| ((x) == '[') \|\| \
115	((x) == ']'))
116
117	/*
118	* unreserved = alphanum \| mark
119	*/
120
121	#define IS_UNRESERVED(x) (IS_ALPHANUM(x) \|\| IS_MARK(x))
122
123	/*
124	* Skip to next pointer char, handle escaped sequences
125	*/
126
127	#define NEXT(p) ((*p == '%')? p += 3 : p++)
128
129	/*
130	* Productions from the spec.
131	*
132	* authority = server \| reg_name
133	* reg_name = 1*( unreserved \| escaped \| "$" \| "," \|
134	* ";" \| ":" \| "@" \| "&" \| "=" \| "+" )
135	*
136	* path = [ abs_path \| opaque_part ]
137	*/
138
139	#define STRNDUP(s, n) (char ) xmlStrndup((const xmlChar )(s), (n))
140
141	/************************************************************************
142	* *
143	* RFC 3986 parser *
144	* *
145	************************************************************************/
146
147	#define ISA_DIGIT(p) (((p) >= '0') && ((p) <= '9'))
148	#define ISA_ALPHA(p) ((((p) >= 'a') && ((p) <= 'z')) \|\| \
149	(((p) >= 'A') && ((p) <= 'Z')))
150	#define ISA_HEXDIG(p) \
151	(ISA_DIGIT(p) \|\| (((p) >= 'a') && ((p) <= 'f')) \|\| \
152	(((p) >= 'A') && ((p) <= 'F')))
153
154	/*
155	* sub-delims = "!" / "$" / "&" / "'" / "(" / ")"
156	* / "*" / "+" / "," / ";" / "="
157	*/
158	#define ISA_SUB_DELIM(p) \
159	((((p) == '!')) \|\| (((p) == '$')) \|\| ((*(p) == '&')) \|\| \
160	(((p) == '(')) \|\| (((p) == ')')) \|\| (((p) == '')) \|\| \
161	(((p) == '+')) \|\| (((p) == ',')) \|\| ((*(p) == ';')) \|\| \
162	(((p) == '=')) \|\| (((p) == '\'')))
163
164	/*
165	* gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@"
166	*/
167	#define ISA_GEN_DELIM(p) \
168	((((p) == ':')) \|\| (((p) == '/')) \|\| ((*(p) == '?')) \|\| \
169	(((p) == '#')) \|\| (((p) == '[')) \|\| ((*(p) == ']')) \|\| \
170	((*(p) == '@')))
171
172	/*
173	* reserved = gen-delims / sub-delims
174	*/
175	#define ISA_RESERVED(p) (ISA_GEN_DELIM(p) \|\| (ISA_SUB_DELIM(p)))
176
177	/*
178	* unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"
179	*/
180	#define ISA_UNRESERVED(p) \
181	((ISA_ALPHA(p)) \|\| (ISA_DIGIT(p)) \|\| ((*(p) == '-')) \|\| \
182	(((p) == '.')) \|\| (((p) == '_')) \|\| ((*(p) == '~')))
183
184	/*
185	* pct-encoded = "%" HEXDIG HEXDIG
186	*/
187	#define ISA_PCT_ENCODED(p) \
188	((*(p) == '%') && (ISA_HEXDIG(p + 1)) && (ISA_HEXDIG(p + 2)))
189
190	/*
191	* pchar = unreserved / pct-encoded / sub-delims / ":" / "@"
192	*/
193	#define ISA_PCHAR(p) \
194	(ISA_UNRESERVED(p) \|\| ISA_PCT_ENCODED(p) \|\| ISA_SUB_DELIM(p) \|\| \
195	(((p) == ':')) \|\| (((p) == '@')))
196
197	/**
198	* xmlParse3986Scheme:
199	* @uri: pointer to an URI structure
200	* @str: pointer to the string to analyze
201	*
202	* Parse an URI scheme
203	*
204	* ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
205	*
206	* Returns 0 or the error code
207	*/
208	static int
209	xmlParse3986Scheme(xmlURIPtr uri, const char **str) {
210	const char *cur;
211
212	if (str == NULL)
213	return(-1);
214
215	cur = *str;
216	if (!ISA_ALPHA(cur))
217	return(2);
218	cur++;
219	while (ISA_ALPHA(cur) \|\| ISA_DIGIT(cur) \|\|
220	(cur == '+') \|\| (cur == '-') \|\| (*cur == '.')) cur++;
221	if (uri != NULL) {
222	if (uri->scheme != NULL) xmlFree(uri->scheme);
223	uri->scheme = STRNDUP(str, cur - str);
224	}
225	*str = cur;
226	return(0);
227	}
228
229	/**
230	* xmlParse3986Fragment:
231	* @uri: pointer to an URI structure
232	* @str: pointer to the string to analyze
233	*
234	* Parse the query part of an URI
235	*
236	* fragment = *( pchar / "/" / "?" )
237	* NOTE: the strict syntax as defined by 3986 does not allow '[' and ']'
238	* in the fragment identifier but this is used very broadly for
239	* xpointer scheme selection, so we are allowing it here to not break
240	* for example all the DocBook processing chains.
241	*
242	* Returns 0 or the error code
243	*/
244	static int
245	xmlParse3986Fragment(xmlURIPtr uri, const char **str)
246	{
247	const char *cur;
248
249	if (str == NULL)
250	return (-1);
251
252	cur = *str;
253
254	while ((ISA_PCHAR(cur)) \|\| (cur == '/') \|\| (cur == '?') \|\|
255	(cur == '[') \|\| (cur == ']') \|\|
256	((uri != NULL) && (uri->cleanup & 1) && (IS_UNWISE(cur))))
257	NEXT(cur);
258	if (uri != NULL) {
259	if (uri->fragment != NULL)
260	xmlFree(uri->fragment);
261	if (uri->cleanup & 2)
262	uri->fragment = STRNDUP(str, cur - str);
263	else
264	uri->fragment = xmlURIUnescapeString(str, cur - str, NULL);
265	}
266	*str = cur;
267	return (0);
268	}
269
270	/**
271	* xmlParse3986Query:
272	* @uri: pointer to an URI structure
273	* @str: pointer to the string to analyze
274	*
275	* Parse the query part of an URI
276	*
277	* query = *uric
278	*
279	* Returns 0 or the error code
280	*/
281	static int
282	xmlParse3986Query(xmlURIPtr uri, const char **str)
283	{
284	const char *cur;
285
286	if (str == NULL)
287	return (-1);
288
289	cur = *str;
290
291	while ((ISA_PCHAR(cur)) \|\| (cur == '/') \|\| (cur == '?') \|\|
292	((uri != NULL) && (uri->cleanup & 1) && (IS_UNWISE(cur))))
293	NEXT(cur);
294	if (uri != NULL) {
295	if (uri->query != NULL)
296	xmlFree(uri->query);
297	if (uri->cleanup & 2)
298	uri->query = STRNDUP(str, cur - str);
299	else
300	uri->query = xmlURIUnescapeString(str, cur - str, NULL);
301
302	/* Save the raw bytes of the query as well.
303	* See: http://mail.gnome.org/archives/xml/2007-April/thread.html#00114
304	*/
305	if (uri->query_raw != NULL)
306	xmlFree (uri->query_raw);
307	uri->query_raw = STRNDUP (str, cur - str);
308	}
309	*str = cur;
310	return (0);
311	}
312
313	/**
314	* xmlParse3986Port:
315	* @uri: pointer to an URI structure
316	* @str: the string to analyze
317	*
318	* Parse a port part and fills in the appropriate fields
319	* of the @uri structure
320	*
321	* port = *DIGIT
322	*
323	* Returns 0 or the error code
324	*/
325	static int
326	xmlParse3986Port(xmlURIPtr uri, const char **str)
327	{
328	const char cur = str;
329	int port = 0;
330
331	if (ISA_DIGIT(cur)) {
332	while (ISA_DIGIT(cur)) {
333	int digit = *cur - '0';
334
335	if (port > INT_MAX / 10)
336	return(1);
337	port *= 10;
338	if (port > INT_MAX - digit)
339	return(1);
340	port += digit;
341
342	cur++;
343	}
344	if (uri != NULL)
345	uri->port = port;
346	*str = cur;
347	return(0);
348	}
349	return(1);
350	}
351
352	/**
353	* xmlParse3986Userinfo:
354	* @uri: pointer to an URI structure
355	* @str: the string to analyze
356	*
357	* Parse an user information part and fills in the appropriate fields
358	* of the @uri structure
359	*
360	* userinfo = *( unreserved / pct-encoded / sub-delims / ":" )
361	*
362	* Returns 0 or the error code
363	*/
364	static int
365	xmlParse3986Userinfo(xmlURIPtr uri, const char **str)
366	{
367	const char *cur;
368
369	cur = *str;
370	while (ISA_UNRESERVED(cur) \|\| ISA_PCT_ENCODED(cur) \|\|
371	ISA_SUB_DELIM(cur) \|\| (*cur == ':'))
372	NEXT(cur);
373	if (*cur == '@') {
374	if (uri != NULL) {
375	if (uri->user != NULL) xmlFree(uri->user);
376	if (uri->cleanup & 2)
377	uri->user = STRNDUP(str, cur - str);
378	else
379	uri->user = xmlURIUnescapeString(str, cur - str, NULL);
380	}
381	*str = cur;
382	return(0);
383	}
384	return(1);
385	}
386
387	/**
388	* xmlParse3986DecOctet:
389	* @str: the string to analyze
390	*
391	* dec-octet = DIGIT ; 0-9
392	* / %x31-39 DIGIT ; 10-99
393	* / "1" 2DIGIT ; 100-199
394	* / "2" %x30-34 DIGIT ; 200-249
395	* / "25" %x30-35 ; 250-255
396	*
397	* Skip a dec-octet.
398	*
399	* Returns 0 if found and skipped, 1 otherwise
400	*/
401	static int
402	xmlParse3986DecOctet(const char **str) {
403	const char cur = str;
404
405	if (!(ISA_DIGIT(cur)))
406	return(1);
407	if (!ISA_DIGIT(cur+1))
408	cur++;
409	else if ((*cur != '0') && (ISA_DIGIT(cur + 1)) && (!ISA_DIGIT(cur+2)))
410	cur += 2;
411	else if ((*cur == '1') && (ISA_DIGIT(cur + 1)) && (ISA_DIGIT(cur + 2)))
412	cur += 3;
413	else if ((cur == '2') && ((cur + 1) >= '0') &&
414	(*(cur + 1) <= '4') && (ISA_DIGIT(cur + 2)))
415	cur += 3;
416	else if ((cur == '2') && ((cur + 1) == '5') &&
417	((cur + 2) >= '0') && ((cur + 1) <= '5'))
418	cur += 3;
419	else
420	return(1);
421	*str = cur;
422	return(0);
423	}
424	/**
425	* xmlParse3986Host:
426	* @uri: pointer to an URI structure
427	* @str: the string to analyze
428	*
429	* Parse an host part and fills in the appropriate fields
430	* of the @uri structure
431	*
432	* host = IP-literal / IPv4address / reg-name
433	* IP-literal = "[" ( IPv6address / IPvFuture ) "]"
434	* IPv4address = dec-octet "." dec-octet "." dec-octet "." dec-octet
435	* reg-name = *( unreserved / pct-encoded / sub-delims )
436	*
437	* Returns 0 or the error code
438	*/
439	static int
440	xmlParse3986Host(xmlURIPtr uri, const char **str)
441	{
442	const char cur = str;
443	const char *host;
444
445	host = cur;
446	/*
447	* IPv6 and future addressing scheme are enclosed between brackets
448	*/
449	if (*cur == '[') {
450	cur++;
451	while ((cur != ']') && (cur != 0))
452	cur++;
453	if (*cur != ']')
454	return(1);
455	cur++;
456	goto found;
457	}
458	/*
459	* try to parse an IPv4
460	*/
461	if (ISA_DIGIT(cur)) {
462	if (xmlParse3986DecOctet(&cur) != 0)
463	goto not_ipv4;
464	if (*cur != '.')
465	goto not_ipv4;
466	cur++;
467	if (xmlParse3986DecOctet(&cur) != 0)
468	goto not_ipv4;
469	if (*cur != '.')
470	goto not_ipv4;
471	if (xmlParse3986DecOctet(&cur) != 0)
472	goto not_ipv4;
473	if (*cur != '.')
474	goto not_ipv4;
475	if (xmlParse3986DecOctet(&cur) != 0)
476	goto not_ipv4;
477	goto found;
478	not_ipv4:
479	cur = *str;
480	}
481	/*
482	* then this should be a hostname which can be empty
483	*/
484	while (ISA_UNRESERVED(cur) \|\| ISA_PCT_ENCODED(cur) \|\| ISA_SUB_DELIM(cur))
485	NEXT(cur);
486	found:
487	if (uri != NULL) {
488	if (uri->authority != NULL) xmlFree(uri->authority);
489	uri->authority = NULL;
490	if (uri->server != NULL) xmlFree(uri->server);
491	if (cur != host) {
492	if (uri->cleanup & 2)
493	uri->server = STRNDUP(host, cur - host);
494	else
495	uri->server = xmlURIUnescapeString(host, cur - host, NULL);
496	} else
497	uri->server = NULL;
498	}
499	*str = cur;
500	return(0);
501	}
502
503	/**
504	* xmlParse3986Authority:
505	* @uri: pointer to an URI structure
506	* @str: the string to analyze
507	*
508	* Parse an authority part and fills in the appropriate fields
509	* of the @uri structure
510	*
511	* authority = [ userinfo "@" ] host [ ":" port ]
512	*
513	* Returns 0 or the error code
514	*/
515	static int
516	xmlParse3986Authority(xmlURIPtr uri, const char **str)
517	{
518	const char *cur;
519	int ret;
520
521	cur = *str;
522	/*
523	* try to parse an userinfo and check for the trailing @
524	*/
525	ret = xmlParse3986Userinfo(uri, &cur);
526	if ((ret != 0) \|\| (*cur != '@'))
527	cur = *str;
528	else
529	cur++;
530	ret = xmlParse3986Host(uri, &cur);
531	if (ret != 0) return(ret);
532	if (*cur == ':') {
533	cur++;
534	ret = xmlParse3986Port(uri, &cur);
535	if (ret != 0) return(ret);
536	}
537	*str = cur;
538	return(0);
539	}
540
541	/**
542	* xmlParse3986Segment:
543	* @str: the string to analyze
544	* @forbid: an optional forbidden character
545	* @empty: allow an empty segment
546	*
547	* Parse a segment and fills in the appropriate fields
548	* of the @uri structure
549	*
550	* segment = *pchar
551	* segment-nz = 1*pchar
552	* segment-nz-nc = 1*( unreserved / pct-encoded / sub-delims / "@" )
553	* ; non-zero-length segment without any colon ":"
554	*
555	* Returns 0 or the error code
556	*/
557	static int
558	xmlParse3986Segment(const char **str, char forbid, int empty)
559	{
560	const char *cur;
561
562	cur = *str;
563	if (!ISA_PCHAR(cur)) {
564	if (empty)
565	return(0);
566	return(1);
567	}
568	while (ISA_PCHAR(cur) && (*cur != forbid))
569	NEXT(cur);
570	*str = cur;
571	return (0);
572	}
573
574	/**
575	* xmlParse3986PathAbEmpty:
576	* @uri: pointer to an URI structure
577	* @str: the string to analyze
578	*
579	* Parse an path absolute or empty and fills in the appropriate fields
580	* of the @uri structure
581	*
582	* path-abempty = *( "/" segment )
583	*
584	* Returns 0 or the error code
585	*/
586	static int
587	xmlParse3986PathAbEmpty(xmlURIPtr uri, const char **str)
588	{
589	const char *cur;
590	int ret;
591
592	cur = *str;
593
594	while (*cur == '/') {
595	cur++;
596	ret = xmlParse3986Segment(&cur, 0, 1);
597	if (ret != 0) return(ret);
598	}
599	if (uri != NULL) {
600	if (uri->path != NULL) xmlFree(uri->path);
601	if (*str != cur) {
602	if (uri->cleanup & 2)
603	uri->path = STRNDUP(str, cur - str);
604	else
605	uri->path = xmlURIUnescapeString(str, cur - str, NULL);
606	} else {
607	uri->path = NULL;
608	}
609	}
610	*str = cur;
611	return (0);
612	}
613
614	/**
615	* xmlParse3986PathAbsolute:
616	* @uri: pointer to an URI structure
617	* @str: the string to analyze
618	*
619	* Parse an path absolute and fills in the appropriate fields
620	* of the @uri structure
621	*
622	* path-absolute = "/" [ segment-nz *( "/" segment ) ]
623	*
624	* Returns 0 or the error code
625	*/
626	static int
627	xmlParse3986PathAbsolute(xmlURIPtr uri, const char **str)
628	{
629	const char *cur;
630	int ret;
631
632	cur = *str;
633
634	if (*cur != '/')
635	return(1);
636	cur++;
637	ret = xmlParse3986Segment(&cur, 0, 0);
638	if (ret == 0) {
639	while (*cur == '/') {
640	cur++;
641	ret = xmlParse3986Segment(&cur, 0, 1);
642	if (ret != 0) return(ret);
643	}
644	}
645	if (uri != NULL) {
646	if (uri->path != NULL) xmlFree(uri->path);
647	if (cur != *str) {
648	if (uri->cleanup & 2)
649	uri->path = STRNDUP(str, cur - str);
650	else
651	uri->path = xmlURIUnescapeString(str, cur - str, NULL);
652	} else {
653	uri->path = NULL;
654	}
655	}
656	*str = cur;
657	return (0);
658	}
659
660	/**
661	* xmlParse3986PathRootless:
662	* @uri: pointer to an URI structure
663	* @str: the string to analyze
664	*
665	* Parse an path without root and fills in the appropriate fields
666	* of the @uri structure
667	*
668	* path-rootless = segment-nz *( "/" segment )
669	*
670	* Returns 0 or the error code
671	*/
672	static int
673	xmlParse3986PathRootless(xmlURIPtr uri, const char **str)
674	{
675	const char *cur;
676	int ret;
677
678	cur = *str;
679
680	ret = xmlParse3986Segment(&cur, 0, 0);
681	if (ret != 0) return(ret);
682	while (*cur == '/') {
683	cur++;
684	ret = xmlParse3986Segment(&cur, 0, 1);
685	if (ret != 0) return(ret);
686	}
687	if (uri != NULL) {
688	if (uri->path != NULL) xmlFree(uri->path);
689	if (cur != *str) {
690	if (uri->cleanup & 2)
691	uri->path = STRNDUP(str, cur - str);
692	else
693	uri->path = xmlURIUnescapeString(str, cur - str, NULL);
694	} else {
695	uri->path = NULL;
696	}
697	}
698	*str = cur;
699	return (0);
700	}
701
702	/**
703	* xmlParse3986PathNoScheme:
704	* @uri: pointer to an URI structure
705	* @str: the string to analyze
706	*
707	* Parse an path which is not a scheme and fills in the appropriate fields
708	* of the @uri structure
709	*
710	* path-noscheme = segment-nz-nc *( "/" segment )
711	*
712	* Returns 0 or the error code
713	*/
714	static int
715	xmlParse3986PathNoScheme(xmlURIPtr uri, const char **str)
716	{
717	const char *cur;
718	int ret;
719
720	cur = *str;
721
722	ret = xmlParse3986Segment(&cur, ':', 0);
723	if (ret != 0) return(ret);
724	while (*cur == '/') {
725	cur++;
726	ret = xmlParse3986Segment(&cur, 0, 1);
727	if (ret != 0) return(ret);
728	}
729	if (uri != NULL) {
730	if (uri->path != NULL) xmlFree(uri->path);
731	if (cur != *str) {
732	if (uri->cleanup & 2)
733	uri->path = STRNDUP(str, cur - str);
734	else
735	uri->path = xmlURIUnescapeString(str, cur - str, NULL);
736	} else {
737	uri->path = NULL;
738	}
739	}
740	*str = cur;
741	return (0);
742	}
743
744	/**
745	* xmlParse3986HierPart:
746	* @uri: pointer to an URI structure
747	* @str: the string to analyze
748	*
749	* Parse an hierarchical part and fills in the appropriate fields
750	* of the @uri structure
751	*
752	* hier-part = "//" authority path-abempty
753	* / path-absolute
754	* / path-rootless
755	* / path-empty
756	*
757	* Returns 0 or the error code
758	*/
759	static int
760	xmlParse3986HierPart(xmlURIPtr uri, const char **str)
761	{
762	const char *cur;
763	int ret;
764
765	cur = *str;
766
767	if ((cur == '/') && ((cur + 1) == '/')) {
768	cur += 2;
769	ret = xmlParse3986Authority(uri, &cur);
770	if (ret != 0) return(ret);
771	if (uri->server == NULL)
772	uri->port = -1;
773	ret = xmlParse3986PathAbEmpty(uri, &cur);
774	if (ret != 0) return(ret);
775	*str = cur;
776	return(0);
777	} else if (*cur == '/') {
778	ret = xmlParse3986PathAbsolute(uri, &cur);
779	if (ret != 0) return(ret);
780	} else if (ISA_PCHAR(cur)) {
781	ret = xmlParse3986PathRootless(uri, &cur);
782	if (ret != 0) return(ret);
783	} else {
784	/* path-empty is effectively empty */
785	if (uri != NULL) {
786	if (uri->path != NULL) xmlFree(uri->path);
787	uri->path = NULL;
788	}
789	}
790	*str = cur;
791	return (0);
792	}
793
794	/**
795	* xmlParse3986RelativeRef:
796	* @uri: pointer to an URI structure
797	* @str: the string to analyze
798	*
799	* Parse an URI string and fills in the appropriate fields
800	* of the @uri structure
801	*
802	* relative-ref = relative-part [ "?" query ] [ "#" fragment ]
803	* relative-part = "//" authority path-abempty
804	* / path-absolute
805	* / path-noscheme
806	* / path-empty
807	*
808	* Returns 0 or the error code
809	*/
810	static int
811	xmlParse3986RelativeRef(xmlURIPtr uri, const char *str) {
812	int ret;
813
814	if ((str == '/') && ((str + 1) == '/')) {
815	str += 2;
816	ret = xmlParse3986Authority(uri, &str);
817	if (ret != 0) return(ret);
818	ret = xmlParse3986PathAbEmpty(uri, &str);
819	if (ret != 0) return(ret);
820	} else if (*str == '/') {
821	ret = xmlParse3986PathAbsolute(uri, &str);
822	if (ret != 0) return(ret);
823	} else if (ISA_PCHAR(str)) {
824	ret = xmlParse3986PathNoScheme(uri, &str);
825	if (ret != 0) return(ret);
826	} else {
827	/* path-empty is effectively empty */
828	if (uri != NULL) {
829	if (uri->path != NULL) xmlFree(uri->path);
830	uri->path = NULL;
831	}
832	}
833
834	if (*str == '?') {
835	str++;
836	ret = xmlParse3986Query(uri, &str);
837	if (ret != 0) return(ret);
838	}
839	if (*str == '#') {
840	str++;
841	ret = xmlParse3986Fragment(uri, &str);
842	if (ret != 0) return(ret);
843	}
844	if (*str != 0) {
845	xmlCleanURI(uri);
846	return(1);
847	}
848	return(0);
849	}
850
851
852	/**
853	* xmlParse3986URI:
854	* @uri: pointer to an URI structure
855	* @str: the string to analyze
856	*
857	* Parse an URI string and fills in the appropriate fields
858	* of the @uri structure
859	*
860	* scheme ":" hier-part [ "?" query ] [ "#" fragment ]
861	*
862	* Returns 0 or the error code
863	*/
864	static int
865	xmlParse3986URI(xmlURIPtr uri, const char *str) {
866	int ret;
867
868	ret = xmlParse3986Scheme(uri, &str);
869	if (ret != 0) return(ret);
870	if (*str != ':') {
871	return(1);
872	}
873	str++;
874	ret = xmlParse3986HierPart(uri, &str);
875	if (ret != 0) return(ret);
876	if (*str == '?') {
877	str++;
878	ret = xmlParse3986Query(uri, &str);
879	if (ret != 0) return(ret);
880	}
881	if (*str == '#') {
882	str++;
883	ret = xmlParse3986Fragment(uri, &str);
884	if (ret != 0) return(ret);
885	}
886	if (*str != 0) {
887	xmlCleanURI(uri);
888	return(1);
889	}
890	return(0);
891	}
892
893	/**
894	* xmlParse3986URIReference:
895	* @uri: pointer to an URI structure
896	* @str: the string to analyze
897	*
898	* Parse an URI reference string and fills in the appropriate fields
899	* of the @uri structure
900	*
901	* URI-reference = URI / relative-ref
902	*
903	* Returns 0 or the error code
904	*/
905	static int
906	xmlParse3986URIReference(xmlURIPtr uri, const char *str) {
907	int ret;
908
909	if (str == NULL)
910	return(-1);
911	xmlCleanURI(uri);
912
913	/*
914	* Try first to parse absolute refs, then fallback to relative if
915	* it fails.
916	*/
917	ret = xmlParse3986URI(uri, str);
918	if (ret != 0) {
919	xmlCleanURI(uri);
920	ret = xmlParse3986RelativeRef(uri, str);
921	if (ret != 0) {
922	xmlCleanURI(uri);
923	return(ret);
924	}
925	}
926	return(0);
927	}
928
929	/**
930	* xmlParseURI:
931	* @str: the URI string to analyze
932	*
933	* Parse an URI based on RFC 3986
934	*
935	* URI-reference = [ absoluteURI \| relativeURI ] [ "#" fragment ]
936	*
937	* Returns a newly built xmlURIPtr or NULL in case of error
938	*/
939	xmlURIPtr
940	xmlParseURI(const char *str) {
941	xmlURIPtr uri;
942	int ret;
943
944	if (str == NULL)
945	return(NULL);
946	uri = xmlCreateURI();
947	if (uri != NULL) {
948	ret = xmlParse3986URIReference(uri, str);
949	if (ret) {
950	xmlFreeURI(uri);
951	return(NULL);
952	}
953	}
954	return(uri);
955	}
956
957	/**
958	* xmlParseURIReference:
959	* @uri: pointer to an URI structure
960	* @str: the string to analyze
961	*
962	* Parse an URI reference string based on RFC 3986 and fills in the
963	* appropriate fields of the @uri structure
964	*
965	* URI-reference = URI / relative-ref
966	*
967	* Returns 0 or the error code
968	*/
969	int
970	xmlParseURIReference(xmlURIPtr uri, const char *str) {
971	return(xmlParse3986URIReference(uri, str));
972	}
973
974	/**
975	* xmlParseURIRaw:
976	* @str: the URI string to analyze
977	* @raw: if 1 unescaping of URI pieces are disabled
978	*
979	* Parse an URI but allows to keep intact the original fragments.
980	*
981	* URI-reference = URI / relative-ref
982	*
983	* Returns a newly built xmlURIPtr or NULL in case of error
984	*/
985	xmlURIPtr
986	xmlParseURIRaw(const char *str, int raw) {
987	xmlURIPtr uri;
988	int ret;
989
990	if (str == NULL)
991	return(NULL);
992	uri = xmlCreateURI();
993	if (uri != NULL) {
994	if (raw) {
995	uri->cleanup \|= 2;
996	}
997	ret = xmlParseURIReference(uri, str);
998	if (ret) {
999	xmlFreeURI(uri);
1000	return(NULL);
1001	}
1002	}
1003	return(uri);
1004	}
1005
1006	/************************************************************************
1007	* *
1008	* Generic URI structure functions *
1009	* *
1010	************************************************************************/
1011
1012	/**
1013	* xmlCreateURI:
1014	*
1015	* Simply creates an empty xmlURI
1016	*
1017	* Returns the new structure or NULL in case of error
1018	*/
1019	xmlURIPtr
1020	xmlCreateURI(void) {
1021	xmlURIPtr ret;
1022
1023	ret = (xmlURIPtr) xmlMalloc(sizeof(xmlURI));
1024	if (ret == NULL) {
1025	xmlURIErrMemory("creating URI structure\n");
1026	return(NULL);
1027	}
1028	memset(ret, 0, sizeof(xmlURI));
1029	return(ret);
1030	}
1031
1032	/**
1033	* xmlSaveUriRealloc:
1034	*
1035	* Function to handle properly a reallocation when saving an URI
1036	* Also imposes some limit on the length of an URI string output
1037	*/
1038	static xmlChar *
1039	xmlSaveUriRealloc(xmlChar ret, int max) {
1040	xmlChar *temp;
1041	int tmp;
1042
1043	if (*max > MAX_URI_LENGTH) {
1044	xmlURIErrMemory("reaching arbitrary MAX_URI_LENGTH limit\n");
1045	return(NULL);
1046	}
1047	tmp = max 2;
1048	temp = (xmlChar *) xmlRealloc(ret, (tmp + 1));
1049	if (temp == NULL) {
1050	xmlURIErrMemory("saving URI\n");
1051	return(NULL);
1052	}
1053	*max = tmp;
1054	return(temp);
1055	}
1056
1057	/**
1058	* xmlSaveUri:
1059	* @uri: pointer to an xmlURI
1060	*
1061	* Save the URI as an escaped string
1062	*
1063	* Returns a new string (to be deallocated by caller)
1064	*/
1065	xmlChar *
1066	xmlSaveUri(xmlURIPtr uri) {
1067	xmlChar *ret = NULL;
1068	xmlChar *temp;
1069	const char *p;
1070	int len;
1071	int max;
1072
1073	if (uri == NULL) return(NULL);
1074
1075
1076	max = 80;
1077	ret = (xmlChar ) xmlMallocAtomic((max + 1) sizeof(xmlChar));
1078	if (ret == NULL) {
1079	xmlURIErrMemory("saving URI\n");
1080	return(NULL);
1081	}
1082	len = 0;
1083
1084	if (uri->scheme != NULL) {
1085	p = uri->scheme;
1086	while (*p != 0) {
1087	if (len >= max) {
1088	temp = xmlSaveUriRealloc(ret, &max);
1089	if (temp == NULL) goto mem_error;
1090	ret = temp;
1091	}
1092	ret[len++] = *p++;
1093	}
1094	if (len >= max) {
1095	temp = xmlSaveUriRealloc(ret, &max);
1096	if (temp == NULL) goto mem_error;
1097	ret = temp;
1098	}
1099	ret[len++] = ':';
1100	}
1101	if (uri->opaque != NULL) {
1102	p = uri->opaque;
1103	while (*p != 0) {
1104	if (len + 3 >= max) {
1105	temp = xmlSaveUriRealloc(ret, &max);
1106	if (temp == NULL) goto mem_error;
1107	ret = temp;
1108	}
1109	if (IS_RESERVED((p)) \|\| IS_UNRESERVED((p)))
1110	ret[len++] = *p++;
1111	else {
1112	int val = (unsigned char )p++;
1113	int hi = val / 0x10, lo = val % 0x10;
1114	ret[len++] = '%';
1115	ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1116	ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1117	}
1118	}
1119	} else {
1120	if ((uri->server != NULL) \|\| (uri->port == -1)) {
1121	if (len + 3 >= max) {
1122	temp = xmlSaveUriRealloc(ret, &max);
1123	if (temp == NULL) goto mem_error;
1124	ret = temp;
1125	}
1126	ret[len++] = '/';
1127	ret[len++] = '/';
1128	if (uri->user != NULL) {
1129	p = uri->user;
1130	while (*p != 0) {
1131	if (len + 3 >= max) {
1132	temp = xmlSaveUriRealloc(ret, &max);
1133	if (temp == NULL) goto mem_error;
1134	ret = temp;
1135	}
1136	if ((IS_UNRESERVED(*(p))) \|\|
1137	(((p) == ';')) \|\| (((p) == ':')) \|\|
1138	(((p) == '&')) \|\| (((p) == '=')) \|\|
1139	(((p) == '+')) \|\| (((p) == '$')) \|\|
1140	((*(p) == ',')))
1141	ret[len++] = *p++;
1142	else {
1143	int val = (unsigned char )p++;
1144	int hi = val / 0x10, lo = val % 0x10;
1145	ret[len++] = '%';
1146	ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1147	ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1148	}
1149	}
1150	if (len + 3 >= max) {
1151	temp = xmlSaveUriRealloc(ret, &max);
1152	if (temp == NULL) goto mem_error;
1153	ret = temp;
1154	}
1155	ret[len++] = '@';
1156	}
1157	if (uri->server != NULL) {
1158	p = uri->server;
1159	while (*p != 0) {
1160	if (len >= max) {
1161	temp = xmlSaveUriRealloc(ret, &max);
1162	if (temp == NULL) goto mem_error;
1163	ret = temp;
1164	}
1165	ret[len++] = *p++;
1166	}
1167	if (uri->port > 0) {
1168	if (len + 10 >= max) {
1169	temp = xmlSaveUriRealloc(ret, &max);
1170	if (temp == NULL) goto mem_error;
1171	ret = temp;
1172	}
1173	len += snprintf((char *) &ret[len], max - len, ":%d", uri->port);
1174	}
1175	}
1176	} else if (uri->authority != NULL) {
1177	if (len + 3 >= max) {
1178	temp = xmlSaveUriRealloc(ret, &max);
1179	if (temp == NULL) goto mem_error;
1180	ret = temp;
1181	}
1182	ret[len++] = '/';
1183	ret[len++] = '/';
1184	p = uri->authority;
1185	while (*p != 0) {
1186	if (len + 3 >= max) {
1187	temp = xmlSaveUriRealloc(ret, &max);
1188	if (temp == NULL) goto mem_error;
1189	ret = temp;
1190	}
1191	if ((IS_UNRESERVED(*(p))) \|\|
1192	(((p) == '$')) \|\| (((p) == ',')) \|\| ((*(p) == ';')) \|\|
1193	(((p) == ':')) \|\| (((p) == '@')) \|\| ((*(p) == '&')) \|\|
1194	(((p) == '=')) \|\| (((p) == '+')))
1195	ret[len++] = *p++;
1196	else {
1197	int val = (unsigned char )p++;
1198	int hi = val / 0x10, lo = val % 0x10;
1199	ret[len++] = '%';
1200	ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1201	ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1202	}
1203	}
1204	} else if (uri->scheme != NULL) {
1205	if (len + 3 >= max) {
1206	temp = xmlSaveUriRealloc(ret, &max);
1207	if (temp == NULL) goto mem_error;
1208	ret = temp;
1209	}
1210	}
1211	if (uri->path != NULL) {
1212	p = uri->path;
1213	/*
1214	* the colon in file:///d: should not be escaped or
1215	* Windows accesses fail later.
1216	*/
1217	if ((uri->scheme != NULL) &&
1218	(p[0] == '/') &&
1219	(((p[1] >= 'a') && (p[1] <= 'z')) \|\|
1220	((p[1] >= 'A') && (p[1] <= 'Z'))) &&
1221	(p[2] == ':') &&
1222	(xmlStrEqual(BAD_CAST uri->scheme, BAD_CAST "file"))) {
1223	if (len + 3 >= max) {
1224	temp = xmlSaveUriRealloc(ret, &max);
1225	if (temp == NULL) goto mem_error;
1226	ret = temp;
1227	}
1228	ret[len++] = *p++;
1229	ret[len++] = *p++;
1230	ret[len++] = *p++;
1231	}
1232	while (*p != 0) {
1233	if (len + 3 >= max) {
1234	temp = xmlSaveUriRealloc(ret, &max);
1235	if (temp == NULL) goto mem_error;
1236	ret = temp;
1237	}
1238	if ((IS_UNRESERVED((p))) \|\| (((p) == '/')) \|\|
1239	(((p) == ';')) \|\| (((p) == '@')) \|\| ((*(p) == '&')) \|\|
1240	(((p) == '=')) \|\| (((p) == '+')) \|\| ((*(p) == '$')) \|\|
1241	((*(p) == ',')))
1242	ret[len++] = *p++;
1243	else {
1244	int val = (unsigned char )p++;
1245	int hi = val / 0x10, lo = val % 0x10;
1246	ret[len++] = '%';
1247	ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1248	ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1249	}
1250	}
1251	}
1252	if (uri->query_raw != NULL) {
1253	if (len + 1 >= max) {
1254	temp = xmlSaveUriRealloc(ret, &max);
1255	if (temp == NULL) goto mem_error;
1256	ret = temp;
1257	}
1258	ret[len++] = '?';
1259	p = uri->query_raw;
1260	while (*p != 0) {
1261	if (len + 1 >= max) {
1262	temp = xmlSaveUriRealloc(ret, &max);
1263	if (temp == NULL) goto mem_error;
1264	ret = temp;
1265	}
1266	ret[len++] = *p++;
1267	}
1268	} else if (uri->query != NULL) {
1269	if (len + 3 >= max) {
1270	temp = xmlSaveUriRealloc(ret, &max);
1271	if (temp == NULL) goto mem_error;
1272	ret = temp;
1273	}
1274	ret[len++] = '?';
1275	p = uri->query;
1276	while (*p != 0) {
1277	if (len + 3 >= max) {
1278	temp = xmlSaveUriRealloc(ret, &max);
1279	if (temp == NULL) goto mem_error;
1280	ret = temp;
1281	}
1282	if ((IS_UNRESERVED((p))) \|\| (IS_RESERVED((p))))
1283	ret[len++] = *p++;
1284	else {
1285	int val = (unsigned char )p++;
1286	int hi = val / 0x10, lo = val % 0x10;
1287	ret[len++] = '%';
1288	ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1289	ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1290	}
1291	}
1292	}
1293	}
1294	if (uri->fragment != NULL) {
1295	if (len + 3 >= max) {
1296	temp = xmlSaveUriRealloc(ret, &max);
1297	if (temp == NULL) goto mem_error;
1298	ret = temp;
1299	}
1300	ret[len++] = '#';
1301	p = uri->fragment;
1302	while (*p != 0) {
1303	if (len + 3 >= max) {
1304	temp = xmlSaveUriRealloc(ret, &max);
1305	if (temp == NULL) goto mem_error;
1306	ret = temp;
1307	}
1308	if ((IS_UNRESERVED((p))) \|\| (IS_RESERVED((p))))
1309	ret[len++] = *p++;
1310	else {
1311	int val = (unsigned char )p++;
1312	int hi = val / 0x10, lo = val % 0x10;
1313	ret[len++] = '%';
1314	ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1315	ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1316	}
1317	}
1318	}
1319	if (len >= max) {
1320	temp = xmlSaveUriRealloc(ret, &max);
1321	if (temp == NULL) goto mem_error;
1322	ret = temp;
1323	}
1324	ret[len] = 0;
1325	return(ret);
1326
1327	mem_error:
1328	xmlFree(ret);
1329	return(NULL);
1330	}
1331
1332	/**
1333	* xmlPrintURI:
1334	* @stream: a FILE* for the output
1335	* @uri: pointer to an xmlURI
1336	*
1337	* Prints the URI in the stream @stream.
1338	*/
1339	void
1340	xmlPrintURI(FILE *stream, xmlURIPtr uri) {
1341	xmlChar *out;
1342
1343	out = xmlSaveUri(uri);
1344	if (out != NULL) {
1345	fprintf(stream, "%s", (char *) out);
1346	xmlFree(out);
1347	}
1348	}
1349
1350	/**
1351	* xmlCleanURI:
1352	* @uri: pointer to an xmlURI
1353	*
1354	* Make sure the xmlURI struct is free of content
1355	*/
1356	static void
1357	xmlCleanURI(xmlURIPtr uri) {
1358	if (uri == NULL) return;
1359
1360	if (uri->scheme != NULL) xmlFree(uri->scheme);
1361	uri->scheme = NULL;
1362	if (uri->server != NULL) xmlFree(uri->server);
1363	uri->server = NULL;
1364	if (uri->user != NULL) xmlFree(uri->user);
1365	uri->user = NULL;
1366	if (uri->path != NULL) xmlFree(uri->path);
1367	uri->path = NULL;
1368	if (uri->fragment != NULL) xmlFree(uri->fragment);
1369	uri->fragment = NULL;
1370	if (uri->opaque != NULL) xmlFree(uri->opaque);
1371	uri->opaque = NULL;
1372	if (uri->authority != NULL) xmlFree(uri->authority);
1373	uri->authority = NULL;
1374	if (uri->query != NULL) xmlFree(uri->query);
1375	uri->query = NULL;
1376	if (uri->query_raw != NULL) xmlFree(uri->query_raw);
1377	uri->query_raw = NULL;
1378	}
1379
1380	/**
1381	* xmlFreeURI:
1382	* @uri: pointer to an xmlURI
1383	*
1384	* Free up the xmlURI struct
1385	*/
1386	void
1387	xmlFreeURI(xmlURIPtr uri) {
1388	if (uri == NULL) return;
1389
1390	if (uri->scheme != NULL) xmlFree(uri->scheme);
1391	if (uri->server != NULL) xmlFree(uri->server);
1392	if (uri->user != NULL) xmlFree(uri->user);
1393	if (uri->path != NULL) xmlFree(uri->path);
1394	if (uri->fragment != NULL) xmlFree(uri->fragment);
1395	if (uri->opaque != NULL) xmlFree(uri->opaque);
1396	if (uri->authority != NULL) xmlFree(uri->authority);
1397	if (uri->query != NULL) xmlFree(uri->query);
1398	if (uri->query_raw != NULL) xmlFree(uri->query_raw);
1399	xmlFree(uri);
1400	}
1401
1402	/************************************************************************
1403	* *
1404	* Helper functions *
1405	* *
1406	************************************************************************/
1407
1408	/**
1409	* xmlNormalizeURIPath:
1410	* @path: pointer to the path string
1411	*
1412	* Applies the 5 normalization steps to a path string--that is, RFC 2396
1413	* Section 5.2, steps 6.c through 6.g.
1414	*
1415	* Normalization occurs directly on the string, no new allocation is done
1416	*
1417	* Returns 0 or an error code
1418	*/
1419	int
1420	xmlNormalizeURIPath(char *path) {
1421	char cur, out;
1422
1423	if (path == NULL)
1424	return(-1);
1425
1426	/* Skip all initial "/" chars. We want to get to the beginning of the
1427	* first non-empty segment.
1428	*/
1429	cur = path;
1430	while (cur[0] == '/')
1431	++cur;
1432	if (cur[0] == '\0')
1433	return(0);
1434
1435	/* Keep everything we've seen so far. */
1436	out = cur;
1437
1438	/*
1439	* Analyze each segment in sequence for cases (c) and (d).
1440	*/
1441	while (cur[0] != '\0') {
1442	/*
1443	* c) All occurrences of "./", where "." is a complete path segment,
1444	* are removed from the buffer string.
1445	*/
1446	if ((cur[0] == '.') && (cur[1] == '/')) {
1447	cur += 2;
1448	/* '//' normalization should be done at this point too */
1449	while (cur[0] == '/')
1450	cur++;
1451	continue;
1452	}
1453
1454	/*
1455	* d) If the buffer string ends with "." as a complete path segment,
1456	* that "." is removed.
1457	*/
1458	if ((cur[0] == '.') && (cur[1] == '\0'))
1459	break;
1460
1461	/* Otherwise keep the segment. */
1462	while (cur[0] != '/') {
1463	if (cur[0] == '\0')
1464	goto done_cd;
1465	(out++)[0] = (cur++)[0];
1466	}
1467	/* normalize // */
1468	while ((cur[0] == '/') && (cur[1] == '/'))
1469	cur++;
1470
1471	(out++)[0] = (cur++)[0];
1472	}
1473	done_cd:
1474	out[0] = '\0';
1475
1476	/* Reset to the beginning of the first segment for the next sequence. */
1477	cur = path;
1478	while (cur[0] == '/')
1479	++cur;
1480	if (cur[0] == '\0')
1481	return(0);
1482
1483	/*
1484	* Analyze each segment in sequence for cases (e) and (f).
1485	*
1486	* e) All occurrences of "<segment>/../", where <segment> is a
1487	* complete path segment not equal to "..", are removed from the
1488	* buffer string. Removal of these path segments is performed
1489	* iteratively, removing the leftmost matching pattern on each
1490	* iteration, until no matching pattern remains.
1491	*
1492	* f) If the buffer string ends with "<segment>/..", where <segment>
1493	* is a complete path segment not equal to "..", that
1494	* "<segment>/.." is removed.
1495	*
1496	* To satisfy the "iterative" clause in (e), we need to collapse the
1497	* string every time we find something that needs to be removed. Thus,
1498	* we don't need to keep two pointers into the string: we only need a
1499	* "current position" pointer.
1500	*/
1501	while (1) {
1502	char segp, tmp;
1503
1504	/* At the beginning of each iteration of this loop, "cur" points to
1505	* the first character of the segment we want to examine.
1506	*/
1507
1508	/* Find the end of the current segment. */
1509	segp = cur;
1510	while ((segp[0] != '/') && (segp[0] != '\0'))
1511	++segp;
1512
1513	/* If this is the last segment, we're done (we need at least two
1514	* segments to meet the criteria for the (e) and (f) cases).
1515	*/
1516	if (segp[0] == '\0')
1517	break;
1518
1519	/* If the first segment is "..", or if the next segment _isn't_ "..",
1520	* keep this segment and try the next one.
1521	*/
1522	++segp;
1523	if (((cur[0] == '.') && (cur[1] == '.') && (segp == cur+3))
1524	\|\| ((segp[0] != '.') \|\| (segp[1] != '.')
1525	\|\| ((segp[2] != '/') && (segp[2] != '\0')))) {
1526	cur = segp;
1527	continue;
1528	}
1529
1530	/* If we get here, remove this segment and the next one and back up
1531	* to the previous segment (if there is one), to implement the
1532	* "iteratively" clause. It's pretty much impossible to back up
1533	* while maintaining two pointers into the buffer, so just compact
1534	* the whole buffer now.
1535	*/
1536
1537	/* If this is the end of the buffer, we're done. */
1538	if (segp[2] == '\0') {
1539	cur[0] = '\0';
1540	break;
1541	}
1542	/* Valgrind complained, strcpy(cur, segp + 3); */
1543	/* string will overlap, do not use strcpy */
1544	tmp = cur;
1545	segp += 3;
1546	while ((tmp++ = segp++) != 0)
1547	;
1548
1549	/* If there are no previous segments, then keep going from here. */
1550	segp = cur;
1551	while ((segp > path) && ((--segp)[0] == '/'))
1552	;
1553	if (segp == path)
1554	continue;
1555
1556	/* "segp" is pointing to the end of a previous segment; find it's
1557	* start. We need to back up to the previous segment and start
1558	* over with that to handle things like "foo/bar/../..". If we
1559	* don't do this, then on the first pass we'll remove the "bar/..",
1560	* but be pointing at the second ".." so we won't realize we can also
1561	* remove the "foo/..".
1562	*/
1563	cur = segp;
1564	while ((cur > path) && (cur[-1] != '/'))
1565	--cur;
1566	}
1567	out[0] = '\0';
1568
1569	/*
1570	* g) If the resulting buffer string still begins with one or more
1571	* complete path segments of "..", then the reference is
1572	* considered to be in error. Implementations may handle this
1573	* error by retaining these components in the resolved path (i.e.,
1574	* treating them as part of the final URI), by removing them from
1575	* the resolved path (i.e., discarding relative levels above the
1576	* root), or by avoiding traversal of the reference.
1577	*
1578	* We discard them from the final path.
1579	*/
1580	if (path[0] == '/') {
1581	cur = path;
1582	while ((cur[0] == '/') && (cur[1] == '.') && (cur[2] == '.')
1583	&& ((cur[3] == '/') \|\| (cur[3] == '\0')))
1584	cur += 3;
1585
1586	if (cur != path) {
1587	out = path;
1588	while (cur[0] != '\0')
1589	(out++)[0] = (cur++)[0];
1590	out[0] = 0;
1591	}
1592	}
1593
1594	return(0);
1595	}
1596
1597	static int is_hex(char c) {
1598	if (((c >= '0') && (c <= '9')) \|\|
1599	((c >= 'a') && (c <= 'f')) \|\|
1600	((c >= 'A') && (c <= 'F')))
1601	return(1);
1602	return(0);
1603	}
1604
1605	/**
1606	* xmlURIUnescapeString:
1607	* @str: the string to unescape
1608	* @len: the length in bytes to unescape (or <= 0 to indicate full string)
1609	* @target: optional destination buffer
1610	*
1611	* Unescaping routine, but does not check that the string is an URI. The
1612	* output is a direct unsigned char translation of %XX values (no encoding)
1613	* Note that the length of the result can only be smaller or same size as
1614	* the input string.
1615	*
1616	* Returns a copy of the string, but unescaped, will return NULL only in case
1617	* of error
1618	*/
1619	char *
1620	xmlURIUnescapeString(const char str, int len, char target) {
1621	char ret, out;
1622	const char *in;
1623
1624	if (str == NULL)
1625	return(NULL);
1626	if (len <= 0) len = strlen(str);
1627	if (len < 0) return(NULL);
1628
1629	if (target == NULL) {
1630	ret = (char *) xmlMallocAtomic(len + 1);
1631	if (ret == NULL) {
1632	xmlURIErrMemory("unescaping URI value\n");
1633	return(NULL);
1634	}
1635	} else
1636	ret = target;
1637	in = str;
1638	out = ret;
1639	while(len > 0) {
1640	if ((len > 2) && (*in == '%') && (is_hex(in[1])) && (is_hex(in[2]))) {
1641	int c = 0;
1642	in++;
1643	if ((in >= '0') && (in <= '9'))
1644	c = (*in - '0');
1645	else if ((in >= 'a') && (in <= 'f'))
1646	c = (*in - 'a') + 10;
1647	else if ((in >= 'A') && (in <= 'F'))
1648	c = (*in - 'A') + 10;
1649	in++;
1650	if ((in >= '0') && (in <= '9'))
1651	c = c * 16 + (*in - '0');
1652	else if ((in >= 'a') && (in <= 'f'))
1653	c = c * 16 + (*in - 'a') + 10;
1654	else if ((in >= 'A') && (in <= 'F'))
1655	c = c * 16 + (*in - 'A') + 10;
1656	in++;
1657	len -= 3;
1658	*out++ = (char) c;
1659	} else {
1660	out++ = in++;
1661	len--;
1662	}
1663	}
1664	*out = 0;
1665	return(ret);
1666	}
1667
1668	/**
1669	* xmlURIEscapeStr:
1670	* @str: string to escape
1671	* @list: exception list string of chars not to escape
1672	*
1673	* This routine escapes a string to hex, ignoring reserved characters (a-z)
1674	* and the characters in the exception list.
1675	*
1676	* Returns a new escaped string or NULL in case of error.
1677	*/
1678	xmlChar *
1679	xmlURIEscapeStr(const xmlChar str, const xmlChar list) {
1680	xmlChar *ret, ch;
1681	xmlChar *temp;
1682	const xmlChar *in;
1683	int len, out;
1684
1685	if (str == NULL)
1686	return(NULL);
1687	if (str[0] == 0)
1688	return(xmlStrdup(str));
1689	len = xmlStrlen(str);
1690	if (!(len > 0)) return(NULL);
1691
1692	len += 20;
1693	ret = (xmlChar *) xmlMallocAtomic(len);
1694	if (ret == NULL) {
1695	xmlURIErrMemory("escaping URI value\n");
1696	return(NULL);
1697	}
1698	in = (const xmlChar *) str;
1699	out = 0;
1700	while(*in != 0) {
1701	if (len - out <= 3) {
1702	temp = xmlSaveUriRealloc(ret, &len);
1703	if (temp == NULL) {
1704	xmlURIErrMemory("escaping URI value\n");
1705	xmlFree(ret);
1706	return(NULL);
1707	}
1708	ret = temp;
1709	}
1710
1711	ch = *in;
1712
1713	if ((ch != '@') && (!IS_UNRESERVED(ch)) && (!xmlStrchr(list, ch))) {
1714	unsigned char val;
1715	ret[out++] = '%';
1716	val = ch >> 4;
1717	if (val <= 9)
1718	ret[out++] = '0' + val;
1719	else
1720	ret[out++] = 'A' + val - 0xA;
1721	val = ch & 0xF;
1722	if (val <= 9)
1723	ret[out++] = '0' + val;
1724	else
1725	ret[out++] = 'A' + val - 0xA;
1726	in++;
1727	} else {
1728	ret[out++] = *in++;
1729	}
1730
1731	}
1732	ret[out] = 0;
1733	return(ret);
1734	}
1735
1736	/**
1737	* xmlURIEscape:
1738	* @str: the string of the URI to escape
1739	*
1740	* Escaping routine, does not do validity checks !
1741	* It will try to escape the chars needing this, but this is heuristic
1742	* based it's impossible to be sure.
1743	*
1744	* Returns an copy of the string, but escaped
1745	*
1746	* 25 May 2001
1747	* Uses xmlParseURI and xmlURIEscapeStr to try to escape correctly
1748	* according to RFC2396.
1749	* - Carl Douglas
1750	*/
1751	xmlChar *
1752	xmlURIEscape(const xmlChar * str)
1753	{
1754	xmlChar ret, segment = NULL;
1755	xmlURIPtr uri;
1756	int ret2;
1757
1758	if (str == NULL)
1759	return (NULL);
1760
1761	uri = xmlCreateURI();
1762	if (uri != NULL) {
1763	/*
1764	* Allow escaping errors in the unescaped form
1765	*/
1766	uri->cleanup = 1;
1767	ret2 = xmlParseURIReference(uri, (const char *)str);
1768	if (ret2) {
1769	xmlFreeURI(uri);
1770	return (NULL);
1771	}
1772	}
1773
1774	if (!uri)
1775	return NULL;
1776
1777	ret = NULL;
1778
1779	#define NULLCHK(p) if(!p) { \
1780	xmlURIErrMemory("escaping URI value\n"); \
1781	xmlFreeURI(uri); \
1782	xmlFree(ret); \
1783	return NULL; } \
1784
1785	if (uri->scheme) {
1786	segment = xmlURIEscapeStr(BAD_CAST uri->scheme, BAD_CAST "+-.");
1787	NULLCHK(segment)
1788	ret = xmlStrcat(ret, segment);
1789	ret = xmlStrcat(ret, BAD_CAST ":");
1790	xmlFree(segment);
1791	}
1792
1793	if (uri->authority) {
1794	segment =
1795	xmlURIEscapeStr(BAD_CAST uri->authority, BAD_CAST "/?;:@");
1796	NULLCHK(segment)
1797	ret = xmlStrcat(ret, BAD_CAST "//");
1798	ret = xmlStrcat(ret, segment);
1799	xmlFree(segment);
1800	}
1801
1802	if (uri->user) {
1803	segment = xmlURIEscapeStr(BAD_CAST uri->user, BAD_CAST ";:&=+$,");
1804	NULLCHK(segment)
1805	ret = xmlStrcat(ret,BAD_CAST "//");
1806	ret = xmlStrcat(ret, segment);
1807	ret = xmlStrcat(ret, BAD_CAST "@");
1808	xmlFree(segment);
1809	}
1810
1811	if (uri->server) {
1812	segment = xmlURIEscapeStr(BAD_CAST uri->server, BAD_CAST "/?;:@");
1813	NULLCHK(segment)
1814	if (uri->user == NULL)
1815	ret = xmlStrcat(ret, BAD_CAST "//");
1816	ret = xmlStrcat(ret, segment);
1817	xmlFree(segment);
1818	}
1819
1820	if (uri->port) {
1821	xmlChar port[10];
1822
1823	snprintf((char *) port, 10, "%d", uri->port);
1824	ret = xmlStrcat(ret, BAD_CAST ":");
1825	ret = xmlStrcat(ret, port);
1826	}
1827
1828	if (uri->path) {
1829	segment =
1830	xmlURIEscapeStr(BAD_CAST uri->path, BAD_CAST ":@&=+$,/?;");
1831	NULLCHK(segment)
1832	ret = xmlStrcat(ret, segment);
1833	xmlFree(segment);
1834	}
1835
1836	if (uri->query_raw) {
1837	ret = xmlStrcat(ret, BAD_CAST "?");
1838	ret = xmlStrcat(ret, BAD_CAST uri->query_raw);
1839	}
1840	else if (uri->query) {
1841	segment =
1842	xmlURIEscapeStr(BAD_CAST uri->query, BAD_CAST ";/?:@&=+,$");
1843	NULLCHK(segment)
1844	ret = xmlStrcat(ret, BAD_CAST "?");
1845	ret = xmlStrcat(ret, segment);
1846	xmlFree(segment);
1847	}
1848
1849	if (uri->opaque) {
1850	segment = xmlURIEscapeStr(BAD_CAST uri->opaque, BAD_CAST "");
1851	NULLCHK(segment)
1852	ret = xmlStrcat(ret, segment);
1853	xmlFree(segment);
1854	}
1855
1856	if (uri->fragment) {
1857	segment = xmlURIEscapeStr(BAD_CAST uri->fragment, BAD_CAST "#");
1858	NULLCHK(segment)
1859	ret = xmlStrcat(ret, BAD_CAST "#");
1860	ret = xmlStrcat(ret, segment);
1861	xmlFree(segment);
1862	}
1863
1864	xmlFreeURI(uri);
1865	#undef NULLCHK
1866
1867	return (ret);
1868	}
1869
1870	/************************************************************************
1871	* *
1872	* Public functions *
1873	* *
1874	************************************************************************/
1875
1876	/**
1877	* xmlBuildURI:
1878	* @URI: the URI instance found in the document
1879	* @base: the base value
1880	*
1881	* Computes he final URI of the reference done by checking that
1882	* the given URI is valid, and building the final URI using the
1883	* base URI. This is processed according to section 5.2 of the
1884	* RFC 2396
1885	*
1886	* 5.2. Resolving Relative References to Absolute Form
1887	*
1888	* Returns a new URI string (to be freed by the caller) or NULL in case
1889	* of error.
1890	*/
1891	xmlChar *
1892	xmlBuildURI(const xmlChar URI, const xmlChar base) {
1893	xmlChar *val = NULL;
1894	int ret, len, indx, cur, out;
1895	xmlURIPtr ref = NULL;
1896	xmlURIPtr bas = NULL;
1897	xmlURIPtr res = NULL;
1898
1899	/*
1900	* 1) The URI reference is parsed into the potential four components and
1901	* fragment identifier, as described in Section 4.3.
1902	*
1903	* NOTE that a completely empty URI is treated by modern browsers
1904	* as a reference to "." rather than as a synonym for the current
1905	* URI. Should we do that here?
1906	*/
1907	if (URI == NULL)
1908	ret = -1;
1909	else {
1910	if (*URI) {
1911	ref = xmlCreateURI();
1912	if (ref == NULL)
1913	goto done;
1914	ret = xmlParseURIReference(ref, (const char *) URI);
1915	}
1916	else
1917	ret = 0;
1918	}
1919	if (ret != 0)
1920	goto done;
1921	if ((ref != NULL) && (ref->scheme != NULL)) {
1922	/*
1923	* The URI is absolute don't modify.
1924	*/
1925	val = xmlStrdup(URI);
1926	goto done;
1927	}
1928	if (base == NULL)
1929	ret = -1;
1930	else {
1931	bas = xmlCreateURI();
1932	if (bas == NULL)
1933	goto done;
1934	ret = xmlParseURIReference(bas, (const char *) base);
1935	}
1936	if (ret != 0) {
1937	if (ref)
1938	val = xmlSaveUri(ref);
1939	goto done;
1940	}
1941	if (ref == NULL) {
1942	/*
1943	* the base fragment must be ignored
1944	*/
1945	if (bas->fragment != NULL) {
1946	xmlFree(bas->fragment);
1947	bas->fragment = NULL;
1948	}
1949	val = xmlSaveUri(bas);
1950	goto done;
1951	}
1952
1953	/*
1954	* 2) If the path component is empty and the scheme, authority, and
1955	* query components are undefined, then it is a reference to the
1956	* current document and we are done. Otherwise, the reference URI's
1957	* query and fragment components are defined as found (or not found)
1958	* within the URI reference and not inherited from the base URI.
1959	*
1960	* NOTE that in modern browsers, the parsing differs from the above
1961	* in the following aspect: the query component is allowed to be
1962	* defined while still treating this as a reference to the current
1963	* document.
1964	*/
1965	res = xmlCreateURI();
1966	if (res == NULL)
1967	goto done;
1968	if ((ref->scheme == NULL) && (ref->path == NULL) &&
1969	((ref->authority == NULL) && (ref->server == NULL))) {
1970	if (bas->scheme != NULL)
1971	res->scheme = xmlMemStrdup(bas->scheme);
1972	if (bas->authority != NULL)
1973	res->authority = xmlMemStrdup(bas->authority);
1974	else if ((bas->server != NULL) \|\| (bas->port == -1)) {
1975	if (bas->server != NULL)
1976	res->server = xmlMemStrdup(bas->server);
1977	if (bas->user != NULL)
1978	res->user = xmlMemStrdup(bas->user);
1979	res->port = bas->port;
1980	}
1981	if (bas->path != NULL)
1982	res->path = xmlMemStrdup(bas->path);
1983	if (ref->query_raw != NULL)
1984	res->query_raw = xmlMemStrdup (ref->query_raw);
1985	else if (ref->query != NULL)
1986	res->query = xmlMemStrdup(ref->query);
1987	else if (bas->query_raw != NULL)
1988	res->query_raw = xmlMemStrdup(bas->query_raw);
1989	else if (bas->query != NULL)
1990	res->query = xmlMemStrdup(bas->query);
1991	if (ref->fragment != NULL)
1992	res->fragment = xmlMemStrdup(ref->fragment);
1993	goto step_7;
1994	}
1995
1996	/*
1997	* 3) If the scheme component is defined, indicating that the reference
1998	* starts with a scheme name, then the reference is interpreted as an
1999	* absolute URI and we are done. Otherwise, the reference URI's
2000	* scheme is inherited from the base URI's scheme component.
2001	*/
2002	if (ref->scheme != NULL) {
2003	val = xmlSaveUri(ref);
2004	goto done;
2005	}
2006	if (bas->scheme != NULL)
2007	res->scheme = xmlMemStrdup(bas->scheme);
2008
2009	if (ref->query_raw != NULL)
2010	res->query_raw = xmlMemStrdup(ref->query_raw);
2011	else if (ref->query != NULL)
2012	res->query = xmlMemStrdup(ref->query);
2013	if (ref->fragment != NULL)
2014	res->fragment = xmlMemStrdup(ref->fragment);
2015
2016	/*
2017	* 4) If the authority component is defined, then the reference is a
2018	* network-path and we skip to step 7. Otherwise, the reference
2019	* URI's authority is inherited from the base URI's authority
2020	* component, which will also be undefined if the URI scheme does not
2021	* use an authority component.
2022	*/
2023	if ((ref->authority != NULL) \|\| (ref->server != NULL)) {
2024	if (ref->authority != NULL)
2025	res->authority = xmlMemStrdup(ref->authority);
2026	else {
2027	res->server = xmlMemStrdup(ref->server);
2028	if (ref->user != NULL)
2029	res->user = xmlMemStrdup(ref->user);
2030	res->port = ref->port;
2031	}
2032	if (ref->path != NULL)
2033	res->path = xmlMemStrdup(ref->path);
2034	goto step_7;
2035	}
2036	if (bas->authority != NULL)
2037	res->authority = xmlMemStrdup(bas->authority);
2038	else if ((bas->server != NULL) \|\| (bas->port == -1)) {
2039	if (bas->server != NULL)
2040	res->server = xmlMemStrdup(bas->server);
2041	if (bas->user != NULL)
2042	res->user = xmlMemStrdup(bas->user);
2043	res->port = bas->port;
2044	}
2045
2046	/*
2047	* 5) If the path component begins with a slash character ("/"), then
2048	* the reference is an absolute-path and we skip to step 7.
2049	*/
2050	if ((ref->path != NULL) && (ref->path[0] == '/')) {
2051	res->path = xmlMemStrdup(ref->path);
2052	goto step_7;
2053	}
2054
2055
2056	/*
2057	* 6) If this step is reached, then we are resolving a relative-path
2058	* reference. The relative path needs to be merged with the base
2059	* URI's path. Although there are many ways to do this, we will
2060	* describe a simple method using a separate string buffer.
2061	*
2062	* Allocate a buffer large enough for the result string.
2063	*/
2064	len = 2; /* extra / and 0 */
2065	if (ref->path != NULL)
2066	len += strlen(ref->path);
2067	if (bas->path != NULL)
2068	len += strlen(bas->path);
2069	res->path = (char *) xmlMallocAtomic(len);
2070	if (res->path == NULL) {
2071	xmlURIErrMemory("resolving URI against base\n");
2072	goto done;
2073	}
2074	res->path[0] = 0;
2075
2076	/*
2077	* a) All but the last segment of the base URI's path component is
2078	* copied to the buffer. In other words, any characters after the
2079	* last (right-most) slash character, if any, are excluded.
2080	*/
2081	cur = 0;
2082	out = 0;
2083	if (bas->path != NULL) {
2084	while (bas->path[cur] != 0) {
2085	while ((bas->path[cur] != 0) && (bas->path[cur] != '/'))
2086	cur++;
2087	if (bas->path[cur] == 0)
2088	break;
2089
2090	cur++;
2091	while (out < cur) {
2092	res->path[out] = bas->path[out];
2093	out++;
2094	}
2095	}
2096	}
2097	res->path[out] = 0;
2098
2099	/*
2100	* b) The reference's path component is appended to the buffer
2101	* string.
2102	*/
2103	if (ref->path != NULL && ref->path[0] != 0) {
2104	indx = 0;
2105	/*
2106	* Ensure the path includes a '/'
2107	*/
2108	if ((out == 0) && (bas->server != NULL))
2109	res->path[out++] = '/';
2110	while (ref->path[indx] != 0) {
2111	res->path[out++] = ref->path[indx++];
2112	}
2113	}
2114	res->path[out] = 0;
2115
2116	/*
2117	* Steps c) to h) are really path normalization steps
2118	*/
2119	xmlNormalizeURIPath(res->path);
2120
2121	step_7:
2122
2123	/*
2124	* 7) The resulting URI components, including any inherited from the
2125	* base URI, are recombined to give the absolute form of the URI
2126	* reference.
2127	*/
2128	val = xmlSaveUri(res);
2129
2130	done:
2131	if (ref != NULL)
2132	xmlFreeURI(ref);
2133	if (bas != NULL)
2134	xmlFreeURI(bas);
2135	if (res != NULL)
2136	xmlFreeURI(res);
2137	return(val);
2138	}
2139
2140	/**
2141	* xmlBuildRelativeURI:
2142	* @URI: the URI reference under consideration
2143	* @base: the base value
2144	*
2145	* Expresses the URI of the reference in terms relative to the
2146	* base. Some examples of this operation include:
2147	* base = "http://site1.com/docs/book1.html"
2148	* URI input URI returned
2149	* docs/pic1.gif pic1.gif
2150	* docs/img/pic1.gif img/pic1.gif
2151	* img/pic1.gif ../img/pic1.gif
2152	* http://site1.com/docs/pic1.gif pic1.gif
2153	* http://site2.com/docs/pic1.gif http://site2.com/docs/pic1.gif
2154	*
2155	* base = "docs/book1.html"
2156	* URI input URI returned
2157	* docs/pic1.gif pic1.gif
2158	* docs/img/pic1.gif img/pic1.gif
2159	* img/pic1.gif ../img/pic1.gif
2160	* http://site1.com/docs/pic1.gif http://site1.com/docs/pic1.gif
2161	*
2162	*
2163	* Note: if the URI reference is really weird or complicated, it may be
2164	* worthwhile to first convert it into a "nice" one by calling
2165	* xmlBuildURI (using 'base') before calling this routine,
2166	* since this routine (for reasonable efficiency) assumes URI has
2167	* already been through some validation.
2168	*
2169	* Returns a new URI string (to be freed by the caller) or NULL in case
2170	* error.
2171	*/
2172	xmlChar *
2173	xmlBuildRelativeURI (const xmlChar * URI, const xmlChar * base)
2174	{
2175	xmlChar *val = NULL;
2176	int ret;
2177	int ix;
2178	int nbslash = 0;
2179	int len;
2180	xmlURIPtr ref = NULL;
2181	xmlURIPtr bas = NULL;
2182	xmlChar bptr, uptr, *vptr;
2183	int remove_path = 0;
2184
2185	if ((URI == NULL) \|\| (*URI == 0))
2186	return NULL;
2187
2188	/*
2189	* First parse URI into a standard form
2190	*/
2191	ref = xmlCreateURI ();
2192	if (ref == NULL)
2193	return NULL;
2194	/* If URI not already in "relative" form */
2195	if (URI[0] != '.') {
2196	ret = xmlParseURIReference (ref, (const char *) URI);
2197	if (ret != 0)
2198	goto done; /* Error in URI, return NULL */
2199	} else
2200	ref->path = (char *)xmlStrdup(URI);
2201
2202	/*
2203	* Next parse base into the same standard form
2204	*/
2205	if ((base == NULL) \|\| (*base == 0)) {
2206	val = xmlStrdup (URI);
2207	goto done;
2208	}
2209	bas = xmlCreateURI ();
2210	if (bas == NULL)
2211	goto done;
2212	if (base[0] != '.') {
2213	ret = xmlParseURIReference (bas, (const char *) base);
2214	if (ret != 0)
2215	goto done; /* Error in base, return NULL */
2216	} else
2217	bas->path = (char *)xmlStrdup(base);
2218
2219	/*
2220	* If the scheme / server on the URI differs from the base,
2221	* just return the URI
2222	*/
2223	if ((ref->scheme != NULL) &&
2224	((bas->scheme == NULL) \|\|
2225	(xmlStrcmp ((xmlChar )bas->scheme, (xmlChar )ref->scheme)) \|\|
2226	(xmlStrcmp ((xmlChar )bas->server, (xmlChar )ref->server)))) {
2227	val = xmlStrdup (URI);
2228	goto done;
2229	}
2230	if (xmlStrEqual((xmlChar )bas->path, (xmlChar )ref->path)) {
2231	val = xmlStrdup(BAD_CAST "");
2232	goto done;
2233	}
2234	if (bas->path == NULL) {
2235	val = xmlStrdup((xmlChar *)ref->path);
2236	goto done;
2237	}
2238	if (ref->path == NULL) {
2239	ref->path = (char *) "/";
2240	remove_path = 1;
2241	}
2242
2243	/*
2244	* At this point (at last!) we can compare the two paths
2245	*
2246	* First we take care of the special case where either of the
2247	* two path components may be missing (bug 316224)
2248	*/
2249	bptr = (xmlChar *)bas->path;
2250	{
2251	xmlChar rptr = (xmlChar ) ref->path;
2252	int pos = 0;
2253
2254	/*
2255	* Next we compare the two strings and find where they first differ
2256	*/
2257	if ((*rptr == '.') && (rptr[1] == '/'))
2258	rptr += 2;
2259	if ((*bptr == '.') && (bptr[1] == '/'))
2260	bptr += 2;
2261	else if ((bptr == '/') && (rptr != '/'))
2262	bptr++;
2263	while ((bptr[pos] == rptr[pos]) && (bptr[pos] != 0))
2264	pos++;
2265
2266	if (bptr[pos] == rptr[pos]) {
2267	val = xmlStrdup(BAD_CAST "");
2268	goto done; /* (I can't imagine why anyone would do this) */
2269	}
2270
2271	/*
2272	* In URI, "back up" to the last '/' encountered. This will be the
2273	* beginning of the "unique" suffix of URI
2274	*/
2275	ix = pos;
2276	for (; ix > 0; ix--) {
2277	if (rptr[ix - 1] == '/')
2278	break;
2279	}
2280	uptr = (xmlChar *)&rptr[ix];
2281
2282	/*
2283	* In base, count the number of '/' from the differing point
2284	*/
2285	for (; bptr[ix] != 0; ix++) {
2286	if (bptr[ix] == '/')
2287	nbslash++;
2288	}
2289
2290	/*
2291	* e.g: URI="foo/" base="foo/bar" -> "./"
2292	*/
2293	if (nbslash == 0 && !uptr[0]) {
2294	val = xmlStrdup(BAD_CAST "./");
2295	goto done;
2296	}
2297
2298	len = xmlStrlen (uptr) + 1;
2299	}
2300
2301	if (nbslash == 0) {
2302	if (uptr != NULL)
2303	/* exception characters from xmlSaveUri */
2304	val = xmlURIEscapeStr(uptr, BAD_CAST "/;&=+$,");
2305	goto done;
2306	}
2307
2308	/*
2309	* Allocate just enough space for the returned string -
2310	* length of the remainder of the URI, plus enough space
2311	* for the "../" groups, plus one for the terminator
2312	*/
2313	val = (xmlChar ) xmlMalloc (len + 3 nbslash);
2314	if (val == NULL) {
2315	xmlURIErrMemory("building relative URI\n");
2316	goto done;
2317	}
2318	vptr = val;
2319	/*
2320	* Put in as many "../" as needed
2321	*/
2322	for (; nbslash>0; nbslash--) {
2323	*vptr++ = '.';
2324	*vptr++ = '.';
2325	*vptr++ = '/';
2326	}
2327	/*
2328	* Finish up with the end of the URI
2329	*/
2330	if (uptr != NULL) {
2331	if ((vptr > val) && (len > 0) &&
2332	(uptr[0] == '/') && (vptr[-1] == '/')) {
2333	memcpy (vptr, uptr + 1, len - 1);
2334	vptr[len - 2] = 0;
2335	} else {
2336	memcpy (vptr, uptr, len);
2337	vptr[len - 1] = 0;
2338	}
2339	} else {
2340	vptr[len - 1] = 0;
2341	}
2342
2343	/* escape the freshly-built path */
2344	vptr = val;
2345	/* exception characters from xmlSaveUri */
2346	val = xmlURIEscapeStr(vptr, BAD_CAST "/;&=+$,");
2347	xmlFree(vptr);
2348
2349	done:
2350	/*
2351	* Free the working variables
2352	*/
2353	if (remove_path != 0)
2354	ref->path = NULL;
2355	if (ref != NULL)
2356	xmlFreeURI (ref);
2357	if (bas != NULL)
2358	xmlFreeURI (bas);
2359
2360	return val;
2361	}
2362
2363	/**
2364	* xmlCanonicPath:
2365	* @path: the resource locator in a filesystem notation
2366	*
2367	* Constructs a canonic path from the specified path.
2368	*
2369	* Returns a new canonic path, or a duplicate of the path parameter if the
2370	* construction fails. The caller is responsible for freeing the memory occupied
2371	* by the returned string. If there is insufficient memory available, or the
2372	* argument is NULL, the function returns NULL.
2373	*/
2374	#define IS_WINDOWS_PATH(p) \
2375	((p != NULL) && \
2376	(((p[0] >= 'a') && (p[0] <= 'z')) \|\| \
2377	((p[0] >= 'A') && (p[0] <= 'Z'))) && \
2378	(p[1] == ':') && ((p[2] == '/') \|\| (p[2] == '\\')))
2379	xmlChar *
2380	xmlCanonicPath(const xmlChar *path)
2381	{
2382	/*
2383	* For Windows implementations, additional work needs to be done to
2384	* replace backslashes in pathnames with "forward slashes"
2385	*/
2386	#if defined(_WIN32) && !defined(__CYGWIN__)
2387	int len = 0;
2388	char *p = NULL;
2389	#endif
2390	xmlURIPtr uri;
2391	xmlChar *ret;
2392	const xmlChar *absuri;
2393
2394	if (path == NULL)
2395	return(NULL);
2396
2397	#if defined(_WIN32)
2398	/*
2399	* We must not change the backslashes to slashes if the the path
2400	* starts with \\?\
2401	* Those paths can be up to 32k characters long.
2402	* Was added specifically for OpenOffice, those paths can't be converted
2403	* to URIs anyway.
2404	*/
2405	if ((path[0] == '\\') && (path[1] == '\\') && (path[2] == '?') &&
2406	(path[3] == '\\') )
2407	return xmlStrdup((const xmlChar *) path);
2408	#endif
2409
2410	/* sanitize filename starting with // so it can be used as URI */
2411	if ((path[0] == '/') && (path[1] == '/') && (path[2] != '/'))
2412	path++;
2413
2414	if ((uri = xmlParseURI((const char *) path)) != NULL) {
2415	xmlFreeURI(uri);
2416	return xmlStrdup(path);
2417	}
2418
2419	/* Check if this is an "absolute uri" */
2420	absuri = xmlStrstr(path, BAD_CAST "://");
2421	if (absuri != NULL) {
2422	int l, j;
2423	unsigned char c;
2424	xmlChar *escURI;
2425
2426	/*
2427	* this looks like an URI where some parts have not been
2428	* escaped leading to a parsing problem. Check that the first
2429	* part matches a protocol.
2430	*/
2431	l = absuri - path;
2432	/* Bypass if first part (part before the '://') is > 20 chars */
2433	if ((l <= 0) \|\| (l > 20))
2434	goto path_processing;
2435	/* Bypass if any non-alpha characters are present in first part */
2436	for (j = 0;j < l;j++) {
2437	c = path[j];
2438	if (!(((c >= 'a') && (c <= 'z')) \|\| ((c >= 'A') && (c <= 'Z'))))
2439	goto path_processing;
2440	}
2441
2442	/* Escape all except the characters specified in the supplied path */
2443	escURI = xmlURIEscapeStr(path, BAD_CAST ":/?_.#&;=");
2444	if (escURI != NULL) {
2445	/* Try parsing the escaped path */
2446	uri = xmlParseURI((const char *) escURI);
2447	/* If successful, return the escaped string */
2448	if (uri != NULL) {
2449	xmlFreeURI(uri);
2450	return escURI;
2451	}
2452	xmlFree(escURI);
2453	}
2454	}
2455
2456	path_processing:
2457	/* For Windows implementations, replace backslashes with 'forward slashes' */
2458	#if defined(_WIN32) && !defined(__CYGWIN__)
2459	/*
2460	* Create a URI structure
2461	*/
2462	uri = xmlCreateURI();
2463	if (uri == NULL) { /* Guard against 'out of memory' */
2464	return(NULL);
2465	}
2466
2467	len = xmlStrlen(path);
2468	if ((len > 2) && IS_WINDOWS_PATH(path)) {
2469	/* make the scheme 'file' */
2470	uri->scheme = (char *) xmlStrdup(BAD_CAST "file");
2471	/* allocate space for leading '/' + path + string terminator */
2472	uri->path = xmlMallocAtomic(len + 2);
2473	if (uri->path == NULL) {
2474	xmlFreeURI(uri); /* Guard against 'out of memory' */
2475	return(NULL);
2476	}
2477	/* Put in leading '/' plus path */
2478	uri->path[0] = '/';
2479	p = uri->path + 1;
2480	strncpy(p, (char *) path, len + 1);
2481	} else {
2482	uri->path = (char *) xmlStrdup(path);
2483	if (uri->path == NULL) {
2484	xmlFreeURI(uri);
2485	return(NULL);
2486	}
2487	p = uri->path;
2488	}
2489	/* Now change all occurrences of '\' to '/' */
2490	while (*p != '\0') {
2491	if (*p == '\\')
2492	*p = '/';
2493	p++;
2494	}
2495
2496	if (uri->scheme == NULL) {
2497	ret = xmlStrdup((const xmlChar *) uri->path);
2498	} else {
2499	ret = xmlSaveUri(uri);
2500	}
2501
2502	xmlFreeURI(uri);
2503	#else
2504	ret = xmlStrdup((const xmlChar *) path);
2505	#endif
2506	return(ret);
2507	}
2508
2509	/**
2510	* xmlPathToURI:
2511	* @path: the resource locator in a filesystem notation
2512	*
2513	* Constructs an URI expressing the existing path
2514	*
2515	* Returns a new URI, or a duplicate of the path parameter if the
2516	* construction fails. The caller is responsible for freeing the memory
2517	* occupied by the returned string. If there is insufficient memory available,
2518	* or the argument is NULL, the function returns NULL.
2519	*/
2520	xmlChar *
2521	xmlPathToURI(const xmlChar *path)
2522	{
2523	xmlURIPtr uri;
2524	xmlURI temp;
2525	xmlChar ret, cal;
2526
2527	if (path == NULL)
2528	return(NULL);
2529
2530	if ((uri = xmlParseURI((const char *) path)) != NULL) {
2531	xmlFreeURI(uri);
2532	return xmlStrdup(path);
2533	}
2534	cal = xmlCanonicPath(path);
2535	if (cal == NULL)
2536	return(NULL);
2537	#if defined(_WIN32) && !defined(__CYGWIN__)
2538	/* xmlCanonicPath can return an URI on Windows (is that the intended behaviour?)
2539	If 'cal' is a valid URI already then we are done here, as continuing would make
2540	it invalid. */
2541	if ((uri = xmlParseURI((const char *) cal)) != NULL) {
2542	xmlFreeURI(uri);
2543	return cal;
2544	}
2545	/* 'cal' can contain a relative path with backslashes. If that is processed
2546	by xmlSaveURI, they will be escaped and the external entity loader machinery
2547	will fail. So convert them to slashes. Misuse 'ret' for walking. */
2548	ret = cal;
2549	while (*ret != '\0') {
2550	if (*ret == '\\')
2551	*ret = '/';
2552	ret++;
2553	}
2554	#endif
2555	memset(&temp, 0, sizeof(temp));
2556	temp.path = (char *) cal;
2557	ret = xmlSaveUri(&temp);
2558	xmlFree(cal);
2559	return(ret);
2560	}
2561	#define bottom_uri
2562	#include "elfgcchack.h"

Note: See TracBrowser for help on using the repository browser.

source: vbox/trunk/src/libs/libxml2-2.9.14/uri.c@ 102654

Download in other formats: