uri.c@ 35199

Last change on this file since 35199 was 6076, checked in by vboxsync, 17 years ago
Merged dmik/s2 branch (r25959:26751) to the trunk.
Property svn:eol-style set to `native` Property svn:keywords set to `Date Revision Author Id`
File size: 65.2 KB

Line
1	/**
2	* uri.c: set of generic URI related routines
3	*
4	* Reference: RFCs 2396, 2732 and 2373
5	*
6	* See Copyright for the status of this software.
7	*
8	* daniel@veillard.com
9	*/
10
11	#define IN_LIBXML
12	#include "libxml.h"
13
14	#include <string.h>
15
16	#include <libxml/xmlmemory.h>
17	#include <libxml/uri.h>
18	#include <libxml/globals.h>
19	#include <libxml/xmlerror.h>
20
21	/************************************************************************
22	* *
23	* Macros to differentiate various character type *
24	* directly extracted from RFC 2396 *
25	* *
26	************************************************************************/
27
28	/*
29	* alpha = lowalpha \| upalpha
30	*/
31	#define IS_ALPHA(x) (IS_LOWALPHA(x) \|\| IS_UPALPHA(x))
32
33
34	/*
35	* lowalpha = "a" \| "b" \| "c" \| "d" \| "e" \| "f" \| "g" \| "h" \| "i" \| "j" \|
36	* "k" \| "l" \| "m" \| "n" \| "o" \| "p" \| "q" \| "r" \| "s" \| "t" \|
37	* "u" \| "v" \| "w" \| "x" \| "y" \| "z"
38	*/
39
40	#define IS_LOWALPHA(x) (((x) >= 'a') && ((x) <= 'z'))
41
42	/*
43	* upalpha = "A" \| "B" \| "C" \| "D" \| "E" \| "F" \| "G" \| "H" \| "I" \| "J" \|
44	* "K" \| "L" \| "M" \| "N" \| "O" \| "P" \| "Q" \| "R" \| "S" \| "T" \|
45	* "U" \| "V" \| "W" \| "X" \| "Y" \| "Z"
46	*/
47	#define IS_UPALPHA(x) (((x) >= 'A') && ((x) <= 'Z'))
48
49	#ifdef IS_DIGIT
50	#undef IS_DIGIT
51	#endif
52	/*
53	* digit = "0" \| "1" \| "2" \| "3" \| "4" \| "5" \| "6" \| "7" \| "8" \| "9"
54	*/
55	#define IS_DIGIT(x) (((x) >= '0') && ((x) <= '9'))
56
57	/*
58	* alphanum = alpha \| digit
59	*/
60
61	#define IS_ALPHANUM(x) (IS_ALPHA(x) \|\| IS_DIGIT(x))
62
63	/*
64	* hex = digit \| "A" \| "B" \| "C" \| "D" \| "E" \| "F" \|
65	* "a" \| "b" \| "c" \| "d" \| "e" \| "f"
66	*/
67
68	#define IS_HEX(x) ((IS_DIGIT(x)) \|\| (((x) >= 'a') && ((x) <= 'f')) \|\| \
69	(((x) >= 'A') && ((x) <= 'F')))
70
71	/*
72	* mark = "-" \| "_" \| "." \| "!" \| "~" \| "*" \| "'" \| "(" \| ")"
73	*/
74
75	#define IS_MARK(x) (((x) == '-') \|\| ((x) == '_') \|\| ((x) == '.') \|\| \
76	((x) == '!') \|\| ((x) == '~') \|\| ((x) == '*') \|\| ((x) == '\'') \|\| \
77	((x) == '(') \|\| ((x) == ')'))
78
79
80	/*
81	* reserved = ";" \| "/" \| "?" \| ":" \| "@" \| "&" \| "=" \| "+" \| "$" \| "," \|
82	* "[" \| "]"
83	*/
84
85	#define IS_RESERVED(x) (((x) == ';') \|\| ((x) == '/') \|\| ((x) == '?') \|\| \
86	((x) == ':') \|\| ((x) == '@') \|\| ((x) == '&') \|\| ((x) == '=') \|\| \
87	((x) == '+') \|\| ((x) == '$') \|\| ((x) == ',') \|\| ((x) == '[') \|\| \
88	((x) == ']'))
89
90	/*
91	* unreserved = alphanum \| mark
92	*/
93
94	#define IS_UNRESERVED(x) (IS_ALPHANUM(x) \|\| IS_MARK(x))
95
96	/*
97	* escaped = "%" hex hex
98	*/
99
100	#define IS_ESCAPED(p) ((*(p) == '%') && (IS_HEX((p)[1])) && \
101	(IS_HEX((p)[2])))
102
103	/*
104	* uric_no_slash = unreserved \| escaped \| ";" \| "?" \| ":" \| "@" \|
105	* "&" \| "=" \| "+" \| "$" \| ","
106	*/
107	#define IS_URIC_NO_SLASH(p) ((IS_UNRESERVED(*(p))) \|\| (IS_ESCAPED(p)) \|\|\
108	(((p) == ';')) \|\| (((p) == '?')) \|\| ((*(p) == ':')) \|\|\
109	(((p) == '@')) \|\| (((p) == '&')) \|\| ((*(p) == '=')) \|\|\
110	(((p) == '+')) \|\| (((p) == '$')) \|\| ((*(p) == ',')))
111
112	/*
113	* pchar = unreserved \| escaped \| ":" \| "@" \| "&" \| "=" \| "+" \| "$" \| ","
114	*/
115	#define IS_PCHAR(p) ((IS_UNRESERVED(*(p))) \|\| (IS_ESCAPED(p)) \|\| \
116	(((p) == ':')) \|\| (((p) == '@')) \|\| ((*(p) == '&')) \|\|\
117	(((p) == '=')) \|\| (((p) == '+')) \|\| ((*(p) == '$')) \|\|\
118	((*(p) == ',')))
119
120	/*
121	* rel_segment = 1*( unreserved \| escaped \|
122	* ";" \| "@" \| "&" \| "=" \| "+" \| "$" \| "," )
123	*/
124
125	#define IS_SEGMENT(p) ((IS_UNRESERVED(*(p))) \|\| (IS_ESCAPED(p)) \|\| \
126	(((p) == ';')) \|\| (((p) == '@')) \|\| ((*(p) == '&')) \|\| \
127	(((p) == '=')) \|\| (((p) == '+')) \|\| ((*(p) == '$')) \|\| \
128	((*(p) == ',')))
129
130	/*
131	* scheme = alpha *( alpha \| digit \| "+" \| "-" \| "." )
132	*/
133
134	#define IS_SCHEME(x) ((IS_ALPHA(x)) \|\| (IS_DIGIT(x)) \|\| \
135	((x) == '+') \|\| ((x) == '-') \|\| ((x) == '.'))
136
137	/*
138	* reg_name = 1*( unreserved \| escaped \| "$" \| "," \|
139	* ";" \| ":" \| "@" \| "&" \| "=" \| "+" )
140	*/
141
142	#define IS_REG_NAME(p) ((IS_UNRESERVED(*(p))) \|\| (IS_ESCAPED(p)) \|\| \
143	(((p) == '$')) \|\| (((p) == ',')) \|\| ((*(p) == ';')) \|\| \
144	(((p) == ':')) \|\| (((p) == '@')) \|\| ((*(p) == '&')) \|\| \
145	(((p) == '=')) \|\| (((p) == '+')))
146
147	/*
148	* userinfo = *( unreserved \| escaped \| ";" \| ":" \| "&" \| "=" \|
149	* "+" \| "$" \| "," )
150	*/
151	#define IS_USERINFO(p) ((IS_UNRESERVED(*(p))) \|\| (IS_ESCAPED(p)) \|\| \
152	(((p) == ';')) \|\| (((p) == ':')) \|\| ((*(p) == '&')) \|\| \
153	(((p) == '=')) \|\| (((p) == '+')) \|\| ((*(p) == '$')) \|\| \
154	((*(p) == ',')))
155
156	/*
157	* uric = reserved \| unreserved \| escaped
158	*/
159
160	#define IS_URIC(p) ((IS_UNRESERVED(*(p))) \|\| (IS_ESCAPED(p)) \|\| \
161	(IS_RESERVED(*(p))))
162
163	/*
164	* unwise = "{" \| "}" \| "\|" \| "\" \| "^" \| "`"
165	*/
166
167	#define IS_UNWISE(p) \
168	((((p) == '{')) \|\| (((p) == '}')) \|\| ((*(p) == '\|')) \|\| \
169	(((p) == '\\')) \|\| (((p) == '^')) \|\| ((*(p) == '[')) \|\| \
170	(((p) == ']')) \|\| (((p) == '`')))
171
172	/*
173	* Skip to next pointer char, handle escaped sequences
174	*/
175
176	#define NEXT(p) ((*p == '%')? p += 3 : p++)
177
178	/*
179	* Productions from the spec.
180	*
181	* authority = server \| reg_name
182	* reg_name = 1*( unreserved \| escaped \| "$" \| "," \|
183	* ";" \| ":" \| "@" \| "&" \| "=" \| "+" )
184	*
185	* path = [ abs_path \| opaque_part ]
186	*/
187
188	#define STRNDUP(s, n) (char ) xmlStrndup((const xmlChar )(s), (n))
189
190	/************************************************************************
191	* *
192	* Generic URI structure functions *
193	* *
194	************************************************************************/
195
196	/**
197	* xmlCreateURI:
198	*
199	* Simply creates an empty xmlURI
200	*
201	* Returns the new structure or NULL in case of error
202	*/
203	xmlURIPtr
204	xmlCreateURI(void) {
205	xmlURIPtr ret;
206
207	ret = (xmlURIPtr) xmlMalloc(sizeof(xmlURI));
208	if (ret == NULL) {
209	xmlGenericError(xmlGenericErrorContext,
210	"xmlCreateURI: out of memory\n");
211	return(NULL);
212	}
213	memset(ret, 0, sizeof(xmlURI));
214	return(ret);
215	}
216
217	/**
218	* xmlSaveUri:
219	* @uri: pointer to an xmlURI
220	*
221	* Save the URI as an escaped string
222	*
223	* Returns a new string (to be deallocated by caller)
224	*/
225	xmlChar *
226	xmlSaveUri(xmlURIPtr uri) {
227	xmlChar *ret = NULL;
228	const char *p;
229	int len;
230	int max;
231
232	if (uri == NULL) return(NULL);
233
234
235	max = 80;
236	ret = (xmlChar ) xmlMallocAtomic((max + 1) sizeof(xmlChar));
237	if (ret == NULL) {
238	xmlGenericError(xmlGenericErrorContext,
239	"xmlSaveUri: out of memory\n");
240	return(NULL);
241	}
242	len = 0;
243
244	if (uri->scheme != NULL) {
245	p = uri->scheme;
246	while (*p != 0) {
247	if (len >= max) {
248	max *= 2;
249	ret = (xmlChar ) xmlRealloc(ret, (max + 1) sizeof(xmlChar));
250	if (ret == NULL) {
251	xmlGenericError(xmlGenericErrorContext,
252	"xmlSaveUri: out of memory\n");
253	return(NULL);
254	}
255	}
256	ret[len++] = *p++;
257	}
258	if (len >= max) {
259	max *= 2;
260	ret = (xmlChar ) xmlRealloc(ret, (max + 1) sizeof(xmlChar));
261	if (ret == NULL) {
262	xmlGenericError(xmlGenericErrorContext,
263	"xmlSaveUri: out of memory\n");
264	return(NULL);
265	}
266	}
267	ret[len++] = ':';
268	}
269	if (uri->opaque != NULL) {
270	p = uri->opaque;
271	while (*p != 0) {
272	if (len + 3 >= max) {
273	max *= 2;
274	ret = (xmlChar ) xmlRealloc(ret, (max + 1) sizeof(xmlChar));
275	if (ret == NULL) {
276	xmlGenericError(xmlGenericErrorContext,
277	"xmlSaveUri: out of memory\n");
278	return(NULL);
279	}
280	}
281	if (IS_RESERVED((p)) \|\| IS_UNRESERVED((p)))
282	ret[len++] = *p++;
283	else {
284	int val = (unsigned char )p++;
285	int hi = val / 0x10, lo = val % 0x10;
286	ret[len++] = '%';
287	ret[len++] = hi + (hi > 9? 'A'-10 : '0');
288	ret[len++] = lo + (lo > 9? 'A'-10 : '0');
289	}
290	}
291	} else {
292	if (uri->server != NULL) {
293	if (len + 3 >= max) {
294	max *= 2;
295	ret = (xmlChar ) xmlRealloc(ret, (max + 1) sizeof(xmlChar));
296	if (ret == NULL) {
297	xmlGenericError(xmlGenericErrorContext,
298	"xmlSaveUri: out of memory\n");
299	return(NULL);
300	}
301	}
302	ret[len++] = '/';
303	ret[len++] = '/';
304	if (uri->user != NULL) {
305	p = uri->user;
306	while (*p != 0) {
307	if (len + 3 >= max) {
308	max *= 2;
309	ret = (xmlChar *) xmlRealloc(ret,
310	(max + 1) * sizeof(xmlChar));
311	if (ret == NULL) {
312	xmlGenericError(xmlGenericErrorContext,
313	"xmlSaveUri: out of memory\n");
314	return(NULL);
315	}
316	}
317	if ((IS_UNRESERVED(*(p))) \|\|
318	(((p) == ';')) \|\| (((p) == ':')) \|\|
319	(((p) == '&')) \|\| (((p) == '=')) \|\|
320	(((p) == '+')) \|\| (((p) == '$')) \|\|
321	((*(p) == ',')))
322	ret[len++] = *p++;
323	else {
324	int val = (unsigned char )p++;
325	int hi = val / 0x10, lo = val % 0x10;
326	ret[len++] = '%';
327	ret[len++] = hi + (hi > 9? 'A'-10 : '0');
328	ret[len++] = lo + (lo > 9? 'A'-10 : '0');
329	}
330	}
331	if (len + 3 >= max) {
332	max *= 2;
333	ret = (xmlChar *) xmlRealloc(ret,
334	(max + 1) * sizeof(xmlChar));
335	if (ret == NULL) {
336	xmlGenericError(xmlGenericErrorContext,
337	"xmlSaveUri: out of memory\n");
338	return(NULL);
339	}
340	}
341	ret[len++] = '@';
342	}
343	p = uri->server;
344	while (*p != 0) {
345	if (len >= max) {
346	max *= 2;
347	ret = (xmlChar *) xmlRealloc(ret,
348	(max + 1) * sizeof(xmlChar));
349	if (ret == NULL) {
350	xmlGenericError(xmlGenericErrorContext,
351	"xmlSaveUri: out of memory\n");
352	return(NULL);
353	}
354	}
355	ret[len++] = *p++;
356	}
357	if (uri->port > 0) {
358	if (len + 10 >= max) {
359	max *= 2;
360	ret = (xmlChar *) xmlRealloc(ret,
361	(max + 1) * sizeof(xmlChar));
362	if (ret == NULL) {
363	xmlGenericError(xmlGenericErrorContext,
364	"xmlSaveUri: out of memory\n");
365	return(NULL);
366	}
367	}
368	len += snprintf((char *) &ret[len], max - len, ":%d", uri->port);
369	}
370	} else if (uri->authority != NULL) {
371	if (len + 3 >= max) {
372	max *= 2;
373	ret = (xmlChar *) xmlRealloc(ret,
374	(max + 1) * sizeof(xmlChar));
375	if (ret == NULL) {
376	xmlGenericError(xmlGenericErrorContext,
377	"xmlSaveUri: out of memory\n");
378	return(NULL);
379	}
380	}
381	ret[len++] = '/';
382	ret[len++] = '/';
383	p = uri->authority;
384	while (*p != 0) {
385	if (len + 3 >= max) {
386	max *= 2;
387	ret = (xmlChar *) xmlRealloc(ret,
388	(max + 1) * sizeof(xmlChar));
389	if (ret == NULL) {
390	xmlGenericError(xmlGenericErrorContext,
391	"xmlSaveUri: out of memory\n");
392	return(NULL);
393	}
394	}
395	if ((IS_UNRESERVED(*(p))) \|\|
396	(((p) == '$')) \|\| (((p) == ',')) \|\| ((*(p) == ';')) \|\|
397	(((p) == ':')) \|\| (((p) == '@')) \|\| ((*(p) == '&')) \|\|
398	(((p) == '=')) \|\| (((p) == '+')))
399	ret[len++] = *p++;
400	else {
401	int val = (unsigned char )p++;
402	int hi = val / 0x10, lo = val % 0x10;
403	ret[len++] = '%';
404	ret[len++] = hi + (hi > 9? 'A'-10 : '0');
405	ret[len++] = lo + (lo > 9? 'A'-10 : '0');
406	}
407	}
408	} else if (uri->scheme != NULL) {
409	if (len + 3 >= max) {
410	max *= 2;
411	ret = (xmlChar *) xmlRealloc(ret,
412	(max + 1) * sizeof(xmlChar));
413	if (ret == NULL) {
414	xmlGenericError(xmlGenericErrorContext,
415	"xmlSaveUri: out of memory\n");
416	return(NULL);
417	}
418	}
419	ret[len++] = '/';
420	ret[len++] = '/';
421	}
422	if (uri->path != NULL) {
423	p = uri->path;
424	while (*p != 0) {
425	if (len + 3 >= max) {
426	max *= 2;
427	ret = (xmlChar *) xmlRealloc(ret,
428	(max + 1) * sizeof(xmlChar));
429	if (ret == NULL) {
430	xmlGenericError(xmlGenericErrorContext,
431	"xmlSaveUri: out of memory\n");
432	return(NULL);
433	}
434	}
435	if ((IS_UNRESERVED((p))) \|\| (((p) == '/')) \|\|
436	(((p) == ';')) \|\| (((p) == '@')) \|\| ((*(p) == '&')) \|\|
437	(((p) == '=')) \|\| (((p) == '+')) \|\| ((*(p) == '$')) \|\|
438	((*(p) == ',')))
439	ret[len++] = *p++;
440	else {
441	int val = (unsigned char )p++;
442	int hi = val / 0x10, lo = val % 0x10;
443	ret[len++] = '%';
444	ret[len++] = hi + (hi > 9? 'A'-10 : '0');
445	ret[len++] = lo + (lo > 9? 'A'-10 : '0');
446	}
447	}
448	}
449	if (uri->query_raw != NULL) {
450	if (len + 1 >= max) {
451	max *= 2;
452	ret = (xmlChar *) xmlRealloc(ret,
453	(max + 1) * sizeof(xmlChar));
454	if (ret == NULL) {
455	xmlGenericError(xmlGenericErrorContext,
456	"xmlSaveUri: out of memory\n");
457	return(NULL);
458	}
459	}
460	ret[len++] = '?';
461	p = uri->query_raw;
462	while (*p != 0) {
463	if (len + 1 >= max) {
464	max *= 2;
465	ret = (xmlChar *) xmlRealloc(ret,
466	(max + 1) * sizeof(xmlChar));
467	if (ret == NULL) {
468	xmlGenericError(xmlGenericErrorContext,
469	"xmlSaveUri: out of memory\n");
470	return(NULL);
471	}
472	}
473	ret[len++] = *p++;
474	}
475	} else if (uri->query != NULL) {
476	if (len + 3 >= max) {
477	max *= 2;
478	ret = (xmlChar *) xmlRealloc(ret,
479	(max + 1) * sizeof(xmlChar));
480	if (ret == NULL) {
481	xmlGenericError(xmlGenericErrorContext,
482	"xmlSaveUri: out of memory\n");
483	return(NULL);
484	}
485	}
486	ret[len++] = '?';
487	p = uri->query;
488	while (*p != 0) {
489	if (len + 3 >= max) {
490	max *= 2;
491	ret = (xmlChar *) xmlRealloc(ret,
492	(max + 1) * sizeof(xmlChar));
493	if (ret == NULL) {
494	xmlGenericError(xmlGenericErrorContext,
495	"xmlSaveUri: out of memory\n");
496	return(NULL);
497	}
498	}
499	if ((IS_UNRESERVED((p))) \|\| (IS_RESERVED((p))))
500	ret[len++] = *p++;
501	else {
502	int val = (unsigned char )p++;
503	int hi = val / 0x10, lo = val % 0x10;
504	ret[len++] = '%';
505	ret[len++] = hi + (hi > 9? 'A'-10 : '0');
506	ret[len++] = lo + (lo > 9? 'A'-10 : '0');
507	}
508	}
509	}
510	}
511	if (uri->fragment != NULL) {
512	if (len + 3 >= max) {
513	max *= 2;
514	ret = (xmlChar *) xmlRealloc(ret,
515	(max + 1) * sizeof(xmlChar));
516	if (ret == NULL) {
517	xmlGenericError(xmlGenericErrorContext,
518	"xmlSaveUri: out of memory\n");
519	return(NULL);
520	}
521	}
522	ret[len++] = '#';
523	p = uri->fragment;
524	while (*p != 0) {
525	if (len + 3 >= max) {
526	max *= 2;
527	ret = (xmlChar *) xmlRealloc(ret,
528	(max + 1) * sizeof(xmlChar));
529	if (ret == NULL) {
530	xmlGenericError(xmlGenericErrorContext,
531	"xmlSaveUri: out of memory\n");
532	return(NULL);
533	}
534	}
535	if ((IS_UNRESERVED((p))) \|\| (IS_RESERVED((p))))
536	ret[len++] = *p++;
537	else {
538	int val = (unsigned char )p++;
539	int hi = val / 0x10, lo = val % 0x10;
540	ret[len++] = '%';
541	ret[len++] = hi + (hi > 9? 'A'-10 : '0');
542	ret[len++] = lo + (lo > 9? 'A'-10 : '0');
543	}
544	}
545	}
546	if (len >= max) {
547	max *= 2;
548	ret = (xmlChar ) xmlRealloc(ret, (max + 1) sizeof(xmlChar));
549	if (ret == NULL) {
550	xmlGenericError(xmlGenericErrorContext,
551	"xmlSaveUri: out of memory\n");
552	return(NULL);
553	}
554	}
555	ret[len++] = 0;
556	return(ret);
557	}
558
559	/**
560	* xmlPrintURI:
561	* @stream: a FILE* for the output
562	* @uri: pointer to an xmlURI
563	*
564	* Prints the URI in the stream @stream.
565	*/
566	void
567	xmlPrintURI(FILE *stream, xmlURIPtr uri) {
568	xmlChar *out;
569
570	out = xmlSaveUri(uri);
571	if (out != NULL) {
572	fprintf(stream, "%s", (char *) out);
573	xmlFree(out);
574	}
575	}
576
577	/**
578	* xmlCleanURI:
579	* @uri: pointer to an xmlURI
580	*
581	* Make sure the xmlURI struct is free of content
582	*/
583	static void
584	xmlCleanURI(xmlURIPtr uri) {
585	if (uri == NULL) return;
586
587	if (uri->scheme != NULL) xmlFree(uri->scheme);
588	uri->scheme = NULL;
589	if (uri->server != NULL) xmlFree(uri->server);
590	uri->server = NULL;
591	if (uri->user != NULL) xmlFree(uri->user);
592	uri->user = NULL;
593	if (uri->path != NULL) xmlFree(uri->path);
594	uri->path = NULL;
595	if (uri->fragment != NULL) xmlFree(uri->fragment);
596	uri->fragment = NULL;
597	if (uri->opaque != NULL) xmlFree(uri->opaque);
598	uri->opaque = NULL;
599	if (uri->authority != NULL) xmlFree(uri->authority);
600	uri->authority = NULL;
601	if (uri->query != NULL) xmlFree(uri->query);
602	uri->query = NULL;
603	if (uri->query_raw != NULL) xmlFree(uri->query_raw);
604	uri->query_raw = NULL;
605	}
606
607	/**
608	* xmlFreeURI:
609	* @uri: pointer to an xmlURI
610	*
611	* Free up the xmlURI struct
612	*/
613	void
614	xmlFreeURI(xmlURIPtr uri) {
615	if (uri == NULL) return;
616
617	if (uri->scheme != NULL) xmlFree(uri->scheme);
618	if (uri->server != NULL) xmlFree(uri->server);
619	if (uri->user != NULL) xmlFree(uri->user);
620	if (uri->path != NULL) xmlFree(uri->path);
621	if (uri->fragment != NULL) xmlFree(uri->fragment);
622	if (uri->opaque != NULL) xmlFree(uri->opaque);
623	if (uri->authority != NULL) xmlFree(uri->authority);
624	if (uri->query != NULL) xmlFree(uri->query);
625	if (uri->query_raw != NULL) xmlFree(uri->query_raw);
626	xmlFree(uri);
627	}
628
629	/************************************************************************
630	* *
631	* Helper functions *
632	* *
633	************************************************************************/
634
635	/**
636	* xmlNormalizeURIPath:
637	* @path: pointer to the path string
638	*
639	* Applies the 5 normalization steps to a path string--that is, RFC 2396
640	* Section 5.2, steps 6.c through 6.g.
641	*
642	* Normalization occurs directly on the string, no new allocation is done
643	*
644	* Returns 0 or an error code
645	*/
646	int
647	xmlNormalizeURIPath(char *path) {
648	char cur, out;
649
650	if (path == NULL)
651	return(-1);
652
653	/* Skip all initial "/" chars. We want to get to the beginning of the
654	* first non-empty segment.
655	*/
656	cur = path;
657	while (cur[0] == '/')
658	++cur;
659	if (cur[0] == '\0')
660	return(0);
661
662	/* Keep everything we've seen so far. */
663	out = cur;
664
665	/*
666	* Analyze each segment in sequence for cases (c) and (d).
667	*/
668	while (cur[0] != '\0') {
669	/*
670	* c) All occurrences of "./", where "." is a complete path segment,
671	* are removed from the buffer string.
672	*/
673	if ((cur[0] == '.') && (cur[1] == '/')) {
674	cur += 2;
675	/* '//' normalization should be done at this point too */
676	while (cur[0] == '/')
677	cur++;
678	continue;
679	}
680
681	/*
682	* d) If the buffer string ends with "." as a complete path segment,
683	* that "." is removed.
684	*/
685	if ((cur[0] == '.') && (cur[1] == '\0'))
686	break;
687
688	/* Otherwise keep the segment. */
689	while (cur[0] != '/') {
690	if (cur[0] == '\0')
691	goto done_cd;
692	(out++)[0] = (cur++)[0];
693	}
694	/* nomalize // */
695	while ((cur[0] == '/') && (cur[1] == '/'))
696	cur++;
697
698	(out++)[0] = (cur++)[0];
699	}
700	done_cd:
701	out[0] = '\0';
702
703	/* Reset to the beginning of the first segment for the next sequence. */
704	cur = path;
705	while (cur[0] == '/')
706	++cur;
707	if (cur[0] == '\0')
708	return(0);
709
710	/*
711	* Analyze each segment in sequence for cases (e) and (f).
712	*
713	* e) All occurrences of "<segment>/../", where <segment> is a
714	* complete path segment not equal to "..", are removed from the
715	* buffer string. Removal of these path segments is performed
716	* iteratively, removing the leftmost matching pattern on each
717	* iteration, until no matching pattern remains.
718	*
719	* f) If the buffer string ends with "<segment>/..", where <segment>
720	* is a complete path segment not equal to "..", that
721	* "<segment>/.." is removed.
722	*
723	* To satisfy the "iterative" clause in (e), we need to collapse the
724	* string every time we find something that needs to be removed. Thus,
725	* we don't need to keep two pointers into the string: we only need a
726	* "current position" pointer.
727	*/
728	while (1) {
729	char segp, tmp;
730
731	/* At the beginning of each iteration of this loop, "cur" points to
732	* the first character of the segment we want to examine.
733	*/
734
735	/* Find the end of the current segment. */
736	segp = cur;
737	while ((segp[0] != '/') && (segp[0] != '\0'))
738	++segp;
739
740	/* If this is the last segment, we're done (we need at least two
741	* segments to meet the criteria for the (e) and (f) cases).
742	*/
743	if (segp[0] == '\0')
744	break;
745
746	/* If the first segment is "..", or if the next segment _isn't_ "..",
747	* keep this segment and try the next one.
748	*/
749	++segp;
750	if (((cur[0] == '.') && (cur[1] == '.') && (segp == cur+3))
751	\|\| ((segp[0] != '.') \|\| (segp[1] != '.')
752	\|\| ((segp[2] != '/') && (segp[2] != '\0')))) {
753	cur = segp;
754	continue;
755	}
756
757	/* If we get here, remove this segment and the next one and back up
758	* to the previous segment (if there is one), to implement the
759	* "iteratively" clause. It's pretty much impossible to back up
760	* while maintaining two pointers into the buffer, so just compact
761	* the whole buffer now.
762	*/
763
764	/* If this is the end of the buffer, we're done. */
765	if (segp[2] == '\0') {
766	cur[0] = '\0';
767	break;
768	}
769	/* Valgrind complained, strcpy(cur, segp + 3); */
770	/* string will overlap, do not use strcpy */
771	tmp = cur;
772	segp += 3;
773	while ((tmp++ = segp++) != 0);
774
775	/* If there are no previous segments, then keep going from here. */
776	segp = cur;
777	while ((segp > path) && ((--segp)[0] == '/'))
778	;
779	if (segp == path)
780	continue;
781
782	/* "segp" is pointing to the end of a previous segment; find it's
783	* start. We need to back up to the previous segment and start
784	* over with that to handle things like "foo/bar/../..". If we
785	* don't do this, then on the first pass we'll remove the "bar/..",
786	* but be pointing at the second ".." so we won't realize we can also
787	* remove the "foo/..".
788	*/
789	cur = segp;
790	while ((cur > path) && (cur[-1] != '/'))
791	--cur;
792	}
793	out[0] = '\0';
794
795	/*
796	* g) If the resulting buffer string still begins with one or more
797	* complete path segments of "..", then the reference is
798	* considered to be in error. Implementations may handle this
799	* error by retaining these components in the resolved path (i.e.,
800	* treating them as part of the final URI), by removing them from
801	* the resolved path (i.e., discarding relative levels above the
802	* root), or by avoiding traversal of the reference.
803	*
804	* We discard them from the final path.
805	*/
806	if (path[0] == '/') {
807	cur = path;
808	while ((cur[0] == '/') && (cur[1] == '.') && (cur[2] == '.')
809	&& ((cur[3] == '/') \|\| (cur[3] == '\0')))
810	cur += 3;
811
812	if (cur != path) {
813	out = path;
814	while (cur[0] != '\0')
815	(out++)[0] = (cur++)[0];
816	out[0] = 0;
817	}
818	}
819
820	return(0);
821	}
822
823	static int is_hex(char c) {
824	if (((c >= '0') && (c <= '9')) \|\|
825	((c >= 'a') && (c <= 'f')) \|\|
826	((c >= 'A') && (c <= 'F')))
827	return(1);
828	return(0);
829	}
830
831	/**
832	* xmlURIUnescapeString:
833	* @str: the string to unescape
834	* @len: the length in bytes to unescape (or <= 0 to indicate full string)
835	* @target: optional destination buffer
836	*
837	* Unescaping routine, but does not check that the string is an URI. The
838	* output is a direct unsigned char translation of %XX values (no encoding)
839	* Note that the length of the result can only be smaller or same size as
840	* the input string.
841	*
842	* Returns a copy of the string, but unescaped, will return NULL only in case
843	* of error
844	*/
845	char *
846	xmlURIUnescapeString(const char str, int len, char target) {
847	char ret, out;
848	const char *in;
849
850	if (str == NULL)
851	return(NULL);
852	if (len <= 0) len = strlen(str);
853	if (len < 0) return(NULL);
854
855	if (target == NULL) {
856	ret = (char *) xmlMallocAtomic(len + 1);
857	if (ret == NULL) {
858	xmlGenericError(xmlGenericErrorContext,
859	"xmlURIUnescapeString: out of memory\n");
860	return(NULL);
861	}
862	} else
863	ret = target;
864	in = str;
865	out = ret;
866	while(len > 0) {
867	if ((len > 2) && (*in == '%') && (is_hex(in[1])) && (is_hex(in[2]))) {
868	in++;
869	if ((in >= '0') && (in <= '9'))
870	out = (in - '0');
871	else if ((in >= 'a') && (in <= 'f'))
872	out = (in - 'a') + 10;
873	else if ((in >= 'A') && (in <= 'F'))
874	out = (in - 'A') + 10;
875	in++;
876	if ((in >= '0') && (in <= '9'))
877	out = out * 16 + (*in - '0');
878	else if ((in >= 'a') && (in <= 'f'))
879	out = out * 16 + (*in - 'a') + 10;
880	else if ((in >= 'A') && (in <= 'F'))
881	out = out * 16 + (*in - 'A') + 10;
882	in++;
883	len -= 3;
884	out++;
885	} else {
886	out++ = in++;
887	len--;
888	}
889	}
890	*out = 0;
891	return(ret);
892	}
893
894	/**
895	* xmlURIEscapeStr:
896	* @str: string to escape
897	* @list: exception list string of chars not to escape
898	*
899	* This routine escapes a string to hex, ignoring reserved characters (a-z)
900	* and the characters in the exception list.
901	*
902	* Returns a new escaped string or NULL in case of error.
903	*/
904	xmlChar *
905	xmlURIEscapeStr(const xmlChar str, const xmlChar list) {
906	xmlChar *ret, ch;
907	const xmlChar *in;
908
909	unsigned int len, out;
910
911	if (str == NULL)
912	return(NULL);
913	if (str[0] == 0)
914	return(xmlStrdup(str));
915	len = xmlStrlen(str);
916	if (!(len > 0)) return(NULL);
917
918	len += 20;
919	ret = (xmlChar *) xmlMallocAtomic(len);
920	if (ret == NULL) {
921	xmlGenericError(xmlGenericErrorContext,
922	"xmlURIEscapeStr: out of memory\n");
923	return(NULL);
924	}
925	in = (const xmlChar *) str;
926	out = 0;
927	while(*in != 0) {
928	if (len - out <= 3) {
929	len += 20;
930	ret = (xmlChar *) xmlRealloc(ret, len);
931	if (ret == NULL) {
932	xmlGenericError(xmlGenericErrorContext,
933	"xmlURIEscapeStr: out of memory\n");
934	return(NULL);
935	}
936	}
937
938	ch = *in;
939
940	if ((ch != '@') && (!IS_UNRESERVED(ch)) && (!xmlStrchr(list, ch))) {
941	unsigned char val;
942	ret[out++] = '%';
943	val = ch >> 4;
944	if (val <= 9)
945	ret[out++] = '0' + val;
946	else
947	ret[out++] = 'A' + val - 0xA;
948	val = ch & 0xF;
949	if (val <= 9)
950	ret[out++] = '0' + val;
951	else
952	ret[out++] = 'A' + val - 0xA;
953	in++;
954	} else {
955	ret[out++] = *in++;
956	}
957
958	}
959	ret[out] = 0;
960	return(ret);
961	}
962
963	/**
964	* xmlURIEscape:
965	* @str: the string of the URI to escape
966	*
967	* Escaping routine, does not do validity checks !
968	* It will try to escape the chars needing this, but this is heuristic
969	* based it's impossible to be sure.
970	*
971	* Returns an copy of the string, but escaped
972	*
973	* 25 May 2001
974	* Uses xmlParseURI and xmlURIEscapeStr to try to escape correctly
975	* according to RFC2396.
976	* - Carl Douglas
977	*/
978	xmlChar *
979	xmlURIEscape(const xmlChar * str)
980	{
981	xmlChar ret, segment = NULL;
982	xmlURIPtr uri;
983	int ret2;
984
985	#define NULLCHK(p) if(!p) { \
986	xmlGenericError(xmlGenericErrorContext, \
987	"xmlURIEscape: out of memory\n"); \
988	return NULL; }
989
990	if (str == NULL)
991	return (NULL);
992
993	uri = xmlCreateURI();
994	if (uri != NULL) {
995	/*
996	* Allow escaping errors in the unescaped form
997	*/
998	uri->cleanup = 1;
999	ret2 = xmlParseURIReference(uri, (const char *)str);
1000	if (ret2) {
1001	xmlFreeURI(uri);
1002	return (NULL);
1003	}
1004	}
1005
1006	if (!uri)
1007	return NULL;
1008
1009	ret = NULL;
1010
1011	if (uri->scheme) {
1012	segment = xmlURIEscapeStr(BAD_CAST uri->scheme, BAD_CAST "+-.");
1013	NULLCHK(segment)
1014	ret = xmlStrcat(ret, segment);
1015	ret = xmlStrcat(ret, BAD_CAST ":");
1016	xmlFree(segment);
1017	}
1018
1019	if (uri->authority) {
1020	segment =
1021	xmlURIEscapeStr(BAD_CAST uri->authority, BAD_CAST "/?;:@");
1022	NULLCHK(segment)
1023	ret = xmlStrcat(ret, BAD_CAST "//");
1024	ret = xmlStrcat(ret, segment);
1025	xmlFree(segment);
1026	}
1027
1028	if (uri->user) {
1029	segment = xmlURIEscapeStr(BAD_CAST uri->user, BAD_CAST ";:&=+$,");
1030	NULLCHK(segment)
1031	ret = xmlStrcat(ret,BAD_CAST "//");
1032	ret = xmlStrcat(ret, segment);
1033	ret = xmlStrcat(ret, BAD_CAST "@");
1034	xmlFree(segment);
1035	}
1036
1037	if (uri->server) {
1038	segment = xmlURIEscapeStr(BAD_CAST uri->server, BAD_CAST "/?;:@");
1039	NULLCHK(segment)
1040	if (uri->user == NULL)
1041	ret = xmlStrcat(ret, BAD_CAST "//");
1042	ret = xmlStrcat(ret, segment);
1043	xmlFree(segment);
1044	}
1045
1046	if (uri->port) {
1047	xmlChar port[10];
1048
1049	snprintf((char *) port, 10, "%d", uri->port);
1050	ret = xmlStrcat(ret, BAD_CAST ":");
1051	ret = xmlStrcat(ret, port);
1052	}
1053
1054	if (uri->path) {
1055	segment =
1056	xmlURIEscapeStr(BAD_CAST uri->path, BAD_CAST ":@&=+$,/?;");
1057	NULLCHK(segment)
1058	ret = xmlStrcat(ret, segment);
1059	xmlFree(segment);
1060	}
1061
1062	if (uri->query_raw) {
1063	ret = xmlStrcat(ret, BAD_CAST "?");
1064	ret = xmlStrcat(ret, BAD_CAST uri->query_raw);
1065	}
1066	else if (uri->query) {
1067	segment =
1068	xmlURIEscapeStr(BAD_CAST uri->query, BAD_CAST ";/?:@&=+,$");
1069	NULLCHK(segment)
1070	ret = xmlStrcat(ret, BAD_CAST "?");
1071	ret = xmlStrcat(ret, segment);
1072	xmlFree(segment);
1073	}
1074
1075	if (uri->opaque) {
1076	segment = xmlURIEscapeStr(BAD_CAST uri->opaque, BAD_CAST "");
1077	NULLCHK(segment)
1078	ret = xmlStrcat(ret, segment);
1079	xmlFree(segment);
1080	}
1081
1082	if (uri->fragment) {
1083	segment = xmlURIEscapeStr(BAD_CAST uri->fragment, BAD_CAST "#");
1084	NULLCHK(segment)
1085	ret = xmlStrcat(ret, BAD_CAST "#");
1086	ret = xmlStrcat(ret, segment);
1087	xmlFree(segment);
1088	}
1089
1090	xmlFreeURI(uri);
1091	#undef NULLCHK
1092
1093	return (ret);
1094	}
1095
1096	/************************************************************************
1097	* *
1098	* Escaped URI parsing *
1099	* *
1100	************************************************************************/
1101
1102	/**
1103	* xmlParseURIFragment:
1104	* @uri: pointer to an URI structure
1105	* @str: pointer to the string to analyze
1106	*
1107	* Parse an URI fragment string and fills in the appropriate fields
1108	* of the @uri structure.
1109	*
1110	* fragment = *uric
1111	*
1112	* Returns 0 or the error code
1113	*/
1114	static int
1115	xmlParseURIFragment(xmlURIPtr uri, const char **str)
1116	{
1117	const char *cur;
1118
1119	if (str == NULL)
1120	return (-1);
1121
1122	cur = *str;
1123
1124	while (IS_URIC(cur) \|\| IS_UNWISE(cur))
1125	NEXT(cur);
1126	if (uri != NULL) {
1127	if (uri->fragment != NULL)
1128	xmlFree(uri->fragment);
1129	if (uri->cleanup & 2)
1130	uri->fragment = STRNDUP(str, cur - str);
1131	else
1132	uri->fragment = xmlURIUnescapeString(str, cur - str, NULL);
1133	}
1134	*str = cur;
1135	return (0);
1136	}
1137
1138	/**
1139	* xmlParseURIQuery:
1140	* @uri: pointer to an URI structure
1141	* @str: pointer to the string to analyze
1142	*
1143	* Parse the query part of an URI
1144	*
1145	* query = *uric
1146	*
1147	* Returns 0 or the error code
1148	*/
1149	static int
1150	xmlParseURIQuery(xmlURIPtr uri, const char **str)
1151	{
1152	const char *cur;
1153
1154	if (str == NULL)
1155	return (-1);
1156
1157	cur = *str;
1158
1159	while ((IS_URIC(cur)) \|\|
1160	((uri != NULL) && (uri->cleanup & 1) && (IS_UNWISE(cur))))
1161	NEXT(cur);
1162	if (uri != NULL) {
1163	if (uri->query != NULL)
1164	xmlFree(uri->query);
1165	if (uri->cleanup & 2)
1166	uri->query = STRNDUP(str, cur - str);
1167	else
1168	uri->query = xmlURIUnescapeString(str, cur - str, NULL);
1169
1170	/* Save the raw bytes of the query as well.
1171	* See: http://mail.gnome.org/archives/xml/2007-April/thread.html#00114
1172	*/
1173	if (uri->query_raw != NULL)
1174	xmlFree (uri->query_raw);
1175	uri->query_raw = STRNDUP (str, cur - str);
1176	}
1177	*str = cur;
1178	return (0);
1179	}
1180
1181	/**
1182	* xmlParseURIScheme:
1183	* @uri: pointer to an URI structure
1184	* @str: pointer to the string to analyze
1185	*
1186	* Parse an URI scheme
1187	*
1188	* scheme = alpha *( alpha \| digit \| "+" \| "-" \| "." )
1189	*
1190	* Returns 0 or the error code
1191	*/
1192	static int
1193	xmlParseURIScheme(xmlURIPtr uri, const char **str) {
1194	const char *cur;
1195
1196	if (str == NULL)
1197	return(-1);
1198
1199	cur = *str;
1200	if (!IS_ALPHA(*cur))
1201	return(2);
1202	cur++;
1203	while (IS_SCHEME(*cur)) cur++;
1204	if (uri != NULL) {
1205	if (uri->scheme != NULL) xmlFree(uri->scheme);
1206	uri->scheme = STRNDUP(str, cur - str);
1207	}
1208	*str = cur;
1209	return(0);
1210	}
1211
1212	/**
1213	* xmlParseURIOpaquePart:
1214	* @uri: pointer to an URI structure
1215	* @str: pointer to the string to analyze
1216	*
1217	* Parse an URI opaque part
1218	*
1219	* opaque_part = uric_no_slash *uric
1220	*
1221	* Returns 0 or the error code
1222	*/
1223	static int
1224	xmlParseURIOpaquePart(xmlURIPtr uri, const char **str)
1225	{
1226	const char *cur;
1227
1228	if (str == NULL)
1229	return (-1);
1230
1231	cur = *str;
1232	if (!((IS_URIC_NO_SLASH(cur)) \|\|
1233	((uri != NULL) && (uri->cleanup & 1) && (IS_UNWISE(cur))))) {
1234	return (3);
1235	}
1236	NEXT(cur);
1237	while ((IS_URIC(cur)) \|\|
1238	((uri != NULL) && (uri->cleanup & 1) && (IS_UNWISE(cur))))
1239	NEXT(cur);
1240	if (uri != NULL) {
1241	if (uri->opaque != NULL)
1242	xmlFree(uri->opaque);
1243	if (uri->cleanup & 2)
1244	uri->opaque = STRNDUP(str, cur - str);
1245	else
1246	uri->opaque = xmlURIUnescapeString(str, cur - str, NULL);
1247	}
1248	*str = cur;
1249	return (0);
1250	}
1251
1252	/**
1253	* xmlParseURIServer:
1254	* @uri: pointer to an URI structure
1255	* @str: pointer to the string to analyze
1256	*
1257	* Parse a server subpart of an URI, it's a finer grain analysis
1258	* of the authority part.
1259	*
1260	* server = [ [ userinfo "@" ] hostport ]
1261	* userinfo = *( unreserved \| escaped \|
1262	* ";" \| ":" \| "&" \| "=" \| "+" \| "$" \| "," )
1263	* hostport = host [ ":" port ]
1264	* host = hostname \| IPv4address \| IPv6reference
1265	* hostname = *( domainlabel "." ) toplabel [ "." ]
1266	* domainlabel = alphanum \| alphanum *( alphanum \| "-" ) alphanum
1267	* toplabel = alpha \| alpha *( alphanum \| "-" ) alphanum
1268	* IPv6reference = "[" IPv6address "]"
1269	* IPv6address = hexpart [ ":" IPv4address ]
1270	* IPv4address = 13digit "." 13digit "." 13digit "." 13digit
1271	* hexpart = hexseq \| hexseq "::" [ hexseq ]\| "::" [ hexseq ]
1272	* hexseq = hex4 *( ":" hex4)
1273	* hex4 = 1*4hexdig
1274	* port = *digit
1275	*
1276	* Returns 0 or the error code
1277	*/
1278	static int
1279	xmlParseURIServer(xmlURIPtr uri, const char **str) {
1280	const char *cur;
1281	const char host, tmp;
1282	const int IPV4max = 4;
1283	const int IPV6max = 8;
1284	int oct;
1285
1286	if (str == NULL)
1287	return(-1);
1288
1289	cur = *str;
1290
1291	/*
1292	* is there a userinfo ?
1293	*/
1294	while (IS_USERINFO(cur)) NEXT(cur);
1295	if (*cur == '@') {
1296	if (uri != NULL) {
1297	if (uri->user != NULL) xmlFree(uri->user);
1298	if (uri->cleanup & 2)
1299	uri->user = STRNDUP(str, cur - str);
1300	else
1301	uri->user = xmlURIUnescapeString(str, cur - str, NULL);
1302	}
1303	cur++;
1304	} else {
1305	if (uri != NULL) {
1306	if (uri->user != NULL) xmlFree(uri->user);
1307	uri->user = NULL;
1308	}
1309	cur = *str;
1310	}
1311	/*
1312	* This can be empty in the case where there is no server
1313	*/
1314	host = cur;
1315	if (*cur == '/') {
1316	if (uri != NULL) {
1317	if (uri->authority != NULL) xmlFree(uri->authority);
1318	uri->authority = NULL;
1319	if (uri->server != NULL) xmlFree(uri->server);
1320	uri->server = NULL;
1321	uri->port = 0;
1322	}
1323	return(0);
1324	}
1325	/*
1326	* host part of hostport can denote an IPV4 address, an IPV6 address
1327	* or an unresolved name. Check the IP first, its easier to detect
1328	* errors if wrong one.
1329	* An IPV6 address must start with a '[' and end with a ']'.
1330	*/
1331	if (*cur == '[') {
1332	int compress=0;
1333	cur++;
1334	for (oct = 0; oct < IPV6max; ++oct) {
1335	if (*cur == ':') {
1336	if (compress)
1337	return(3); /* multiple compression attempted */
1338	if (!oct) { /* initial char is compression */
1339	if (*++cur != ':')
1340	return(3);
1341	}
1342	compress = 1; /* set compression-encountered flag */
1343	cur++; /* skip over the second ':' */
1344	continue;
1345	}
1346	while(IS_HEX(*cur)) cur++;
1347	if (oct == (IPV6max-1))
1348	continue;
1349	if (*cur != ':')
1350	break;
1351	cur++;
1352	}
1353	if ((!compress) && (oct != IPV6max))
1354	return(3);
1355	if (*cur != ']')
1356	return(3);
1357	if (uri != NULL) {
1358	if (uri->server != NULL) xmlFree(uri->server);
1359	uri->server = (char )xmlStrndup((xmlChar )host+1,
1360	(cur-host)-1);
1361	}
1362	cur++;
1363	} else {
1364	/*
1365	* Not IPV6, maybe IPV4
1366	*/
1367	for (oct = 0; oct < IPV4max; ++oct) {
1368	if (*cur == '.')
1369	return(3); /* e.g. http://.xml/ or http://18.29..30/ */
1370	while(IS_DIGIT(*cur)) cur++;
1371	if (oct == (IPV4max-1))
1372	continue;
1373	if (*cur != '.')
1374	break;
1375	cur++;
1376	}
1377	}
1378	if ((host[0] != '[') && (oct < IPV4max \|\| (*cur == '.' && cur++) \|\|
1379	IS_ALPHA(*cur))) {
1380	/* maybe host_name */
1381	if (!IS_ALPHANUM(*cur))
1382	return(4); /* e.g. http://xml.$oft */
1383	do {
1384	do ++cur; while (IS_ALPHANUM(*cur));
1385	if (*cur == '-') {
1386	--cur;
1387	if (*cur == '.')
1388	return(5); /* e.g. http://xml.-soft */
1389	++cur;
1390	continue;
1391	}
1392	if (*cur == '.') {
1393	--cur;
1394	if (*cur == '-')
1395	return(6); /* e.g. http://xml-.soft */
1396	if (*cur == '.')
1397	return(7); /* e.g. http://xml..soft */
1398	++cur;
1399	continue;
1400	}
1401	break;
1402	} while (1);
1403	tmp = cur;
1404	if (tmp[-1] == '.')
1405	--tmp; /* e.g. http://xml.$Oft/ */
1406	do --tmp; while (tmp >= host && IS_ALPHANUM(*tmp));
1407	if ((++tmp == host \|\| tmp[-1] == '.') && !IS_ALPHA(*tmp))
1408	return(8); /* e.g. http://xmlsOft.0rg/ */
1409	}
1410	if (uri != NULL) {
1411	if (uri->authority != NULL) xmlFree(uri->authority);
1412	uri->authority = NULL;
1413	if (host[0] != '[') { /* it's not an IPV6 addr */
1414	if (uri->server != NULL) xmlFree(uri->server);
1415	if (uri->cleanup & 2)
1416	uri->server = STRNDUP(host, cur - host);
1417	else
1418	uri->server = xmlURIUnescapeString(host, cur - host, NULL);
1419	}
1420	}
1421	/*
1422	* finish by checking for a port presence.
1423	*/
1424	if (*cur == ':') {
1425	cur++;
1426	if (IS_DIGIT(*cur)) {
1427	if (uri != NULL)
1428	uri->port = 0;
1429	while (IS_DIGIT(*cur)) {
1430	if (uri != NULL)
1431	uri->port = uri->port * 10 + (*cur - '0');
1432	cur++;
1433	}
1434	}
1435	}
1436	*str = cur;
1437	return(0);
1438	}
1439
1440	/**
1441	* xmlParseURIRelSegment:
1442	* @uri: pointer to an URI structure
1443	* @str: pointer to the string to analyze
1444	*
1445	* Parse an URI relative segment
1446	*
1447	* rel_segment = 1*( unreserved \| escaped \| ";" \| "@" \| "&" \| "=" \|
1448	* "+" \| "$" \| "," )
1449	*
1450	* Returns 0 or the error code
1451	*/
1452	static int
1453	xmlParseURIRelSegment(xmlURIPtr uri, const char **str)
1454	{
1455	const char *cur;
1456
1457	if (str == NULL)
1458	return (-1);
1459
1460	cur = *str;
1461	if (!((IS_SEGMENT(cur)) \|\|
1462	((uri != NULL) && (uri->cleanup & 1) && (IS_UNWISE(cur))))) {
1463	return (3);
1464	}
1465	NEXT(cur);
1466	while ((IS_SEGMENT(cur)) \|\|
1467	((uri != NULL) && (uri->cleanup & 1) && (IS_UNWISE(cur))))
1468	NEXT(cur);
1469	if (uri != NULL) {
1470	if (uri->path != NULL)
1471	xmlFree(uri->path);
1472	if (uri->cleanup & 2)
1473	uri->path = STRNDUP(str, cur - str);
1474	else
1475	uri->path = xmlURIUnescapeString(str, cur - str, NULL);
1476	}
1477	*str = cur;
1478	return (0);
1479	}
1480
1481	/**
1482	* xmlParseURIPathSegments:
1483	* @uri: pointer to an URI structure
1484	* @str: pointer to the string to analyze
1485	* @slash: should we add a leading slash
1486	*
1487	* Parse an URI set of path segments
1488	*
1489	* path_segments = segment *( "/" segment )
1490	* segment = pchar ( ";" param )
1491	* param = *pchar
1492	*
1493	* Returns 0 or the error code
1494	*/
1495	static int
1496	xmlParseURIPathSegments(xmlURIPtr uri, const char **str, int slash)
1497	{
1498	const char *cur;
1499
1500	if (str == NULL)
1501	return (-1);
1502
1503	cur = *str;
1504
1505	do {
1506	while ((IS_PCHAR(cur)) \|\|
1507	((uri != NULL) && (uri->cleanup & 1) && (IS_UNWISE(cur))))
1508	NEXT(cur);
1509	while (*cur == ';') {
1510	cur++;
1511	while ((IS_PCHAR(cur)) \|\|
1512	((uri != NULL) && (uri->cleanup & 1) && (IS_UNWISE(cur))))
1513	NEXT(cur);
1514	}
1515	if (*cur != '/')
1516	break;
1517	cur++;
1518	} while (1);
1519	if (uri != NULL) {
1520	int len, len2 = 0;
1521	char *path;
1522
1523	/*
1524	* Concat the set of path segments to the current path
1525	*/
1526	len = cur - *str;
1527	if (slash)
1528	len++;
1529
1530	if (uri->path != NULL) {
1531	len2 = strlen(uri->path);
1532	len += len2;
1533	}
1534	path = (char *) xmlMallocAtomic(len + 1);
1535	if (path == NULL) {
1536	xmlGenericError(xmlGenericErrorContext,
1537	"xmlParseURIPathSegments: out of memory\n");
1538	*str = cur;
1539	return (-1);
1540	}
1541	if (uri->path != NULL)
1542	memcpy(path, uri->path, len2);
1543	if (slash) {
1544	path[len2] = '/';
1545	len2++;
1546	}
1547	path[len2] = 0;
1548	if (cur - *str > 0) {
1549	if (uri->cleanup & 2) {
1550	memcpy(&path[len2], str, cur - str);
1551	path[len2 + (cur - *str)] = 0;
1552	} else
1553	xmlURIUnescapeString(str, cur - str, &path[len2]);
1554	}
1555	if (uri->path != NULL)
1556	xmlFree(uri->path);
1557	uri->path = path;
1558	}
1559	*str = cur;
1560	return (0);
1561	}
1562
1563	/**
1564	* xmlParseURIAuthority:
1565	* @uri: pointer to an URI structure
1566	* @str: pointer to the string to analyze
1567	*
1568	* Parse the authority part of an URI.
1569	*
1570	* authority = server \| reg_name
1571	* server = [ [ userinfo "@" ] hostport ]
1572	* reg_name = 1*( unreserved \| escaped \| "$" \| "," \| ";" \| ":" \|
1573	* "@" \| "&" \| "=" \| "+" )
1574	*
1575	* Note : this is completely ambiguous since reg_name is allowed to
1576	* use the full set of chars in use by server:
1577	*
1578	* 3.2.1. Registry-based Naming Authority
1579	*
1580	* The structure of a registry-based naming authority is specific
1581	* to the URI scheme, but constrained to the allowed characters
1582	* for an authority component.
1583	*
1584	* Returns 0 or the error code
1585	*/
1586	static int
1587	xmlParseURIAuthority(xmlURIPtr uri, const char **str) {
1588	const char *cur;
1589	int ret;
1590
1591	if (str == NULL)
1592	return(-1);
1593
1594	cur = *str;
1595
1596	/*
1597	* try first to parse it as a server string.
1598	*/
1599	ret = xmlParseURIServer(uri, str);
1600	if ((ret == 0) && (*str != NULL) &&
1601	((str == 0) \|\| (str == '/') \|\| (**str == '?')))
1602	return(0);
1603	*str = cur;
1604
1605	/*
1606	* failed, fallback to reg_name
1607	*/
1608	if (!IS_REG_NAME(cur)) {
1609	return(5);
1610	}
1611	NEXT(cur);
1612	while (IS_REG_NAME(cur)) NEXT(cur);
1613	if (uri != NULL) {
1614	if (uri->server != NULL) xmlFree(uri->server);
1615	uri->server = NULL;
1616	if (uri->user != NULL) xmlFree(uri->user);
1617	uri->user = NULL;
1618	if (uri->authority != NULL) xmlFree(uri->authority);
1619	if (uri->cleanup & 2)
1620	uri->authority = STRNDUP(str, cur - str);
1621	else
1622	uri->authority = xmlURIUnescapeString(str, cur - str, NULL);
1623	}
1624	*str = cur;
1625	return(0);
1626	}
1627
1628	/**
1629	* xmlParseURIHierPart:
1630	* @uri: pointer to an URI structure
1631	* @str: pointer to the string to analyze
1632	*
1633	* Parse an URI hierarchical part
1634	*
1635	* hier_part = ( net_path \| abs_path ) [ "?" query ]
1636	* abs_path = "/" path_segments
1637	* net_path = "//" authority [ abs_path ]
1638	*
1639	* Returns 0 or the error code
1640	*/
1641	static int
1642	xmlParseURIHierPart(xmlURIPtr uri, const char **str) {
1643	int ret;
1644	const char *cur;
1645
1646	if (str == NULL)
1647	return(-1);
1648
1649	cur = *str;
1650
1651	if ((cur[0] == '/') && (cur[1] == '/')) {
1652	cur += 2;
1653	ret = xmlParseURIAuthority(uri, &cur);
1654	if (ret != 0)
1655	return(ret);
1656	if (cur[0] == '/') {
1657	cur++;
1658	ret = xmlParseURIPathSegments(uri, &cur, 1);
1659	}
1660	} else if (cur[0] == '/') {
1661	cur++;
1662	ret = xmlParseURIPathSegments(uri, &cur, 1);
1663	} else {
1664	return(4);
1665	}
1666	if (ret != 0)
1667	return(ret);
1668	if (*cur == '?') {
1669	cur++;
1670	ret = xmlParseURIQuery(uri, &cur);
1671	if (ret != 0)
1672	return(ret);
1673	}
1674	*str = cur;
1675	return(0);
1676	}
1677
1678	/**
1679	* xmlParseAbsoluteURI:
1680	* @uri: pointer to an URI structure
1681	* @str: pointer to the string to analyze
1682	*
1683	* Parse an URI reference string and fills in the appropriate fields
1684	* of the @uri structure
1685	*
1686	* absoluteURI = scheme ":" ( hier_part \| opaque_part )
1687	*
1688	* Returns 0 or the error code
1689	*/
1690	static int
1691	xmlParseAbsoluteURI(xmlURIPtr uri, const char **str) {
1692	int ret;
1693	const char *cur;
1694
1695	if (str == NULL)
1696	return(-1);
1697
1698	cur = *str;
1699
1700	ret = xmlParseURIScheme(uri, str);
1701	if (ret != 0) return(ret);
1702	if (**str != ':') {
1703	*str = cur;
1704	return(1);
1705	}
1706	(*str)++;
1707	if (**str == '/')
1708	return(xmlParseURIHierPart(uri, str));
1709	return(xmlParseURIOpaquePart(uri, str));
1710	}
1711
1712	/**
1713	* xmlParseRelativeURI:
1714	* @uri: pointer to an URI structure
1715	* @str: pointer to the string to analyze
1716	*
1717	* Parse an relative URI string and fills in the appropriate fields
1718	* of the @uri structure
1719	*
1720	* relativeURI = ( net_path \| abs_path \| rel_path ) [ "?" query ]
1721	* abs_path = "/" path_segments
1722	* net_path = "//" authority [ abs_path ]
1723	* rel_path = rel_segment [ abs_path ]
1724	*
1725	* Returns 0 or the error code
1726	*/
1727	static int
1728	xmlParseRelativeURI(xmlURIPtr uri, const char **str) {
1729	int ret = 0;
1730	const char *cur;
1731
1732	if (str == NULL)
1733	return(-1);
1734
1735	cur = *str;
1736	if ((cur[0] == '/') && (cur[1] == '/')) {
1737	cur += 2;
1738	ret = xmlParseURIAuthority(uri, &cur);
1739	if (ret != 0)
1740	return(ret);
1741	if (cur[0] == '/') {
1742	cur++;
1743	ret = xmlParseURIPathSegments(uri, &cur, 1);
1744	}
1745	} else if (cur[0] == '/') {
1746	cur++;
1747	ret = xmlParseURIPathSegments(uri, &cur, 1);
1748	} else if (cur[0] != '#' && cur[0] != '?') {
1749	ret = xmlParseURIRelSegment(uri, &cur);
1750	if (ret != 0)
1751	return(ret);
1752	if (cur[0] == '/') {
1753	cur++;
1754	ret = xmlParseURIPathSegments(uri, &cur, 1);
1755	}
1756	}
1757	if (ret != 0)
1758	return(ret);
1759	if (*cur == '?') {
1760	cur++;
1761	ret = xmlParseURIQuery(uri, &cur);
1762	if (ret != 0)
1763	return(ret);
1764	}
1765	*str = cur;
1766	return(ret);
1767	}
1768
1769	/**
1770	* xmlParseURIReference:
1771	* @uri: pointer to an URI structure
1772	* @str: the string to analyze
1773	*
1774	* Parse an URI reference string and fills in the appropriate fields
1775	* of the @uri structure
1776	*
1777	* URI-reference = [ absoluteURI \| relativeURI ] [ "#" fragment ]
1778	*
1779	* Returns 0 or the error code
1780	*/
1781	int
1782	xmlParseURIReference(xmlURIPtr uri, const char *str) {
1783	int ret;
1784	const char *tmp = str;
1785
1786	if (str == NULL)
1787	return(-1);
1788	xmlCleanURI(uri);
1789
1790	/*
1791	* Try first to parse absolute refs, then fallback to relative if
1792	* it fails.
1793	*/
1794	ret = xmlParseAbsoluteURI(uri, &str);
1795	if (ret != 0) {
1796	xmlCleanURI(uri);
1797	str = tmp;
1798	ret = xmlParseRelativeURI(uri, &str);
1799	}
1800	if (ret != 0) {
1801	xmlCleanURI(uri);
1802	return(ret);
1803	}
1804
1805	if (*str == '#') {
1806	str++;
1807	ret = xmlParseURIFragment(uri, &str);
1808	if (ret != 0) return(ret);
1809	}
1810	if (*str != 0) {
1811	xmlCleanURI(uri);
1812	return(1);
1813	}
1814	return(0);
1815	}
1816
1817	/**
1818	* xmlParseURI:
1819	* @str: the URI string to analyze
1820	*
1821	* Parse an URI
1822	*
1823	* URI-reference = [ absoluteURI \| relativeURI ] [ "#" fragment ]
1824	*
1825	* Returns a newly built xmlURIPtr or NULL in case of error
1826	*/
1827	xmlURIPtr
1828	xmlParseURI(const char *str) {
1829	xmlURIPtr uri;
1830	int ret;
1831
1832	if (str == NULL)
1833	return(NULL);
1834	uri = xmlCreateURI();
1835	if (uri != NULL) {
1836	ret = xmlParseURIReference(uri, str);
1837	if (ret) {
1838	xmlFreeURI(uri);
1839	return(NULL);
1840	}
1841	}
1842	return(uri);
1843	}
1844
1845	/**
1846	* xmlParseURIRaw:
1847	* @str: the URI string to analyze
1848	* @raw: if 1 unescaping of URI pieces are disabled
1849	*
1850	* Parse an URI but allows to keep intact the original fragments.
1851	*
1852	* URI-reference = [ absoluteURI \| relativeURI ] [ "#" fragment ]
1853	*
1854	* Returns a newly built xmlURIPtr or NULL in case of error
1855	*/
1856	xmlURIPtr
1857	xmlParseURIRaw(const char *str, int raw) {
1858	xmlURIPtr uri;
1859	int ret;
1860
1861	if (str == NULL)
1862	return(NULL);
1863	uri = xmlCreateURI();
1864	if (uri != NULL) {
1865	if (raw) {
1866	uri->cleanup \|= 2;
1867	}
1868	ret = xmlParseURIReference(uri, str);
1869	if (ret) {
1870	xmlFreeURI(uri);
1871	return(NULL);
1872	}
1873	}
1874	return(uri);
1875	}
1876
1877	/************************************************************************
1878	* *
1879	* Public functions *
1880	* *
1881	************************************************************************/
1882
1883	/**
1884	* xmlBuildURI:
1885	* @URI: the URI instance found in the document
1886	* @base: the base value
1887	*
1888	* Computes he final URI of the reference done by checking that
1889	* the given URI is valid, and building the final URI using the
1890	* base URI. This is processed according to section 5.2 of the
1891	* RFC 2396
1892	*
1893	* 5.2. Resolving Relative References to Absolute Form
1894	*
1895	* Returns a new URI string (to be freed by the caller) or NULL in case
1896	* of error.
1897	*/
1898	xmlChar *
1899	xmlBuildURI(const xmlChar URI, const xmlChar base) {
1900	xmlChar *val = NULL;
1901	int ret, len, indx, cur, out;
1902	xmlURIPtr ref = NULL;
1903	xmlURIPtr bas = NULL;
1904	xmlURIPtr res = NULL;
1905
1906	/*
1907	* 1) The URI reference is parsed into the potential four components and
1908	* fragment identifier, as described in Section 4.3.
1909	*
1910	* NOTE that a completely empty URI is treated by modern browsers
1911	* as a reference to "." rather than as a synonym for the current
1912	* URI. Should we do that here?
1913	*/
1914	if (URI == NULL)
1915	ret = -1;
1916	else {
1917	if (*URI) {
1918	ref = xmlCreateURI();
1919	if (ref == NULL)
1920	goto done;
1921	ret = xmlParseURIReference(ref, (const char *) URI);
1922	}
1923	else
1924	ret = 0;
1925	}
1926	if (ret != 0)
1927	goto done;
1928	if ((ref != NULL) && (ref->scheme != NULL)) {
1929	/*
1930	* The URI is absolute don't modify.
1931	*/
1932	val = xmlStrdup(URI);
1933	goto done;
1934	}
1935	if (base == NULL)
1936	ret = -1;
1937	else {
1938	bas = xmlCreateURI();
1939	if (bas == NULL)
1940	goto done;
1941	ret = xmlParseURIReference(bas, (const char *) base);
1942	}
1943	if (ret != 0) {
1944	if (ref)
1945	val = xmlSaveUri(ref);
1946	goto done;
1947	}
1948	if (ref == NULL) {
1949	/*
1950	* the base fragment must be ignored
1951	*/
1952	if (bas->fragment != NULL) {
1953	xmlFree(bas->fragment);
1954	bas->fragment = NULL;
1955	}
1956	val = xmlSaveUri(bas);
1957	goto done;
1958	}
1959
1960	/*
1961	* 2) If the path component is empty and the scheme, authority, and
1962	* query components are undefined, then it is a reference to the
1963	* current document and we are done. Otherwise, the reference URI's
1964	* query and fragment components are defined as found (or not found)
1965	* within the URI reference and not inherited from the base URI.
1966	*
1967	* NOTE that in modern browsers, the parsing differs from the above
1968	* in the following aspect: the query component is allowed to be
1969	* defined while still treating this as a reference to the current
1970	* document.
1971	*/
1972	res = xmlCreateURI();
1973	if (res == NULL)
1974	goto done;
1975	if ((ref->scheme == NULL) && (ref->path == NULL) &&
1976	((ref->authority == NULL) && (ref->server == NULL))) {
1977	if (bas->scheme != NULL)
1978	res->scheme = xmlMemStrdup(bas->scheme);
1979	if (bas->authority != NULL)
1980	res->authority = xmlMemStrdup(bas->authority);
1981	else if (bas->server != NULL) {
1982	res->server = xmlMemStrdup(bas->server);
1983	if (bas->user != NULL)
1984	res->user = xmlMemStrdup(bas->user);
1985	res->port = bas->port;
1986	}
1987	if (bas->path != NULL)
1988	res->path = xmlMemStrdup(bas->path);
1989	if (ref->query_raw != NULL)
1990	res->query_raw = xmlMemStrdup (ref->query_raw);
1991	else if (ref->query != NULL)
1992	res->query = xmlMemStrdup(ref->query);
1993	else if (bas->query_raw != NULL)
1994	res->query_raw = xmlMemStrdup(bas->query_raw);
1995	else if (bas->query != NULL)
1996	res->query = xmlMemStrdup(bas->query);
1997	if (ref->fragment != NULL)
1998	res->fragment = xmlMemStrdup(ref->fragment);
1999	goto step_7;
2000	}
2001
2002	/*
2003	* 3) If the scheme component is defined, indicating that the reference
2004	* starts with a scheme name, then the reference is interpreted as an
2005	* absolute URI and we are done. Otherwise, the reference URI's
2006	* scheme is inherited from the base URI's scheme component.
2007	*/
2008	if (ref->scheme != NULL) {
2009	val = xmlSaveUri(ref);
2010	goto done;
2011	}
2012	if (bas->scheme != NULL)
2013	res->scheme = xmlMemStrdup(bas->scheme);
2014
2015	if (ref->query_raw != NULL)
2016	res->query_raw = xmlMemStrdup(ref->query_raw);
2017	else if (ref->query != NULL)
2018	res->query = xmlMemStrdup(ref->query);
2019	if (ref->fragment != NULL)
2020	res->fragment = xmlMemStrdup(ref->fragment);
2021
2022	/*
2023	* 4) If the authority component is defined, then the reference is a
2024	* network-path and we skip to step 7. Otherwise, the reference
2025	* URI's authority is inherited from the base URI's authority
2026	* component, which will also be undefined if the URI scheme does not
2027	* use an authority component.
2028	*/
2029	if ((ref->authority != NULL) \|\| (ref->server != NULL)) {
2030	if (ref->authority != NULL)
2031	res->authority = xmlMemStrdup(ref->authority);
2032	else {
2033	res->server = xmlMemStrdup(ref->server);
2034	if (ref->user != NULL)
2035	res->user = xmlMemStrdup(ref->user);
2036	res->port = ref->port;
2037	}
2038	if (ref->path != NULL)
2039	res->path = xmlMemStrdup(ref->path);
2040	goto step_7;
2041	}
2042	if (bas->authority != NULL)
2043	res->authority = xmlMemStrdup(bas->authority);
2044	else if (bas->server != NULL) {
2045	res->server = xmlMemStrdup(bas->server);
2046	if (bas->user != NULL)
2047	res->user = xmlMemStrdup(bas->user);
2048	res->port = bas->port;
2049	}
2050
2051	/*
2052	* 5) If the path component begins with a slash character ("/"), then
2053	* the reference is an absolute-path and we skip to step 7.
2054	*/
2055	if ((ref->path != NULL) && (ref->path[0] == '/')) {
2056	res->path = xmlMemStrdup(ref->path);
2057	goto step_7;
2058	}
2059
2060
2061	/*
2062	* 6) If this step is reached, then we are resolving a relative-path
2063	* reference. The relative path needs to be merged with the base
2064	* URI's path. Although there are many ways to do this, we will
2065	* describe a simple method using a separate string buffer.
2066	*
2067	* Allocate a buffer large enough for the result string.
2068	*/
2069	len = 2; /* extra / and 0 */
2070	if (ref->path != NULL)
2071	len += strlen(ref->path);
2072	if (bas->path != NULL)
2073	len += strlen(bas->path);
2074	res->path = (char *) xmlMallocAtomic(len);
2075	if (res->path == NULL) {
2076	xmlGenericError(xmlGenericErrorContext,
2077	"xmlBuildURI: out of memory\n");
2078	goto done;
2079	}
2080	res->path[0] = 0;
2081
2082	/*
2083	* a) All but the last segment of the base URI's path component is
2084	* copied to the buffer. In other words, any characters after the
2085	* last (right-most) slash character, if any, are excluded.
2086	*/
2087	cur = 0;
2088	out = 0;
2089	if (bas->path != NULL) {
2090	while (bas->path[cur] != 0) {
2091	while ((bas->path[cur] != 0) && (bas->path[cur] != '/'))
2092	cur++;
2093	if (bas->path[cur] == 0)
2094	break;
2095
2096	cur++;
2097	while (out < cur) {
2098	res->path[out] = bas->path[out];
2099	out++;
2100	}
2101	}
2102	}
2103	res->path[out] = 0;
2104
2105	/*
2106	* b) The reference's path component is appended to the buffer
2107	* string.
2108	*/
2109	if (ref->path != NULL && ref->path[0] != 0) {
2110	indx = 0;
2111	/*
2112	* Ensure the path includes a '/'
2113	*/
2114	if ((out == 0) && (bas->server != NULL))
2115	res->path[out++] = '/';
2116	while (ref->path[indx] != 0) {
2117	res->path[out++] = ref->path[indx++];
2118	}
2119	}
2120	res->path[out] = 0;
2121
2122	/*
2123	* Steps c) to h) are really path normalization steps
2124	*/
2125	xmlNormalizeURIPath(res->path);
2126
2127	step_7:
2128
2129	/*
2130	* 7) The resulting URI components, including any inherited from the
2131	* base URI, are recombined to give the absolute form of the URI
2132	* reference.
2133	*/
2134	val = xmlSaveUri(res);
2135
2136	done:
2137	if (ref != NULL)
2138	xmlFreeURI(ref);
2139	if (bas != NULL)
2140	xmlFreeURI(bas);
2141	if (res != NULL)
2142	xmlFreeURI(res);
2143	return(val);
2144	}
2145
2146	/**
2147	* xmlBuildRelativeURI:
2148	* @URI: the URI reference under consideration
2149	* @base: the base value
2150	*
2151	* Expresses the URI of the reference in terms relative to the
2152	* base. Some examples of this operation include:
2153	* base = "http://site1.com/docs/book1.html"
2154	* URI input URI returned
2155	* docs/pic1.gif pic1.gif
2156	* docs/img/pic1.gif img/pic1.gif
2157	* img/pic1.gif ../img/pic1.gif
2158	* http://site1.com/docs/pic1.gif pic1.gif
2159	* http://site2.com/docs/pic1.gif http://site2.com/docs/pic1.gif
2160	*
2161	* base = "docs/book1.html"
2162	* URI input URI returned
2163	* docs/pic1.gif pic1.gif
2164	* docs/img/pic1.gif img/pic1.gif
2165	* img/pic1.gif ../img/pic1.gif
2166	* http://site1.com/docs/pic1.gif http://site1.com/docs/pic1.gif
2167	*
2168	*
2169	* Note: if the URI reference is really wierd or complicated, it may be
2170	* worthwhile to first convert it into a "nice" one by calling
2171	* xmlBuildURI (using 'base') before calling this routine,
2172	* since this routine (for reasonable efficiency) assumes URI has
2173	* already been through some validation.
2174	*
2175	* Returns a new URI string (to be freed by the caller) or NULL in case
2176	* error.
2177	*/
2178	xmlChar *
2179	xmlBuildRelativeURI (const xmlChar * URI, const xmlChar * base)
2180	{
2181	xmlChar *val = NULL;
2182	int ret;
2183	int ix;
2184	int pos = 0;
2185	int nbslash = 0;
2186	int len;
2187	xmlURIPtr ref = NULL;
2188	xmlURIPtr bas = NULL;
2189	xmlChar bptr, uptr, *vptr;
2190	int remove_path = 0;
2191
2192	if ((URI == NULL) \|\| (*URI == 0))
2193	return NULL;
2194
2195	/*
2196	* First parse URI into a standard form
2197	*/
2198	ref = xmlCreateURI ();
2199	if (ref == NULL)
2200	return NULL;
2201	/* If URI not already in "relative" form */
2202	if (URI[0] != '.') {
2203	ret = xmlParseURIReference (ref, (const char *) URI);
2204	if (ret != 0)
2205	goto done; /* Error in URI, return NULL */
2206	} else
2207	ref->path = (char *)xmlStrdup(URI);
2208
2209	/*
2210	* Next parse base into the same standard form
2211	*/
2212	if ((base == NULL) \|\| (*base == 0)) {
2213	val = xmlStrdup (URI);
2214	goto done;
2215	}
2216	bas = xmlCreateURI ();
2217	if (bas == NULL)
2218	goto done;
2219	if (base[0] != '.') {
2220	ret = xmlParseURIReference (bas, (const char *) base);
2221	if (ret != 0)
2222	goto done; /* Error in base, return NULL */
2223	} else
2224	bas->path = (char *)xmlStrdup(base);
2225
2226	/*
2227	* If the scheme / server on the URI differs from the base,
2228	* just return the URI
2229	*/
2230	if ((ref->scheme != NULL) &&
2231	((bas->scheme == NULL) \|\|
2232	(xmlStrcmp ((xmlChar )bas->scheme, (xmlChar )ref->scheme)) \|\|
2233	(xmlStrcmp ((xmlChar )bas->server, (xmlChar )ref->server)))) {
2234	val = xmlStrdup (URI);
2235	goto done;
2236	}
2237	if (xmlStrEqual((xmlChar )bas->path, (xmlChar )ref->path)) {
2238	val = xmlStrdup(BAD_CAST "");
2239	goto done;
2240	}
2241	if (bas->path == NULL) {
2242	val = xmlStrdup((xmlChar *)ref->path);
2243	goto done;
2244	}
2245	if (ref->path == NULL) {
2246	ref->path = (char *) "/";
2247	remove_path = 1;
2248	}
2249
2250	/*
2251	* At this point (at last!) we can compare the two paths
2252	*
2253	* First we take care of the special case where either of the
2254	* two path components may be missing (bug 316224)
2255	*/
2256	if (bas->path == NULL) {
2257	if (ref->path != NULL) {
2258	uptr = (xmlChar *) ref->path;
2259	if (*uptr == '/')
2260	uptr++;
2261	/* exception characters from xmlSaveUri */
2262	val = xmlURIEscapeStr(uptr, BAD_CAST "/;&=+$,");
2263	}
2264	goto done;
2265	}
2266	bptr = (xmlChar *)bas->path;
2267	if (ref->path == NULL) {
2268	for (ix = 0; bptr[ix] != 0; ix++) {
2269	if (bptr[ix] == '/')
2270	nbslash++;
2271	}
2272	uptr = NULL;
2273	len = 1; /* this is for a string terminator only */
2274	} else {
2275	/*
2276	* Next we compare the two strings and find where they first differ
2277	*/
2278	if ((ref->path[pos] == '.') && (ref->path[pos+1] == '/'))
2279	pos += 2;
2280	if ((*bptr == '.') && (bptr[1] == '/'))
2281	bptr += 2;
2282	else if ((*bptr == '/') && (ref->path[pos] != '/'))
2283	bptr++;
2284	while ((bptr[pos] == ref->path[pos]) && (bptr[pos] != 0))
2285	pos++;
2286
2287	if (bptr[pos] == ref->path[pos]) {
2288	val = xmlStrdup(BAD_CAST "");
2289	goto done; /* (I can't imagine why anyone would do this) */
2290	}
2291
2292	/*
2293	* In URI, "back up" to the last '/' encountered. This will be the
2294	* beginning of the "unique" suffix of URI
2295	*/
2296	ix = pos;
2297	if ((ref->path[ix] == '/') && (ix > 0))
2298	ix--;
2299	else if ((ref->path[ix] == 0) && (ix > 1) && (ref->path[ix - 1] == '/'))
2300	ix -= 2;
2301	for (; ix > 0; ix--) {
2302	if (ref->path[ix] == '/')
2303	break;
2304	}
2305	if (ix == 0) {
2306	uptr = (xmlChar *)ref->path;
2307	} else {
2308	ix++;
2309	uptr = (xmlChar *)&ref->path[ix];
2310	}
2311
2312	/*
2313	* In base, count the number of '/' from the differing point
2314	*/
2315	if (bptr[pos] != ref->path[pos]) {/* check for trivial URI == base */
2316	for (; bptr[ix] != 0; ix++) {
2317	if (bptr[ix] == '/')
2318	nbslash++;
2319	}
2320	}
2321	len = xmlStrlen (uptr) + 1;
2322	}
2323
2324	if (nbslash == 0) {
2325	if (uptr != NULL)
2326	/* exception characters from xmlSaveUri */
2327	val = xmlURIEscapeStr(uptr, BAD_CAST "/;&=+$,");
2328	goto done;
2329	}
2330
2331	/*
2332	* Allocate just enough space for the returned string -
2333	* length of the remainder of the URI, plus enough space
2334	* for the "../" groups, plus one for the terminator
2335	*/
2336	val = (xmlChar ) xmlMalloc (len + 3 nbslash);
2337	if (val == NULL) {
2338	xmlGenericError(xmlGenericErrorContext,
2339	"xmlBuildRelativeURI: out of memory\n");
2340	goto done;
2341	}
2342	vptr = val;
2343	/*
2344	* Put in as many "../" as needed
2345	*/
2346	for (; nbslash>0; nbslash--) {
2347	*vptr++ = '.';
2348	*vptr++ = '.';
2349	*vptr++ = '/';
2350	}
2351	/*
2352	* Finish up with the end of the URI
2353	*/
2354	if (uptr != NULL) {
2355	if ((vptr > val) && (len > 0) &&
2356	(uptr[0] == '/') && (vptr[-1] == '/')) {
2357	memcpy (vptr, uptr + 1, len - 1);
2358	vptr[len - 2] = 0;
2359	} else {
2360	memcpy (vptr, uptr, len);
2361	vptr[len - 1] = 0;
2362	}
2363	} else {
2364	vptr[len - 1] = 0;
2365	}
2366
2367	/* escape the freshly-built path */
2368	vptr = val;
2369	/* exception characters from xmlSaveUri */
2370	val = xmlURIEscapeStr(vptr, BAD_CAST "/;&=+$,");
2371	xmlFree(vptr);
2372
2373	done:
2374	/*
2375	* Free the working variables
2376	*/
2377	if (remove_path != 0)
2378	ref->path = NULL;
2379	if (ref != NULL)
2380	xmlFreeURI (ref);
2381	if (bas != NULL)
2382	xmlFreeURI (bas);
2383
2384	return val;
2385	}
2386
2387	/**
2388	* xmlCanonicPath:
2389	* @path: the resource locator in a filesystem notation
2390	*
2391	* Constructs a canonic path from the specified path.
2392	*
2393	* Returns a new canonic path, or a duplicate of the path parameter if the
2394	* construction fails. The caller is responsible for freeing the memory occupied
2395	* by the returned string. If there is insufficient memory available, or the
2396	* argument is NULL, the function returns NULL.
2397	*/
2398	#define IS_WINDOWS_PATH(p) \
2399	((p != NULL) && \
2400	(((p[0] >= 'a') && (p[0] <= 'z')) \|\| \
2401	((p[0] >= 'A') && (p[0] <= 'Z'))) && \
2402	(p[1] == ':') && ((p[2] == '/') \|\| (p[2] == '\\')))
2403	xmlChar *
2404	xmlCanonicPath(const xmlChar *path)
2405	{
2406	/*
2407	* For Windows implementations, additional work needs to be done to
2408	* replace backslashes in pathnames with "forward slashes"
2409	*/
2410	#if defined(_WIN32) && !defined(__CYGWIN__)
2411	int len = 0;
2412	int i = 0;
2413	xmlChar *p = NULL;
2414	#endif
2415	xmlURIPtr uri;
2416	xmlChar *ret;
2417	const xmlChar *absuri;
2418
2419	if (path == NULL)
2420	return(NULL);
2421	if ((uri = xmlParseURI((const char *) path)) != NULL) {
2422	xmlFreeURI(uri);
2423	return xmlStrdup(path);
2424	}
2425
2426	/* Check if this is an "absolute uri" */
2427	absuri = xmlStrstr(path, BAD_CAST "://");
2428	if (absuri != NULL) {
2429	int l, j;
2430	unsigned char c;
2431	xmlChar *escURI;
2432
2433	/*
2434	* this looks like an URI where some parts have not been
2435	* escaped leading to a parsing problem. Check that the first
2436	* part matches a protocol.
2437	*/
2438	l = absuri - path;
2439	/* Bypass if first part (part before the '://') is > 20 chars */
2440	if ((l <= 0) \|\| (l > 20))
2441	goto path_processing;
2442	/* Bypass if any non-alpha characters are present in first part */
2443	for (j = 0;j < l;j++) {
2444	c = path[j];
2445	if (!(((c >= 'a') && (c <= 'z')) \|\| ((c >= 'A') && (c <= 'Z'))))
2446	goto path_processing;
2447	}
2448
2449	/* Escape all except the characters specified in the supplied path */
2450	escURI = xmlURIEscapeStr(path, BAD_CAST ":/?_.#&;=");
2451	if (escURI != NULL) {
2452	/* Try parsing the escaped path */
2453	uri = xmlParseURI((const char *) escURI);
2454	/* If successful, return the escaped string */
2455	if (uri != NULL) {
2456	xmlFreeURI(uri);
2457	return escURI;
2458	}
2459	}
2460	}
2461
2462	path_processing:
2463	/* For Windows implementations, replace backslashes with 'forward slashes' */
2464	#if defined(_WIN32) && !defined(__CYGWIN__)
2465	/*
2466	* Create a URI structure
2467	*/
2468	uri = xmlCreateURI();
2469	if (uri == NULL) { /* Guard against 'out of memory' */
2470	return(NULL);
2471	}
2472
2473	len = xmlStrlen(path);
2474	if ((len > 2) && IS_WINDOWS_PATH(path)) {
2475	/* make the scheme 'file' */
2476	uri->scheme = xmlStrdup(BAD_CAST "file");
2477	/* allocate space for leading '/' + path + string terminator */
2478	uri->path = xmlMallocAtomic(len + 2);
2479	if (uri->path == NULL) {
2480	xmlFreeURI(uri); /* Guard agains 'out of memory' */
2481	return(NULL);
2482	}
2483	/* Put in leading '/' plus path */
2484	uri->path[0] = '/';
2485	p = uri->path + 1;
2486	strncpy(p, path, len + 1);
2487	} else {
2488	uri->path = xmlStrdup(path);
2489	if (uri->path == NULL) {
2490	xmlFreeURI(uri);
2491	return(NULL);
2492	}
2493	p = uri->path;
2494	}
2495	/* Now change all occurences of '\' to '/' */
2496	while (*p != '\0') {
2497	if (*p == '\\')
2498	*p = '/';
2499	p++;
2500	}
2501
2502	if (uri->scheme == NULL) {
2503	ret = xmlStrdup((const xmlChar *) uri->path);
2504	} else {
2505	ret = xmlSaveUri(uri);
2506	}
2507
2508	xmlFreeURI(uri);
2509	#else
2510	ret = xmlStrdup((const xmlChar *) path);
2511	#endif
2512	return(ret);
2513	}
2514
2515	/**
2516	* xmlPathToURI:
2517	* @path: the resource locator in a filesystem notation
2518	*
2519	* Constructs an URI expressing the existing path
2520	*
2521	* Returns a new URI, or a duplicate of the path parameter if the
2522	* construction fails. The caller is responsible for freeing the memory
2523	* occupied by the returned string. If there is insufficient memory available,
2524	* or the argument is NULL, the function returns NULL.
2525	*/
2526	xmlChar *
2527	xmlPathToURI(const xmlChar *path)
2528	{
2529	xmlURIPtr uri;
2530	xmlURI temp;
2531	xmlChar ret, cal;
2532
2533	if (path == NULL)
2534	return(NULL);
2535
2536	if ((uri = xmlParseURI((const char *) path)) != NULL) {
2537	xmlFreeURI(uri);
2538	return xmlStrdup(path);
2539	}
2540	cal = xmlCanonicPath(path);
2541	if (cal == NULL)
2542	return(NULL);
2543	#if defined(_WIN32) && !defined(__CYGWIN__)
2544	/* xmlCanonicPath can return an URI on Windows (is that the intended behaviour?)
2545	If 'cal' is a valid URI allready then we are done here, as continuing would make
2546	it invalid. */
2547	if ((uri = xmlParseURI((const char *) cal)) != NULL) {
2548	xmlFreeURI(uri);
2549	return cal;
2550	}
2551	/* 'cal' can contain a relative path with backslashes. If that is processed
2552	by xmlSaveURI, they will be escaped and the external entity loader machinery
2553	will fail. So convert them to slashes. Misuse 'ret' for walking. */
2554	ret = cal;
2555	while (*ret != '\0') {
2556	if (*ret == '\\')
2557	*ret = '/';
2558	ret++;
2559	}
2560	#endif
2561	memset(&temp, 0, sizeof(temp));
2562	temp.path = (char *) cal;
2563	ret = xmlSaveUri(&temp);
2564	xmlFree(cal);
2565	return(ret);
2566	}
2567	#define bottom_uri
2568	#include "elfgcchack.h"

Note: See TracBrowser for help on using the repository browser.

source: vbox/trunk/src/libs/libxml2-2.6.30/uri.c@ 35199

Download in other formats: