VirtualBox

source: vbox/trunk/src/libs/libxml2-2.13.2/uri.c@ 105770

Last change on this file since 105770 was 105420, checked in by vboxsync, 4 months ago

libxml2-2.12.6: Applied and adjusted our libxml2 changes to 2.12.6. bugref:10730

  • Property svn:eol-style set to native
File size: 69.8 KB
Line 
1/**
2 * uri.c: set of generic URI related routines
3 *
4 * Reference: RFCs 3986, 2732 and 2373
5 *
6 * See Copyright for the status of this software.
7 *
8 * daniel@veillard.com
9 */
10
11#define IN_LIBXML
12#include "libxml.h"
13
14#include <limits.h>
15#include <string.h>
16
17#include <libxml/xmlmemory.h>
18#include <libxml/uri.h>
19#include <libxml/xmlerror.h>
20
21#include "private/error.h"
22
23/**
24 * MAX_URI_LENGTH:
25 *
26 * The definition of the URI regexp in the above RFC has no size limit
27 * In practice they are usually relatively short except for the
28 * data URI scheme as defined in RFC 2397. Even for data URI the usual
29 * maximum size before hitting random practical limits is around 64 KB
30 * and 4KB is usually a maximum admitted limit for proper operations.
31 * The value below is more a security limit than anything else and
32 * really should never be hit by 'normal' operations
33 * Set to 1 MByte in 2012, this is only enforced on output
34 */
35#define MAX_URI_LENGTH 1024 * 1024
36
37#define PORT_EMPTY 0
38#define PORT_EMPTY_SERVER -1
39
40static void xmlCleanURI(xmlURIPtr uri);
41
42/*
43 * Old rule from 2396 used in legacy handling code
44 * alpha = lowalpha | upalpha
45 */
46#define IS_ALPHA(x) (IS_LOWALPHA(x) || IS_UPALPHA(x))
47
48
49/*
50 * lowalpha = "a" | "b" | "c" | "d" | "e" | "f" | "g" | "h" | "i" | "j" |
51 * "k" | "l" | "m" | "n" | "o" | "p" | "q" | "r" | "s" | "t" |
52 * "u" | "v" | "w" | "x" | "y" | "z"
53 */
54#define IS_LOWALPHA(x) (((x) >= 'a') && ((x) <= 'z'))
55
56/*
57 * upalpha = "A" | "B" | "C" | "D" | "E" | "F" | "G" | "H" | "I" | "J" |
58 * "K" | "L" | "M" | "N" | "O" | "P" | "Q" | "R" | "S" | "T" |
59 * "U" | "V" | "W" | "X" | "Y" | "Z"
60 */
61#define IS_UPALPHA(x) (((x) >= 'A') && ((x) <= 'Z'))
62
63#ifdef IS_DIGIT
64#undef IS_DIGIT
65#endif
66/*
67 * digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9"
68 */
69#define IS_DIGIT(x) (((x) >= '0') && ((x) <= '9'))
70
71/*
72 * alphanum = alpha | digit
73 */
74#define IS_ALPHANUM(x) (IS_ALPHA(x) || IS_DIGIT(x))
75
76/*
77 * mark = "-" | "_" | "." | "!" | "~" | "*" | "'" | "(" | ")"
78 */
79
80#define IS_MARK(x) (((x) == '-') || ((x) == '_') || ((x) == '.') || \
81 ((x) == '!') || ((x) == '~') || ((x) == '*') || ((x) == '\'') || \
82 ((x) == '(') || ((x) == ')'))
83
84/*
85 * unwise = "{" | "}" | "|" | "\" | "^" | "`"
86 */
87#define IS_UNWISE(p) \
88 (((*(p) == '{')) || ((*(p) == '}')) || ((*(p) == '|')) || \
89 ((*(p) == '\\')) || ((*(p) == '^')) || ((*(p) == '[')) || \
90 ((*(p) == ']')) || ((*(p) == '`')))
91
92/*
93 * reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" | "$" | "," |
94 * "[" | "]"
95 */
96#define IS_RESERVED(x) (((x) == ';') || ((x) == '/') || ((x) == '?') || \
97 ((x) == ':') || ((x) == '@') || ((x) == '&') || ((x) == '=') || \
98 ((x) == '+') || ((x) == '$') || ((x) == ',') || ((x) == '[') || \
99 ((x) == ']'))
100
101/*
102 * unreserved = alphanum | mark
103 */
104#define IS_UNRESERVED(x) (IS_ALPHANUM(x) || IS_MARK(x))
105
106/*
107 * Skip to next pointer char, handle escaped sequences
108 */
109#define NEXT(p) ((*p == '%')? p += 3 : p++)
110
111/*
112 * Productions from the spec.
113 *
114 * authority = server | reg_name
115 * reg_name = 1*( unreserved | escaped | "$" | "," |
116 * ";" | ":" | "@" | "&" | "=" | "+" )
117 *
118 * path = [ abs_path | opaque_part ]
119 */
120#define STRNDUP(s, n) (char *) xmlStrndup((const xmlChar *)(s), (n))
121
122/************************************************************************
123 * *
124 * RFC 3986 parser *
125 * *
126 ************************************************************************/
127
128#define ISA_DIGIT(p) ((*(p) >= '0') && (*(p) <= '9'))
129#define ISA_ALPHA(p) (((*(p) >= 'a') && (*(p) <= 'z')) || \
130 ((*(p) >= 'A') && (*(p) <= 'Z')))
131#define ISA_HEXDIG(p) \
132 (ISA_DIGIT(p) || ((*(p) >= 'a') && (*(p) <= 'f')) || \
133 ((*(p) >= 'A') && (*(p) <= 'F')))
134
135/*
136 * sub-delims = "!" / "$" / "&" / "'" / "(" / ")"
137 * / "*" / "+" / "," / ";" / "="
138 */
139#define ISA_SUB_DELIM(p) \
140 (((*(p) == '!')) || ((*(p) == '$')) || ((*(p) == '&')) || \
141 ((*(p) == '(')) || ((*(p) == ')')) || ((*(p) == '*')) || \
142 ((*(p) == '+')) || ((*(p) == ',')) || ((*(p) == ';')) || \
143 ((*(p) == '=')) || ((*(p) == '\'')))
144
145/*
146 * gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@"
147 */
148#define ISA_GEN_DELIM(p) \
149 (((*(p) == ':')) || ((*(p) == '/')) || ((*(p) == '?')) || \
150 ((*(p) == '#')) || ((*(p) == '[')) || ((*(p) == ']')) || \
151 ((*(p) == '@')))
152
153/*
154 * reserved = gen-delims / sub-delims
155 */
156#define ISA_RESERVED(p) (ISA_GEN_DELIM(p) || (ISA_SUB_DELIM(p)))
157
158/*
159 * unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"
160 */
161#define ISA_STRICTLY_UNRESERVED(p) \
162 ((ISA_ALPHA(p)) || (ISA_DIGIT(p)) || ((*(p) == '-')) || \
163 ((*(p) == '.')) || ((*(p) == '_')) || ((*(p) == '~')))
164
165/*
166 * pct-encoded = "%" HEXDIG HEXDIG
167 */
168#define ISA_PCT_ENCODED(p) \
169 ((*(p) == '%') && (ISA_HEXDIG(p + 1)) && (ISA_HEXDIG(p + 2)))
170
171/*
172 * pchar = unreserved / pct-encoded / sub-delims / ":" / "@"
173 */
174#define ISA_PCHAR(u, p) \
175 (ISA_UNRESERVED(u, p) || ISA_PCT_ENCODED(p) || ISA_SUB_DELIM(p) || \
176 ((*(p) == ':')) || ((*(p) == '@')))
177
178/*
179 * From https://www.w3.org/TR/leiri/
180 *
181 * " " / "<" / ">" / '"' / "{" / "}" / "|"
182 * / "\" / "^" / "`" / %x0-1F / %x7F-D7FF
183 * / %xE000-FFFD / %x10000-10FFFF
184 */
185#define ISA_UCSCHAR(p) \
186 ((*(p) <= 0x20) || (*(p) >= 0x7F) || (*(p) == '<') || (*(p) == '>') || \
187 (*(p) == '"') || (*(p) == '{') || (*(p) == '}') || (*(p) == '|') || \
188 (*(p) == '\\') || (*(p) == '^') || (*(p) == '`'))
189
190#define ISA_UNRESERVED(u, p) (xmlIsUnreserved(u, p))
191
192#define XML_URI_ALLOW_UNWISE 1
193#define XML_URI_NO_UNESCAPE 2
194#define XML_URI_ALLOW_UCSCHAR 4
195
196static int
197xmlIsUnreserved(xmlURIPtr uri, const char *cur) {
198 if (uri == NULL)
199 return(0);
200
201 if (ISA_STRICTLY_UNRESERVED(cur))
202 return(1);
203
204 if (uri->cleanup & XML_URI_ALLOW_UNWISE) {
205 if (IS_UNWISE(cur))
206 return(1);
207 } else if (uri->cleanup & XML_URI_ALLOW_UCSCHAR) {
208 if (ISA_UCSCHAR(cur))
209 return(1);
210 }
211
212 return(0);
213}
214
215/**
216 * xmlParse3986Scheme:
217 * @uri: pointer to an URI structure
218 * @str: pointer to the string to analyze
219 *
220 * Parse an URI scheme
221 *
222 * ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
223 *
224 * Returns 0 or the error code
225 */
226static int
227xmlParse3986Scheme(xmlURIPtr uri, const char **str) {
228 const char *cur;
229
230 cur = *str;
231 if (!ISA_ALPHA(cur))
232 return(1);
233 cur++;
234 while (ISA_ALPHA(cur) || ISA_DIGIT(cur) ||
235 (*cur == '+') || (*cur == '-') || (*cur == '.')) cur++;
236 if (uri != NULL) {
237 if (uri->scheme != NULL) xmlFree(uri->scheme);
238 uri->scheme = STRNDUP(*str, cur - *str);
239 if (uri->scheme == NULL)
240 return(-1);
241 }
242 *str = cur;
243 return(0);
244}
245
246/**
247 * xmlParse3986Fragment:
248 * @uri: pointer to an URI structure
249 * @str: pointer to the string to analyze
250 *
251 * Parse the query part of an URI
252 *
253 * fragment = *( pchar / "/" / "?" )
254 * NOTE: the strict syntax as defined by 3986 does not allow '[' and ']'
255 * in the fragment identifier but this is used very broadly for
256 * xpointer scheme selection, so we are allowing it here to not break
257 * for example all the DocBook processing chains.
258 *
259 * Returns 0 or the error code
260 */
261static int
262xmlParse3986Fragment(xmlURIPtr uri, const char **str)
263{
264 const char *cur;
265
266 cur = *str;
267
268 while ((ISA_PCHAR(uri, cur)) || (*cur == '/') || (*cur == '?') ||
269 (*cur == '[') || (*cur == ']'))
270 NEXT(cur);
271 if (uri != NULL) {
272 if (uri->fragment != NULL)
273 xmlFree(uri->fragment);
274 if (uri->cleanup & XML_URI_NO_UNESCAPE)
275 uri->fragment = STRNDUP(*str, cur - *str);
276 else
277 uri->fragment = xmlURIUnescapeString(*str, cur - *str, NULL);
278 if (uri->fragment == NULL)
279 return (-1);
280 }
281 *str = cur;
282 return (0);
283}
284
285/**
286 * xmlParse3986Query:
287 * @uri: pointer to an URI structure
288 * @str: pointer to the string to analyze
289 *
290 * Parse the query part of an URI
291 *
292 * query = *uric
293 *
294 * Returns 0 or the error code
295 */
296static int
297xmlParse3986Query(xmlURIPtr uri, const char **str)
298{
299 const char *cur;
300
301 cur = *str;
302
303 while ((ISA_PCHAR(uri, cur)) || (*cur == '/') || (*cur == '?'))
304 NEXT(cur);
305 if (uri != NULL) {
306 if (uri->query != NULL)
307 xmlFree(uri->query);
308 if (uri->cleanup & XML_URI_NO_UNESCAPE)
309 uri->query = STRNDUP(*str, cur - *str);
310 else
311 uri->query = xmlURIUnescapeString(*str, cur - *str, NULL);
312 if (uri->query == NULL)
313 return (-1);
314
315 /* Save the raw bytes of the query as well.
316 * See: http://mail.gnome.org/archives/xml/2007-April/thread.html#00114
317 */
318 if (uri->query_raw != NULL)
319 xmlFree (uri->query_raw);
320 uri->query_raw = STRNDUP (*str, cur - *str);
321 if (uri->query_raw == NULL)
322 return (-1);
323 }
324 *str = cur;
325 return (0);
326}
327
328/**
329 * xmlParse3986Port:
330 * @uri: pointer to an URI structure
331 * @str: the string to analyze
332 *
333 * Parse a port part and fills in the appropriate fields
334 * of the @uri structure
335 *
336 * port = *DIGIT
337 *
338 * Returns 0 or the error code
339 */
340static int
341xmlParse3986Port(xmlURIPtr uri, const char **str)
342{
343 const char *cur = *str;
344 int port = 0;
345
346 if (ISA_DIGIT(cur)) {
347 while (ISA_DIGIT(cur)) {
348 int digit = *cur - '0';
349
350 if (port > INT_MAX / 10)
351 return(1);
352 port *= 10;
353 if (port > INT_MAX - digit)
354 return(1);
355 port += digit;
356
357 cur++;
358 }
359 if (uri != NULL)
360 uri->port = port;
361 *str = cur;
362 return(0);
363 }
364 return(1);
365}
366
367/**
368 * xmlParse3986Userinfo:
369 * @uri: pointer to an URI structure
370 * @str: the string to analyze
371 *
372 * Parse an user information part and fills in the appropriate fields
373 * of the @uri structure
374 *
375 * userinfo = *( unreserved / pct-encoded / sub-delims / ":" )
376 *
377 * Returns 0 or the error code
378 */
379static int
380xmlParse3986Userinfo(xmlURIPtr uri, const char **str)
381{
382 const char *cur;
383
384 cur = *str;
385 while (ISA_UNRESERVED(uri, cur) || ISA_PCT_ENCODED(cur) ||
386 ISA_SUB_DELIM(cur) || (*cur == ':'))
387 NEXT(cur);
388 if (*cur == '@') {
389 if (uri != NULL) {
390 if (uri->user != NULL) xmlFree(uri->user);
391 if (uri->cleanup & XML_URI_NO_UNESCAPE)
392 uri->user = STRNDUP(*str, cur - *str);
393 else
394 uri->user = xmlURIUnescapeString(*str, cur - *str, NULL);
395 if (uri->user == NULL)
396 return(-1);
397 }
398 *str = cur;
399 return(0);
400 }
401 return(1);
402}
403
404/**
405 * xmlParse3986DecOctet:
406 * @str: the string to analyze
407 *
408 * dec-octet = DIGIT ; 0-9
409 * / %x31-39 DIGIT ; 10-99
410 * / "1" 2DIGIT ; 100-199
411 * / "2" %x30-34 DIGIT ; 200-249
412 * / "25" %x30-35 ; 250-255
413 *
414 * Skip a dec-octet.
415 *
416 * Returns 0 if found and skipped, 1 otherwise
417 */
418static int
419xmlParse3986DecOctet(const char **str) {
420 const char *cur = *str;
421
422 if (!(ISA_DIGIT(cur)))
423 return(1);
424 if (!ISA_DIGIT(cur+1))
425 cur++;
426 else if ((*cur != '0') && (ISA_DIGIT(cur + 1)) && (!ISA_DIGIT(cur+2)))
427 cur += 2;
428 else if ((*cur == '1') && (ISA_DIGIT(cur + 1)) && (ISA_DIGIT(cur + 2)))
429 cur += 3;
430 else if ((*cur == '2') && (*(cur + 1) >= '0') &&
431 (*(cur + 1) <= '4') && (ISA_DIGIT(cur + 2)))
432 cur += 3;
433 else if ((*cur == '2') && (*(cur + 1) == '5') &&
434 (*(cur + 2) >= '0') && (*(cur + 1) <= '5'))
435 cur += 3;
436 else
437 return(1);
438 *str = cur;
439 return(0);
440}
441/**
442 * xmlParse3986Host:
443 * @uri: pointer to an URI structure
444 * @str: the string to analyze
445 *
446 * Parse an host part and fills in the appropriate fields
447 * of the @uri structure
448 *
449 * host = IP-literal / IPv4address / reg-name
450 * IP-literal = "[" ( IPv6address / IPvFuture ) "]"
451 * IPv4address = dec-octet "." dec-octet "." dec-octet "." dec-octet
452 * reg-name = *( unreserved / pct-encoded / sub-delims )
453 *
454 * Returns 0 or the error code
455 */
456static int
457xmlParse3986Host(xmlURIPtr uri, const char **str)
458{
459 const char *cur = *str;
460 const char *host;
461
462 host = cur;
463 /*
464 * IPv6 and future addressing scheme are enclosed between brackets
465 */
466 if (*cur == '[') {
467 cur++;
468 while ((*cur != ']') && (*cur != 0))
469 cur++;
470 if (*cur != ']')
471 return(1);
472 cur++;
473 goto found;
474 }
475 /*
476 * try to parse an IPv4
477 */
478 if (ISA_DIGIT(cur)) {
479 if (xmlParse3986DecOctet(&cur) != 0)
480 goto not_ipv4;
481 if (*cur != '.')
482 goto not_ipv4;
483 cur++;
484 if (xmlParse3986DecOctet(&cur) != 0)
485 goto not_ipv4;
486 if (*cur != '.')
487 goto not_ipv4;
488 if (xmlParse3986DecOctet(&cur) != 0)
489 goto not_ipv4;
490 if (*cur != '.')
491 goto not_ipv4;
492 if (xmlParse3986DecOctet(&cur) != 0)
493 goto not_ipv4;
494 goto found;
495not_ipv4:
496 cur = *str;
497 }
498 /*
499 * then this should be a hostname which can be empty
500 */
501 while (ISA_UNRESERVED(uri, cur) ||
502 ISA_PCT_ENCODED(cur) || ISA_SUB_DELIM(cur))
503 NEXT(cur);
504found:
505 if (uri != NULL) {
506 if (uri->authority != NULL) xmlFree(uri->authority);
507 uri->authority = NULL;
508 if (uri->server != NULL) xmlFree(uri->server);
509 if (cur != host) {
510 if (uri->cleanup & XML_URI_NO_UNESCAPE)
511 uri->server = STRNDUP(host, cur - host);
512 else
513 uri->server = xmlURIUnescapeString(host, cur - host, NULL);
514 if (uri->server == NULL)
515 return(-1);
516 } else
517 uri->server = NULL;
518 }
519 *str = cur;
520 return(0);
521}
522
523/**
524 * xmlParse3986Authority:
525 * @uri: pointer to an URI structure
526 * @str: the string to analyze
527 *
528 * Parse an authority part and fills in the appropriate fields
529 * of the @uri structure
530 *
531 * authority = [ userinfo "@" ] host [ ":" port ]
532 *
533 * Returns 0 or the error code
534 */
535static int
536xmlParse3986Authority(xmlURIPtr uri, const char **str)
537{
538 const char *cur;
539 int ret;
540
541 cur = *str;
542 /*
543 * try to parse an userinfo and check for the trailing @
544 */
545 ret = xmlParse3986Userinfo(uri, &cur);
546 if (ret < 0)
547 return(ret);
548 if ((ret != 0) || (*cur != '@'))
549 cur = *str;
550 else
551 cur++;
552 ret = xmlParse3986Host(uri, &cur);
553 if (ret != 0) return(ret);
554 if (*cur == ':') {
555 cur++;
556 ret = xmlParse3986Port(uri, &cur);
557 if (ret != 0) return(ret);
558 }
559 *str = cur;
560 return(0);
561}
562
563/**
564 * xmlParse3986Segment:
565 * @str: the string to analyze
566 * @forbid: an optional forbidden character
567 * @empty: allow an empty segment
568 *
569 * Parse a segment and fills in the appropriate fields
570 * of the @uri structure
571 *
572 * segment = *pchar
573 * segment-nz = 1*pchar
574 * segment-nz-nc = 1*( unreserved / pct-encoded / sub-delims / "@" )
575 * ; non-zero-length segment without any colon ":"
576 *
577 * Returns 0 or the error code
578 */
579static int
580xmlParse3986Segment(xmlURIPtr uri, const char **str, char forbid, int empty)
581{
582 const char *cur;
583
584 cur = *str;
585 if (!ISA_PCHAR(uri, cur)) {
586 if (empty)
587 return(0);
588 return(1);
589 }
590 while (ISA_PCHAR(uri, cur) && (*cur != forbid))
591 NEXT(cur);
592 *str = cur;
593 return (0);
594}
595
596/**
597 * xmlParse3986PathAbEmpty:
598 * @uri: pointer to an URI structure
599 * @str: the string to analyze
600 *
601 * Parse an path absolute or empty and fills in the appropriate fields
602 * of the @uri structure
603 *
604 * path-abempty = *( "/" segment )
605 *
606 * Returns 0 or the error code
607 */
608static int
609xmlParse3986PathAbEmpty(xmlURIPtr uri, const char **str)
610{
611 const char *cur;
612 int ret;
613
614 cur = *str;
615
616 while (*cur == '/') {
617 cur++;
618 ret = xmlParse3986Segment(uri, &cur, 0, 1);
619 if (ret != 0) return(ret);
620 }
621 if (uri != NULL) {
622 if (uri->path != NULL) xmlFree(uri->path);
623 if (*str != cur) {
624 if (uri->cleanup & XML_URI_NO_UNESCAPE)
625 uri->path = STRNDUP(*str, cur - *str);
626 else
627 uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);
628 if (uri->path == NULL)
629 return (-1);
630 } else {
631 uri->path = NULL;
632 }
633 }
634 *str = cur;
635 return (0);
636}
637
638/**
639 * xmlParse3986PathAbsolute:
640 * @uri: pointer to an URI structure
641 * @str: the string to analyze
642 *
643 * Parse an path absolute and fills in the appropriate fields
644 * of the @uri structure
645 *
646 * path-absolute = "/" [ segment-nz *( "/" segment ) ]
647 *
648 * Returns 0 or the error code
649 */
650static int
651xmlParse3986PathAbsolute(xmlURIPtr uri, const char **str)
652{
653 const char *cur;
654 int ret;
655
656 cur = *str;
657
658 if (*cur != '/')
659 return(1);
660 cur++;
661 ret = xmlParse3986Segment(uri, &cur, 0, 0);
662 if (ret == 0) {
663 while (*cur == '/') {
664 cur++;
665 ret = xmlParse3986Segment(uri, &cur, 0, 1);
666 if (ret != 0) return(ret);
667 }
668 }
669 if (uri != NULL) {
670 if (uri->path != NULL) xmlFree(uri->path);
671 if (cur != *str) {
672 if (uri->cleanup & XML_URI_NO_UNESCAPE)
673 uri->path = STRNDUP(*str, cur - *str);
674 else
675 uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);
676 if (uri->path == NULL)
677 return (-1);
678 } else {
679 uri->path = NULL;
680 }
681 }
682 *str = cur;
683 return (0);
684}
685
686/**
687 * xmlParse3986PathRootless:
688 * @uri: pointer to an URI structure
689 * @str: the string to analyze
690 *
691 * Parse an path without root and fills in the appropriate fields
692 * of the @uri structure
693 *
694 * path-rootless = segment-nz *( "/" segment )
695 *
696 * Returns 0 or the error code
697 */
698static int
699xmlParse3986PathRootless(xmlURIPtr uri, const char **str)
700{
701 const char *cur;
702 int ret;
703
704 cur = *str;
705
706 ret = xmlParse3986Segment(uri, &cur, 0, 0);
707 if (ret != 0) return(ret);
708 while (*cur == '/') {
709 cur++;
710 ret = xmlParse3986Segment(uri, &cur, 0, 1);
711 if (ret != 0) return(ret);
712 }
713 if (uri != NULL) {
714 if (uri->path != NULL) xmlFree(uri->path);
715 if (cur != *str) {
716 if (uri->cleanup & XML_URI_NO_UNESCAPE)
717 uri->path = STRNDUP(*str, cur - *str);
718 else
719 uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);
720 if (uri->path == NULL)
721 return (-1);
722 } else {
723 uri->path = NULL;
724 }
725 }
726 *str = cur;
727 return (0);
728}
729
730/**
731 * xmlParse3986PathNoScheme:
732 * @uri: pointer to an URI structure
733 * @str: the string to analyze
734 *
735 * Parse an path which is not a scheme and fills in the appropriate fields
736 * of the @uri structure
737 *
738 * path-noscheme = segment-nz-nc *( "/" segment )
739 *
740 * Returns 0 or the error code
741 */
742static int
743xmlParse3986PathNoScheme(xmlURIPtr uri, const char **str)
744{
745 const char *cur;
746 int ret;
747
748 cur = *str;
749
750 ret = xmlParse3986Segment(uri, &cur, ':', 0);
751 if (ret != 0) return(ret);
752 while (*cur == '/') {
753 cur++;
754 ret = xmlParse3986Segment(uri, &cur, 0, 1);
755 if (ret != 0) return(ret);
756 }
757 if (uri != NULL) {
758 if (uri->path != NULL) xmlFree(uri->path);
759 if (cur != *str) {
760 if (uri->cleanup & XML_URI_NO_UNESCAPE)
761 uri->path = STRNDUP(*str, cur - *str);
762 else
763 uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);
764 if (uri->path == NULL)
765 return (-1);
766 } else {
767 uri->path = NULL;
768 }
769 }
770 *str = cur;
771 return (0);
772}
773
774/**
775 * xmlParse3986HierPart:
776 * @uri: pointer to an URI structure
777 * @str: the string to analyze
778 *
779 * Parse an hierarchical part and fills in the appropriate fields
780 * of the @uri structure
781 *
782 * hier-part = "//" authority path-abempty
783 * / path-absolute
784 * / path-rootless
785 * / path-empty
786 *
787 * Returns 0 or the error code
788 */
789static int
790xmlParse3986HierPart(xmlURIPtr uri, const char **str)
791{
792 const char *cur;
793 int ret;
794
795 cur = *str;
796
797 if ((*cur == '/') && (*(cur + 1) == '/')) {
798 cur += 2;
799 ret = xmlParse3986Authority(uri, &cur);
800 if (ret != 0) return(ret);
801 /*
802 * An empty server is marked with a special URI value.
803 */
804 if ((uri->server == NULL) && (uri->port == PORT_EMPTY))
805 uri->port = PORT_EMPTY_SERVER;
806 ret = xmlParse3986PathAbEmpty(uri, &cur);
807 if (ret != 0) return(ret);
808 *str = cur;
809 return(0);
810 } else if (*cur == '/') {
811 ret = xmlParse3986PathAbsolute(uri, &cur);
812 if (ret != 0) return(ret);
813 } else if (ISA_PCHAR(uri, cur)) {
814 ret = xmlParse3986PathRootless(uri, &cur);
815 if (ret != 0) return(ret);
816 } else {
817 /* path-empty is effectively empty */
818 if (uri != NULL) {
819 if (uri->path != NULL) xmlFree(uri->path);
820 uri->path = NULL;
821 }
822 }
823 *str = cur;
824 return (0);
825}
826
827/**
828 * xmlParse3986RelativeRef:
829 * @uri: pointer to an URI structure
830 * @str: the string to analyze
831 *
832 * Parse an URI string and fills in the appropriate fields
833 * of the @uri structure
834 *
835 * relative-ref = relative-part [ "?" query ] [ "#" fragment ]
836 * relative-part = "//" authority path-abempty
837 * / path-absolute
838 * / path-noscheme
839 * / path-empty
840 *
841 * Returns 0 or the error code
842 */
843static int
844xmlParse3986RelativeRef(xmlURIPtr uri, const char *str) {
845 int ret;
846
847 if ((*str == '/') && (*(str + 1) == '/')) {
848 str += 2;
849 ret = xmlParse3986Authority(uri, &str);
850 if (ret != 0) return(ret);
851 ret = xmlParse3986PathAbEmpty(uri, &str);
852 if (ret != 0) return(ret);
853 } else if (*str == '/') {
854 ret = xmlParse3986PathAbsolute(uri, &str);
855 if (ret != 0) return(ret);
856 } else if (ISA_PCHAR(uri, str)) {
857 ret = xmlParse3986PathNoScheme(uri, &str);
858 if (ret != 0) return(ret);
859 } else {
860 /* path-empty is effectively empty */
861 if (uri != NULL) {
862 if (uri->path != NULL) xmlFree(uri->path);
863 uri->path = NULL;
864 }
865 }
866
867 if (*str == '?') {
868 str++;
869 ret = xmlParse3986Query(uri, &str);
870 if (ret != 0) return(ret);
871 }
872 if (*str == '#') {
873 str++;
874 ret = xmlParse3986Fragment(uri, &str);
875 if (ret != 0) return(ret);
876 }
877 if (*str != 0) {
878 xmlCleanURI(uri);
879 return(1);
880 }
881 return(0);
882}
883
884
885/**
886 * xmlParse3986URI:
887 * @uri: pointer to an URI structure
888 * @str: the string to analyze
889 *
890 * Parse an URI string and fills in the appropriate fields
891 * of the @uri structure
892 *
893 * scheme ":" hier-part [ "?" query ] [ "#" fragment ]
894 *
895 * Returns 0 or the error code
896 */
897static int
898xmlParse3986URI(xmlURIPtr uri, const char *str) {
899 int ret;
900
901 ret = xmlParse3986Scheme(uri, &str);
902 if (ret != 0) return(ret);
903 if (*str != ':') {
904 return(1);
905 }
906 str++;
907 ret = xmlParse3986HierPart(uri, &str);
908 if (ret != 0) return(ret);
909 if (*str == '?') {
910 str++;
911 ret = xmlParse3986Query(uri, &str);
912 if (ret != 0) return(ret);
913 }
914 if (*str == '#') {
915 str++;
916 ret = xmlParse3986Fragment(uri, &str);
917 if (ret != 0) return(ret);
918 }
919 if (*str != 0) {
920 xmlCleanURI(uri);
921 return(1);
922 }
923 return(0);
924}
925
926/**
927 * xmlParse3986URIReference:
928 * @uri: pointer to an URI structure
929 * @str: the string to analyze
930 *
931 * Parse an URI reference string and fills in the appropriate fields
932 * of the @uri structure
933 *
934 * URI-reference = URI / relative-ref
935 *
936 * Returns 0 or the error code
937 */
938static int
939xmlParse3986URIReference(xmlURIPtr uri, const char *str) {
940 int ret;
941
942 if (str == NULL)
943 return(-1);
944 xmlCleanURI(uri);
945
946 /*
947 * Try first to parse absolute refs, then fallback to relative if
948 * it fails.
949 */
950 ret = xmlParse3986URI(uri, str);
951 if (ret < 0)
952 return(ret);
953 if (ret != 0) {
954 xmlCleanURI(uri);
955 ret = xmlParse3986RelativeRef(uri, str);
956 if (ret != 0) {
957 xmlCleanURI(uri);
958 return(ret);
959 }
960 }
961 return(0);
962}
963
964/**
965 * xmlParseURISafe:
966 * @str: the URI string to analyze
967 * @uriOut: optional pointer to parsed URI
968 *
969 * Parse an URI based on RFC 3986
970 *
971 * URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
972 *
973 * Available since 2.13.0.
974 *
975 * Returns 0 on success, an error code (typically 1) if the URI is invalid
976 * or -1 if a memory allocation failed.
977 */
978int
979xmlParseURISafe(const char *str, xmlURIPtr *uriOut) {
980 xmlURIPtr uri;
981 int ret;
982
983 if (uriOut == NULL)
984 return(1);
985 *uriOut = NULL;
986 if (str == NULL)
987 return(1);
988
989 uri = xmlCreateURI();
990 if (uri == NULL)
991 return(-1);
992
993 ret = xmlParse3986URIReference(uri, str);
994 if (ret) {
995 xmlFreeURI(uri);
996 return(ret);
997 }
998
999 *uriOut = uri;
1000 return(0);
1001}
1002
1003/**
1004 * xmlParseURI:
1005 * @str: the URI string to analyze
1006 *
1007 * Parse an URI based on RFC 3986
1008 *
1009 * URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
1010 *
1011 * Returns a newly built xmlURIPtr or NULL in case of error
1012 */
1013xmlURIPtr
1014xmlParseURI(const char *str) {
1015 xmlURIPtr uri;
1016 xmlParseURISafe(str, &uri);
1017 return(uri);
1018}
1019
1020/**
1021 * xmlParseURIReference:
1022 * @uri: pointer to an URI structure
1023 * @str: the string to analyze
1024 *
1025 * Parse an URI reference string based on RFC 3986 and fills in the
1026 * appropriate fields of the @uri structure
1027 *
1028 * URI-reference = URI / relative-ref
1029 *
1030 * Returns 0 or the error code
1031 */
1032int
1033xmlParseURIReference(xmlURIPtr uri, const char *str) {
1034 return(xmlParse3986URIReference(uri, str));
1035}
1036
1037/**
1038 * xmlParseURIRaw:
1039 * @str: the URI string to analyze
1040 * @raw: if 1 unescaping of URI pieces are disabled
1041 *
1042 * Parse an URI but allows to keep intact the original fragments.
1043 *
1044 * URI-reference = URI / relative-ref
1045 *
1046 * Returns a newly built xmlURIPtr or NULL in case of error
1047 */
1048xmlURIPtr
1049xmlParseURIRaw(const char *str, int raw) {
1050 xmlURIPtr uri;
1051 int ret;
1052
1053 if (str == NULL)
1054 return(NULL);
1055 uri = xmlCreateURI();
1056 if (uri != NULL) {
1057 if (raw) {
1058 uri->cleanup |= XML_URI_NO_UNESCAPE;
1059 }
1060 ret = xmlParseURIReference(uri, str);
1061 if (ret) {
1062 xmlFreeURI(uri);
1063 return(NULL);
1064 }
1065 }
1066 return(uri);
1067}
1068
1069/************************************************************************
1070 * *
1071 * Generic URI structure functions *
1072 * *
1073 ************************************************************************/
1074
1075/**
1076 * xmlCreateURI:
1077 *
1078 * Simply creates an empty xmlURI
1079 *
1080 * Returns the new structure or NULL in case of error
1081 */
1082xmlURIPtr
1083xmlCreateURI(void) {
1084 xmlURIPtr ret;
1085
1086 ret = (xmlURIPtr) xmlMalloc(sizeof(xmlURI));
1087 if (ret == NULL)
1088 return(NULL);
1089 memset(ret, 0, sizeof(xmlURI));
1090 ret->port = PORT_EMPTY;
1091 return(ret);
1092}
1093
1094/**
1095 * xmlSaveUriRealloc:
1096 *
1097 * Function to handle properly a reallocation when saving an URI
1098 * Also imposes some limit on the length of an URI string output
1099 */
1100static xmlChar *
1101xmlSaveUriRealloc(xmlChar *ret, int *max) {
1102 xmlChar *temp;
1103 int tmp;
1104
1105 if (*max > MAX_URI_LENGTH)
1106 return(NULL);
1107 tmp = *max * 2;
1108 temp = (xmlChar *) xmlRealloc(ret, (tmp + 1));
1109 if (temp == NULL)
1110 return(NULL);
1111 *max = tmp;
1112 return(temp);
1113}
1114
1115/**
1116 * xmlSaveUri:
1117 * @uri: pointer to an xmlURI
1118 *
1119 * Save the URI as an escaped string
1120 *
1121 * Returns a new string (to be deallocated by caller)
1122 */
1123xmlChar *
1124xmlSaveUri(xmlURIPtr uri) {
1125 xmlChar *ret = NULL;
1126 xmlChar *temp;
1127 const char *p;
1128 int len;
1129 int max;
1130
1131 if (uri == NULL) return(NULL);
1132
1133
1134 max = 80;
1135 ret = (xmlChar *) xmlMallocAtomic(max + 1);
1136 if (ret == NULL)
1137 return(NULL);
1138 len = 0;
1139
1140 if (uri->scheme != NULL) {
1141 p = uri->scheme;
1142 while (*p != 0) {
1143 if (len >= max) {
1144 temp = xmlSaveUriRealloc(ret, &max);
1145 if (temp == NULL) goto mem_error;
1146 ret = temp;
1147 }
1148 ret[len++] = *p++;
1149 }
1150 if (len >= max) {
1151 temp = xmlSaveUriRealloc(ret, &max);
1152 if (temp == NULL) goto mem_error;
1153 ret = temp;
1154 }
1155 ret[len++] = ':';
1156 }
1157 if (uri->opaque != NULL) {
1158 p = uri->opaque;
1159 while (*p != 0) {
1160 if (len + 3 >= max) {
1161 temp = xmlSaveUriRealloc(ret, &max);
1162 if (temp == NULL) goto mem_error;
1163 ret = temp;
1164 }
1165 if (IS_RESERVED(*(p)) || IS_UNRESERVED(*(p)))
1166 ret[len++] = *p++;
1167 else {
1168 int val = *(unsigned char *)p++;
1169 int hi = val / 0x10, lo = val % 0x10;
1170 ret[len++] = '%';
1171 ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1172 ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1173 }
1174 }
1175 } else {
1176 if ((uri->server != NULL) || (uri->port != PORT_EMPTY)) {
1177 if (len + 3 >= max) {
1178 temp = xmlSaveUriRealloc(ret, &max);
1179 if (temp == NULL) goto mem_error;
1180 ret = temp;
1181 }
1182 ret[len++] = '/';
1183 ret[len++] = '/';
1184 if (uri->user != NULL) {
1185 p = uri->user;
1186 while (*p != 0) {
1187 if (len + 3 >= max) {
1188 temp = xmlSaveUriRealloc(ret, &max);
1189 if (temp == NULL) goto mem_error;
1190 ret = temp;
1191 }
1192 if ((IS_UNRESERVED(*(p))) ||
1193 ((*(p) == ';')) || ((*(p) == ':')) ||
1194 ((*(p) == '&')) || ((*(p) == '=')) ||
1195 ((*(p) == '+')) || ((*(p) == '$')) ||
1196 ((*(p) == ',')))
1197 ret[len++] = *p++;
1198 else {
1199 int val = *(unsigned char *)p++;
1200 int hi = val / 0x10, lo = val % 0x10;
1201 ret[len++] = '%';
1202 ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1203 ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1204 }
1205 }
1206 if (len + 3 >= max) {
1207 temp = xmlSaveUriRealloc(ret, &max);
1208 if (temp == NULL) goto mem_error;
1209 ret = temp;
1210 }
1211 ret[len++] = '@';
1212 }
1213 if (uri->server != NULL) {
1214 p = uri->server;
1215 while (*p != 0) {
1216 if (len >= max) {
1217 temp = xmlSaveUriRealloc(ret, &max);
1218 if (temp == NULL) goto mem_error;
1219 ret = temp;
1220 }
1221 /* TODO: escaping? */
1222 ret[len++] = (xmlChar) *p++;
1223 }
1224 }
1225 if (uri->port > 0) {
1226 if (len + 10 >= max) {
1227 temp = xmlSaveUriRealloc(ret, &max);
1228 if (temp == NULL) goto mem_error;
1229 ret = temp;
1230 }
1231 len += snprintf((char *) &ret[len], max - len, ":%d", uri->port);
1232 }
1233 } else if (uri->authority != NULL) {
1234 if (len + 3 >= max) {
1235 temp = xmlSaveUriRealloc(ret, &max);
1236 if (temp == NULL) goto mem_error;
1237 ret = temp;
1238 }
1239 ret[len++] = '/';
1240 ret[len++] = '/';
1241 p = uri->authority;
1242 while (*p != 0) {
1243 if (len + 3 >= max) {
1244 temp = xmlSaveUriRealloc(ret, &max);
1245 if (temp == NULL) goto mem_error;
1246 ret = temp;
1247 }
1248 if ((IS_UNRESERVED(*(p))) ||
1249 ((*(p) == '$')) || ((*(p) == ',')) || ((*(p) == ';')) ||
1250 ((*(p) == ':')) || ((*(p) == '@')) || ((*(p) == '&')) ||
1251 ((*(p) == '=')) || ((*(p) == '+')))
1252 ret[len++] = *p++;
1253 else {
1254 int val = *(unsigned char *)p++;
1255 int hi = val / 0x10, lo = val % 0x10;
1256 ret[len++] = '%';
1257 ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1258 ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1259 }
1260 }
1261 } else if (uri->scheme != NULL) {
1262 if (len + 3 >= max) {
1263 temp = xmlSaveUriRealloc(ret, &max);
1264 if (temp == NULL) goto mem_error;
1265 ret = temp;
1266 }
1267 }
1268 if (uri->path != NULL) {
1269 p = uri->path;
1270 /*
1271 * the colon in file:///d: should not be escaped or
1272 * Windows accesses fail later.
1273 */
1274 if ((uri->scheme != NULL) &&
1275 (p[0] == '/') &&
1276 (((p[1] >= 'a') && (p[1] <= 'z')) ||
1277 ((p[1] >= 'A') && (p[1] <= 'Z'))) &&
1278 (p[2] == ':') &&
1279 (xmlStrEqual(BAD_CAST uri->scheme, BAD_CAST "file"))) {
1280 if (len + 3 >= max) {
1281 temp = xmlSaveUriRealloc(ret, &max);
1282 if (temp == NULL) goto mem_error;
1283 ret = temp;
1284 }
1285 ret[len++] = *p++;
1286 ret[len++] = *p++;
1287 ret[len++] = *p++;
1288 }
1289 while (*p != 0) {
1290 if (len + 3 >= max) {
1291 temp = xmlSaveUriRealloc(ret, &max);
1292 if (temp == NULL) goto mem_error;
1293 ret = temp;
1294 }
1295 if ((IS_UNRESERVED(*(p))) || ((*(p) == '/')) ||
1296 ((*(p) == ';')) || ((*(p) == '@')) || ((*(p) == '&')) ||
1297 ((*(p) == '=')) || ((*(p) == '+')) || ((*(p) == '$')) ||
1298 ((*(p) == ',')))
1299 ret[len++] = *p++;
1300 else {
1301 int val = *(unsigned char *)p++;
1302 int hi = val / 0x10, lo = val % 0x10;
1303 ret[len++] = '%';
1304 ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1305 ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1306 }
1307 }
1308 }
1309 if (uri->query_raw != NULL) {
1310 if (len + 1 >= max) {
1311 temp = xmlSaveUriRealloc(ret, &max);
1312 if (temp == NULL) goto mem_error;
1313 ret = temp;
1314 }
1315 ret[len++] = '?';
1316 p = uri->query_raw;
1317 while (*p != 0) {
1318 if (len + 1 >= max) {
1319 temp = xmlSaveUriRealloc(ret, &max);
1320 if (temp == NULL) goto mem_error;
1321 ret = temp;
1322 }
1323 ret[len++] = *p++;
1324 }
1325 } else if (uri->query != NULL) {
1326 if (len + 3 >= max) {
1327 temp = xmlSaveUriRealloc(ret, &max);
1328 if (temp == NULL) goto mem_error;
1329 ret = temp;
1330 }
1331 ret[len++] = '?';
1332 p = uri->query;
1333 while (*p != 0) {
1334 if (len + 3 >= max) {
1335 temp = xmlSaveUriRealloc(ret, &max);
1336 if (temp == NULL) goto mem_error;
1337 ret = temp;
1338 }
1339 if ((IS_UNRESERVED(*(p))) || (IS_RESERVED(*(p))))
1340 ret[len++] = *p++;
1341 else {
1342 int val = *(unsigned char *)p++;
1343 int hi = val / 0x10, lo = val % 0x10;
1344 ret[len++] = '%';
1345 ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1346 ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1347 }
1348 }
1349 }
1350 }
1351 if (uri->fragment != NULL) {
1352 if (len + 3 >= max) {
1353 temp = xmlSaveUriRealloc(ret, &max);
1354 if (temp == NULL) goto mem_error;
1355 ret = temp;
1356 }
1357 ret[len++] = '#';
1358 p = uri->fragment;
1359 while (*p != 0) {
1360 if (len + 3 >= max) {
1361 temp = xmlSaveUriRealloc(ret, &max);
1362 if (temp == NULL) goto mem_error;
1363 ret = temp;
1364 }
1365 if ((IS_UNRESERVED(*(p))) || (IS_RESERVED(*(p))))
1366 ret[len++] = *p++;
1367 else {
1368 int val = *(unsigned char *)p++;
1369 int hi = val / 0x10, lo = val % 0x10;
1370 ret[len++] = '%';
1371 ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1372 ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1373 }
1374 }
1375 }
1376 if (len >= max) {
1377 temp = xmlSaveUriRealloc(ret, &max);
1378 if (temp == NULL) goto mem_error;
1379 ret = temp;
1380 }
1381 ret[len] = 0;
1382 return(ret);
1383
1384mem_error:
1385 xmlFree(ret);
1386 return(NULL);
1387}
1388
1389/**
1390 * xmlPrintURI:
1391 * @stream: a FILE* for the output
1392 * @uri: pointer to an xmlURI
1393 *
1394 * Prints the URI in the stream @stream.
1395 */
1396void
1397xmlPrintURI(FILE *stream, xmlURIPtr uri) {
1398 xmlChar *out;
1399
1400 out = xmlSaveUri(uri);
1401 if (out != NULL) {
1402 fprintf(stream, "%s", (char *) out);
1403 xmlFree(out);
1404 }
1405}
1406
1407/**
1408 * xmlCleanURI:
1409 * @uri: pointer to an xmlURI
1410 *
1411 * Make sure the xmlURI struct is free of content
1412 */
1413static void
1414xmlCleanURI(xmlURIPtr uri) {
1415 if (uri == NULL) return;
1416
1417 if (uri->scheme != NULL) xmlFree(uri->scheme);
1418 uri->scheme = NULL;
1419 if (uri->server != NULL) xmlFree(uri->server);
1420 uri->server = NULL;
1421 if (uri->user != NULL) xmlFree(uri->user);
1422 uri->user = NULL;
1423 if (uri->path != NULL) xmlFree(uri->path);
1424 uri->path = NULL;
1425 if (uri->fragment != NULL) xmlFree(uri->fragment);
1426 uri->fragment = NULL;
1427 if (uri->opaque != NULL) xmlFree(uri->opaque);
1428 uri->opaque = NULL;
1429 if (uri->authority != NULL) xmlFree(uri->authority);
1430 uri->authority = NULL;
1431 if (uri->query != NULL) xmlFree(uri->query);
1432 uri->query = NULL;
1433 if (uri->query_raw != NULL) xmlFree(uri->query_raw);
1434 uri->query_raw = NULL;
1435}
1436
1437/**
1438 * xmlFreeURI:
1439 * @uri: pointer to an xmlURI
1440 *
1441 * Free up the xmlURI struct
1442 */
1443void
1444xmlFreeURI(xmlURIPtr uri) {
1445 if (uri == NULL) return;
1446
1447 if (uri->scheme != NULL) xmlFree(uri->scheme);
1448 if (uri->server != NULL) xmlFree(uri->server);
1449 if (uri->user != NULL) xmlFree(uri->user);
1450 if (uri->path != NULL) xmlFree(uri->path);
1451 if (uri->fragment != NULL) xmlFree(uri->fragment);
1452 if (uri->opaque != NULL) xmlFree(uri->opaque);
1453 if (uri->authority != NULL) xmlFree(uri->authority);
1454 if (uri->query != NULL) xmlFree(uri->query);
1455 if (uri->query_raw != NULL) xmlFree(uri->query_raw);
1456 xmlFree(uri);
1457}
1458
1459/************************************************************************
1460 * *
1461 * Helper functions *
1462 * *
1463 ************************************************************************/
1464
1465static int
1466xmlIsPathSeparator(int c, int isFile) {
1467 (void) isFile;
1468
1469 if (c == '/')
1470 return(1);
1471
1472#if defined(_WIN32) || defined(__CYGWIN__)
1473 if (isFile && (c == '\\'))
1474 return(1);
1475#endif
1476
1477 return(0);
1478}
1479
1480/**
1481 * xmlNormalizePath:
1482 * @path: pointer to the path string
1483 * @isFile: true for filesystem paths, false for URIs
1484 *
1485 * Normalize a filesystem path or URI.
1486 *
1487 * Returns 0 or an error code
1488 */
1489static int
1490xmlNormalizePath(char *path, int isFile) {
1491 char *cur, *out;
1492 int numSeg = 0;
1493
1494 if (path == NULL)
1495 return(-1);
1496
1497 cur = path;
1498 out = path;
1499
1500 if (*cur == 0)
1501 return(0);
1502
1503 if (xmlIsPathSeparator(*cur, isFile)) {
1504 cur++;
1505 *out++ = '/';
1506 }
1507
1508 while (*cur != 0) {
1509 /*
1510 * At this point, out is either empty or ends with a separator.
1511 * Collapse multiple separators first.
1512 */
1513 while (xmlIsPathSeparator(*cur, isFile)) {
1514#if defined(_WIN32) || defined(__CYGWIN__)
1515 /* Allow two separators at start of path */
1516 if ((isFile) && (out == path + 1))
1517 *out++ = '/';
1518#endif
1519 cur++;
1520 }
1521
1522 if (*cur == '.') {
1523 if (cur[1] == 0) {
1524 /* Ignore "." at end of path */
1525 break;
1526 } else if (xmlIsPathSeparator(cur[1], isFile)) {
1527 /* Skip "./" */
1528 cur += 2;
1529 continue;
1530 } else if ((cur[1] == '.') &&
1531 ((cur[2] == 0) || xmlIsPathSeparator(cur[2], isFile))) {
1532 if (numSeg > 0) {
1533 /* Handle ".." by removing last segment */
1534 do {
1535 out--;
1536 } while ((out > path) &&
1537 !xmlIsPathSeparator(out[-1], isFile));
1538 numSeg--;
1539
1540 if (cur[2] == 0)
1541 break;
1542 cur += 3;
1543 continue;
1544 } else if (out[0] == '/') {
1545 /* Ignore extraneous ".." in absolute paths */
1546 if (cur[2] == 0)
1547 break;
1548 cur += 3;
1549 continue;
1550 } else {
1551 /* Keep "../" at start of relative path */
1552 numSeg--;
1553 }
1554 }
1555 }
1556
1557 /* Copy segment */
1558 while ((*cur != 0) && !xmlIsPathSeparator(*cur, isFile)) {
1559 *out++ = *cur++;
1560 }
1561
1562 /* Copy separator */
1563 if (*cur != 0) {
1564 cur++;
1565 *out++ = '/';
1566 }
1567
1568 numSeg++;
1569 }
1570
1571 /* Keep "." if output is empty and it's a file */
1572 if ((isFile) && (out <= path))
1573 *out++ = '.';
1574 *out = 0;
1575
1576 return(0);
1577}
1578
1579/**
1580 * xmlNormalizeURIPath:
1581 * @path: pointer to the path string
1582 *
1583 * Applies the 5 normalization steps to a path string--that is, RFC 2396
1584 * Section 5.2, steps 6.c through 6.g.
1585 *
1586 * Normalization occurs directly on the string, no new allocation is done
1587 *
1588 * Returns 0 or an error code
1589 */
1590int
1591xmlNormalizeURIPath(char *path) {
1592 return(xmlNormalizePath(path, 0));
1593}
1594
1595static int is_hex(char c) {
1596 if (((c >= '0') && (c <= '9')) ||
1597 ((c >= 'a') && (c <= 'f')) ||
1598 ((c >= 'A') && (c <= 'F')))
1599 return(1);
1600 return(0);
1601}
1602
1603/**
1604 * xmlURIUnescapeString:
1605 * @str: the string to unescape
1606 * @len: the length in bytes to unescape (or <= 0 to indicate full string)
1607 * @target: optional destination buffer
1608 *
1609 * Unescaping routine, but does not check that the string is an URI. The
1610 * output is a direct unsigned char translation of %XX values (no encoding)
1611 * Note that the length of the result can only be smaller or same size as
1612 * the input string.
1613 *
1614 * Returns a copy of the string, but unescaped, will return NULL only in case
1615 * of error
1616 */
1617char *
1618xmlURIUnescapeString(const char *str, int len, char *target) {
1619 char *ret, *out;
1620 const char *in;
1621
1622 if (str == NULL)
1623 return(NULL);
1624 if (len <= 0) len = strlen(str);
1625 if (len < 0) return(NULL);
1626
1627 if (target == NULL) {
1628 ret = (char *) xmlMallocAtomic(len + 1);
1629 if (ret == NULL)
1630 return(NULL);
1631 } else
1632 ret = target;
1633 in = str;
1634 out = ret;
1635 while(len > 0) {
1636 if ((len > 2) && (*in == '%') && (is_hex(in[1])) && (is_hex(in[2]))) {
1637 int c = 0;
1638 in++;
1639 if ((*in >= '0') && (*in <= '9'))
1640 c = (*in - '0');
1641 else if ((*in >= 'a') && (*in <= 'f'))
1642 c = (*in - 'a') + 10;
1643 else if ((*in >= 'A') && (*in <= 'F'))
1644 c = (*in - 'A') + 10;
1645 in++;
1646 if ((*in >= '0') && (*in <= '9'))
1647 c = c * 16 + (*in - '0');
1648 else if ((*in >= 'a') && (*in <= 'f'))
1649 c = c * 16 + (*in - 'a') + 10;
1650 else if ((*in >= 'A') && (*in <= 'F'))
1651 c = c * 16 + (*in - 'A') + 10;
1652 in++;
1653 len -= 3;
1654 /* Explicit sign change */
1655 *out++ = (char) c;
1656 } else {
1657 *out++ = *in++;
1658 len--;
1659 }
1660 }
1661 *out = 0;
1662 return(ret);
1663}
1664
1665/**
1666 * xmlURIEscapeStr:
1667 * @str: string to escape
1668 * @list: exception list string of chars not to escape
1669 *
1670 * This routine escapes a string to hex, ignoring unreserved characters
1671 * a-z, A-Z, 0-9, "-._~", a few sub-delims "!*'()", the gen-delim "@"
1672 * (why?) and the characters in the exception list.
1673 *
1674 * Returns a new escaped string or NULL in case of error.
1675 */
1676xmlChar *
1677xmlURIEscapeStr(const xmlChar *str, const xmlChar *list) {
1678 xmlChar *ret, ch;
1679 xmlChar *temp;
1680 const xmlChar *in;
1681 int len, out;
1682
1683 if (str == NULL)
1684 return(NULL);
1685 if (str[0] == 0)
1686 return(xmlStrdup(str));
1687 len = xmlStrlen(str);
1688
1689 len += 20;
1690 ret = (xmlChar *) xmlMallocAtomic(len);
1691 if (ret == NULL)
1692 return(NULL);
1693 in = (const xmlChar *) str;
1694 out = 0;
1695 while(*in != 0) {
1696 if (len - out <= 3) {
1697 if (len > INT_MAX / 2)
1698 return(NULL);
1699 temp = xmlRealloc(ret, len * 2);
1700 if (temp == NULL) {
1701 xmlFree(ret);
1702 return(NULL);
1703 }
1704 ret = temp;
1705 len *= 2;
1706 }
1707
1708 ch = *in;
1709
1710 if ((ch != '@') && (!IS_UNRESERVED(ch)) && (!xmlStrchr(list, ch))) {
1711 unsigned char val;
1712 ret[out++] = '%';
1713 val = ch >> 4;
1714 if (val <= 9)
1715 ret[out++] = '0' + val;
1716 else
1717 ret[out++] = 'A' + val - 0xA;
1718 val = ch & 0xF;
1719 if (val <= 9)
1720 ret[out++] = '0' + val;
1721 else
1722 ret[out++] = 'A' + val - 0xA;
1723 in++;
1724 } else {
1725 ret[out++] = *in++;
1726 }
1727
1728 }
1729 ret[out] = 0;
1730 return(ret);
1731}
1732
1733/**
1734 * xmlURIEscape:
1735 * @str: the string of the URI to escape
1736 *
1737 * Escaping routine, does not do validity checks !
1738 * It will try to escape the chars needing this, but this is heuristic
1739 * based it's impossible to be sure.
1740 *
1741 * Returns an copy of the string, but escaped
1742 *
1743 * 25 May 2001
1744 * Uses xmlParseURI and xmlURIEscapeStr to try to escape correctly
1745 * according to RFC2396.
1746 * - Carl Douglas
1747 */
1748xmlChar *
1749xmlURIEscape(const xmlChar * str)
1750{
1751 xmlChar *ret, *segment = NULL;
1752 xmlURIPtr uri;
1753 int ret2;
1754
1755 if (str == NULL)
1756 return (NULL);
1757
1758 uri = xmlCreateURI();
1759 if (uri != NULL) {
1760 /*
1761 * Allow escaping errors in the unescaped form
1762 */
1763 uri->cleanup = XML_URI_ALLOW_UNWISE;
1764 ret2 = xmlParseURIReference(uri, (const char *)str);
1765 if (ret2) {
1766 xmlFreeURI(uri);
1767 return (NULL);
1768 }
1769 }
1770
1771 if (!uri)
1772 return NULL;
1773
1774 ret = NULL;
1775
1776#define NULLCHK(p) if(!p) { \
1777 xmlFreeURI(uri); \
1778 xmlFree(ret); \
1779 return NULL; } \
1780
1781 if (uri->scheme) {
1782 segment = xmlURIEscapeStr(BAD_CAST uri->scheme, BAD_CAST "+-.");
1783 NULLCHK(segment)
1784 ret = xmlStrcat(ret, segment);
1785 ret = xmlStrcat(ret, BAD_CAST ":");
1786 xmlFree(segment);
1787 }
1788
1789 if (uri->authority) {
1790 segment =
1791 xmlURIEscapeStr(BAD_CAST uri->authority, BAD_CAST "/?;:@");
1792 NULLCHK(segment)
1793 ret = xmlStrcat(ret, BAD_CAST "//");
1794 ret = xmlStrcat(ret, segment);
1795 xmlFree(segment);
1796 }
1797
1798 if (uri->user) {
1799 segment = xmlURIEscapeStr(BAD_CAST uri->user, BAD_CAST ";:&=+$,");
1800 NULLCHK(segment)
1801 ret = xmlStrcat(ret,BAD_CAST "//");
1802 ret = xmlStrcat(ret, segment);
1803 ret = xmlStrcat(ret, BAD_CAST "@");
1804 xmlFree(segment);
1805 }
1806
1807 if (uri->server) {
1808 segment = xmlURIEscapeStr(BAD_CAST uri->server, BAD_CAST "/?;:@");
1809 NULLCHK(segment)
1810 if (uri->user == NULL)
1811 ret = xmlStrcat(ret, BAD_CAST "//");
1812 ret = xmlStrcat(ret, segment);
1813 xmlFree(segment);
1814 }
1815
1816 if (uri->port > 0) {
1817 xmlChar port[11];
1818
1819 snprintf((char *) port, 11, "%d", uri->port);
1820 ret = xmlStrcat(ret, BAD_CAST ":");
1821 ret = xmlStrcat(ret, port);
1822 }
1823
1824 if (uri->path) {
1825 segment =
1826 xmlURIEscapeStr(BAD_CAST uri->path, BAD_CAST ":@&=+$,/?;");
1827 NULLCHK(segment)
1828 ret = xmlStrcat(ret, segment);
1829 xmlFree(segment);
1830 }
1831
1832 if (uri->query_raw) {
1833 ret = xmlStrcat(ret, BAD_CAST "?");
1834 ret = xmlStrcat(ret, BAD_CAST uri->query_raw);
1835 }
1836 else if (uri->query) {
1837 segment =
1838 xmlURIEscapeStr(BAD_CAST uri->query, BAD_CAST ";/?:@&=+,$");
1839 NULLCHK(segment)
1840 ret = xmlStrcat(ret, BAD_CAST "?");
1841 ret = xmlStrcat(ret, segment);
1842 xmlFree(segment);
1843 }
1844
1845 if (uri->opaque) {
1846 segment = xmlURIEscapeStr(BAD_CAST uri->opaque, BAD_CAST "");
1847 NULLCHK(segment)
1848 ret = xmlStrcat(ret, segment);
1849 xmlFree(segment);
1850 }
1851
1852 if (uri->fragment) {
1853 segment = xmlURIEscapeStr(BAD_CAST uri->fragment, BAD_CAST "#");
1854 NULLCHK(segment)
1855 ret = xmlStrcat(ret, BAD_CAST "#");
1856 ret = xmlStrcat(ret, segment);
1857 xmlFree(segment);
1858 }
1859
1860 xmlFreeURI(uri);
1861#undef NULLCHK
1862
1863 return (ret);
1864}
1865
1866/************************************************************************
1867 * *
1868 * Public functions *
1869 * *
1870 ************************************************************************/
1871
1872static int
1873xmlIsAbsolutePath(const xmlChar *path) {
1874 int c = path[0];
1875
1876 if (xmlIsPathSeparator(c, 1))
1877 return(1);
1878
1879#if defined(_WIN32) || defined(__CYGWIN__)
1880 if ((((c >= 'A') && (c <= 'Z')) ||
1881 ((c >= 'a') && (c <= 'z'))) &&
1882 (path[1] == ':'))
1883 return(1);
1884#endif
1885
1886 return(0);
1887}
1888
1889/**
1890 * xmlResolvePath:
1891 * @ref: the filesystem path
1892 * @base: the base value
1893 * @out: pointer to result URI
1894 *
1895 * Resolves a filesystem path from a base path.
1896 *
1897 * Returns 0 on success, -1 if a memory allocation failed or an error
1898 * code if URI or base are invalid.
1899 */
1900static int
1901xmlResolvePath(const xmlChar *escRef, const xmlChar *base, xmlChar **out) {
1902 const xmlChar *fragment;
1903 xmlChar *tmp = NULL;
1904 xmlChar *ref = NULL;
1905 xmlChar *result = NULL;
1906 int ret = -1;
1907 int i;
1908
1909 if (out == NULL)
1910 return(1);
1911 *out = NULL;
1912
1913 if ((escRef == NULL) || (escRef[0] == 0)) {
1914 if ((base == NULL) || (base[0] == 0))
1915 return(1);
1916 ref = xmlStrdup(base);
1917 if (ref == NULL)
1918 goto err_memory;
1919 *out = ref;
1920 return(0);
1921 }
1922
1923 /*
1924 * If a URI is resolved, we can assume it is a valid URI and not
1925 * a filesystem path. This means we have to unescape the part
1926 * before the fragment.
1927 */
1928 fragment = xmlStrchr(escRef, '#');
1929 if (fragment != NULL) {
1930 tmp = xmlStrndup(escRef, fragment - escRef);
1931 if (tmp == NULL)
1932 goto err_memory;
1933 escRef = tmp;
1934 }
1935
1936 ref = (xmlChar *) xmlURIUnescapeString((char *) escRef, -1, NULL);
1937 if (ref == NULL)
1938 goto err_memory;
1939
1940 if ((base == NULL) || (base[0] == 0))
1941 goto done;
1942
1943 if (xmlIsAbsolutePath(ref))
1944 goto done;
1945
1946 /*
1947 * Remove last segment from base
1948 */
1949 i = xmlStrlen(base);
1950 while ((i > 0) && !xmlIsPathSeparator(base[i-1], 1))
1951 i--;
1952
1953 /*
1954 * Concatenate base and ref
1955 */
1956 if (i > 0) {
1957 int refLen = xmlStrlen(ref);
1958
1959 result = xmlMalloc(i + refLen + 1);
1960 if (result == NULL)
1961 goto err_memory;
1962
1963 memcpy(result, base, i);
1964 memcpy(result + i, ref, refLen + 1);
1965 }
1966
1967 /*
1968 * Normalize
1969 */
1970 xmlNormalizePath((char *) result, 1);
1971
1972done:
1973 if (result == NULL) {
1974 result = ref;
1975 ref = NULL;
1976 }
1977
1978 if (fragment != NULL) {
1979 result = xmlStrcat(result, fragment);
1980 if (result == NULL)
1981 goto err_memory;
1982 }
1983
1984 *out = result;
1985 ret = 0;
1986
1987err_memory:
1988 xmlFree(tmp);
1989 xmlFree(ref);
1990 return(ret);
1991}
1992
1993/**
1994 * xmlBuildURISafe:
1995 * @URI: the URI instance found in the document
1996 * @base: the base value
1997 * @valPtr: pointer to result URI
1998 *
1999 * Computes he final URI of the reference done by checking that
2000 * the given URI is valid, and building the final URI using the
2001 * base URI. This is processed according to section 5.2 of the
2002 * RFC 2396
2003 *
2004 * 5.2. Resolving Relative References to Absolute Form
2005 *
2006 * Available since 2.13.0.
2007 *
2008 * Returns 0 on success, -1 if a memory allocation failed or an error
2009 * code if URI or base are invalid.
2010 */
2011int
2012xmlBuildURISafe(const xmlChar *URI, const xmlChar *base, xmlChar **valPtr) {
2013 xmlChar *val = NULL;
2014 int ret, len, indx, cur, out;
2015 xmlURIPtr ref = NULL;
2016 xmlURIPtr bas = NULL;
2017 xmlURIPtr res = NULL;
2018
2019 if (valPtr == NULL)
2020 return(1);
2021 *valPtr = NULL;
2022
2023 if (URI == NULL)
2024 return(1);
2025
2026 if (base == NULL) {
2027 val = xmlStrdup(URI);
2028 if (val == NULL)
2029 return(-1);
2030 *valPtr = val;
2031 return(0);
2032 }
2033
2034 /*
2035 * 1) The URI reference is parsed into the potential four components and
2036 * fragment identifier, as described in Section 4.3.
2037 *
2038 * NOTE that a completely empty URI is treated by modern browsers
2039 * as a reference to "." rather than as a synonym for the current
2040 * URI. Should we do that here?
2041 */
2042 if (URI[0] != 0)
2043 ret = xmlParseURISafe((const char *) URI, &ref);
2044 else
2045 ret = 0;
2046 if (ret != 0)
2047 goto done;
2048 if ((ref != NULL) && (ref->scheme != NULL)) {
2049 /*
2050 * The URI is absolute don't modify.
2051 */
2052 val = xmlStrdup(URI);
2053 if (val == NULL)
2054 ret = -1;
2055 goto done;
2056 }
2057
2058 /*
2059 * If base has no scheme or authority, it is assumed to be a
2060 * filesystem path.
2061 */
2062 if (xmlStrstr(base, BAD_CAST "://") == NULL) {
2063 xmlFreeURI(ref);
2064 return(xmlResolvePath(URI, base, valPtr));
2065 }
2066
2067 ret = xmlParseURISafe((const char *) base, &bas);
2068 if (ret < 0)
2069 goto done;
2070 if (ret != 0) {
2071 if (ref) {
2072 ret = 0;
2073 val = xmlSaveUri(ref);
2074 if (val == NULL)
2075 ret = -1;
2076 }
2077 goto done;
2078 }
2079 if (ref == NULL) {
2080 /*
2081 * the base fragment must be ignored
2082 */
2083 if (bas->fragment != NULL) {
2084 xmlFree(bas->fragment);
2085 bas->fragment = NULL;
2086 }
2087 val = xmlSaveUri(bas);
2088 if (val == NULL)
2089 ret = -1;
2090 goto done;
2091 }
2092
2093 /*
2094 * 2) If the path component is empty and the scheme, authority, and
2095 * query components are undefined, then it is a reference to the
2096 * current document and we are done. Otherwise, the reference URI's
2097 * query and fragment components are defined as found (or not found)
2098 * within the URI reference and not inherited from the base URI.
2099 *
2100 * NOTE that in modern browsers, the parsing differs from the above
2101 * in the following aspect: the query component is allowed to be
2102 * defined while still treating this as a reference to the current
2103 * document.
2104 */
2105 ret = -1;
2106 res = xmlCreateURI();
2107 if (res == NULL)
2108 goto done;
2109 if ((ref->scheme == NULL) && (ref->path == NULL) &&
2110 ((ref->authority == NULL) && (ref->server == NULL) &&
2111 (ref->port == PORT_EMPTY))) {
2112 if (bas->scheme != NULL) {
2113 res->scheme = xmlMemStrdup(bas->scheme);
2114 if (res->scheme == NULL)
2115 goto done;
2116 }
2117 if (bas->authority != NULL) {
2118 res->authority = xmlMemStrdup(bas->authority);
2119 if (res->authority == NULL)
2120 goto done;
2121 } else {
2122 if (bas->server != NULL) {
2123 res->server = xmlMemStrdup(bas->server);
2124 if (res->server == NULL)
2125 goto done;
2126 }
2127 if (bas->user != NULL) {
2128 res->user = xmlMemStrdup(bas->user);
2129 if (res->user == NULL)
2130 goto done;
2131 }
2132 res->port = bas->port;
2133 }
2134 if (bas->path != NULL) {
2135 res->path = xmlMemStrdup(bas->path);
2136 if (res->path == NULL)
2137 goto done;
2138 }
2139 if (ref->query_raw != NULL) {
2140 res->query_raw = xmlMemStrdup (ref->query_raw);
2141 if (res->query_raw == NULL)
2142 goto done;
2143 } else if (ref->query != NULL) {
2144 res->query = xmlMemStrdup(ref->query);
2145 if (res->query == NULL)
2146 goto done;
2147 } else if (bas->query_raw != NULL) {
2148 res->query_raw = xmlMemStrdup(bas->query_raw);
2149 if (res->query_raw == NULL)
2150 goto done;
2151 } else if (bas->query != NULL) {
2152 res->query = xmlMemStrdup(bas->query);
2153 if (res->query == NULL)
2154 goto done;
2155 }
2156 if (ref->fragment != NULL) {
2157 res->fragment = xmlMemStrdup(ref->fragment);
2158 if (res->fragment == NULL)
2159 goto done;
2160 }
2161 goto step_7;
2162 }
2163
2164 /*
2165 * 3) If the scheme component is defined, indicating that the reference
2166 * starts with a scheme name, then the reference is interpreted as an
2167 * absolute URI and we are done. Otherwise, the reference URI's
2168 * scheme is inherited from the base URI's scheme component.
2169 */
2170 if (ref->scheme != NULL) {
2171 val = xmlSaveUri(ref);
2172 if (val != NULL)
2173 ret = 0;
2174 goto done;
2175 }
2176 if (bas->scheme != NULL) {
2177 res->scheme = xmlMemStrdup(bas->scheme);
2178 if (res->scheme == NULL)
2179 goto done;
2180 }
2181
2182 if (ref->query_raw != NULL) {
2183 res->query_raw = xmlMemStrdup(ref->query_raw);
2184 if (res->query_raw == NULL)
2185 goto done;
2186 } else if (ref->query != NULL) {
2187 res->query = xmlMemStrdup(ref->query);
2188 if (res->query == NULL)
2189 goto done;
2190 }
2191 if (ref->fragment != NULL) {
2192 res->fragment = xmlMemStrdup(ref->fragment);
2193 if (res->fragment == NULL)
2194 goto done;
2195 }
2196
2197 /*
2198 * 4) If the authority component is defined, then the reference is a
2199 * network-path and we skip to step 7. Otherwise, the reference
2200 * URI's authority is inherited from the base URI's authority
2201 * component, which will also be undefined if the URI scheme does not
2202 * use an authority component.
2203 */
2204 if ((ref->authority != NULL) || (ref->server != NULL) ||
2205 (ref->port != PORT_EMPTY)) {
2206 if (ref->authority != NULL) {
2207 res->authority = xmlMemStrdup(ref->authority);
2208 if (res->authority == NULL)
2209 goto done;
2210 } else {
2211 if (ref->server != NULL) {
2212 res->server = xmlMemStrdup(ref->server);
2213 if (res->server == NULL)
2214 goto done;
2215 }
2216 if (ref->user != NULL) {
2217 res->user = xmlMemStrdup(ref->user);
2218 if (res->user == NULL)
2219 goto done;
2220 }
2221 res->port = ref->port;
2222 }
2223 if (ref->path != NULL) {
2224 res->path = xmlMemStrdup(ref->path);
2225 if (res->path == NULL)
2226 goto done;
2227 }
2228 goto step_7;
2229 }
2230 if (bas->authority != NULL) {
2231 res->authority = xmlMemStrdup(bas->authority);
2232 if (res->authority == NULL)
2233 goto done;
2234 } else if ((bas->server != NULL) || (bas->port != PORT_EMPTY)) {
2235 if (bas->server != NULL) {
2236 res->server = xmlMemStrdup(bas->server);
2237 if (res->server == NULL)
2238 goto done;
2239 }
2240 if (bas->user != NULL) {
2241 res->user = xmlMemStrdup(bas->user);
2242 if (res->user == NULL)
2243 goto done;
2244 }
2245 res->port = bas->port;
2246 }
2247
2248 /*
2249 * 5) If the path component begins with a slash character ("/"), then
2250 * the reference is an absolute-path and we skip to step 7.
2251 */
2252 if ((ref->path != NULL) && (ref->path[0] == '/')) {
2253 res->path = xmlMemStrdup(ref->path);
2254 if (res->path == NULL)
2255 goto done;
2256 goto step_7;
2257 }
2258
2259
2260 /*
2261 * 6) If this step is reached, then we are resolving a relative-path
2262 * reference. The relative path needs to be merged with the base
2263 * URI's path. Although there are many ways to do this, we will
2264 * describe a simple method using a separate string buffer.
2265 *
2266 * Allocate a buffer large enough for the result string.
2267 */
2268 len = 2; /* extra / and 0 */
2269 if (ref->path != NULL)
2270 len += strlen(ref->path);
2271 if (bas->path != NULL)
2272 len += strlen(bas->path);
2273 res->path = (char *) xmlMallocAtomic(len);
2274 if (res->path == NULL)
2275 goto done;
2276 res->path[0] = 0;
2277
2278 /*
2279 * a) All but the last segment of the base URI's path component is
2280 * copied to the buffer. In other words, any characters after the
2281 * last (right-most) slash character, if any, are excluded.
2282 */
2283 cur = 0;
2284 out = 0;
2285 if (bas->path != NULL) {
2286 while (bas->path[cur] != 0) {
2287 while ((bas->path[cur] != 0) && (bas->path[cur] != '/'))
2288 cur++;
2289 if (bas->path[cur] == 0)
2290 break;
2291
2292 cur++;
2293 while (out < cur) {
2294 res->path[out] = bas->path[out];
2295 out++;
2296 }
2297 }
2298 }
2299 res->path[out] = 0;
2300
2301 /*
2302 * b) The reference's path component is appended to the buffer
2303 * string.
2304 */
2305 if (ref->path != NULL && ref->path[0] != 0) {
2306 indx = 0;
2307 /*
2308 * Ensure the path includes a '/'
2309 */
2310 if ((out == 0) && ((bas->server != NULL) || bas->port != PORT_EMPTY))
2311 res->path[out++] = '/';
2312 while (ref->path[indx] != 0) {
2313 res->path[out++] = ref->path[indx++];
2314 }
2315 }
2316 res->path[out] = 0;
2317
2318 /*
2319 * Steps c) to h) are really path normalization steps
2320 */
2321 xmlNormalizeURIPath(res->path);
2322
2323step_7:
2324
2325 /*
2326 * 7) The resulting URI components, including any inherited from the
2327 * base URI, are recombined to give the absolute form of the URI
2328 * reference.
2329 */
2330 val = xmlSaveUri(res);
2331 if (val != NULL)
2332 ret = 0;
2333
2334done:
2335 if (ref != NULL)
2336 xmlFreeURI(ref);
2337 if (bas != NULL)
2338 xmlFreeURI(bas);
2339 if (res != NULL)
2340 xmlFreeURI(res);
2341 *valPtr = val;
2342 return(ret);
2343}
2344
2345/**
2346 * xmlBuildURI:
2347 * @URI: the URI instance found in the document
2348 * @base: the base value
2349 *
2350 * Computes he final URI of the reference done by checking that
2351 * the given URI is valid, and building the final URI using the
2352 * base URI. This is processed according to section 5.2 of the
2353 * RFC 2396
2354 *
2355 * 5.2. Resolving Relative References to Absolute Form
2356 *
2357 * Returns a new URI string (to be freed by the caller) or NULL in case
2358 * of error.
2359 */
2360xmlChar *
2361xmlBuildURI(const xmlChar *URI, const xmlChar *base) {
2362 xmlChar *out;
2363
2364 xmlBuildURISafe(URI, base, &out);
2365 return(out);
2366}
2367
2368static int
2369xmlParseUriOrPath(const char *str, xmlURIPtr *out, int *drive) {
2370 xmlURIPtr uri;
2371 char *buf = NULL;
2372 int ret;
2373
2374 *out = NULL;
2375 *drive = 0;
2376
2377 uri = xmlCreateURI();
2378 if (uri == NULL) {
2379 ret = -1;
2380 goto done;
2381 }
2382
2383 if (xmlStrstr(BAD_CAST str, BAD_CAST "://") == NULL) {
2384 const char *path;
2385 size_t pathSize;
2386 int prependSlash = 0;
2387
2388 buf = xmlMemStrdup(str);
2389 if (buf == NULL) {
2390 ret = -1;
2391 goto done;
2392 }
2393 xmlNormalizePath(buf, /* isFile */ 1);
2394
2395 path = buf;
2396
2397 if (xmlIsAbsolutePath(BAD_CAST buf)) {
2398#if defined(_WIN32) || defined(__CYGWIN__)
2399 const char *server = NULL;
2400 int isFileScheme = 0;
2401#endif
2402
2403#if defined(_WIN32) || defined(__CYGWIN__)
2404 if (strncmp(buf, "//?/UNC/", 8) == 0) {
2405 server = buf + 8;
2406 isFileScheme = 1;
2407 } else if (strncmp(buf, "//?/", 4) == 0) {
2408 path = buf + 3;
2409 isFileScheme = 1;
2410 } else if (strncmp(buf, "//", 2) == 0) {
2411 server = buf + 2;
2412 isFileScheme = 1;
2413 }
2414
2415 if (server != NULL) {
2416 const char *end = strchr(server, '/');
2417
2418 if (end == NULL) {
2419 uri->server = xmlMemStrdup(server);
2420 path = "/";
2421 } else {
2422 uri->server = (char *) xmlStrndup(BAD_CAST server,
2423 end - server);
2424 path = end;
2425 }
2426 if (uri->server == NULL) {
2427 ret = -1;
2428 goto done;
2429 }
2430 }
2431
2432 if ((((path[0] >= 'A') && (path[0] <= 'Z')) ||
2433 ((path[0] >= 'a') && (path[0] <= 'z'))) &&
2434 (path[1] == ':')) {
2435 prependSlash = 1;
2436 isFileScheme = 1;
2437 }
2438
2439 if (isFileScheme) {
2440 uri->scheme = xmlMemStrdup("file");
2441 if (uri->scheme == NULL) {
2442 ret = -1;
2443 goto done;
2444 }
2445
2446 if (uri->server == NULL)
2447 uri->port = PORT_EMPTY_SERVER;
2448 }
2449#endif
2450 }
2451
2452 pathSize = strlen(path);
2453 uri->path = xmlMalloc(pathSize + prependSlash + 1);
2454 if (uri->path == NULL) {
2455 ret = -1;
2456 goto done;
2457 }
2458 if (prependSlash) {
2459 uri->path[0] = '/';
2460 memcpy(uri->path + 1, path, pathSize + 1);
2461 } else {
2462 memcpy(uri->path, path, pathSize + 1);
2463 }
2464 } else {
2465 ret = xmlParseURIReference(uri, str);
2466 if (ret != 0)
2467 goto done;
2468
2469 xmlNormalizePath(uri->path, /* isFile */ 0);
2470 }
2471
2472#if defined(_WIN32) || defined(__CYGWIN__)
2473 if ((uri->path[0] == '/') &&
2474 (((uri->path[1] >= 'A') && (uri->path[1] <= 'Z')) ||
2475 ((uri->path[1] >= 'a') && (uri->path[1] <= 'z'))) &&
2476 (uri->path[2] == ':'))
2477 *drive = uri->path[1];
2478#endif
2479
2480 *out = uri;
2481 uri = NULL;
2482 ret = 0;
2483
2484done:
2485 xmlFreeURI(uri);
2486 xmlFree(buf);
2487
2488 return(ret);
2489}
2490
2491/**
2492 * xmlBuildRelativeURISafe:
2493 * @URI: the URI reference under consideration
2494 * @base: the base value
2495 * @valPtr: pointer to result URI
2496 *
2497 * Expresses the URI of the reference in terms relative to the
2498 * base. Some examples of this operation include:
2499 * base = "http://site1.com/docs/book1.html"
2500 * URI input URI returned
2501 * docs/pic1.gif pic1.gif
2502 * docs/img/pic1.gif img/pic1.gif
2503 * img/pic1.gif ../img/pic1.gif
2504 * http://site1.com/docs/pic1.gif pic1.gif
2505 * http://site2.com/docs/pic1.gif http://site2.com/docs/pic1.gif
2506 *
2507 * base = "docs/book1.html"
2508 * URI input URI returned
2509 * docs/pic1.gif pic1.gif
2510 * docs/img/pic1.gif img/pic1.gif
2511 * img/pic1.gif ../img/pic1.gif
2512 * http://site1.com/docs/pic1.gif http://site1.com/docs/pic1.gif
2513 *
2514 *
2515 * Note: if the URI reference is really weird or complicated, it may be
2516 * worthwhile to first convert it into a "nice" one by calling
2517 * xmlBuildURI (using 'base') before calling this routine,
2518 * since this routine (for reasonable efficiency) assumes URI has
2519 * already been through some validation.
2520 *
2521 * Available since 2.13.0.
2522 *
2523 * Returns 0 on success, -1 if a memory allocation failed or an error
2524 * code if URI or base are invalid.
2525 */
2526int
2527xmlBuildRelativeURISafe(const xmlChar * URI, const xmlChar * base,
2528 xmlChar **valPtr)
2529{
2530 xmlChar *val = NULL;
2531 int ret = 0;
2532 int ix;
2533 int nbslash = 0;
2534 int len;
2535 xmlURIPtr ref = NULL;
2536 xmlURIPtr bas = NULL;
2537 const xmlChar *bptr, *uptr, *rptr;
2538 xmlChar *vptr;
2539 int remove_path = 0;
2540 int refDrive, baseDrive;
2541
2542 if (valPtr == NULL)
2543 return(1);
2544 *valPtr = NULL;
2545 if ((URI == NULL) || (*URI == 0))
2546 return(1);
2547
2548 ret = xmlParseUriOrPath((char *) URI, &ref, &refDrive);
2549 if (ret < 0)
2550 goto done;
2551 if (ret != 0) {
2552 /* Return URI if URI is invalid */
2553 ret = 0;
2554 val = xmlStrdup(URI);
2555 if (val == NULL)
2556 ret = -1;
2557 goto done;
2558 }
2559
2560 /* Return URI if base is empty */
2561 if ((base == NULL) || (*base == 0))
2562 goto done;
2563
2564 ret = xmlParseUriOrPath((char *) base, &bas, &baseDrive);
2565 if (ret < 0)
2566 goto done;
2567 if (ret != 0) {
2568 /* Return URI if base is invalid */
2569 ret = 0;
2570 goto done;
2571 }
2572
2573 /*
2574 * If the scheme / server on the URI differs from the base,
2575 * just return the URI
2576 */
2577 if ((xmlStrcmp ((xmlChar *)bas->scheme, (xmlChar *)ref->scheme)) ||
2578 (xmlStrcmp ((xmlChar *)bas->server, (xmlChar *)ref->server)) ||
2579 (bas->port != ref->port) ||
2580 (baseDrive != refDrive)) {
2581 goto done;
2582 }
2583 if (xmlStrEqual((xmlChar *)bas->path, (xmlChar *)ref->path)) {
2584 val = xmlStrdup(BAD_CAST "");
2585 if (val == NULL)
2586 ret = -1;
2587 goto done;
2588 }
2589 if (bas->path == NULL) {
2590 val = xmlStrdup((xmlChar *)ref->path);
2591 if (val == NULL) {
2592 ret = -1;
2593 goto done;
2594 }
2595 goto escape;
2596 }
2597 if (ref->path == NULL) {
2598 ref->path = (char *) "/";
2599 remove_path = 1;
2600 }
2601
2602 bptr = (xmlChar *) bas->path;
2603 rptr = (xmlChar *) ref->path;
2604
2605 /*
2606 * Return URI if URI and base aren't both absolute or relative.
2607 */
2608 if ((bptr[0] == '/') != (rptr[0] == '/'))
2609 goto done;
2610
2611 /*
2612 * At this point we can compare the two paths
2613 */
2614 {
2615 int pos = 0;
2616
2617 /*
2618 * Next we compare the two strings and find where they first differ
2619 */
2620 while ((bptr[pos] == rptr[pos]) && (bptr[pos] != 0))
2621 pos++;
2622
2623 if (bptr[pos] == rptr[pos]) {
2624 val = xmlStrdup(BAD_CAST "");
2625 if (val == NULL)
2626 ret = -1;
2627 goto done; /* (I can't imagine why anyone would do this) */
2628 }
2629
2630 /*
2631 * In URI, "back up" to the last '/' encountered. This will be the
2632 * beginning of the "unique" suffix of URI
2633 */
2634 ix = pos;
2635 for (; ix > 0; ix--) {
2636 if (rptr[ix - 1] == '/')
2637 break;
2638 }
2639 uptr = (xmlChar *)&rptr[ix];
2640
2641 /*
2642 * In base, count the number of '/' from the differing point
2643 */
2644 for (; bptr[ix] != 0; ix++) {
2645 if (bptr[ix] == '/')
2646 nbslash++;
2647 }
2648
2649 /*
2650 * e.g: URI="foo/" base="foo/bar" -> "./"
2651 */
2652 if (nbslash == 0 && !uptr[0]) {
2653 val = xmlStrdup(BAD_CAST "./");
2654 if (val == NULL)
2655 ret = -1;
2656 goto done;
2657 }
2658
2659 len = xmlStrlen (uptr) + 1;
2660 }
2661
2662 if (nbslash == 0) {
2663 if (uptr != NULL) {
2664 /* exception characters from xmlSaveUri */
2665 val = xmlURIEscapeStr(uptr, BAD_CAST "/;&=+$,");
2666 if (val == NULL)
2667 ret = -1;
2668 }
2669 goto done;
2670 }
2671
2672 /*
2673 * Allocate just enough space for the returned string -
2674 * length of the remainder of the URI, plus enough space
2675 * for the "../" groups, plus one for the terminator
2676 */
2677 val = (xmlChar *) xmlMalloc (len + 3 * nbslash);
2678 if (val == NULL) {
2679 ret = -1;
2680 goto done;
2681 }
2682 vptr = val;
2683 /*
2684 * Put in as many "../" as needed
2685 */
2686 for (; nbslash>0; nbslash--) {
2687 *vptr++ = '.';
2688 *vptr++ = '.';
2689 *vptr++ = '/';
2690 }
2691 /*
2692 * Finish up with the end of the URI
2693 */
2694 if (uptr != NULL) {
2695 if ((vptr > val) && (len > 0) &&
2696 (uptr[0] == '/') && (vptr[-1] == '/')) {
2697 memcpy (vptr, uptr + 1, len - 1);
2698 vptr[len - 2] = 0;
2699 } else {
2700 memcpy (vptr, uptr, len);
2701 vptr[len - 1] = 0;
2702 }
2703 } else {
2704 vptr[len - 1] = 0;
2705 }
2706
2707escape:
2708 /* escape the freshly-built path */
2709 vptr = val;
2710 /* exception characters from xmlSaveUri */
2711 val = xmlURIEscapeStr(vptr, BAD_CAST "/;&=+$,");
2712 if (val == NULL)
2713 ret = -1;
2714 else
2715 ret = 0;
2716 xmlFree(vptr);
2717
2718done:
2719 if ((ret == 0) && (val == NULL)) {
2720 val = xmlSaveUri(ref);
2721 if (val == NULL)
2722 ret = -1;
2723 }
2724
2725 /*
2726 * Free the working variables
2727 */
2728 if (remove_path != 0)
2729 ref->path = NULL;
2730 if (ref != NULL)
2731 xmlFreeURI (ref);
2732 if (bas != NULL)
2733 xmlFreeURI (bas);
2734 if (ret != 0) {
2735 xmlFree(val);
2736 val = NULL;
2737 }
2738
2739 *valPtr = val;
2740 return(ret);
2741}
2742
2743/*
2744 * xmlBuildRelativeURI:
2745 * @URI: the URI reference under consideration
2746 * @base: the base value
2747 *
2748 * See xmlBuildRelativeURISafe.
2749 *
2750 * Returns a new URI string (to be freed by the caller) or NULL in case
2751 * error.
2752 */
2753xmlChar *
2754xmlBuildRelativeURI(const xmlChar * URI, const xmlChar * base)
2755{
2756 xmlChar *val;
2757
2758 xmlBuildRelativeURISafe(URI, base, &val);
2759 return(val);
2760}
2761
2762/**
2763 * xmlCanonicPath:
2764 * @path: the resource locator in a filesystem notation
2765 *
2766 * Prepares a path.
2767 *
2768 * If the path contains the substring "://", it is considered a
2769 * Legacy Extended IRI. Characters which aren't allowed in URIs are
2770 * escaped.
2771 *
2772 * Otherwise, the path is considered a filesystem path which is
2773 * copied without modification.
2774 *
2775 * The caller is responsible for freeing the memory occupied
2776 * by the returned string. If there is insufficient memory available, or the
2777 * argument is NULL, the function returns NULL.
2778 *
2779 * Returns the escaped path.
2780 */
2781xmlChar *
2782xmlCanonicPath(const xmlChar *path)
2783{
2784 xmlChar *ret;
2785
2786 if (path == NULL)
2787 return(NULL);
2788
2789 /* Check if this is an "absolute uri" */
2790 if (xmlStrstr(path, BAD_CAST "://") != NULL) {
2791 /*
2792 * Escape all characters except reserved, unreserved and the
2793 * percent sign.
2794 *
2795 * xmlURIEscapeStr already keeps unreserved characters, so we
2796 * pass gen-delims, sub-delims and "%" to ignore.
2797 */
2798 ret = xmlURIEscapeStr(path, BAD_CAST ":/?#[]@!$&()*+,;='%");
2799 } else {
2800 ret = xmlStrdup((const xmlChar *) path);
2801 }
2802
2803 return(ret);
2804}
2805
2806/**
2807 * xmlPathToURI:
2808 * @path: the resource locator in a filesystem notation
2809 *
2810 * Constructs an URI expressing the existing path
2811 *
2812 * Returns a new URI, or a duplicate of the path parameter if the
2813 * construction fails. The caller is responsible for freeing the memory
2814 * occupied by the returned string. If there is insufficient memory available,
2815 * or the argument is NULL, the function returns NULL.
2816 */
2817xmlChar *
2818xmlPathToURI(const xmlChar *path)
2819{
2820 return(xmlCanonicPath(path));
2821}
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette