1 | /*
|
---|
2 | * parserInternals.c : Internal routines (and obsolete ones) needed for the
|
---|
3 | * XML and HTML parsers.
|
---|
4 | *
|
---|
5 | * See Copyright for the status of this software.
|
---|
6 | *
|
---|
7 | * daniel@veillard.com
|
---|
8 | */
|
---|
9 |
|
---|
10 | #define IN_LIBXML
|
---|
11 | #include "libxml.h"
|
---|
12 |
|
---|
13 | #if defined(_WIN32)
|
---|
14 | #define XML_DIR_SEP '\\'
|
---|
15 | #else
|
---|
16 | #define XML_DIR_SEP '/'
|
---|
17 | #endif
|
---|
18 |
|
---|
19 | #include <string.h>
|
---|
20 | #include <ctype.h>
|
---|
21 | #include <stdlib.h>
|
---|
22 |
|
---|
23 | #include <libxml/xmlmemory.h>
|
---|
24 | #include <libxml/tree.h>
|
---|
25 | #include <libxml/parser.h>
|
---|
26 | #include <libxml/parserInternals.h>
|
---|
27 | #include <libxml/entities.h>
|
---|
28 | #include <libxml/xmlerror.h>
|
---|
29 | #include <libxml/encoding.h>
|
---|
30 | #include <libxml/xmlIO.h>
|
---|
31 | #include <libxml/uri.h>
|
---|
32 | #include <libxml/dict.h>
|
---|
33 | #include <libxml/xmlsave.h>
|
---|
34 | #ifdef LIBXML_CATALOG_ENABLED
|
---|
35 | #include <libxml/catalog.h>
|
---|
36 | #endif
|
---|
37 | #include <libxml/chvalid.h>
|
---|
38 | #include <libxml/nanohttp.h>
|
---|
39 |
|
---|
40 | #define CUR(ctxt) ctxt->input->cur
|
---|
41 | #define END(ctxt) ctxt->input->end
|
---|
42 |
|
---|
43 | #include "private/buf.h"
|
---|
44 | #include "private/enc.h"
|
---|
45 | #include "private/error.h"
|
---|
46 | #include "private/io.h"
|
---|
47 | #include "private/parser.h"
|
---|
48 |
|
---|
49 | #define XML_MAX_ERRORS 100
|
---|
50 |
|
---|
51 | /*
|
---|
52 | * XML_MAX_AMPLIFICATION_DEFAULT is the default maximum allowed amplification
|
---|
53 | * factor of serialized output after entity expansion.
|
---|
54 | */
|
---|
55 | #define XML_MAX_AMPLIFICATION_DEFAULT 5
|
---|
56 |
|
---|
57 | /*
|
---|
58 | * Various global defaults for parsing
|
---|
59 | */
|
---|
60 |
|
---|
61 | /**
|
---|
62 | * xmlCheckVersion:
|
---|
63 | * @version: the include version number
|
---|
64 | *
|
---|
65 | * check the compiled lib version against the include one.
|
---|
66 | */
|
---|
67 | void
|
---|
68 | xmlCheckVersion(int version) {
|
---|
69 | int myversion = LIBXML_VERSION;
|
---|
70 |
|
---|
71 | xmlInitParser();
|
---|
72 |
|
---|
73 | if ((myversion / 10000) != (version / 10000)) {
|
---|
74 | fprintf(stderr,
|
---|
75 | "Fatal: program compiled against libxml %d using libxml %d\n",
|
---|
76 | (version / 10000), (myversion / 10000));
|
---|
77 | } else if ((myversion / 100) < (version / 100)) {
|
---|
78 | fprintf(stderr,
|
---|
79 | "Warning: program compiled against libxml %d using older %d\n",
|
---|
80 | (version / 100), (myversion / 100));
|
---|
81 | }
|
---|
82 | }
|
---|
83 |
|
---|
84 |
|
---|
85 | /************************************************************************
|
---|
86 | * *
|
---|
87 | * Some factorized error routines *
|
---|
88 | * *
|
---|
89 | ************************************************************************/
|
---|
90 |
|
---|
91 |
|
---|
92 | /**
|
---|
93 | * xmlCtxtSetErrorHandler:
|
---|
94 | * @ctxt: an XML parser context
|
---|
95 | * @handler: error handler
|
---|
96 | * @data: data for error handler
|
---|
97 | *
|
---|
98 | * Register a callback function that will be called on errors and
|
---|
99 | * warnings. If handler is NULL, the error handler will be deactivated.
|
---|
100 | *
|
---|
101 | * This is the recommended way to collect errors from the parser and
|
---|
102 | * takes precedence over all other error reporting mechanisms.
|
---|
103 | * These are (in order of precedence):
|
---|
104 | *
|
---|
105 | * - per-context structured handler (xmlCtxtSetErrorHandler)
|
---|
106 | * - per-context structured "serror" SAX handler
|
---|
107 | * - global structured handler (xmlSetStructuredErrorFunc)
|
---|
108 | * - per-context generic "error" and "warning" SAX handlers
|
---|
109 | * - global generic handler (xmlSetGenericErrorFunc)
|
---|
110 | * - print to stderr
|
---|
111 | *
|
---|
112 | * Available since 2.13.0.
|
---|
113 | */
|
---|
114 | void
|
---|
115 | xmlCtxtSetErrorHandler(xmlParserCtxtPtr ctxt, xmlStructuredErrorFunc handler,
|
---|
116 | void *data)
|
---|
117 | {
|
---|
118 | if (ctxt == NULL)
|
---|
119 | return;
|
---|
120 | ctxt->errorHandler = handler;
|
---|
121 | ctxt->errorCtxt = data;
|
---|
122 | }
|
---|
123 |
|
---|
124 | /**
|
---|
125 | * xmlCtxtErrMemory:
|
---|
126 | * @ctxt: an XML parser context
|
---|
127 | *
|
---|
128 | * Handle an out-of-memory error.
|
---|
129 | *
|
---|
130 | * Available since 2.13.0.
|
---|
131 | */
|
---|
132 | void
|
---|
133 | xmlCtxtErrMemory(xmlParserCtxtPtr ctxt)
|
---|
134 | {
|
---|
135 | xmlStructuredErrorFunc schannel = NULL;
|
---|
136 | xmlGenericErrorFunc channel = NULL;
|
---|
137 | void *data;
|
---|
138 |
|
---|
139 | if (ctxt == NULL)
|
---|
140 | return;
|
---|
141 |
|
---|
142 | ctxt->errNo = XML_ERR_NO_MEMORY;
|
---|
143 | ctxt->instate = XML_PARSER_EOF; /* TODO: Remove after refactoring */
|
---|
144 | ctxt->wellFormed = 0;
|
---|
145 | ctxt->disableSAX = 2;
|
---|
146 |
|
---|
147 | if (ctxt->errorHandler) {
|
---|
148 | schannel = ctxt->errorHandler;
|
---|
149 | data = ctxt->errorCtxt;
|
---|
150 | } else if ((ctxt->sax->initialized == XML_SAX2_MAGIC) &&
|
---|
151 | (ctxt->sax->serror != NULL)) {
|
---|
152 | schannel = ctxt->sax->serror;
|
---|
153 | data = ctxt->userData;
|
---|
154 | } else {
|
---|
155 | channel = ctxt->sax->error;
|
---|
156 | data = ctxt->userData;
|
---|
157 | }
|
---|
158 |
|
---|
159 | xmlRaiseMemoryError(schannel, channel, data, XML_FROM_PARSER,
|
---|
160 | &ctxt->lastError);
|
---|
161 | }
|
---|
162 |
|
---|
163 | /**
|
---|
164 | * xmlCtxtErrIO:
|
---|
165 | * @ctxt: parser context
|
---|
166 | * @code: xmlParserErrors code
|
---|
167 | * @uri: filename or URI (optional)
|
---|
168 | *
|
---|
169 | * If filename is empty, use the one from context input if available.
|
---|
170 | *
|
---|
171 | * Report an IO error to the parser context.
|
---|
172 | */
|
---|
173 | void
|
---|
174 | xmlCtxtErrIO(xmlParserCtxtPtr ctxt, int code, const char *uri)
|
---|
175 | {
|
---|
176 | const char *errstr, *msg, *str1, *str2;
|
---|
177 | xmlErrorLevel level;
|
---|
178 |
|
---|
179 | if (ctxt == NULL)
|
---|
180 | return;
|
---|
181 |
|
---|
182 | /*
|
---|
183 | * Only report a warning if a file could not be found. This should
|
---|
184 | * only be done for external entities, but the external entity loader
|
---|
185 | * of xsltproc can try multiple paths and assumes that ENOENT doesn't
|
---|
186 | * raise an error and aborts parsing.
|
---|
187 | */
|
---|
188 | if (((code == XML_IO_ENOENT) ||
|
---|
189 | (code == XML_IO_NETWORK_ATTEMPT) ||
|
---|
190 | (code == XML_IO_UNKNOWN))) {
|
---|
191 | if (ctxt->validate == 0)
|
---|
192 | level = XML_ERR_WARNING;
|
---|
193 | else
|
---|
194 | level = XML_ERR_ERROR;
|
---|
195 | } else {
|
---|
196 | level = XML_ERR_FATAL;
|
---|
197 | }
|
---|
198 |
|
---|
199 | errstr = xmlErrString(code);
|
---|
200 |
|
---|
201 | if (uri == NULL) {
|
---|
202 | msg = "%s\n";
|
---|
203 | str1 = errstr;
|
---|
204 | str2 = NULL;
|
---|
205 | } else {
|
---|
206 | msg = "failed to load \"%s\": %s\n";
|
---|
207 | str1 = uri;
|
---|
208 | str2 = errstr;
|
---|
209 | }
|
---|
210 |
|
---|
211 | xmlCtxtErr(ctxt, NULL, XML_FROM_IO, code, level,
|
---|
212 | (const xmlChar *) uri, NULL, NULL, 0,
|
---|
213 | msg, str1, str2);
|
---|
214 | }
|
---|
215 |
|
---|
216 | /**
|
---|
217 | * xmlCtxtVErr:
|
---|
218 | * @ctxt: a parser context
|
---|
219 | * @node: the current node or NULL
|
---|
220 | * @domain: the domain for the error
|
---|
221 | * @code: the code for the error
|
---|
222 | * @level: the xmlErrorLevel for the error
|
---|
223 | * @str1: extra string info
|
---|
224 | * @str2: extra string info
|
---|
225 | * @str3: extra string info
|
---|
226 | * @int1: extra int info
|
---|
227 | * @msg: the message to display/transmit
|
---|
228 | * @ap: extra parameters for the message display
|
---|
229 | *
|
---|
230 | * Raise a parser error.
|
---|
231 | */
|
---|
232 | void
|
---|
233 | xmlCtxtVErr(xmlParserCtxtPtr ctxt, xmlNodePtr node, xmlErrorDomain domain,
|
---|
234 | xmlParserErrors code, xmlErrorLevel level,
|
---|
235 | const xmlChar *str1, const xmlChar *str2, const xmlChar *str3,
|
---|
236 | int int1, const char *msg, va_list ap)
|
---|
237 | {
|
---|
238 | xmlStructuredErrorFunc schannel = NULL;
|
---|
239 | xmlGenericErrorFunc channel = NULL;
|
---|
240 | void *data = NULL;
|
---|
241 | const char *file = NULL;
|
---|
242 | int line = 0;
|
---|
243 | int col = 0;
|
---|
244 | int res;
|
---|
245 |
|
---|
246 | if (code == XML_ERR_NO_MEMORY) {
|
---|
247 | xmlCtxtErrMemory(ctxt);
|
---|
248 | return;
|
---|
249 | }
|
---|
250 |
|
---|
251 | if (PARSER_STOPPED(ctxt))
|
---|
252 | return;
|
---|
253 |
|
---|
254 | if (level == XML_ERR_WARNING) {
|
---|
255 | if (ctxt->nbWarnings >= XML_MAX_ERRORS)
|
---|
256 | return;
|
---|
257 | ctxt->nbWarnings += 1;
|
---|
258 | } else {
|
---|
259 | if (ctxt->nbErrors >= XML_MAX_ERRORS)
|
---|
260 | return;
|
---|
261 | ctxt->nbErrors += 1;
|
---|
262 | }
|
---|
263 |
|
---|
264 | if (((ctxt->options & XML_PARSE_NOERROR) == 0) &&
|
---|
265 | ((level != XML_ERR_WARNING) ||
|
---|
266 | ((ctxt->options & XML_PARSE_NOWARNING) == 0))) {
|
---|
267 | if (ctxt->errorHandler) {
|
---|
268 | schannel = ctxt->errorHandler;
|
---|
269 | data = ctxt->errorCtxt;
|
---|
270 | } else if ((ctxt->sax->initialized == XML_SAX2_MAGIC) &&
|
---|
271 | (ctxt->sax->serror != NULL)) {
|
---|
272 | schannel = ctxt->sax->serror;
|
---|
273 | data = ctxt->userData;
|
---|
274 | } else if ((domain == XML_FROM_VALID) || (domain == XML_FROM_DTD)) {
|
---|
275 | if (level == XML_ERR_WARNING)
|
---|
276 | channel = ctxt->vctxt.warning;
|
---|
277 | else
|
---|
278 | channel = ctxt->vctxt.error;
|
---|
279 | data = ctxt->vctxt.userData;
|
---|
280 | } else {
|
---|
281 | if (level == XML_ERR_WARNING)
|
---|
282 | channel = ctxt->sax->warning;
|
---|
283 | else
|
---|
284 | channel = ctxt->sax->error;
|
---|
285 | data = ctxt->userData;
|
---|
286 | }
|
---|
287 | }
|
---|
288 |
|
---|
289 | if (ctxt->input != NULL) {
|
---|
290 | xmlParserInputPtr input = ctxt->input;
|
---|
291 |
|
---|
292 | if ((input->filename == NULL) &&
|
---|
293 | (ctxt->inputNr > 1)) {
|
---|
294 | input = ctxt->inputTab[ctxt->inputNr - 2];
|
---|
295 | }
|
---|
296 | file = input->filename;
|
---|
297 | line = input->line;
|
---|
298 | col = input->col;
|
---|
299 | }
|
---|
300 |
|
---|
301 | res = xmlVRaiseError(schannel, channel, data, ctxt, node, domain, code,
|
---|
302 | level, file, line, (const char *) str1,
|
---|
303 | (const char *) str2, (const char *) str3, int1, col,
|
---|
304 | msg, ap);
|
---|
305 |
|
---|
306 | if (res < 0) {
|
---|
307 | xmlCtxtErrMemory(ctxt);
|
---|
308 | return;
|
---|
309 | }
|
---|
310 |
|
---|
311 | if (level >= XML_ERR_ERROR)
|
---|
312 | ctxt->errNo = code;
|
---|
313 | if (level == XML_ERR_FATAL) {
|
---|
314 | ctxt->wellFormed = 0;
|
---|
315 | if (ctxt->recovery == 0)
|
---|
316 | ctxt->disableSAX = 1;
|
---|
317 | }
|
---|
318 |
|
---|
319 | return;
|
---|
320 | }
|
---|
321 |
|
---|
322 | /**
|
---|
323 | * xmlCtxtErr:
|
---|
324 | * @ctxt: a parser context
|
---|
325 | * @node: the current node or NULL
|
---|
326 | * @domain: the domain for the error
|
---|
327 | * @code: the code for the error
|
---|
328 | * @level: the xmlErrorLevel for the error
|
---|
329 | * @str1: extra string info
|
---|
330 | * @str2: extra string info
|
---|
331 | * @str3: extra string info
|
---|
332 | * @int1: extra int info
|
---|
333 | * @msg: the message to display/transmit
|
---|
334 | * @...: extra parameters for the message display
|
---|
335 | *
|
---|
336 | * Raise a parser error.
|
---|
337 | */
|
---|
338 | void
|
---|
339 | xmlCtxtErr(xmlParserCtxtPtr ctxt, xmlNodePtr node, xmlErrorDomain domain,
|
---|
340 | xmlParserErrors code, xmlErrorLevel level,
|
---|
341 | const xmlChar *str1, const xmlChar *str2, const xmlChar *str3,
|
---|
342 | int int1, const char *msg, ...)
|
---|
343 | {
|
---|
344 | va_list ap;
|
---|
345 |
|
---|
346 | va_start(ap, msg);
|
---|
347 | xmlCtxtVErr(ctxt, node, domain, code, level,
|
---|
348 | str1, str2, str3, int1, msg, ap);
|
---|
349 | va_end(ap);
|
---|
350 | }
|
---|
351 |
|
---|
352 | /**
|
---|
353 | * xmlFatalErr:
|
---|
354 | * @ctxt: an XML parser context
|
---|
355 | * @code: the error number
|
---|
356 | * @info: extra information string
|
---|
357 | *
|
---|
358 | * Handle a fatal parser error, i.e. violating Well-Formedness constraints
|
---|
359 | */
|
---|
360 | void
|
---|
361 | xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors code, const char *info)
|
---|
362 | {
|
---|
363 | const char *errmsg;
|
---|
364 | xmlErrorLevel level;
|
---|
365 |
|
---|
366 | if (code == XML_ERR_UNSUPPORTED_ENCODING)
|
---|
367 | level = XML_ERR_WARNING;
|
---|
368 | else
|
---|
369 | level = XML_ERR_FATAL;
|
---|
370 |
|
---|
371 | errmsg = xmlErrString(code);
|
---|
372 |
|
---|
373 | if (info == NULL) {
|
---|
374 | xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, code, level,
|
---|
375 | NULL, NULL, NULL, 0, "%s\n", errmsg);
|
---|
376 | } else {
|
---|
377 | xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, code, level,
|
---|
378 | (const xmlChar *) info, NULL, NULL, 0,
|
---|
379 | "%s: %s\n", errmsg, info);
|
---|
380 | }
|
---|
381 | }
|
---|
382 |
|
---|
383 | /**
|
---|
384 | * xmlIsLetter:
|
---|
385 | * @c: an unicode character (int)
|
---|
386 | *
|
---|
387 | * Check whether the character is allowed by the production
|
---|
388 | * [84] Letter ::= BaseChar | Ideographic
|
---|
389 | *
|
---|
390 | * Returns 0 if not, non-zero otherwise
|
---|
391 | */
|
---|
392 | int
|
---|
393 | xmlIsLetter(int c) {
|
---|
394 | return(IS_BASECHAR(c) || IS_IDEOGRAPHIC(c));
|
---|
395 | }
|
---|
396 |
|
---|
397 | /************************************************************************
|
---|
398 | * *
|
---|
399 | * Input handling functions for progressive parsing *
|
---|
400 | * *
|
---|
401 | ************************************************************************/
|
---|
402 |
|
---|
403 | /* we need to keep enough input to show errors in context */
|
---|
404 | #define LINE_LEN 80
|
---|
405 |
|
---|
406 | /**
|
---|
407 | * xmlHaltParser:
|
---|
408 | * @ctxt: an XML parser context
|
---|
409 | *
|
---|
410 | * Blocks further parser processing don't override error
|
---|
411 | * for internal use
|
---|
412 | */
|
---|
413 | void
|
---|
414 | xmlHaltParser(xmlParserCtxtPtr ctxt) {
|
---|
415 | if (ctxt == NULL)
|
---|
416 | return;
|
---|
417 | ctxt->instate = XML_PARSER_EOF; /* TODO: Remove after refactoring */
|
---|
418 | ctxt->disableSAX = 2;
|
---|
419 | }
|
---|
420 |
|
---|
421 | /**
|
---|
422 | * xmlParserInputRead:
|
---|
423 | * @in: an XML parser input
|
---|
424 | * @len: an indicative size for the lookahead
|
---|
425 | *
|
---|
426 | * DEPRECATED: This function was internal and is deprecated.
|
---|
427 | *
|
---|
428 | * Returns -1 as this is an error to use it.
|
---|
429 | */
|
---|
430 | int
|
---|
431 | xmlParserInputRead(xmlParserInputPtr in ATTRIBUTE_UNUSED, int len ATTRIBUTE_UNUSED) {
|
---|
432 | return(-1);
|
---|
433 | }
|
---|
434 |
|
---|
435 | /**
|
---|
436 | * xmlParserGrow:
|
---|
437 | * @ctxt: an XML parser context
|
---|
438 | *
|
---|
439 | * Grow the input buffer.
|
---|
440 | *
|
---|
441 | * Returns the number of bytes read or -1 in case of error.
|
---|
442 | */
|
---|
443 | int
|
---|
444 | xmlParserGrow(xmlParserCtxtPtr ctxt) {
|
---|
445 | xmlParserInputPtr in = ctxt->input;
|
---|
446 | xmlParserInputBufferPtr buf = in->buf;
|
---|
447 | size_t curEnd = in->end - in->cur;
|
---|
448 | size_t curBase = in->cur - in->base;
|
---|
449 | size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
|
---|
450 | XML_MAX_HUGE_LENGTH :
|
---|
451 | XML_MAX_LOOKUP_LIMIT;
|
---|
452 | int ret;
|
---|
453 |
|
---|
454 | if (buf == NULL)
|
---|
455 | return(0);
|
---|
456 | /* Don't grow push parser buffer. */
|
---|
457 | if (PARSER_PROGRESSIVE(ctxt))
|
---|
458 | return(0);
|
---|
459 | /* Don't grow memory buffers. */
|
---|
460 | if ((buf->encoder == NULL) && (buf->readcallback == NULL))
|
---|
461 | return(0);
|
---|
462 | if (buf->error != 0)
|
---|
463 | return(-1);
|
---|
464 |
|
---|
465 | if (curBase > maxLength) {
|
---|
466 | xmlFatalErr(ctxt, XML_ERR_RESOURCE_LIMIT,
|
---|
467 | "Buffer size limit exceeded, try XML_PARSE_HUGE\n");
|
---|
468 | xmlHaltParser(ctxt);
|
---|
469 | return(-1);
|
---|
470 | }
|
---|
471 |
|
---|
472 | if (curEnd >= INPUT_CHUNK)
|
---|
473 | return(0);
|
---|
474 |
|
---|
475 | ret = xmlParserInputBufferGrow(buf, INPUT_CHUNK);
|
---|
476 | xmlBufUpdateInput(buf->buffer, in, curBase);
|
---|
477 |
|
---|
478 | if (ret < 0) {
|
---|
479 | xmlCtxtErrIO(ctxt, buf->error, NULL);
|
---|
480 | }
|
---|
481 |
|
---|
482 | return(ret);
|
---|
483 | }
|
---|
484 |
|
---|
485 | /**
|
---|
486 | * xmlParserInputGrow:
|
---|
487 | * @in: an XML parser input
|
---|
488 | * @len: an indicative size for the lookahead
|
---|
489 | *
|
---|
490 | * DEPRECATED: Don't use.
|
---|
491 | *
|
---|
492 | * This function increase the input for the parser. It tries to
|
---|
493 | * preserve pointers to the input buffer, and keep already read data
|
---|
494 | *
|
---|
495 | * Returns the amount of char read, or -1 in case of error, 0 indicate the
|
---|
496 | * end of this entity
|
---|
497 | */
|
---|
498 | int
|
---|
499 | xmlParserInputGrow(xmlParserInputPtr in, int len) {
|
---|
500 | int ret;
|
---|
501 | size_t indx;
|
---|
502 |
|
---|
503 | if ((in == NULL) || (len < 0)) return(-1);
|
---|
504 | if (in->buf == NULL) return(-1);
|
---|
505 | if (in->base == NULL) return(-1);
|
---|
506 | if (in->cur == NULL) return(-1);
|
---|
507 | if (in->buf->buffer == NULL) return(-1);
|
---|
508 |
|
---|
509 | /* Don't grow memory buffers. */
|
---|
510 | if ((in->buf->encoder == NULL) && (in->buf->readcallback == NULL))
|
---|
511 | return(0);
|
---|
512 |
|
---|
513 | indx = in->cur - in->base;
|
---|
514 | if (xmlBufUse(in->buf->buffer) > (unsigned int) indx + INPUT_CHUNK) {
|
---|
515 | return(0);
|
---|
516 | }
|
---|
517 | ret = xmlParserInputBufferGrow(in->buf, len);
|
---|
518 |
|
---|
519 | in->base = xmlBufContent(in->buf->buffer);
|
---|
520 | if (in->base == NULL) {
|
---|
521 | in->base = BAD_CAST "";
|
---|
522 | in->cur = in->base;
|
---|
523 | in->end = in->base;
|
---|
524 | return(-1);
|
---|
525 | }
|
---|
526 | in->cur = in->base + indx;
|
---|
527 | in->end = xmlBufEnd(in->buf->buffer);
|
---|
528 |
|
---|
529 | return(ret);
|
---|
530 | }
|
---|
531 |
|
---|
532 | /**
|
---|
533 | * xmlParserShrink:
|
---|
534 | * @ctxt: an XML parser context
|
---|
535 | *
|
---|
536 | * Shrink the input buffer.
|
---|
537 | */
|
---|
538 | void
|
---|
539 | xmlParserShrink(xmlParserCtxtPtr ctxt) {
|
---|
540 | xmlParserInputPtr in = ctxt->input;
|
---|
541 | xmlParserInputBufferPtr buf = in->buf;
|
---|
542 | size_t used;
|
---|
543 |
|
---|
544 | if (buf == NULL)
|
---|
545 | return;
|
---|
546 | /* Don't shrink pull parser memory buffers. */
|
---|
547 | if ((!PARSER_PROGRESSIVE(ctxt)) &&
|
---|
548 | (buf->encoder == NULL) &&
|
---|
549 | (buf->readcallback == NULL))
|
---|
550 | return;
|
---|
551 |
|
---|
552 | used = in->cur - in->base;
|
---|
553 | /*
|
---|
554 | * Do not shrink on large buffers whose only a tiny fraction
|
---|
555 | * was consumed
|
---|
556 | */
|
---|
557 | if (used > INPUT_CHUNK) {
|
---|
558 | size_t res = xmlBufShrink(buf->buffer, used - LINE_LEN);
|
---|
559 |
|
---|
560 | if (res > 0) {
|
---|
561 | used -= res;
|
---|
562 | if ((res > ULONG_MAX) ||
|
---|
563 | (in->consumed > ULONG_MAX - (unsigned long)res))
|
---|
564 | in->consumed = ULONG_MAX;
|
---|
565 | else
|
---|
566 | in->consumed += res;
|
---|
567 | }
|
---|
568 | }
|
---|
569 |
|
---|
570 | xmlBufUpdateInput(buf->buffer, in, used);
|
---|
571 | }
|
---|
572 |
|
---|
573 | /**
|
---|
574 | * xmlParserInputShrink:
|
---|
575 | * @in: an XML parser input
|
---|
576 | *
|
---|
577 | * DEPRECATED: Don't use.
|
---|
578 | *
|
---|
579 | * This function removes used input for the parser.
|
---|
580 | */
|
---|
581 | void
|
---|
582 | xmlParserInputShrink(xmlParserInputPtr in) {
|
---|
583 | size_t used;
|
---|
584 | size_t ret;
|
---|
585 |
|
---|
586 | if (in == NULL) return;
|
---|
587 | if (in->buf == NULL) return;
|
---|
588 | if (in->base == NULL) return;
|
---|
589 | if (in->cur == NULL) return;
|
---|
590 | if (in->buf->buffer == NULL) return;
|
---|
591 |
|
---|
592 | used = in->cur - in->base;
|
---|
593 | /*
|
---|
594 | * Do not shrink on large buffers whose only a tiny fraction
|
---|
595 | * was consumed
|
---|
596 | */
|
---|
597 | if (used > INPUT_CHUNK) {
|
---|
598 | ret = xmlBufShrink(in->buf->buffer, used - LINE_LEN);
|
---|
599 | if (ret > 0) {
|
---|
600 | used -= ret;
|
---|
601 | if ((ret > ULONG_MAX) ||
|
---|
602 | (in->consumed > ULONG_MAX - (unsigned long)ret))
|
---|
603 | in->consumed = ULONG_MAX;
|
---|
604 | else
|
---|
605 | in->consumed += ret;
|
---|
606 | }
|
---|
607 | }
|
---|
608 |
|
---|
609 | if (xmlBufUse(in->buf->buffer) <= INPUT_CHUNK) {
|
---|
610 | xmlParserInputBufferRead(in->buf, 2 * INPUT_CHUNK);
|
---|
611 | }
|
---|
612 |
|
---|
613 | in->base = xmlBufContent(in->buf->buffer);
|
---|
614 | if (in->base == NULL) {
|
---|
615 | /* TODO: raise error */
|
---|
616 | in->base = BAD_CAST "";
|
---|
617 | in->cur = in->base;
|
---|
618 | in->end = in->base;
|
---|
619 | return;
|
---|
620 | }
|
---|
621 | in->cur = in->base + used;
|
---|
622 | in->end = xmlBufEnd(in->buf->buffer);
|
---|
623 | }
|
---|
624 |
|
---|
625 | /************************************************************************
|
---|
626 | * *
|
---|
627 | * UTF8 character input and related functions *
|
---|
628 | * *
|
---|
629 | ************************************************************************/
|
---|
630 |
|
---|
631 | /**
|
---|
632 | * xmlNextChar:
|
---|
633 | * @ctxt: the XML parser context
|
---|
634 | *
|
---|
635 | * DEPRECATED: Internal function, do not use.
|
---|
636 | *
|
---|
637 | * Skip to the next char input char.
|
---|
638 | */
|
---|
639 |
|
---|
640 | void
|
---|
641 | xmlNextChar(xmlParserCtxtPtr ctxt)
|
---|
642 | {
|
---|
643 | const unsigned char *cur;
|
---|
644 | size_t avail;
|
---|
645 | int c;
|
---|
646 |
|
---|
647 | if ((ctxt == NULL) || (ctxt->input == NULL))
|
---|
648 | return;
|
---|
649 |
|
---|
650 | avail = ctxt->input->end - ctxt->input->cur;
|
---|
651 |
|
---|
652 | if (avail < INPUT_CHUNK) {
|
---|
653 | xmlParserGrow(ctxt);
|
---|
654 | if (ctxt->input->cur >= ctxt->input->end)
|
---|
655 | return;
|
---|
656 | avail = ctxt->input->end - ctxt->input->cur;
|
---|
657 | }
|
---|
658 |
|
---|
659 | cur = ctxt->input->cur;
|
---|
660 | c = *cur;
|
---|
661 |
|
---|
662 | if (c < 0x80) {
|
---|
663 | if (c == '\n') {
|
---|
664 | ctxt->input->cur++;
|
---|
665 | ctxt->input->line++;
|
---|
666 | ctxt->input->col = 1;
|
---|
667 | } else if (c == '\r') {
|
---|
668 | /*
|
---|
669 | * 2.11 End-of-Line Handling
|
---|
670 | * the literal two-character sequence "#xD#xA" or a standalone
|
---|
671 | * literal #xD, an XML processor must pass to the application
|
---|
672 | * the single character #xA.
|
---|
673 | */
|
---|
674 | ctxt->input->cur += ((cur[1] == '\n') ? 2 : 1);
|
---|
675 | ctxt->input->line++;
|
---|
676 | ctxt->input->col = 1;
|
---|
677 | return;
|
---|
678 | } else {
|
---|
679 | ctxt->input->cur++;
|
---|
680 | ctxt->input->col++;
|
---|
681 | }
|
---|
682 | } else {
|
---|
683 | ctxt->input->col++;
|
---|
684 |
|
---|
685 | if ((avail < 2) || (cur[1] & 0xc0) != 0x80)
|
---|
686 | goto encoding_error;
|
---|
687 |
|
---|
688 | if (c < 0xe0) {
|
---|
689 | /* 2-byte code */
|
---|
690 | if (c < 0xc2)
|
---|
691 | goto encoding_error;
|
---|
692 | ctxt->input->cur += 2;
|
---|
693 | } else {
|
---|
694 | unsigned int val = (c << 8) | cur[1];
|
---|
695 |
|
---|
696 | if ((avail < 3) || (cur[2] & 0xc0) != 0x80)
|
---|
697 | goto encoding_error;
|
---|
698 |
|
---|
699 | if (c < 0xf0) {
|
---|
700 | /* 3-byte code */
|
---|
701 | if ((val < 0xe0a0) || ((val >= 0xeda0) && (val < 0xee00)))
|
---|
702 | goto encoding_error;
|
---|
703 | ctxt->input->cur += 3;
|
---|
704 | } else {
|
---|
705 | if ((avail < 4) || ((cur[3] & 0xc0) != 0x80))
|
---|
706 | goto encoding_error;
|
---|
707 |
|
---|
708 | /* 4-byte code */
|
---|
709 | if ((val < 0xf090) || (val >= 0xf490))
|
---|
710 | goto encoding_error;
|
---|
711 | ctxt->input->cur += 4;
|
---|
712 | }
|
---|
713 | }
|
---|
714 | }
|
---|
715 |
|
---|
716 | return;
|
---|
717 |
|
---|
718 | encoding_error:
|
---|
719 | /* Only report the first error */
|
---|
720 | if ((ctxt->input->flags & XML_INPUT_ENCODING_ERROR) == 0) {
|
---|
721 | xmlCtxtErrIO(ctxt, XML_ERR_INVALID_ENCODING, NULL);
|
---|
722 | ctxt->input->flags |= XML_INPUT_ENCODING_ERROR;
|
---|
723 | }
|
---|
724 | ctxt->input->cur++;
|
---|
725 | return;
|
---|
726 | }
|
---|
727 |
|
---|
728 | /**
|
---|
729 | * xmlCurrentChar:
|
---|
730 | * @ctxt: the XML parser context
|
---|
731 | * @len: pointer to the length of the char read
|
---|
732 | *
|
---|
733 | * DEPRECATED: Internal function, do not use.
|
---|
734 | *
|
---|
735 | * The current char value, if using UTF-8 this may actually span multiple
|
---|
736 | * bytes in the input buffer. Implement the end of line normalization:
|
---|
737 | * 2.11 End-of-Line Handling
|
---|
738 | * Wherever an external parsed entity or the literal entity value
|
---|
739 | * of an internal parsed entity contains either the literal two-character
|
---|
740 | * sequence "#xD#xA" or a standalone literal #xD, an XML processor
|
---|
741 | * must pass to the application the single character #xA.
|
---|
742 | * This behavior can conveniently be produced by normalizing all
|
---|
743 | * line breaks to #xA on input, before parsing.)
|
---|
744 | *
|
---|
745 | * Returns the current char value and its length
|
---|
746 | */
|
---|
747 |
|
---|
748 | int
|
---|
749 | xmlCurrentChar(xmlParserCtxtPtr ctxt, int *len) {
|
---|
750 | const unsigned char *cur;
|
---|
751 | size_t avail;
|
---|
752 | int c;
|
---|
753 |
|
---|
754 | if ((ctxt == NULL) || (len == NULL) || (ctxt->input == NULL)) return(0);
|
---|
755 |
|
---|
756 | avail = ctxt->input->end - ctxt->input->cur;
|
---|
757 |
|
---|
758 | if (avail < INPUT_CHUNK) {
|
---|
759 | xmlParserGrow(ctxt);
|
---|
760 | avail = ctxt->input->end - ctxt->input->cur;
|
---|
761 | }
|
---|
762 |
|
---|
763 | cur = ctxt->input->cur;
|
---|
764 | c = *cur;
|
---|
765 |
|
---|
766 | if (c < 0x80) {
|
---|
767 | /* 1-byte code */
|
---|
768 | if (c < 0x20) {
|
---|
769 | /*
|
---|
770 | * 2.11 End-of-Line Handling
|
---|
771 | * the literal two-character sequence "#xD#xA" or a standalone
|
---|
772 | * literal #xD, an XML processor must pass to the application
|
---|
773 | * the single character #xA.
|
---|
774 | */
|
---|
775 | if (c == '\r') {
|
---|
776 | /*
|
---|
777 | * TODO: This function shouldn't change the 'cur' pointer
|
---|
778 | * as side effect, but the NEXTL macro in parser.c relies
|
---|
779 | * on this behavior when incrementing line numbers.
|
---|
780 | */
|
---|
781 | if (cur[1] == '\n')
|
---|
782 | ctxt->input->cur++;
|
---|
783 | *len = 1;
|
---|
784 | c = '\n';
|
---|
785 | } else if (c == 0) {
|
---|
786 | if (ctxt->input->cur >= ctxt->input->end) {
|
---|
787 | *len = 0;
|
---|
788 | } else {
|
---|
789 | *len = 1;
|
---|
790 | /*
|
---|
791 | * TODO: Null bytes should be handled by callers,
|
---|
792 | * but this can be tricky.
|
---|
793 | */
|
---|
794 | xmlFatalErr(ctxt, XML_ERR_INVALID_CHAR,
|
---|
795 | "Char 0x0 out of allowed range\n");
|
---|
796 | }
|
---|
797 | } else {
|
---|
798 | *len = 1;
|
---|
799 | }
|
---|
800 | } else {
|
---|
801 | *len = 1;
|
---|
802 | }
|
---|
803 |
|
---|
804 | return(c);
|
---|
805 | } else {
|
---|
806 | int val;
|
---|
807 |
|
---|
808 | if (avail < 2)
|
---|
809 | goto incomplete_sequence;
|
---|
810 | if ((cur[1] & 0xc0) != 0x80)
|
---|
811 | goto encoding_error;
|
---|
812 |
|
---|
813 | if (c < 0xe0) {
|
---|
814 | /* 2-byte code */
|
---|
815 | if (c < 0xc2)
|
---|
816 | goto encoding_error;
|
---|
817 | val = (c & 0x1f) << 6;
|
---|
818 | val |= cur[1] & 0x3f;
|
---|
819 | *len = 2;
|
---|
820 | } else {
|
---|
821 | if (avail < 3)
|
---|
822 | goto incomplete_sequence;
|
---|
823 | if ((cur[2] & 0xc0) != 0x80)
|
---|
824 | goto encoding_error;
|
---|
825 |
|
---|
826 | if (c < 0xf0) {
|
---|
827 | /* 3-byte code */
|
---|
828 | val = (c & 0xf) << 12;
|
---|
829 | val |= (cur[1] & 0x3f) << 6;
|
---|
830 | val |= cur[2] & 0x3f;
|
---|
831 | if ((val < 0x800) || ((val >= 0xd800) && (val < 0xe000)))
|
---|
832 | goto encoding_error;
|
---|
833 | *len = 3;
|
---|
834 | } else {
|
---|
835 | if (avail < 4)
|
---|
836 | goto incomplete_sequence;
|
---|
837 | if ((cur[3] & 0xc0) != 0x80)
|
---|
838 | goto encoding_error;
|
---|
839 |
|
---|
840 | /* 4-byte code */
|
---|
841 | val = (c & 0x0f) << 18;
|
---|
842 | val |= (cur[1] & 0x3f) << 12;
|
---|
843 | val |= (cur[2] & 0x3f) << 6;
|
---|
844 | val |= cur[3] & 0x3f;
|
---|
845 | if ((val < 0x10000) || (val >= 0x110000))
|
---|
846 | goto encoding_error;
|
---|
847 | *len = 4;
|
---|
848 | }
|
---|
849 | }
|
---|
850 |
|
---|
851 | return(val);
|
---|
852 | }
|
---|
853 |
|
---|
854 | encoding_error:
|
---|
855 | /* Only report the first error */
|
---|
856 | if ((ctxt->input->flags & XML_INPUT_ENCODING_ERROR) == 0) {
|
---|
857 | xmlCtxtErrIO(ctxt, XML_ERR_INVALID_ENCODING, NULL);
|
---|
858 | ctxt->input->flags |= XML_INPUT_ENCODING_ERROR;
|
---|
859 | }
|
---|
860 | *len = 1;
|
---|
861 | return(0xFFFD); /* U+FFFD Replacement Character */
|
---|
862 |
|
---|
863 | incomplete_sequence:
|
---|
864 | /*
|
---|
865 | * An encoding problem may arise from a truncated input buffer
|
---|
866 | * splitting a character in the middle. In that case do not raise
|
---|
867 | * an error but return 0. This should only happen when push parsing
|
---|
868 | * char data.
|
---|
869 | */
|
---|
870 | *len = 0;
|
---|
871 | return(0);
|
---|
872 | }
|
---|
873 |
|
---|
874 | /**
|
---|
875 | * xmlStringCurrentChar:
|
---|
876 | * @ctxt: the XML parser context
|
---|
877 | * @cur: pointer to the beginning of the char
|
---|
878 | * @len: pointer to the length of the char read
|
---|
879 | *
|
---|
880 | * DEPRECATED: Internal function, do not use.
|
---|
881 | *
|
---|
882 | * The current char value, if using UTF-8 this may actually span multiple
|
---|
883 | * bytes in the input buffer.
|
---|
884 | *
|
---|
885 | * Returns the current char value and its length
|
---|
886 | */
|
---|
887 |
|
---|
888 | int
|
---|
889 | xmlStringCurrentChar(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED,
|
---|
890 | const xmlChar *cur, int *len) {
|
---|
891 | int c;
|
---|
892 |
|
---|
893 | if ((cur == NULL) || (len == NULL))
|
---|
894 | return(0);
|
---|
895 |
|
---|
896 | /* cur is zero-terminated, so we can lie about its length. */
|
---|
897 | *len = 4;
|
---|
898 | c = xmlGetUTF8Char(cur, len);
|
---|
899 |
|
---|
900 | return((c < 0) ? 0 : c);
|
---|
901 | }
|
---|
902 |
|
---|
903 | /**
|
---|
904 | * xmlCopyCharMultiByte:
|
---|
905 | * @out: pointer to an array of xmlChar
|
---|
906 | * @val: the char value
|
---|
907 | *
|
---|
908 | * append the char value in the array
|
---|
909 | *
|
---|
910 | * Returns the number of xmlChar written
|
---|
911 | */
|
---|
912 | int
|
---|
913 | xmlCopyCharMultiByte(xmlChar *out, int val) {
|
---|
914 | if ((out == NULL) || (val < 0)) return(0);
|
---|
915 | /*
|
---|
916 | * We are supposed to handle UTF8, check it's valid
|
---|
917 | * From rfc2044: encoding of the Unicode values on UTF-8:
|
---|
918 | *
|
---|
919 | * UCS-4 range (hex.) UTF-8 octet sequence (binary)
|
---|
920 | * 0000 0000-0000 007F 0xxxxxxx
|
---|
921 | * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
|
---|
922 | * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
|
---|
923 | */
|
---|
924 | if (val >= 0x80) {
|
---|
925 | xmlChar *savedout = out;
|
---|
926 | int bits;
|
---|
927 | if (val < 0x800) { *out++= (val >> 6) | 0xC0; bits= 0; }
|
---|
928 | else if (val < 0x10000) { *out++= (val >> 12) | 0xE0; bits= 6;}
|
---|
929 | else if (val < 0x110000) { *out++= (val >> 18) | 0xF0; bits= 12; }
|
---|
930 | else {
|
---|
931 | #ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
|
---|
932 | fprintf(stderr, "xmlCopyCharMultiByte: codepoint out of range\n");
|
---|
933 | abort();
|
---|
934 | #endif
|
---|
935 | return(0);
|
---|
936 | }
|
---|
937 | for ( ; bits >= 0; bits-= 6)
|
---|
938 | *out++= ((val >> bits) & 0x3F) | 0x80 ;
|
---|
939 | return (out - savedout);
|
---|
940 | }
|
---|
941 | *out = val;
|
---|
942 | return 1;
|
---|
943 | }
|
---|
944 |
|
---|
945 | /**
|
---|
946 | * xmlCopyChar:
|
---|
947 | * @len: Ignored, compatibility
|
---|
948 | * @out: pointer to an array of xmlChar
|
---|
949 | * @val: the char value
|
---|
950 | *
|
---|
951 | * append the char value in the array
|
---|
952 | *
|
---|
953 | * Returns the number of xmlChar written
|
---|
954 | */
|
---|
955 |
|
---|
956 | int
|
---|
957 | xmlCopyChar(int len ATTRIBUTE_UNUSED, xmlChar *out, int val) {
|
---|
958 | if ((out == NULL) || (val < 0)) return(0);
|
---|
959 | /* the len parameter is ignored */
|
---|
960 | if (val >= 0x80) {
|
---|
961 | return(xmlCopyCharMultiByte (out, val));
|
---|
962 | }
|
---|
963 | *out = val;
|
---|
964 | return 1;
|
---|
965 | }
|
---|
966 |
|
---|
967 | /************************************************************************
|
---|
968 | * *
|
---|
969 | * Commodity functions to switch encodings *
|
---|
970 | * *
|
---|
971 | ************************************************************************/
|
---|
972 |
|
---|
973 | static int
|
---|
974 | xmlDetectEBCDIC(xmlParserInputPtr input, xmlCharEncodingHandlerPtr *hout) {
|
---|
975 | xmlChar out[200];
|
---|
976 | xmlCharEncodingHandlerPtr handler;
|
---|
977 | int inlen, outlen, res, i;
|
---|
978 |
|
---|
979 | *hout = NULL;
|
---|
980 |
|
---|
981 | /*
|
---|
982 | * To detect the EBCDIC code page, we convert the first 200 bytes
|
---|
983 | * to EBCDIC-US and try to find the encoding declaration.
|
---|
984 | */
|
---|
985 | res = xmlLookupCharEncodingHandler(XML_CHAR_ENCODING_EBCDIC, &handler);
|
---|
986 | if (res != 0)
|
---|
987 | return(res);
|
---|
988 | outlen = sizeof(out) - 1;
|
---|
989 | inlen = input->end - input->cur;
|
---|
990 | res = xmlEncInputChunk(handler, out, &outlen, input->cur, &inlen);
|
---|
991 | /*
|
---|
992 | * Return the EBCDIC handler if decoding failed. The error will
|
---|
993 | * be reported later.
|
---|
994 | */
|
---|
995 | if (res < 0)
|
---|
996 | goto done;
|
---|
997 | out[outlen] = 0;
|
---|
998 |
|
---|
999 | for (i = 0; i < outlen; i++) {
|
---|
1000 | if (out[i] == '>')
|
---|
1001 | break;
|
---|
1002 | if ((out[i] == 'e') &&
|
---|
1003 | (xmlStrncmp(out + i, BAD_CAST "encoding", 8) == 0)) {
|
---|
1004 | int start, cur, quote;
|
---|
1005 |
|
---|
1006 | i += 8;
|
---|
1007 | while (IS_BLANK_CH(out[i]))
|
---|
1008 | i += 1;
|
---|
1009 | if (out[i++] != '=')
|
---|
1010 | break;
|
---|
1011 | while (IS_BLANK_CH(out[i]))
|
---|
1012 | i += 1;
|
---|
1013 | quote = out[i++];
|
---|
1014 | if ((quote != '\'') && (quote != '"'))
|
---|
1015 | break;
|
---|
1016 | start = i;
|
---|
1017 | cur = out[i];
|
---|
1018 | while (((cur >= 'a') && (cur <= 'z')) ||
|
---|
1019 | ((cur >= 'A') && (cur <= 'Z')) ||
|
---|
1020 | ((cur >= '0') && (cur <= '9')) ||
|
---|
1021 | (cur == '.') || (cur == '_') ||
|
---|
1022 | (cur == '-'))
|
---|
1023 | cur = out[++i];
|
---|
1024 | if (cur != quote)
|
---|
1025 | break;
|
---|
1026 | out[i] = 0;
|
---|
1027 | xmlCharEncCloseFunc(handler);
|
---|
1028 | res = xmlOpenCharEncodingHandler((char *) out + start,
|
---|
1029 | /* output */ 0, &handler);
|
---|
1030 | if (res != 0)
|
---|
1031 | return(res);
|
---|
1032 | *hout = handler;
|
---|
1033 | return(0);
|
---|
1034 | }
|
---|
1035 | }
|
---|
1036 |
|
---|
1037 | done:
|
---|
1038 | /*
|
---|
1039 | * Encoding handlers are stateful, so we have to recreate them.
|
---|
1040 | */
|
---|
1041 | xmlCharEncCloseFunc(handler);
|
---|
1042 | res = xmlLookupCharEncodingHandler(XML_CHAR_ENCODING_EBCDIC, &handler);
|
---|
1043 | if (res != 0)
|
---|
1044 | return(res);
|
---|
1045 | *hout = handler;
|
---|
1046 | return(0);
|
---|
1047 | }
|
---|
1048 |
|
---|
1049 | /**
|
---|
1050 | * xmlSwitchEncoding:
|
---|
1051 | * @ctxt: the parser context
|
---|
1052 | * @enc: the encoding value (number)
|
---|
1053 | *
|
---|
1054 | * Use encoding specified by enum to decode input data. This overrides
|
---|
1055 | * the encoding found in the XML declaration.
|
---|
1056 | *
|
---|
1057 | * This function can also be used to override the encoding of chunks
|
---|
1058 | * passed to xmlParseChunk.
|
---|
1059 | *
|
---|
1060 | * Returns 0 in case of success, -1 otherwise
|
---|
1061 | */
|
---|
1062 | int
|
---|
1063 | xmlSwitchEncoding(xmlParserCtxtPtr ctxt, xmlCharEncoding enc)
|
---|
1064 | {
|
---|
1065 | xmlCharEncodingHandlerPtr handler = NULL;
|
---|
1066 | int ret;
|
---|
1067 | int res;
|
---|
1068 |
|
---|
1069 | if ((ctxt == NULL) || (ctxt->input == NULL))
|
---|
1070 | return(-1);
|
---|
1071 |
|
---|
1072 | switch (enc) {
|
---|
1073 | case XML_CHAR_ENCODING_NONE:
|
---|
1074 | case XML_CHAR_ENCODING_UTF8:
|
---|
1075 | case XML_CHAR_ENCODING_ASCII:
|
---|
1076 | res = 0;
|
---|
1077 | break;
|
---|
1078 | case XML_CHAR_ENCODING_EBCDIC:
|
---|
1079 | res = xmlDetectEBCDIC(ctxt->input, &handler);
|
---|
1080 | break;
|
---|
1081 | default:
|
---|
1082 | res = xmlLookupCharEncodingHandler(enc, &handler);
|
---|
1083 | break;
|
---|
1084 | }
|
---|
1085 |
|
---|
1086 | if (res != 0) {
|
---|
1087 | const char *name = xmlGetCharEncodingName(enc);
|
---|
1088 |
|
---|
1089 | xmlFatalErr(ctxt, res, (name ? name : "<null>"));
|
---|
1090 | return(-1);
|
---|
1091 | }
|
---|
1092 |
|
---|
1093 | ret = xmlSwitchInputEncoding(ctxt, ctxt->input, handler);
|
---|
1094 |
|
---|
1095 | if ((ret >= 0) && (enc == XML_CHAR_ENCODING_NONE)) {
|
---|
1096 | ctxt->input->flags &= ~XML_INPUT_HAS_ENCODING;
|
---|
1097 | }
|
---|
1098 |
|
---|
1099 | return(ret);
|
---|
1100 | }
|
---|
1101 |
|
---|
1102 | /**
|
---|
1103 | * xmlSwitchEncodingName:
|
---|
1104 | * @ctxt: the parser context, only for error reporting
|
---|
1105 | * @input: the input strea,
|
---|
1106 | * @encoding: the encoding name
|
---|
1107 | *
|
---|
1108 | * Available since 2.13.0.
|
---|
1109 | *
|
---|
1110 | * Returns 0 in case of success, -1 otherwise
|
---|
1111 | */
|
---|
1112 | static int
|
---|
1113 | xmlSwitchInputEncodingName(xmlParserCtxtPtr ctxt, xmlParserInputPtr input,
|
---|
1114 | const char *encoding) {
|
---|
1115 | xmlCharEncodingHandlerPtr handler;
|
---|
1116 | int res;
|
---|
1117 |
|
---|
1118 | if (encoding == NULL)
|
---|
1119 | return(-1);
|
---|
1120 |
|
---|
1121 | res = xmlOpenCharEncodingHandler(encoding, /* output */ 0, &handler);
|
---|
1122 | if (res != 0) {
|
---|
1123 | xmlFatalErr(ctxt, res, encoding);
|
---|
1124 | return(-1);
|
---|
1125 | }
|
---|
1126 |
|
---|
1127 | return(xmlSwitchInputEncoding(ctxt, input, handler));
|
---|
1128 | }
|
---|
1129 |
|
---|
1130 | /**
|
---|
1131 | * xmlSwitchEncodingName:
|
---|
1132 | * @ctxt: the parser context
|
---|
1133 | * @encoding: the encoding name
|
---|
1134 | *
|
---|
1135 | * Use specified encoding to decode input data. This overrides the
|
---|
1136 | * encoding found in the XML declaration.
|
---|
1137 | *
|
---|
1138 | * This function can also be used to override the encoding of chunks
|
---|
1139 | * passed to xmlParseChunk.
|
---|
1140 | *
|
---|
1141 | * Available since 2.13.0.
|
---|
1142 | *
|
---|
1143 | * Returns 0 in case of success, -1 otherwise
|
---|
1144 | */
|
---|
1145 | int
|
---|
1146 | xmlSwitchEncodingName(xmlParserCtxtPtr ctxt, const char *encoding) {
|
---|
1147 | if (ctxt == NULL)
|
---|
1148 | return(-1);
|
---|
1149 |
|
---|
1150 | return(xmlSwitchInputEncodingName(ctxt, ctxt->input, encoding));
|
---|
1151 | }
|
---|
1152 |
|
---|
1153 | /**
|
---|
1154 | * xmlSwitchInputEncoding:
|
---|
1155 | * @ctxt: the parser context, only for error reporting
|
---|
1156 | * @input: the input stream
|
---|
1157 | * @handler: the encoding handler
|
---|
1158 | *
|
---|
1159 | * DEPRECATED: Internal function, don't use.
|
---|
1160 | *
|
---|
1161 | * Use encoding handler to decode input data.
|
---|
1162 | *
|
---|
1163 | * Returns 0 in case of success, -1 otherwise
|
---|
1164 | */
|
---|
1165 | int
|
---|
1166 | xmlSwitchInputEncoding(xmlParserCtxtPtr ctxt, xmlParserInputPtr input,
|
---|
1167 | xmlCharEncodingHandlerPtr handler)
|
---|
1168 | {
|
---|
1169 | int nbchars;
|
---|
1170 | xmlParserInputBufferPtr in;
|
---|
1171 |
|
---|
1172 | if ((input == NULL) || (input->buf == NULL)) {
|
---|
1173 | xmlCharEncCloseFunc(handler);
|
---|
1174 | return (-1);
|
---|
1175 | }
|
---|
1176 | in = input->buf;
|
---|
1177 |
|
---|
1178 | input->flags |= XML_INPUT_HAS_ENCODING;
|
---|
1179 |
|
---|
1180 | /*
|
---|
1181 | * UTF-8 requires no encoding handler.
|
---|
1182 | */
|
---|
1183 | if ((handler != NULL) &&
|
---|
1184 | (xmlStrcasecmp(BAD_CAST handler->name, BAD_CAST "UTF-8") == 0)) {
|
---|
1185 | xmlCharEncCloseFunc(handler);
|
---|
1186 | handler = NULL;
|
---|
1187 | }
|
---|
1188 |
|
---|
1189 | if (in->encoder == handler)
|
---|
1190 | return (0);
|
---|
1191 |
|
---|
1192 | if (in->encoder != NULL) {
|
---|
1193 | /*
|
---|
1194 | * Switching encodings during parsing is a really bad idea,
|
---|
1195 | * but Chromium can switch between ISO-8859-1 and UTF-16 before
|
---|
1196 | * separate calls to xmlParseChunk.
|
---|
1197 | *
|
---|
1198 | * TODO: We should check whether the "raw" input buffer is empty and
|
---|
1199 | * convert the old content using the old encoder.
|
---|
1200 | */
|
---|
1201 |
|
---|
1202 | xmlCharEncCloseFunc(in->encoder);
|
---|
1203 | in->encoder = handler;
|
---|
1204 | return (0);
|
---|
1205 | }
|
---|
1206 |
|
---|
1207 | in->encoder = handler;
|
---|
1208 |
|
---|
1209 | /*
|
---|
1210 | * Is there already some content down the pipe to convert ?
|
---|
1211 | */
|
---|
1212 | if (xmlBufIsEmpty(in->buffer) == 0) {
|
---|
1213 | xmlBufPtr buf;
|
---|
1214 | size_t processed;
|
---|
1215 |
|
---|
1216 | buf = xmlBufCreate();
|
---|
1217 | if (buf == NULL) {
|
---|
1218 | xmlCtxtErrMemory(ctxt);
|
---|
1219 | return(-1);
|
---|
1220 | }
|
---|
1221 |
|
---|
1222 | /*
|
---|
1223 | * Shrink the current input buffer.
|
---|
1224 | * Move it as the raw buffer and create a new input buffer
|
---|
1225 | */
|
---|
1226 | processed = input->cur - input->base;
|
---|
1227 | xmlBufShrink(in->buffer, processed);
|
---|
1228 | input->consumed += processed;
|
---|
1229 | in->raw = in->buffer;
|
---|
1230 | in->buffer = buf;
|
---|
1231 | in->rawconsumed = processed;
|
---|
1232 |
|
---|
1233 | nbchars = xmlCharEncInput(in);
|
---|
1234 | xmlBufResetInput(in->buffer, input);
|
---|
1235 | if (nbchars == XML_ENC_ERR_MEMORY) {
|
---|
1236 | xmlCtxtErrMemory(ctxt);
|
---|
1237 | } else if (nbchars < 0) {
|
---|
1238 | xmlCtxtErrIO(ctxt, in->error, NULL);
|
---|
1239 | xmlHaltParser(ctxt);
|
---|
1240 | return (-1);
|
---|
1241 | }
|
---|
1242 | }
|
---|
1243 | return (0);
|
---|
1244 | }
|
---|
1245 |
|
---|
1246 | /**
|
---|
1247 | * xmlSwitchToEncoding:
|
---|
1248 | * @ctxt: the parser context
|
---|
1249 | * @handler: the encoding handler
|
---|
1250 | *
|
---|
1251 | * Use encoding handler to decode input data.
|
---|
1252 | *
|
---|
1253 | * This function can be used to enforce the encoding of chunks passed
|
---|
1254 | * to xmlParseChunk.
|
---|
1255 | *
|
---|
1256 | * Returns 0 in case of success, -1 otherwise
|
---|
1257 | */
|
---|
1258 | int
|
---|
1259 | xmlSwitchToEncoding(xmlParserCtxtPtr ctxt, xmlCharEncodingHandlerPtr handler)
|
---|
1260 | {
|
---|
1261 | if (ctxt == NULL)
|
---|
1262 | return(-1);
|
---|
1263 | return(xmlSwitchInputEncoding(ctxt, ctxt->input, handler));
|
---|
1264 | }
|
---|
1265 |
|
---|
1266 | /**
|
---|
1267 | * xmlDetectEncoding:
|
---|
1268 | * @ctxt: the parser context
|
---|
1269 | *
|
---|
1270 | * Handle optional BOM, detect and switch to encoding.
|
---|
1271 | *
|
---|
1272 | * Assumes that there are at least four bytes in the input buffer.
|
---|
1273 | */
|
---|
1274 | void
|
---|
1275 | xmlDetectEncoding(xmlParserCtxtPtr ctxt) {
|
---|
1276 | const xmlChar *in;
|
---|
1277 | xmlCharEncoding enc;
|
---|
1278 | int bomSize;
|
---|
1279 | int autoFlag = 0;
|
---|
1280 |
|
---|
1281 | if (xmlParserGrow(ctxt) < 0)
|
---|
1282 | return;
|
---|
1283 | in = ctxt->input->cur;
|
---|
1284 | if (ctxt->input->end - in < 4)
|
---|
1285 | return;
|
---|
1286 |
|
---|
1287 | if (ctxt->input->flags & XML_INPUT_HAS_ENCODING) {
|
---|
1288 | /*
|
---|
1289 | * If the encoding was already set, only skip the BOM which was
|
---|
1290 | * possibly decoded to UTF-8.
|
---|
1291 | */
|
---|
1292 | if ((in[0] == 0xEF) && (in[1] == 0xBB) && (in[2] == 0xBF)) {
|
---|
1293 | ctxt->input->cur += 3;
|
---|
1294 | }
|
---|
1295 |
|
---|
1296 | return;
|
---|
1297 | }
|
---|
1298 |
|
---|
1299 | enc = XML_CHAR_ENCODING_NONE;
|
---|
1300 | bomSize = 0;
|
---|
1301 |
|
---|
1302 | switch (in[0]) {
|
---|
1303 | case 0x00:
|
---|
1304 | if ((in[1] == 0x00) && (in[2] == 0x00) && (in[3] == 0x3C)) {
|
---|
1305 | enc = XML_CHAR_ENCODING_UCS4BE;
|
---|
1306 | autoFlag = XML_INPUT_AUTO_OTHER;
|
---|
1307 | } else if ((in[1] == 0x3C) && (in[2] == 0x00) && (in[3] == 0x3F)) {
|
---|
1308 | enc = XML_CHAR_ENCODING_UTF16BE;
|
---|
1309 | autoFlag = XML_INPUT_AUTO_UTF16BE;
|
---|
1310 | }
|
---|
1311 | break;
|
---|
1312 |
|
---|
1313 | case 0x3C:
|
---|
1314 | if (in[1] == 0x00) {
|
---|
1315 | if ((in[2] == 0x00) && (in[3] == 0x00)) {
|
---|
1316 | enc = XML_CHAR_ENCODING_UCS4LE;
|
---|
1317 | autoFlag = XML_INPUT_AUTO_OTHER;
|
---|
1318 | } else if ((in[2] == 0x3F) && (in[3] == 0x00)) {
|
---|
1319 | enc = XML_CHAR_ENCODING_UTF16LE;
|
---|
1320 | autoFlag = XML_INPUT_AUTO_UTF16LE;
|
---|
1321 | }
|
---|
1322 | }
|
---|
1323 | break;
|
---|
1324 |
|
---|
1325 | case 0x4C:
|
---|
1326 | if ((in[1] == 0x6F) && (in[2] == 0xA7) && (in[3] == 0x94)) {
|
---|
1327 | enc = XML_CHAR_ENCODING_EBCDIC;
|
---|
1328 | autoFlag = XML_INPUT_AUTO_OTHER;
|
---|
1329 | }
|
---|
1330 | break;
|
---|
1331 |
|
---|
1332 | case 0xEF:
|
---|
1333 | if ((in[1] == 0xBB) && (in[2] == 0xBF)) {
|
---|
1334 | enc = XML_CHAR_ENCODING_UTF8;
|
---|
1335 | autoFlag = XML_INPUT_AUTO_UTF8;
|
---|
1336 | bomSize = 3;
|
---|
1337 | }
|
---|
1338 | break;
|
---|
1339 |
|
---|
1340 | case 0xFE:
|
---|
1341 | if (in[1] == 0xFF) {
|
---|
1342 | enc = XML_CHAR_ENCODING_UTF16BE;
|
---|
1343 | autoFlag = XML_INPUT_AUTO_UTF16BE;
|
---|
1344 | bomSize = 2;
|
---|
1345 | }
|
---|
1346 | break;
|
---|
1347 |
|
---|
1348 | case 0xFF:
|
---|
1349 | if (in[1] == 0xFE) {
|
---|
1350 | enc = XML_CHAR_ENCODING_UTF16LE;
|
---|
1351 | autoFlag = XML_INPUT_AUTO_UTF16LE;
|
---|
1352 | bomSize = 2;
|
---|
1353 | }
|
---|
1354 | break;
|
---|
1355 | }
|
---|
1356 |
|
---|
1357 | if (bomSize > 0) {
|
---|
1358 | ctxt->input->cur += bomSize;
|
---|
1359 | }
|
---|
1360 |
|
---|
1361 | if (enc != XML_CHAR_ENCODING_NONE) {
|
---|
1362 | ctxt->input->flags |= autoFlag;
|
---|
1363 | xmlSwitchEncoding(ctxt, enc);
|
---|
1364 | }
|
---|
1365 | }
|
---|
1366 |
|
---|
1367 | /**
|
---|
1368 | * xmlSetDeclaredEncoding:
|
---|
1369 | * @ctxt: the parser context
|
---|
1370 | * @encoding: declared encoding
|
---|
1371 | *
|
---|
1372 | * Set the encoding from a declaration in the document.
|
---|
1373 | *
|
---|
1374 | * If no encoding was set yet, switch the encoding. Otherwise, only warn
|
---|
1375 | * about encoding mismatches.
|
---|
1376 | *
|
---|
1377 | * Takes ownership of 'encoding'.
|
---|
1378 | */
|
---|
1379 | void
|
---|
1380 | xmlSetDeclaredEncoding(xmlParserCtxtPtr ctxt, xmlChar *encoding) {
|
---|
1381 | if (((ctxt->input->flags & XML_INPUT_HAS_ENCODING) == 0) &&
|
---|
1382 | ((ctxt->options & XML_PARSE_IGNORE_ENC) == 0)) {
|
---|
1383 | xmlSwitchEncodingName(ctxt, (const char *) encoding);
|
---|
1384 | ctxt->input->flags |= XML_INPUT_USES_ENC_DECL;
|
---|
1385 | } else if (ctxt->input->flags & XML_INPUT_AUTO_ENCODING) {
|
---|
1386 | static const char *allowedUTF8[] = {
|
---|
1387 | "UTF-8", "UTF8", NULL
|
---|
1388 | };
|
---|
1389 | static const char *allowedUTF16LE[] = {
|
---|
1390 | "UTF-16", "UTF-16LE", "UTF16", NULL
|
---|
1391 | };
|
---|
1392 | static const char *allowedUTF16BE[] = {
|
---|
1393 | "UTF-16", "UTF-16BE", "UTF16", NULL
|
---|
1394 | };
|
---|
1395 | const char **allowed = NULL;
|
---|
1396 | const char *autoEnc = NULL;
|
---|
1397 |
|
---|
1398 | switch (ctxt->input->flags & XML_INPUT_AUTO_ENCODING) {
|
---|
1399 | case XML_INPUT_AUTO_UTF8:
|
---|
1400 | allowed = allowedUTF8;
|
---|
1401 | autoEnc = "UTF-8";
|
---|
1402 | break;
|
---|
1403 | case XML_INPUT_AUTO_UTF16LE:
|
---|
1404 | allowed = allowedUTF16LE;
|
---|
1405 | autoEnc = "UTF-16LE";
|
---|
1406 | break;
|
---|
1407 | case XML_INPUT_AUTO_UTF16BE:
|
---|
1408 | allowed = allowedUTF16BE;
|
---|
1409 | autoEnc = "UTF-16BE";
|
---|
1410 | break;
|
---|
1411 | }
|
---|
1412 |
|
---|
1413 | if (allowed != NULL) {
|
---|
1414 | const char **p;
|
---|
1415 | int match = 0;
|
---|
1416 |
|
---|
1417 | for (p = allowed; *p != NULL; p++) {
|
---|
1418 | if (xmlStrcasecmp(encoding, BAD_CAST *p) == 0) {
|
---|
1419 | match = 1;
|
---|
1420 | break;
|
---|
1421 | }
|
---|
1422 | }
|
---|
1423 |
|
---|
1424 | if (match == 0) {
|
---|
1425 | xmlWarningMsg(ctxt, XML_WAR_ENCODING_MISMATCH,
|
---|
1426 | "Encoding '%s' doesn't match "
|
---|
1427 | "auto-detected '%s'\n",
|
---|
1428 | encoding, BAD_CAST autoEnc);
|
---|
1429 | xmlFree(encoding);
|
---|
1430 | encoding = xmlStrdup(BAD_CAST autoEnc);
|
---|
1431 | if (encoding == NULL)
|
---|
1432 | xmlCtxtErrMemory(ctxt);
|
---|
1433 | }
|
---|
1434 | }
|
---|
1435 | }
|
---|
1436 |
|
---|
1437 | if (ctxt->encoding != NULL)
|
---|
1438 | xmlFree((xmlChar *) ctxt->encoding);
|
---|
1439 | ctxt->encoding = encoding;
|
---|
1440 | }
|
---|
1441 |
|
---|
1442 | /**
|
---|
1443 | * xmlGetActualEncoding:
|
---|
1444 | * @ctxt: the parser context
|
---|
1445 | *
|
---|
1446 | * Returns the actual used to parse the document. This can differ from
|
---|
1447 | * the declared encoding.
|
---|
1448 | */
|
---|
1449 | const xmlChar *
|
---|
1450 | xmlGetActualEncoding(xmlParserCtxtPtr ctxt) {
|
---|
1451 | const xmlChar *encoding = NULL;
|
---|
1452 |
|
---|
1453 | if ((ctxt->input->flags & XML_INPUT_USES_ENC_DECL) ||
|
---|
1454 | (ctxt->input->flags & XML_INPUT_AUTO_ENCODING)) {
|
---|
1455 | /* Preserve encoding exactly */
|
---|
1456 | encoding = ctxt->encoding;
|
---|
1457 | } else if ((ctxt->input->buf) && (ctxt->input->buf->encoder)) {
|
---|
1458 | encoding = BAD_CAST ctxt->input->buf->encoder->name;
|
---|
1459 | } else if (ctxt->input->flags & XML_INPUT_HAS_ENCODING) {
|
---|
1460 | encoding = BAD_CAST "UTF-8";
|
---|
1461 | }
|
---|
1462 |
|
---|
1463 | return(encoding);
|
---|
1464 | }
|
---|
1465 |
|
---|
1466 | /************************************************************************
|
---|
1467 | * *
|
---|
1468 | * Commodity functions to handle entities processing *
|
---|
1469 | * *
|
---|
1470 | ************************************************************************/
|
---|
1471 |
|
---|
1472 | /**
|
---|
1473 | * xmlFreeInputStream:
|
---|
1474 | * @input: an xmlParserInputPtr
|
---|
1475 | *
|
---|
1476 | * Free up an input stream.
|
---|
1477 | */
|
---|
1478 | void
|
---|
1479 | xmlFreeInputStream(xmlParserInputPtr input) {
|
---|
1480 | if (input == NULL) return;
|
---|
1481 |
|
---|
1482 | if (input->filename != NULL) xmlFree((char *) input->filename);
|
---|
1483 | if (input->version != NULL) xmlFree((char *) input->version);
|
---|
1484 | if ((input->free != NULL) && (input->base != NULL))
|
---|
1485 | input->free((xmlChar *) input->base);
|
---|
1486 | if (input->buf != NULL)
|
---|
1487 | xmlFreeParserInputBuffer(input->buf);
|
---|
1488 | xmlFree(input);
|
---|
1489 | }
|
---|
1490 |
|
---|
1491 | /**
|
---|
1492 | * xmlNewInputStream:
|
---|
1493 | * @ctxt: an XML parser context
|
---|
1494 | *
|
---|
1495 | * Create a new input stream structure.
|
---|
1496 | *
|
---|
1497 | * Returns the new input stream or NULL
|
---|
1498 | */
|
---|
1499 | xmlParserInputPtr
|
---|
1500 | xmlNewInputStream(xmlParserCtxtPtr ctxt) {
|
---|
1501 | xmlParserInputPtr input;
|
---|
1502 |
|
---|
1503 | input = (xmlParserInputPtr) xmlMalloc(sizeof(xmlParserInput));
|
---|
1504 | if (input == NULL) {
|
---|
1505 | xmlCtxtErrMemory(ctxt);
|
---|
1506 | return(NULL);
|
---|
1507 | }
|
---|
1508 | memset(input, 0, sizeof(xmlParserInput));
|
---|
1509 | input->line = 1;
|
---|
1510 | input->col = 1;
|
---|
1511 |
|
---|
1512 | /*
|
---|
1513 | * If the context is NULL the id cannot be initialized, but that
|
---|
1514 | * should not happen while parsing which is the situation where
|
---|
1515 | * the id is actually needed.
|
---|
1516 | */
|
---|
1517 | if (ctxt != NULL) {
|
---|
1518 | if (input->id >= INT_MAX) {
|
---|
1519 | xmlCtxtErrMemory(ctxt);
|
---|
1520 | return(NULL);
|
---|
1521 | }
|
---|
1522 | input->id = ctxt->input_id++;
|
---|
1523 | }
|
---|
1524 |
|
---|
1525 | return(input);
|
---|
1526 | }
|
---|
1527 |
|
---|
1528 | /**
|
---|
1529 | * xmlNewInputURL:
|
---|
1530 | * @ctxt: parser context
|
---|
1531 | * @url: filename or URL
|
---|
1532 | * @publicId: publid ID from doctype (optional)
|
---|
1533 | * @encoding: character encoding (optional)
|
---|
1534 | * @flags: unused, pass 0
|
---|
1535 | *
|
---|
1536 | * Creates a new parser input from the filesystem, the network or
|
---|
1537 | * a user-defined resource loader.
|
---|
1538 | *
|
---|
1539 | * Returns a new parser input.
|
---|
1540 | */
|
---|
1541 | xmlParserInputPtr
|
---|
1542 | xmlNewInputURL(xmlParserCtxtPtr ctxt, const char *url, const char *publicId,
|
---|
1543 | const char *encoding, int flags ATTRIBUTE_UNUSED) {
|
---|
1544 | xmlParserInputPtr input;
|
---|
1545 |
|
---|
1546 | if ((ctxt == NULL) || (url == NULL))
|
---|
1547 | return(NULL);
|
---|
1548 |
|
---|
1549 | input = xmlLoadExternalEntity(url, publicId, ctxt);
|
---|
1550 | if (input == NULL)
|
---|
1551 | return(NULL);
|
---|
1552 |
|
---|
1553 | if (encoding != NULL)
|
---|
1554 | xmlSwitchInputEncodingName(ctxt, input, encoding);
|
---|
1555 |
|
---|
1556 | return(input);
|
---|
1557 | }
|
---|
1558 |
|
---|
1559 | /**
|
---|
1560 | * xmlNewInputInternal:
|
---|
1561 | * @ctxt: parser context
|
---|
1562 | * @buf: parser input buffer
|
---|
1563 | * @filename: filename or URL
|
---|
1564 | * @encoding: character encoding (optional)
|
---|
1565 | *
|
---|
1566 | * Internal helper function.
|
---|
1567 | *
|
---|
1568 | * Returns a new parser input.
|
---|
1569 | */
|
---|
1570 | static xmlParserInputPtr
|
---|
1571 | xmlNewInputInternal(xmlParserCtxtPtr ctxt, xmlParserInputBufferPtr buf,
|
---|
1572 | const char *filename, const char *encoding) {
|
---|
1573 | xmlParserInputPtr input;
|
---|
1574 |
|
---|
1575 | input = xmlNewInputStream(ctxt);
|
---|
1576 | if (input == NULL) {
|
---|
1577 | xmlFreeParserInputBuffer(buf);
|
---|
1578 | return(NULL);
|
---|
1579 | }
|
---|
1580 |
|
---|
1581 | input->buf = buf;
|
---|
1582 | xmlBufResetInput(input->buf->buffer, input);
|
---|
1583 |
|
---|
1584 | if (filename != NULL) {
|
---|
1585 | input->filename = xmlMemStrdup(filename);
|
---|
1586 | if (input->filename == NULL) {
|
---|
1587 | xmlCtxtErrMemory(ctxt);
|
---|
1588 | xmlFreeInputStream(input);
|
---|
1589 | return(NULL);
|
---|
1590 | }
|
---|
1591 | }
|
---|
1592 |
|
---|
1593 | if (encoding != NULL)
|
---|
1594 | xmlSwitchInputEncodingName(ctxt, input, encoding);
|
---|
1595 |
|
---|
1596 | return(input);
|
---|
1597 | }
|
---|
1598 |
|
---|
1599 | /**
|
---|
1600 | * xmlNewInputMemory:
|
---|
1601 | * @ctxt: parser context
|
---|
1602 | * @url: base URL (optional)
|
---|
1603 | * @mem: pointer to char array
|
---|
1604 | * @size: size of array
|
---|
1605 | * @encoding: character encoding (optional)
|
---|
1606 | * @flags: optimization hints
|
---|
1607 | *
|
---|
1608 | * Creates a new parser input to read from a memory area.
|
---|
1609 | *
|
---|
1610 | * @url is used as base to resolve external entities and for
|
---|
1611 | * error reporting.
|
---|
1612 | *
|
---|
1613 | * If the XML_INPUT_BUF_STATIC flag is set, the memory area must
|
---|
1614 | * stay unchanged until parsing has finished. This can avoid
|
---|
1615 | * temporary copies.
|
---|
1616 | *
|
---|
1617 | * If the XML_INPUT_BUF_ZERO_TERMINATED flag is set, the memory
|
---|
1618 | * area must contain a zero byte after the buffer at position @size.
|
---|
1619 | * This can avoid temporary copies.
|
---|
1620 | *
|
---|
1621 | * Returns a new parser input.
|
---|
1622 | */
|
---|
1623 | xmlParserInputPtr
|
---|
1624 | xmlNewInputMemory(xmlParserCtxtPtr ctxt, const char *url,
|
---|
1625 | const void *mem, size_t size,
|
---|
1626 | const char *encoding, int flags) {
|
---|
1627 | xmlParserInputBufferPtr buf;
|
---|
1628 |
|
---|
1629 | if ((ctxt == NULL) || (mem == NULL))
|
---|
1630 | return(NULL);
|
---|
1631 |
|
---|
1632 | buf = xmlNewInputBufferMemory(mem, size, flags, XML_CHAR_ENCODING_NONE);
|
---|
1633 | if (buf == NULL) {
|
---|
1634 | xmlCtxtErrMemory(ctxt);
|
---|
1635 | return(NULL);
|
---|
1636 | }
|
---|
1637 |
|
---|
1638 | return(xmlNewInputInternal(ctxt, buf, url, encoding));
|
---|
1639 | }
|
---|
1640 |
|
---|
1641 | /**
|
---|
1642 | * xmlNewInputString:
|
---|
1643 | * @ctxt: parser context
|
---|
1644 | * @url: base URL (optional)
|
---|
1645 | * @str: zero-terminated string
|
---|
1646 | * @encoding: character encoding (optional)
|
---|
1647 | * @flags: optimization hints
|
---|
1648 | *
|
---|
1649 | * Creates a new parser input to read from a zero-terminated string.
|
---|
1650 | *
|
---|
1651 | * @url is used as base to resolve external entities and for
|
---|
1652 | * error reporting.
|
---|
1653 | *
|
---|
1654 | * If the XML_INPUT_BUF_STATIC flag is set, the string must
|
---|
1655 | * stay unchanged until parsing has finished. This can avoid
|
---|
1656 | * temporary copies.
|
---|
1657 | *
|
---|
1658 | * Returns a new parser input.
|
---|
1659 | */
|
---|
1660 | xmlParserInputPtr
|
---|
1661 | xmlNewInputString(xmlParserCtxtPtr ctxt, const char *url,
|
---|
1662 | const char *str, const char *encoding, int flags) {
|
---|
1663 | xmlParserInputBufferPtr buf;
|
---|
1664 |
|
---|
1665 | if ((ctxt == NULL) || (str == NULL))
|
---|
1666 | return(NULL);
|
---|
1667 |
|
---|
1668 | buf = xmlNewInputBufferString(str, flags);
|
---|
1669 | if (buf == NULL) {
|
---|
1670 | xmlCtxtErrMemory(ctxt);
|
---|
1671 | return(NULL);
|
---|
1672 | }
|
---|
1673 |
|
---|
1674 | return(xmlNewInputInternal(ctxt, buf, url, encoding));
|
---|
1675 | }
|
---|
1676 |
|
---|
1677 | /**
|
---|
1678 | * xmlNewInputFd:
|
---|
1679 | * @ctxt: parser context
|
---|
1680 | * @url: base URL (optional)
|
---|
1681 | * @fd: file descriptor
|
---|
1682 | * @encoding: character encoding (optional)
|
---|
1683 | * @flags: unused, pass 0
|
---|
1684 | *
|
---|
1685 | * Creates a new parser input to read from a zero-terminated string.
|
---|
1686 | *
|
---|
1687 | * @url is used as base to resolve external entities and for
|
---|
1688 | * error reporting.
|
---|
1689 | *
|
---|
1690 | * @fd is closed after parsing has finished.
|
---|
1691 | *
|
---|
1692 | * Returns a new parser input.
|
---|
1693 | */
|
---|
1694 | xmlParserInputPtr
|
---|
1695 | xmlNewInputFd(xmlParserCtxtPtr ctxt, const char *url,
|
---|
1696 | int fd, const char *encoding, int flags ATTRIBUTE_UNUSED) {
|
---|
1697 | xmlParserInputBufferPtr buf;
|
---|
1698 |
|
---|
1699 | if ((ctxt == NULL) || (fd < 0))
|
---|
1700 | return(NULL);
|
---|
1701 |
|
---|
1702 | buf = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
|
---|
1703 | if (buf == NULL) {
|
---|
1704 | xmlCtxtErrMemory(ctxt);
|
---|
1705 | return(NULL);
|
---|
1706 | }
|
---|
1707 |
|
---|
1708 | return(xmlNewInputInternal(ctxt, buf, url, encoding));
|
---|
1709 | }
|
---|
1710 |
|
---|
1711 | /**
|
---|
1712 | * xmlNewInputIO:
|
---|
1713 | * @ctxt: parser context
|
---|
1714 | * @url: base URL (optional)
|
---|
1715 | * @ioRead: read callback
|
---|
1716 | * @ioClose: close callback (optional)
|
---|
1717 | * @ioCtxt: IO context
|
---|
1718 | * @encoding: character encoding (optional)
|
---|
1719 | * @flags: unused, pass 0
|
---|
1720 | *
|
---|
1721 | * Creates a new parser input to read from input callbacks and
|
---|
1722 | * cintext.
|
---|
1723 | *
|
---|
1724 | * @url is used as base to resolve external entities and for
|
---|
1725 | * error reporting.
|
---|
1726 | *
|
---|
1727 | * @ioRead is called to read new data into a provided buffer.
|
---|
1728 | * It must return the number of bytes written into the buffer
|
---|
1729 | * ot a negative xmlParserErrors code on failure.
|
---|
1730 | *
|
---|
1731 | * @ioClose is called after parsing has finished.
|
---|
1732 | *
|
---|
1733 | * @ioCtxt is an opaque pointer passed to the callbacks.
|
---|
1734 | *
|
---|
1735 | * Returns a new parser input.
|
---|
1736 | */
|
---|
1737 | xmlParserInputPtr
|
---|
1738 | xmlNewInputIO(xmlParserCtxtPtr ctxt, const char *url,
|
---|
1739 | xmlInputReadCallback ioRead, xmlInputCloseCallback ioClose,
|
---|
1740 | void *ioCtxt,
|
---|
1741 | const char *encoding, int flags ATTRIBUTE_UNUSED) {
|
---|
1742 | xmlParserInputBufferPtr buf;
|
---|
1743 |
|
---|
1744 | if ((ctxt == NULL) || (ioRead == NULL))
|
---|
1745 | return(NULL);
|
---|
1746 |
|
---|
1747 | buf = xmlAllocParserInputBuffer(XML_CHAR_ENCODING_NONE);
|
---|
1748 | if (buf == NULL) {
|
---|
1749 | xmlCtxtErrMemory(ctxt);
|
---|
1750 | if (ioClose != NULL)
|
---|
1751 | ioClose(ioCtxt);
|
---|
1752 | return(NULL);
|
---|
1753 | }
|
---|
1754 |
|
---|
1755 | buf->context = ioCtxt;
|
---|
1756 | buf->readcallback = ioRead;
|
---|
1757 | buf->closecallback = ioClose;
|
---|
1758 |
|
---|
1759 | return(xmlNewInputInternal(ctxt, buf, url, encoding));
|
---|
1760 | }
|
---|
1761 |
|
---|
1762 | /**
|
---|
1763 | * xmlNewInputPush:
|
---|
1764 | * @ctxt: parser context
|
---|
1765 | * @url: base URL (optional)
|
---|
1766 | * @chunk: pointer to char array
|
---|
1767 | * @size: size of array
|
---|
1768 | * @encoding: character encoding (optional)
|
---|
1769 | *
|
---|
1770 | * Creates a new parser input for a push parser.
|
---|
1771 | *
|
---|
1772 | * Returns a new parser input.
|
---|
1773 | */
|
---|
1774 | xmlParserInputPtr
|
---|
1775 | xmlNewInputPush(xmlParserCtxtPtr ctxt, const char *url,
|
---|
1776 | const char *chunk, int size, const char *encoding) {
|
---|
1777 | xmlParserInputBufferPtr buf;
|
---|
1778 | xmlParserInputPtr input;
|
---|
1779 |
|
---|
1780 | buf = xmlAllocParserInputBuffer(XML_CHAR_ENCODING_NONE);
|
---|
1781 | if (buf == NULL) {
|
---|
1782 | xmlCtxtErrMemory(ctxt);
|
---|
1783 | return(NULL);
|
---|
1784 | }
|
---|
1785 |
|
---|
1786 | input = xmlNewInputInternal(ctxt, buf, url, encoding);
|
---|
1787 | if (input == NULL)
|
---|
1788 | return(NULL);
|
---|
1789 |
|
---|
1790 | input->flags |= XML_INPUT_PROGRESSIVE;
|
---|
1791 |
|
---|
1792 | if ((size > 0) && (chunk != NULL)) {
|
---|
1793 | int res;
|
---|
1794 |
|
---|
1795 | res = xmlParserInputBufferPush(input->buf, size, chunk);
|
---|
1796 | xmlBufResetInput(input->buf->buffer, input);
|
---|
1797 | if (res < 0) {
|
---|
1798 | xmlCtxtErrIO(ctxt, input->buf->error, NULL);
|
---|
1799 | xmlFreeInputStream(input);
|
---|
1800 | return(NULL);
|
---|
1801 | }
|
---|
1802 | }
|
---|
1803 |
|
---|
1804 | return(input);
|
---|
1805 | }
|
---|
1806 |
|
---|
1807 | /**
|
---|
1808 | * xmlNewIOInputStream:
|
---|
1809 | * @ctxt: an XML parser context
|
---|
1810 | * @buf: an input buffer
|
---|
1811 | * @enc: the charset encoding if known
|
---|
1812 | *
|
---|
1813 | * Create a new input stream structure encapsulating the @input into
|
---|
1814 | * a stream suitable for the parser.
|
---|
1815 | *
|
---|
1816 | * Returns the new input stream or NULL
|
---|
1817 | */
|
---|
1818 | xmlParserInputPtr
|
---|
1819 | xmlNewIOInputStream(xmlParserCtxtPtr ctxt, xmlParserInputBufferPtr buf,
|
---|
1820 | xmlCharEncoding enc) {
|
---|
1821 | const char *encoding;
|
---|
1822 |
|
---|
1823 | if (buf == NULL)
|
---|
1824 | return(NULL);
|
---|
1825 |
|
---|
1826 | encoding = xmlGetCharEncodingName(enc);
|
---|
1827 | return(xmlNewInputInternal(ctxt, buf, NULL, encoding));
|
---|
1828 | }
|
---|
1829 |
|
---|
1830 | /**
|
---|
1831 | * xmlNewEntityInputStream:
|
---|
1832 | * @ctxt: an XML parser context
|
---|
1833 | * @ent: an Entity pointer
|
---|
1834 | *
|
---|
1835 | * DEPRECATED: Internal function, do not use.
|
---|
1836 | *
|
---|
1837 | * Create a new input stream based on an xmlEntityPtr
|
---|
1838 | *
|
---|
1839 | * Returns the new input stream or NULL
|
---|
1840 | */
|
---|
1841 | xmlParserInputPtr
|
---|
1842 | xmlNewEntityInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr ent) {
|
---|
1843 | xmlParserInputPtr input;
|
---|
1844 |
|
---|
1845 | if ((ctxt == NULL) || (ent == NULL))
|
---|
1846 | return(NULL);
|
---|
1847 |
|
---|
1848 | if (ent->content != NULL) {
|
---|
1849 | input = xmlNewInputString(ctxt, NULL, (const char *) ent->content,
|
---|
1850 | NULL, XML_INPUT_BUF_STATIC);
|
---|
1851 | } else if (ent->URI != NULL) {
|
---|
1852 | input = xmlLoadExternalEntity((char *) ent->URI,
|
---|
1853 | (char *) ent->ExternalID, ctxt);
|
---|
1854 | } else {
|
---|
1855 | return(NULL);
|
---|
1856 | }
|
---|
1857 |
|
---|
1858 | if (input == NULL)
|
---|
1859 | return(NULL);
|
---|
1860 |
|
---|
1861 | input->entity = ent;
|
---|
1862 |
|
---|
1863 | return(input);
|
---|
1864 | }
|
---|
1865 |
|
---|
1866 | /**
|
---|
1867 | * xmlNewStringInputStream:
|
---|
1868 | * @ctxt: an XML parser context
|
---|
1869 | * @buffer: an memory buffer
|
---|
1870 | *
|
---|
1871 | * Create a new input stream based on a memory buffer.
|
---|
1872 | *
|
---|
1873 | * Returns the new input stream
|
---|
1874 | */
|
---|
1875 | xmlParserInputPtr
|
---|
1876 | xmlNewStringInputStream(xmlParserCtxtPtr ctxt, const xmlChar *buffer) {
|
---|
1877 | return(xmlNewInputString(ctxt, NULL, (const char *) buffer, NULL, 0));
|
---|
1878 | }
|
---|
1879 |
|
---|
1880 |
|
---|
1881 | /****************************************************************
|
---|
1882 | * *
|
---|
1883 | * External entities loading *
|
---|
1884 | * *
|
---|
1885 | ****************************************************************/
|
---|
1886 |
|
---|
1887 | #ifdef LIBXML_CATALOG_ENABLED
|
---|
1888 |
|
---|
1889 | /**
|
---|
1890 | * xmlResolveResourceFromCatalog:
|
---|
1891 | * @URL: the URL for the entity to load
|
---|
1892 | * @ID: the System ID for the entity to load
|
---|
1893 | * @ctxt: the context in which the entity is called or NULL
|
---|
1894 | *
|
---|
1895 | * Resolves the URL and ID against the appropriate catalog.
|
---|
1896 | * This function is used by xmlDefaultExternalEntityLoader and
|
---|
1897 | * xmlNoNetExternalEntityLoader.
|
---|
1898 | *
|
---|
1899 | * Returns a new allocated URL, or NULL.
|
---|
1900 | */
|
---|
1901 | static xmlChar *
|
---|
1902 | xmlResolveResourceFromCatalog(const char *URL, const char *ID,
|
---|
1903 | xmlParserCtxtPtr ctxt) {
|
---|
1904 | xmlChar *resource = NULL;
|
---|
1905 | xmlCatalogAllow pref;
|
---|
1906 |
|
---|
1907 | /*
|
---|
1908 | * If the resource doesn't exists as a file,
|
---|
1909 | * try to load it from the resource pointed in the catalogs
|
---|
1910 | */
|
---|
1911 | pref = xmlCatalogGetDefaults();
|
---|
1912 |
|
---|
1913 | if ((pref != XML_CATA_ALLOW_NONE) && (!xmlNoNetExists(URL))) {
|
---|
1914 | /*
|
---|
1915 | * Do a local lookup
|
---|
1916 | */
|
---|
1917 | if ((ctxt != NULL) && (ctxt->catalogs != NULL) &&
|
---|
1918 | ((pref == XML_CATA_ALLOW_ALL) ||
|
---|
1919 | (pref == XML_CATA_ALLOW_DOCUMENT))) {
|
---|
1920 | resource = xmlCatalogLocalResolve(ctxt->catalogs,
|
---|
1921 | (const xmlChar *)ID,
|
---|
1922 | (const xmlChar *)URL);
|
---|
1923 | }
|
---|
1924 | /*
|
---|
1925 | * Try a global lookup
|
---|
1926 | */
|
---|
1927 | if ((resource == NULL) &&
|
---|
1928 | ((pref == XML_CATA_ALLOW_ALL) ||
|
---|
1929 | (pref == XML_CATA_ALLOW_GLOBAL))) {
|
---|
1930 | resource = xmlCatalogResolve((const xmlChar *)ID,
|
---|
1931 | (const xmlChar *)URL);
|
---|
1932 | }
|
---|
1933 | if ((resource == NULL) && (URL != NULL))
|
---|
1934 | resource = xmlStrdup((const xmlChar *) URL);
|
---|
1935 |
|
---|
1936 | /*
|
---|
1937 | * TODO: do an URI lookup on the reference
|
---|
1938 | */
|
---|
1939 | if ((resource != NULL) && (!xmlNoNetExists((const char *)resource))) {
|
---|
1940 | xmlChar *tmp = NULL;
|
---|
1941 |
|
---|
1942 | if ((ctxt != NULL) && (ctxt->catalogs != NULL) &&
|
---|
1943 | ((pref == XML_CATA_ALLOW_ALL) ||
|
---|
1944 | (pref == XML_CATA_ALLOW_DOCUMENT))) {
|
---|
1945 | tmp = xmlCatalogLocalResolveURI(ctxt->catalogs, resource);
|
---|
1946 | }
|
---|
1947 | if ((tmp == NULL) &&
|
---|
1948 | ((pref == XML_CATA_ALLOW_ALL) ||
|
---|
1949 | (pref == XML_CATA_ALLOW_GLOBAL))) {
|
---|
1950 | tmp = xmlCatalogResolveURI(resource);
|
---|
1951 | }
|
---|
1952 |
|
---|
1953 | if (tmp != NULL) {
|
---|
1954 | xmlFree(resource);
|
---|
1955 | resource = tmp;
|
---|
1956 | }
|
---|
1957 | }
|
---|
1958 | }
|
---|
1959 |
|
---|
1960 | return resource;
|
---|
1961 | }
|
---|
1962 |
|
---|
1963 | #endif
|
---|
1964 |
|
---|
1965 | /**
|
---|
1966 | * xmlCheckHTTPInput:
|
---|
1967 | * @ctxt: an XML parser context
|
---|
1968 | * @ret: an XML parser input
|
---|
1969 | *
|
---|
1970 | * DEPRECATED: Internal function, don't use.
|
---|
1971 | *
|
---|
1972 | * Check an input in case it was created from an HTTP stream, in that
|
---|
1973 | * case it will handle encoding and update of the base URL in case of
|
---|
1974 | * redirection. It also checks for HTTP errors in which case the input
|
---|
1975 | * is cleanly freed up and an appropriate error is raised in context
|
---|
1976 | *
|
---|
1977 | * Returns the input or NULL in case of HTTP error.
|
---|
1978 | */
|
---|
1979 | xmlParserInputPtr
|
---|
1980 | xmlCheckHTTPInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr ret) {
|
---|
1981 | /* Avoid unused variable warning if features are disabled. */
|
---|
1982 | (void) ctxt;
|
---|
1983 |
|
---|
1984 | #ifdef LIBXML_HTTP_ENABLED
|
---|
1985 | if ((ret != NULL) && (ret->buf != NULL) &&
|
---|
1986 | (ret->buf->readcallback == xmlIOHTTPRead) &&
|
---|
1987 | (ret->buf->context != NULL)) {
|
---|
1988 | const char *encoding;
|
---|
1989 | const char *redir;
|
---|
1990 | const char *mime;
|
---|
1991 | int code;
|
---|
1992 |
|
---|
1993 | code = xmlNanoHTTPReturnCode(ret->buf->context);
|
---|
1994 | if (code >= 400) {
|
---|
1995 | /* fatal error */
|
---|
1996 | if (ret->filename != NULL)
|
---|
1997 | xmlCtxtErrIO(ctxt, XML_IO_LOAD_ERROR, ret->filename);
|
---|
1998 | else
|
---|
1999 | xmlCtxtErrIO(ctxt, XML_IO_LOAD_ERROR, "<null>");
|
---|
2000 | xmlFreeInputStream(ret);
|
---|
2001 | ret = NULL;
|
---|
2002 | } else {
|
---|
2003 |
|
---|
2004 | mime = xmlNanoHTTPMimeType(ret->buf->context);
|
---|
2005 | if ((xmlStrstr(BAD_CAST mime, BAD_CAST "/xml")) ||
|
---|
2006 | (xmlStrstr(BAD_CAST mime, BAD_CAST "+xml"))) {
|
---|
2007 | encoding = xmlNanoHTTPEncoding(ret->buf->context);
|
---|
2008 | if (encoding != NULL)
|
---|
2009 | xmlSwitchEncodingName(ctxt, encoding);
|
---|
2010 | #if 0
|
---|
2011 | } else if (xmlStrstr(BAD_CAST mime, BAD_CAST "html")) {
|
---|
2012 | #endif
|
---|
2013 | }
|
---|
2014 | redir = xmlNanoHTTPRedir(ret->buf->context);
|
---|
2015 | if (redir != NULL) {
|
---|
2016 | if (ret->filename != NULL)
|
---|
2017 | xmlFree((xmlChar *) ret->filename);
|
---|
2018 | ret->filename =
|
---|
2019 | (char *) xmlStrdup((const xmlChar *) redir);
|
---|
2020 | }
|
---|
2021 | }
|
---|
2022 | }
|
---|
2023 | #endif
|
---|
2024 | return(ret);
|
---|
2025 | }
|
---|
2026 |
|
---|
2027 | /**
|
---|
2028 | * xmlNewInputFromFile:
|
---|
2029 | * @ctxt: an XML parser context
|
---|
2030 | * @filename: the filename to use as entity
|
---|
2031 | *
|
---|
2032 | * Create a new input stream based on a file or an URL.
|
---|
2033 | *
|
---|
2034 | * Returns the new input stream or NULL in case of error
|
---|
2035 | */
|
---|
2036 | xmlParserInputPtr
|
---|
2037 | xmlNewInputFromFile(xmlParserCtxtPtr ctxt, const char *filename) {
|
---|
2038 | xmlParserInputBufferPtr buf;
|
---|
2039 | xmlParserInputPtr inputStream;
|
---|
2040 | const xmlChar *URI;
|
---|
2041 | xmlChar *canonic;
|
---|
2042 | int code;
|
---|
2043 |
|
---|
2044 | if ((ctxt == NULL) || (filename == NULL))
|
---|
2045 | return(NULL);
|
---|
2046 |
|
---|
2047 | code = xmlParserInputBufferCreateFilenameSafe(filename,
|
---|
2048 | XML_CHAR_ENCODING_NONE, &buf);
|
---|
2049 | if (buf == NULL) {
|
---|
2050 | xmlCtxtErrIO(ctxt, code, filename);
|
---|
2051 | return(NULL);
|
---|
2052 | }
|
---|
2053 |
|
---|
2054 | inputStream = xmlNewInputStream(ctxt);
|
---|
2055 | if (inputStream == NULL) {
|
---|
2056 | xmlFreeParserInputBuffer(buf);
|
---|
2057 | return(NULL);
|
---|
2058 | }
|
---|
2059 |
|
---|
2060 | inputStream->buf = buf;
|
---|
2061 | inputStream = xmlCheckHTTPInput(ctxt, inputStream);
|
---|
2062 | if (inputStream == NULL)
|
---|
2063 | return(NULL);
|
---|
2064 |
|
---|
2065 | if (inputStream->filename == NULL)
|
---|
2066 | URI = (xmlChar *) filename;
|
---|
2067 | else
|
---|
2068 | URI = (xmlChar *) inputStream->filename;
|
---|
2069 | canonic = xmlCanonicPath(URI);
|
---|
2070 | if (canonic == NULL) {
|
---|
2071 | xmlCtxtErrMemory(ctxt);
|
---|
2072 | xmlFreeInputStream(inputStream);
|
---|
2073 | return(NULL);
|
---|
2074 | }
|
---|
2075 | if (inputStream->filename != NULL)
|
---|
2076 | xmlFree((char *) inputStream->filename);
|
---|
2077 | inputStream->filename = (char *) canonic;
|
---|
2078 |
|
---|
2079 | xmlBufResetInput(inputStream->buf->buffer, inputStream);
|
---|
2080 |
|
---|
2081 | return(inputStream);
|
---|
2082 | }
|
---|
2083 |
|
---|
2084 | /**
|
---|
2085 | * xmlDefaultExternalEntityLoader:
|
---|
2086 | * @URL: the URL for the entity to load
|
---|
2087 | * @ID: the System ID for the entity to load
|
---|
2088 | * @ctxt: the context in which the entity is called or NULL
|
---|
2089 | *
|
---|
2090 | * By default we don't load external entities, yet.
|
---|
2091 | *
|
---|
2092 | * Returns a new allocated xmlParserInputPtr, or NULL.
|
---|
2093 | */
|
---|
2094 | static xmlParserInputPtr
|
---|
2095 | xmlDefaultExternalEntityLoader(const char *URL, const char *ID,
|
---|
2096 | xmlParserCtxtPtr ctxt)
|
---|
2097 | {
|
---|
2098 | xmlParserInputPtr ret = NULL;
|
---|
2099 | xmlChar *resource = NULL;
|
---|
2100 |
|
---|
2101 | if (URL == NULL)
|
---|
2102 | return(NULL);
|
---|
2103 |
|
---|
2104 | if ((ctxt != NULL) && (ctxt->options & XML_PARSE_NONET)) {
|
---|
2105 | int options = ctxt->options;
|
---|
2106 |
|
---|
2107 | ctxt->options -= XML_PARSE_NONET;
|
---|
2108 | ret = xmlNoNetExternalEntityLoader(URL, ID, ctxt);
|
---|
2109 | ctxt->options = options;
|
---|
2110 | return(ret);
|
---|
2111 | }
|
---|
2112 | #ifdef LIBXML_CATALOG_ENABLED
|
---|
2113 | resource = xmlResolveResourceFromCatalog(URL, ID, ctxt);
|
---|
2114 | #endif
|
---|
2115 |
|
---|
2116 | if (resource == NULL)
|
---|
2117 | resource = (xmlChar *) URL;
|
---|
2118 |
|
---|
2119 | ret = xmlNewInputFromFile(ctxt, (const char *) resource);
|
---|
2120 | if ((resource != NULL) && (resource != (xmlChar *) URL))
|
---|
2121 | xmlFree(resource);
|
---|
2122 | return (ret);
|
---|
2123 | }
|
---|
2124 |
|
---|
2125 | /**
|
---|
2126 | * xmlNoNetExternalEntityLoader:
|
---|
2127 | * @URL: the URL for the entity to load
|
---|
2128 | * @ID: the System ID for the entity to load
|
---|
2129 | * @ctxt: the context in which the entity is called or NULL
|
---|
2130 | *
|
---|
2131 | * A specific entity loader disabling network accesses, though still
|
---|
2132 | * allowing local catalog accesses for resolution.
|
---|
2133 | *
|
---|
2134 | * Returns a new allocated xmlParserInputPtr, or NULL.
|
---|
2135 | */
|
---|
2136 | xmlParserInputPtr
|
---|
2137 | xmlNoNetExternalEntityLoader(const char *URL, const char *ID,
|
---|
2138 | xmlParserCtxtPtr ctxt) {
|
---|
2139 | xmlParserInputPtr input = NULL;
|
---|
2140 | xmlChar *resource = NULL;
|
---|
2141 |
|
---|
2142 | #ifdef LIBXML_CATALOG_ENABLED
|
---|
2143 | resource = xmlResolveResourceFromCatalog(URL, ID, ctxt);
|
---|
2144 | #endif
|
---|
2145 |
|
---|
2146 | if (resource == NULL)
|
---|
2147 | resource = (xmlChar *) URL;
|
---|
2148 |
|
---|
2149 | if (resource != NULL) {
|
---|
2150 | if ((!xmlStrncasecmp(BAD_CAST resource, BAD_CAST "ftp://", 6)) ||
|
---|
2151 | (!xmlStrncasecmp(BAD_CAST resource, BAD_CAST "http://", 7))) {
|
---|
2152 | xmlCtxtErrIO(ctxt, XML_IO_NETWORK_ATTEMPT,
|
---|
2153 | (const char *) resource);
|
---|
2154 | /*
|
---|
2155 | * Also forward the error directly to the global error
|
---|
2156 | * handler, which the XML::LibXML test suite expects.
|
---|
2157 | */
|
---|
2158 | __xmlIOErr(XML_FROM_IO, XML_IO_NETWORK_ATTEMPT,
|
---|
2159 | (const char *) resource);
|
---|
2160 | if (resource != (xmlChar *) URL)
|
---|
2161 | xmlFree(resource);
|
---|
2162 | return(NULL);
|
---|
2163 | }
|
---|
2164 | }
|
---|
2165 | input = xmlDefaultExternalEntityLoader((const char *) resource, ID, ctxt);
|
---|
2166 | if (resource != (xmlChar *) URL)
|
---|
2167 | xmlFree(resource);
|
---|
2168 | return(input);
|
---|
2169 | }
|
---|
2170 |
|
---|
2171 | /*
|
---|
2172 | * This global has to die eventually
|
---|
2173 | */
|
---|
2174 | static xmlExternalEntityLoader
|
---|
2175 | xmlCurrentExternalEntityLoader = xmlDefaultExternalEntityLoader;
|
---|
2176 |
|
---|
2177 | /**
|
---|
2178 | * xmlSetExternalEntityLoader:
|
---|
2179 | * @f: the new entity resolver function
|
---|
2180 | *
|
---|
2181 | * Changes the defaultexternal entity resolver function for the application
|
---|
2182 | */
|
---|
2183 | void
|
---|
2184 | xmlSetExternalEntityLoader(xmlExternalEntityLoader f) {
|
---|
2185 | xmlCurrentExternalEntityLoader = f;
|
---|
2186 | }
|
---|
2187 |
|
---|
2188 | /**
|
---|
2189 | * xmlGetExternalEntityLoader:
|
---|
2190 | *
|
---|
2191 | * Get the default external entity resolver function for the application
|
---|
2192 | *
|
---|
2193 | * Returns the xmlExternalEntityLoader function pointer
|
---|
2194 | */
|
---|
2195 | xmlExternalEntityLoader
|
---|
2196 | xmlGetExternalEntityLoader(void) {
|
---|
2197 | return(xmlCurrentExternalEntityLoader);
|
---|
2198 | }
|
---|
2199 |
|
---|
2200 | /**
|
---|
2201 | * xmlLoadExternalEntity:
|
---|
2202 | * @URL: the URL for the entity to load
|
---|
2203 | * @ID: the Public ID for the entity to load
|
---|
2204 | * @ctxt: the context in which the entity is called or NULL
|
---|
2205 | *
|
---|
2206 | * @URL is a filename or URL. If if contains the substring "://",
|
---|
2207 | * it is assumed to be a Legacy Extended IRI. Otherwise, it is
|
---|
2208 | * treated as a filesystem path.
|
---|
2209 | *
|
---|
2210 | * @ID is an optional XML public ID, typically from a doctype
|
---|
2211 | * declaration. It is used for catalog lookups.
|
---|
2212 | *
|
---|
2213 | * The following resource loaders will be called if they were
|
---|
2214 | * registered (in order of precedence):
|
---|
2215 | *
|
---|
2216 | * - the global external entity loader set with
|
---|
2217 | * xmlSetExternalEntityLoader
|
---|
2218 | * - the per-thread xmlParserInputBufferCreateFilenameFunc set with
|
---|
2219 | * xmlParserInputBufferCreateFilenameDefault
|
---|
2220 | * - the default loader which will return
|
---|
2221 | * - the result from a matching global input callback set with
|
---|
2222 | * xmlRegisterInputCallbacks
|
---|
2223 | * - a HTTP resource if support is compiled in.
|
---|
2224 | * - a file opened from the filesystem, with automatic detection
|
---|
2225 | * of compressed files if support is compiled in.
|
---|
2226 | *
|
---|
2227 | * Returns the xmlParserInputPtr or NULL
|
---|
2228 | */
|
---|
2229 | xmlParserInputPtr
|
---|
2230 | xmlLoadExternalEntity(const char *URL, const char *ID,
|
---|
2231 | xmlParserCtxtPtr ctxt) {
|
---|
2232 | char *canonicFilename;
|
---|
2233 | xmlParserInputPtr ret;
|
---|
2234 |
|
---|
2235 | if (URL == NULL)
|
---|
2236 | return(NULL);
|
---|
2237 |
|
---|
2238 | canonicFilename = (char *) xmlCanonicPath((const xmlChar *) URL);
|
---|
2239 | if (canonicFilename == NULL) {
|
---|
2240 | xmlCtxtErrMemory(ctxt);
|
---|
2241 | return(NULL);
|
---|
2242 | }
|
---|
2243 |
|
---|
2244 | ret = xmlCurrentExternalEntityLoader(canonicFilename, ID, ctxt);
|
---|
2245 | xmlFree(canonicFilename);
|
---|
2246 | return(ret);
|
---|
2247 | }
|
---|
2248 |
|
---|
2249 | /************************************************************************
|
---|
2250 | * *
|
---|
2251 | * Commodity functions to handle parser contexts *
|
---|
2252 | * *
|
---|
2253 | ************************************************************************/
|
---|
2254 |
|
---|
2255 | /**
|
---|
2256 | * xmlInitSAXParserCtxt:
|
---|
2257 | * @ctxt: XML parser context
|
---|
2258 | * @sax: SAX handlert
|
---|
2259 | * @userData: user data
|
---|
2260 | *
|
---|
2261 | * Initialize a SAX parser context
|
---|
2262 | *
|
---|
2263 | * Returns 0 in case of success and -1 in case of error
|
---|
2264 | */
|
---|
2265 |
|
---|
2266 | static int
|
---|
2267 | xmlInitSAXParserCtxt(xmlParserCtxtPtr ctxt, const xmlSAXHandler *sax,
|
---|
2268 | void *userData)
|
---|
2269 | {
|
---|
2270 | xmlParserInputPtr input;
|
---|
2271 |
|
---|
2272 | if (ctxt == NULL)
|
---|
2273 | return(-1);
|
---|
2274 |
|
---|
2275 | if (ctxt->dict == NULL)
|
---|
2276 | ctxt->dict = xmlDictCreate();
|
---|
2277 | if (ctxt->dict == NULL)
|
---|
2278 | return(-1);
|
---|
2279 | xmlDictSetLimit(ctxt->dict, XML_MAX_DICTIONARY_LIMIT);
|
---|
2280 |
|
---|
2281 | if (ctxt->sax == NULL)
|
---|
2282 | ctxt->sax = (xmlSAXHandler *) xmlMalloc(sizeof(xmlSAXHandler));
|
---|
2283 | if (ctxt->sax == NULL)
|
---|
2284 | return(-1);
|
---|
2285 | if (sax == NULL) {
|
---|
2286 | memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
|
---|
2287 | xmlSAXVersion(ctxt->sax, 2);
|
---|
2288 | ctxt->userData = ctxt;
|
---|
2289 | } else {
|
---|
2290 | if (sax->initialized == XML_SAX2_MAGIC) {
|
---|
2291 | memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
|
---|
2292 | } else {
|
---|
2293 | memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
|
---|
2294 | memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
|
---|
2295 | }
|
---|
2296 | ctxt->userData = userData ? userData : ctxt;
|
---|
2297 | }
|
---|
2298 |
|
---|
2299 | ctxt->maxatts = 0;
|
---|
2300 | ctxt->atts = NULL;
|
---|
2301 | /* Allocate the Input stack */
|
---|
2302 | if (ctxt->inputTab == NULL) {
|
---|
2303 | ctxt->inputTab = (xmlParserInputPtr *)
|
---|
2304 | xmlMalloc(5 * sizeof(xmlParserInputPtr));
|
---|
2305 | ctxt->inputMax = 5;
|
---|
2306 | }
|
---|
2307 | if (ctxt->inputTab == NULL)
|
---|
2308 | return(-1);
|
---|
2309 | while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
|
---|
2310 | xmlFreeInputStream(input);
|
---|
2311 | }
|
---|
2312 | ctxt->inputNr = 0;
|
---|
2313 | ctxt->input = NULL;
|
---|
2314 |
|
---|
2315 | ctxt->version = NULL;
|
---|
2316 | ctxt->encoding = NULL;
|
---|
2317 | ctxt->standalone = -1;
|
---|
2318 | ctxt->hasExternalSubset = 0;
|
---|
2319 | ctxt->hasPErefs = 0;
|
---|
2320 | ctxt->html = 0;
|
---|
2321 | ctxt->instate = XML_PARSER_START;
|
---|
2322 |
|
---|
2323 | /* Allocate the Node stack */
|
---|
2324 | if (ctxt->nodeTab == NULL) {
|
---|
2325 | ctxt->nodeTab = (xmlNodePtr *) xmlMalloc(10 * sizeof(xmlNodePtr));
|
---|
2326 | ctxt->nodeMax = 10;
|
---|
2327 | }
|
---|
2328 | if (ctxt->nodeTab == NULL)
|
---|
2329 | return(-1);
|
---|
2330 | ctxt->nodeNr = 0;
|
---|
2331 | ctxt->node = NULL;
|
---|
2332 |
|
---|
2333 | /* Allocate the Name stack */
|
---|
2334 | if (ctxt->nameTab == NULL) {
|
---|
2335 | ctxt->nameTab = (const xmlChar **) xmlMalloc(10 * sizeof(xmlChar *));
|
---|
2336 | ctxt->nameMax = 10;
|
---|
2337 | }
|
---|
2338 | if (ctxt->nameTab == NULL)
|
---|
2339 | return(-1);
|
---|
2340 | ctxt->nameNr = 0;
|
---|
2341 | ctxt->name = NULL;
|
---|
2342 |
|
---|
2343 | /* Allocate the space stack */
|
---|
2344 | if (ctxt->spaceTab == NULL) {
|
---|
2345 | ctxt->spaceTab = (int *) xmlMalloc(10 * sizeof(int));
|
---|
2346 | ctxt->spaceMax = 10;
|
---|
2347 | }
|
---|
2348 | if (ctxt->spaceTab == NULL)
|
---|
2349 | return(-1);
|
---|
2350 | ctxt->spaceNr = 1;
|
---|
2351 | ctxt->spaceMax = 10;
|
---|
2352 | ctxt->spaceTab[0] = -1;
|
---|
2353 | ctxt->space = &ctxt->spaceTab[0];
|
---|
2354 | ctxt->myDoc = NULL;
|
---|
2355 | ctxt->wellFormed = 1;
|
---|
2356 | ctxt->nsWellFormed = 1;
|
---|
2357 | ctxt->valid = 1;
|
---|
2358 |
|
---|
2359 | ctxt->options = XML_PARSE_NODICT;
|
---|
2360 |
|
---|
2361 | /*
|
---|
2362 | * Initialize some parser options from deprecated global variables.
|
---|
2363 | * Note that the "modern" API taking options arguments or
|
---|
2364 | * xmlCtxtSetOptions will ignore these defaults. They're only
|
---|
2365 | * relevant if old API functions like xmlParseFile are used.
|
---|
2366 | */
|
---|
2367 | ctxt->loadsubset = xmlLoadExtDtdDefaultValue;
|
---|
2368 | if (ctxt->loadsubset) {
|
---|
2369 | ctxt->options |= XML_PARSE_DTDLOAD;
|
---|
2370 | }
|
---|
2371 | ctxt->validate = xmlDoValidityCheckingDefaultValue;
|
---|
2372 | if (ctxt->validate) {
|
---|
2373 | ctxt->options |= XML_PARSE_DTDVALID;
|
---|
2374 | }
|
---|
2375 | ctxt->pedantic = xmlPedanticParserDefaultValue;
|
---|
2376 | if (ctxt->pedantic) {
|
---|
2377 | ctxt->options |= XML_PARSE_PEDANTIC;
|
---|
2378 | }
|
---|
2379 | ctxt->linenumbers = xmlLineNumbersDefaultValue;
|
---|
2380 | ctxt->keepBlanks = xmlKeepBlanksDefaultValue;
|
---|
2381 | if (ctxt->keepBlanks == 0) {
|
---|
2382 | ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
|
---|
2383 | ctxt->options |= XML_PARSE_NOBLANKS;
|
---|
2384 | }
|
---|
2385 | ctxt->replaceEntities = xmlSubstituteEntitiesDefaultValue;
|
---|
2386 | if (ctxt->replaceEntities) {
|
---|
2387 | ctxt->options |= XML_PARSE_NOENT;
|
---|
2388 | }
|
---|
2389 | if (xmlGetWarningsDefaultValue == 0)
|
---|
2390 | ctxt->options |= XML_PARSE_NOWARNING;
|
---|
2391 |
|
---|
2392 | ctxt->vctxt.flags = XML_VCTXT_USE_PCTXT;
|
---|
2393 | ctxt->vctxt.userData = ctxt;
|
---|
2394 | ctxt->vctxt.error = xmlParserValidityError;
|
---|
2395 | ctxt->vctxt.warning = xmlParserValidityWarning;
|
---|
2396 |
|
---|
2397 | ctxt->record_info = 0;
|
---|
2398 | ctxt->checkIndex = 0;
|
---|
2399 | ctxt->inSubset = 0;
|
---|
2400 | ctxt->errNo = XML_ERR_OK;
|
---|
2401 | ctxt->depth = 0;
|
---|
2402 | ctxt->catalogs = NULL;
|
---|
2403 | ctxt->sizeentities = 0;
|
---|
2404 | ctxt->sizeentcopy = 0;
|
---|
2405 | ctxt->input_id = 1;
|
---|
2406 | ctxt->maxAmpl = XML_MAX_AMPLIFICATION_DEFAULT;
|
---|
2407 | xmlInitNodeInfoSeq(&ctxt->node_seq);
|
---|
2408 |
|
---|
2409 | if (ctxt->nsdb == NULL) {
|
---|
2410 | ctxt->nsdb = xmlParserNsCreate();
|
---|
2411 | if (ctxt->nsdb == NULL) {
|
---|
2412 | xmlCtxtErrMemory(ctxt);
|
---|
2413 | return(-1);
|
---|
2414 | }
|
---|
2415 | }
|
---|
2416 |
|
---|
2417 | return(0);
|
---|
2418 | }
|
---|
2419 |
|
---|
2420 | /**
|
---|
2421 | * xmlInitParserCtxt:
|
---|
2422 | * @ctxt: an XML parser context
|
---|
2423 | *
|
---|
2424 | * DEPRECATED: Internal function which will be made private in a future
|
---|
2425 | * version.
|
---|
2426 | *
|
---|
2427 | * Initialize a parser context
|
---|
2428 | *
|
---|
2429 | * Returns 0 in case of success and -1 in case of error
|
---|
2430 | */
|
---|
2431 |
|
---|
2432 | int
|
---|
2433 | xmlInitParserCtxt(xmlParserCtxtPtr ctxt)
|
---|
2434 | {
|
---|
2435 | return(xmlInitSAXParserCtxt(ctxt, NULL, NULL));
|
---|
2436 | }
|
---|
2437 |
|
---|
2438 | /**
|
---|
2439 | * xmlFreeParserCtxt:
|
---|
2440 | * @ctxt: an XML parser context
|
---|
2441 | *
|
---|
2442 | * Free all the memory used by a parser context. However the parsed
|
---|
2443 | * document in ctxt->myDoc is not freed.
|
---|
2444 | */
|
---|
2445 |
|
---|
2446 | void
|
---|
2447 | xmlFreeParserCtxt(xmlParserCtxtPtr ctxt)
|
---|
2448 | {
|
---|
2449 | xmlParserInputPtr input;
|
---|
2450 |
|
---|
2451 | if (ctxt == NULL) return;
|
---|
2452 |
|
---|
2453 | while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
|
---|
2454 | xmlFreeInputStream(input);
|
---|
2455 | }
|
---|
2456 | if (ctxt->spaceTab != NULL) xmlFree(ctxt->spaceTab);
|
---|
2457 | if (ctxt->nameTab != NULL) xmlFree((xmlChar * *)ctxt->nameTab);
|
---|
2458 | if (ctxt->nodeTab != NULL) xmlFree(ctxt->nodeTab);
|
---|
2459 | if (ctxt->nodeInfoTab != NULL) xmlFree(ctxt->nodeInfoTab);
|
---|
2460 | if (ctxt->inputTab != NULL) xmlFree(ctxt->inputTab);
|
---|
2461 | if (ctxt->version != NULL) xmlFree((char *) ctxt->version);
|
---|
2462 | if (ctxt->encoding != NULL) xmlFree((char *) ctxt->encoding);
|
---|
2463 | if (ctxt->extSubURI != NULL) xmlFree((char *) ctxt->extSubURI);
|
---|
2464 | if (ctxt->extSubSystem != NULL) xmlFree((char *) ctxt->extSubSystem);
|
---|
2465 | #ifdef LIBXML_SAX1_ENABLED
|
---|
2466 | if ((ctxt->sax != NULL) &&
|
---|
2467 | (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler))
|
---|
2468 | #else
|
---|
2469 | if (ctxt->sax != NULL)
|
---|
2470 | #endif /* LIBXML_SAX1_ENABLED */
|
---|
2471 | xmlFree(ctxt->sax);
|
---|
2472 | if (ctxt->directory != NULL) xmlFree(ctxt->directory);
|
---|
2473 | if (ctxt->vctxt.nodeTab != NULL) xmlFree(ctxt->vctxt.nodeTab);
|
---|
2474 | if (ctxt->atts != NULL) xmlFree((xmlChar * *)ctxt->atts);
|
---|
2475 | if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
|
---|
2476 | if (ctxt->nsTab != NULL) xmlFree(ctxt->nsTab);
|
---|
2477 | if (ctxt->nsdb != NULL) xmlParserNsFree(ctxt->nsdb);
|
---|
2478 | if (ctxt->attrHash != NULL) xmlFree(ctxt->attrHash);
|
---|
2479 | if (ctxt->pushTab != NULL) xmlFree(ctxt->pushTab);
|
---|
2480 | if (ctxt->attallocs != NULL) xmlFree(ctxt->attallocs);
|
---|
2481 | if (ctxt->attsDefault != NULL)
|
---|
2482 | xmlHashFree(ctxt->attsDefault, xmlHashDefaultDeallocator);
|
---|
2483 | if (ctxt->attsSpecial != NULL)
|
---|
2484 | xmlHashFree(ctxt->attsSpecial, NULL);
|
---|
2485 | if (ctxt->freeElems != NULL) {
|
---|
2486 | xmlNodePtr cur, next;
|
---|
2487 |
|
---|
2488 | cur = ctxt->freeElems;
|
---|
2489 | while (cur != NULL) {
|
---|
2490 | next = cur->next;
|
---|
2491 | xmlFree(cur);
|
---|
2492 | cur = next;
|
---|
2493 | }
|
---|
2494 | }
|
---|
2495 | if (ctxt->freeAttrs != NULL) {
|
---|
2496 | xmlAttrPtr cur, next;
|
---|
2497 |
|
---|
2498 | cur = ctxt->freeAttrs;
|
---|
2499 | while (cur != NULL) {
|
---|
2500 | next = cur->next;
|
---|
2501 | xmlFree(cur);
|
---|
2502 | cur = next;
|
---|
2503 | }
|
---|
2504 | }
|
---|
2505 | /*
|
---|
2506 | * cleanup the error strings
|
---|
2507 | */
|
---|
2508 | if (ctxt->lastError.message != NULL)
|
---|
2509 | xmlFree(ctxt->lastError.message);
|
---|
2510 | if (ctxt->lastError.file != NULL)
|
---|
2511 | xmlFree(ctxt->lastError.file);
|
---|
2512 | if (ctxt->lastError.str1 != NULL)
|
---|
2513 | xmlFree(ctxt->lastError.str1);
|
---|
2514 | if (ctxt->lastError.str2 != NULL)
|
---|
2515 | xmlFree(ctxt->lastError.str2);
|
---|
2516 | if (ctxt->lastError.str3 != NULL)
|
---|
2517 | xmlFree(ctxt->lastError.str3);
|
---|
2518 |
|
---|
2519 | #ifdef LIBXML_CATALOG_ENABLED
|
---|
2520 | if (ctxt->catalogs != NULL)
|
---|
2521 | xmlCatalogFreeLocal(ctxt->catalogs);
|
---|
2522 | #endif
|
---|
2523 | xmlFree(ctxt);
|
---|
2524 | }
|
---|
2525 |
|
---|
2526 | /**
|
---|
2527 | * xmlNewParserCtxt:
|
---|
2528 | *
|
---|
2529 | * Allocate and initialize a new parser context.
|
---|
2530 | *
|
---|
2531 | * Returns the xmlParserCtxtPtr or NULL
|
---|
2532 | */
|
---|
2533 |
|
---|
2534 | xmlParserCtxtPtr
|
---|
2535 | xmlNewParserCtxt(void)
|
---|
2536 | {
|
---|
2537 | return(xmlNewSAXParserCtxt(NULL, NULL));
|
---|
2538 | }
|
---|
2539 |
|
---|
2540 | /**
|
---|
2541 | * xmlNewSAXParserCtxt:
|
---|
2542 | * @sax: SAX handler
|
---|
2543 | * @userData: user data
|
---|
2544 | *
|
---|
2545 | * Allocate and initialize a new SAX parser context. If userData is NULL,
|
---|
2546 | * the parser context will be passed as user data.
|
---|
2547 | *
|
---|
2548 | * Available since 2.11.0. If you want support older versions,
|
---|
2549 | * it's best to invoke xmlNewParserCtxt and set ctxt->sax with
|
---|
2550 | * struct assignment.
|
---|
2551 | *
|
---|
2552 | * Returns the xmlParserCtxtPtr or NULL if memory allocation failed.
|
---|
2553 | */
|
---|
2554 |
|
---|
2555 | xmlParserCtxtPtr
|
---|
2556 | xmlNewSAXParserCtxt(const xmlSAXHandler *sax, void *userData)
|
---|
2557 | {
|
---|
2558 | xmlParserCtxtPtr ctxt;
|
---|
2559 |
|
---|
2560 | xmlInitParser();
|
---|
2561 |
|
---|
2562 | ctxt = (xmlParserCtxtPtr) xmlMalloc(sizeof(xmlParserCtxt));
|
---|
2563 | if (ctxt == NULL)
|
---|
2564 | return(NULL);
|
---|
2565 | memset(ctxt, 0, sizeof(xmlParserCtxt));
|
---|
2566 | if (xmlInitSAXParserCtxt(ctxt, sax, userData) < 0) {
|
---|
2567 | xmlFreeParserCtxt(ctxt);
|
---|
2568 | return(NULL);
|
---|
2569 | }
|
---|
2570 | return(ctxt);
|
---|
2571 | }
|
---|
2572 |
|
---|
2573 | /************************************************************************
|
---|
2574 | * *
|
---|
2575 | * Handling of node information *
|
---|
2576 | * *
|
---|
2577 | ************************************************************************/
|
---|
2578 |
|
---|
2579 | /**
|
---|
2580 | * xmlClearParserCtxt:
|
---|
2581 | * @ctxt: an XML parser context
|
---|
2582 | *
|
---|
2583 | * Clear (release owned resources) and reinitialize a parser context
|
---|
2584 | */
|
---|
2585 |
|
---|
2586 | void
|
---|
2587 | xmlClearParserCtxt(xmlParserCtxtPtr ctxt)
|
---|
2588 | {
|
---|
2589 | if (ctxt==NULL)
|
---|
2590 | return;
|
---|
2591 | xmlClearNodeInfoSeq(&ctxt->node_seq);
|
---|
2592 | xmlCtxtReset(ctxt);
|
---|
2593 | }
|
---|
2594 |
|
---|
2595 |
|
---|
2596 | /**
|
---|
2597 | * xmlParserFindNodeInfo:
|
---|
2598 | * @ctx: an XML parser context
|
---|
2599 | * @node: an XML node within the tree
|
---|
2600 | *
|
---|
2601 | * DEPRECATED: Don't use.
|
---|
2602 | *
|
---|
2603 | * Find the parser node info struct for a given node
|
---|
2604 | *
|
---|
2605 | * Returns an xmlParserNodeInfo block pointer or NULL
|
---|
2606 | */
|
---|
2607 | const xmlParserNodeInfo *
|
---|
2608 | xmlParserFindNodeInfo(xmlParserCtxtPtr ctx, xmlNodePtr node)
|
---|
2609 | {
|
---|
2610 | unsigned long pos;
|
---|
2611 |
|
---|
2612 | if ((ctx == NULL) || (node == NULL))
|
---|
2613 | return (NULL);
|
---|
2614 | /* Find position where node should be at */
|
---|
2615 | pos = xmlParserFindNodeInfoIndex(&ctx->node_seq, node);
|
---|
2616 | if (pos < ctx->node_seq.length
|
---|
2617 | && ctx->node_seq.buffer[pos].node == node)
|
---|
2618 | return &ctx->node_seq.buffer[pos];
|
---|
2619 | else
|
---|
2620 | return NULL;
|
---|
2621 | }
|
---|
2622 |
|
---|
2623 |
|
---|
2624 | /**
|
---|
2625 | * xmlInitNodeInfoSeq:
|
---|
2626 | * @seq: a node info sequence pointer
|
---|
2627 | *
|
---|
2628 | * DEPRECATED: Don't use.
|
---|
2629 | *
|
---|
2630 | * -- Initialize (set to initial state) node info sequence
|
---|
2631 | */
|
---|
2632 | void
|
---|
2633 | xmlInitNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)
|
---|
2634 | {
|
---|
2635 | if (seq == NULL)
|
---|
2636 | return;
|
---|
2637 | seq->length = 0;
|
---|
2638 | seq->maximum = 0;
|
---|
2639 | seq->buffer = NULL;
|
---|
2640 | }
|
---|
2641 |
|
---|
2642 | /**
|
---|
2643 | * xmlClearNodeInfoSeq:
|
---|
2644 | * @seq: a node info sequence pointer
|
---|
2645 | *
|
---|
2646 | * DEPRECATED: Don't use.
|
---|
2647 | *
|
---|
2648 | * -- Clear (release memory and reinitialize) node
|
---|
2649 | * info sequence
|
---|
2650 | */
|
---|
2651 | void
|
---|
2652 | xmlClearNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)
|
---|
2653 | {
|
---|
2654 | if (seq == NULL)
|
---|
2655 | return;
|
---|
2656 | if (seq->buffer != NULL)
|
---|
2657 | xmlFree(seq->buffer);
|
---|
2658 | xmlInitNodeInfoSeq(seq);
|
---|
2659 | }
|
---|
2660 |
|
---|
2661 | /**
|
---|
2662 | * xmlParserFindNodeInfoIndex:
|
---|
2663 | * @seq: a node info sequence pointer
|
---|
2664 | * @node: an XML node pointer
|
---|
2665 | *
|
---|
2666 | * DEPRECATED: Don't use.
|
---|
2667 | *
|
---|
2668 | * xmlParserFindNodeInfoIndex : Find the index that the info record for
|
---|
2669 | * the given node is or should be at in a sorted sequence
|
---|
2670 | *
|
---|
2671 | * Returns a long indicating the position of the record
|
---|
2672 | */
|
---|
2673 | unsigned long
|
---|
2674 | xmlParserFindNodeInfoIndex(xmlParserNodeInfoSeqPtr seq,
|
---|
2675 | xmlNodePtr node)
|
---|
2676 | {
|
---|
2677 | unsigned long upper, lower, middle;
|
---|
2678 | int found = 0;
|
---|
2679 |
|
---|
2680 | if ((seq == NULL) || (node == NULL))
|
---|
2681 | return ((unsigned long) -1);
|
---|
2682 |
|
---|
2683 | /* Do a binary search for the key */
|
---|
2684 | lower = 1;
|
---|
2685 | upper = seq->length;
|
---|
2686 | middle = 0;
|
---|
2687 | while (lower <= upper && !found) {
|
---|
2688 | middle = lower + (upper - lower) / 2;
|
---|
2689 | if (node == seq->buffer[middle - 1].node)
|
---|
2690 | found = 1;
|
---|
2691 | else if (node < seq->buffer[middle - 1].node)
|
---|
2692 | upper = middle - 1;
|
---|
2693 | else
|
---|
2694 | lower = middle + 1;
|
---|
2695 | }
|
---|
2696 |
|
---|
2697 | /* Return position */
|
---|
2698 | if (middle == 0 || seq->buffer[middle - 1].node < node)
|
---|
2699 | return middle;
|
---|
2700 | else
|
---|
2701 | return middle - 1;
|
---|
2702 | }
|
---|
2703 |
|
---|
2704 |
|
---|
2705 | /**
|
---|
2706 | * xmlParserAddNodeInfo:
|
---|
2707 | * @ctxt: an XML parser context
|
---|
2708 | * @info: a node info sequence pointer
|
---|
2709 | *
|
---|
2710 | * DEPRECATED: Don't use.
|
---|
2711 | *
|
---|
2712 | * Insert node info record into the sorted sequence
|
---|
2713 | */
|
---|
2714 | void
|
---|
2715 | xmlParserAddNodeInfo(xmlParserCtxtPtr ctxt,
|
---|
2716 | xmlParserNodeInfoPtr info)
|
---|
2717 | {
|
---|
2718 | unsigned long pos;
|
---|
2719 |
|
---|
2720 | if ((ctxt == NULL) || (info == NULL)) return;
|
---|
2721 |
|
---|
2722 | /* Find pos and check to see if node is already in the sequence */
|
---|
2723 | pos = xmlParserFindNodeInfoIndex(&ctxt->node_seq, (xmlNodePtr)
|
---|
2724 | info->node);
|
---|
2725 |
|
---|
2726 | if ((pos < ctxt->node_seq.length) &&
|
---|
2727 | (ctxt->node_seq.buffer != NULL) &&
|
---|
2728 | (ctxt->node_seq.buffer[pos].node == info->node)) {
|
---|
2729 | ctxt->node_seq.buffer[pos] = *info;
|
---|
2730 | }
|
---|
2731 |
|
---|
2732 | /* Otherwise, we need to add new node to buffer */
|
---|
2733 | else {
|
---|
2734 | if ((ctxt->node_seq.length + 1 > ctxt->node_seq.maximum) ||
|
---|
2735 | (ctxt->node_seq.buffer == NULL)) {
|
---|
2736 | xmlParserNodeInfo *tmp_buffer;
|
---|
2737 | unsigned int byte_size;
|
---|
2738 |
|
---|
2739 | if (ctxt->node_seq.maximum == 0)
|
---|
2740 | ctxt->node_seq.maximum = 2;
|
---|
2741 | byte_size = (sizeof(*ctxt->node_seq.buffer) *
|
---|
2742 | (2 * ctxt->node_seq.maximum));
|
---|
2743 |
|
---|
2744 | if (ctxt->node_seq.buffer == NULL)
|
---|
2745 | tmp_buffer = (xmlParserNodeInfo *) xmlMalloc(byte_size);
|
---|
2746 | else
|
---|
2747 | tmp_buffer =
|
---|
2748 | (xmlParserNodeInfo *) xmlRealloc(ctxt->node_seq.buffer,
|
---|
2749 | byte_size);
|
---|
2750 |
|
---|
2751 | if (tmp_buffer == NULL) {
|
---|
2752 | xmlCtxtErrMemory(ctxt);
|
---|
2753 | return;
|
---|
2754 | }
|
---|
2755 | ctxt->node_seq.buffer = tmp_buffer;
|
---|
2756 | ctxt->node_seq.maximum *= 2;
|
---|
2757 | }
|
---|
2758 |
|
---|
2759 | /* If position is not at end, move elements out of the way */
|
---|
2760 | if (pos != ctxt->node_seq.length) {
|
---|
2761 | unsigned long i;
|
---|
2762 |
|
---|
2763 | for (i = ctxt->node_seq.length; i > pos; i--)
|
---|
2764 | ctxt->node_seq.buffer[i] = ctxt->node_seq.buffer[i - 1];
|
---|
2765 | }
|
---|
2766 |
|
---|
2767 | /* Copy element and increase length */
|
---|
2768 | ctxt->node_seq.buffer[pos] = *info;
|
---|
2769 | ctxt->node_seq.length++;
|
---|
2770 | }
|
---|
2771 | }
|
---|
2772 |
|
---|
2773 | /************************************************************************
|
---|
2774 | * *
|
---|
2775 | * Defaults settings *
|
---|
2776 | * *
|
---|
2777 | ************************************************************************/
|
---|
2778 | /**
|
---|
2779 | * xmlPedanticParserDefault:
|
---|
2780 | * @val: int 0 or 1
|
---|
2781 | *
|
---|
2782 | * DEPRECATED: Use the modern options API with XML_PARSE_PEDANTIC.
|
---|
2783 | *
|
---|
2784 | * Set and return the previous value for enabling pedantic warnings.
|
---|
2785 | *
|
---|
2786 | * Returns the last value for 0 for no substitution, 1 for substitution.
|
---|
2787 | */
|
---|
2788 |
|
---|
2789 | int
|
---|
2790 | xmlPedanticParserDefault(int val) {
|
---|
2791 | int old = xmlPedanticParserDefaultValue;
|
---|
2792 |
|
---|
2793 | xmlPedanticParserDefaultValue = val;
|
---|
2794 | return(old);
|
---|
2795 | }
|
---|
2796 |
|
---|
2797 | /**
|
---|
2798 | * xmlLineNumbersDefault:
|
---|
2799 | * @val: int 0 or 1
|
---|
2800 | *
|
---|
2801 | * DEPRECATED: The modern options API always enables line numbers.
|
---|
2802 | *
|
---|
2803 | * Set and return the previous value for enabling line numbers in elements
|
---|
2804 | * contents. This may break on old application and is turned off by default.
|
---|
2805 | *
|
---|
2806 | * Returns the last value for 0 for no substitution, 1 for substitution.
|
---|
2807 | */
|
---|
2808 |
|
---|
2809 | int
|
---|
2810 | xmlLineNumbersDefault(int val) {
|
---|
2811 | int old = xmlLineNumbersDefaultValue;
|
---|
2812 |
|
---|
2813 | xmlLineNumbersDefaultValue = val;
|
---|
2814 | return(old);
|
---|
2815 | }
|
---|
2816 |
|
---|
2817 | /**
|
---|
2818 | * xmlSubstituteEntitiesDefault:
|
---|
2819 | * @val: int 0 or 1
|
---|
2820 | *
|
---|
2821 | * DEPRECATED: Use the modern options API with XML_PARSE_NOENT.
|
---|
2822 | *
|
---|
2823 | * Set and return the previous value for default entity support.
|
---|
2824 | * Initially the parser always keep entity references instead of substituting
|
---|
2825 | * entity values in the output. This function has to be used to change the
|
---|
2826 | * default parser behavior
|
---|
2827 | * SAX::substituteEntities() has to be used for changing that on a file by
|
---|
2828 | * file basis.
|
---|
2829 | *
|
---|
2830 | * Returns the last value for 0 for no substitution, 1 for substitution.
|
---|
2831 | */
|
---|
2832 |
|
---|
2833 | int
|
---|
2834 | xmlSubstituteEntitiesDefault(int val) {
|
---|
2835 | int old = xmlSubstituteEntitiesDefaultValue;
|
---|
2836 |
|
---|
2837 | xmlSubstituteEntitiesDefaultValue = val;
|
---|
2838 | return(old);
|
---|
2839 | }
|
---|
2840 |
|
---|
2841 | /**
|
---|
2842 | * xmlKeepBlanksDefault:
|
---|
2843 | * @val: int 0 or 1
|
---|
2844 | *
|
---|
2845 | * DEPRECATED: Use the modern options API with XML_PARSE_NOBLANKS.
|
---|
2846 | *
|
---|
2847 | * Set and return the previous value for default blanks text nodes support.
|
---|
2848 | * The 1.x version of the parser used an heuristic to try to detect
|
---|
2849 | * ignorable white spaces. As a result the SAX callback was generating
|
---|
2850 | * xmlSAX2IgnorableWhitespace() callbacks instead of characters() one, and when
|
---|
2851 | * using the DOM output text nodes containing those blanks were not generated.
|
---|
2852 | * The 2.x and later version will switch to the XML standard way and
|
---|
2853 | * ignorableWhitespace() are only generated when running the parser in
|
---|
2854 | * validating mode and when the current element doesn't allow CDATA or
|
---|
2855 | * mixed content.
|
---|
2856 | * This function is provided as a way to force the standard behavior
|
---|
2857 | * on 1.X libs and to switch back to the old mode for compatibility when
|
---|
2858 | * running 1.X client code on 2.X . Upgrade of 1.X code should be done
|
---|
2859 | * by using xmlIsBlankNode() commodity function to detect the "empty"
|
---|
2860 | * nodes generated.
|
---|
2861 | * This value also affect autogeneration of indentation when saving code
|
---|
2862 | * if blanks sections are kept, indentation is not generated.
|
---|
2863 | *
|
---|
2864 | * Returns the last value for 0 for no substitution, 1 for substitution.
|
---|
2865 | */
|
---|
2866 |
|
---|
2867 | int
|
---|
2868 | xmlKeepBlanksDefault(int val) {
|
---|
2869 | int old = xmlKeepBlanksDefaultValue;
|
---|
2870 |
|
---|
2871 | xmlKeepBlanksDefaultValue = val;
|
---|
2872 | #ifdef LIBXML_OUTPUT_ENABLED
|
---|
2873 | if (!val)
|
---|
2874 | xmlIndentTreeOutput = 1;
|
---|
2875 | #endif
|
---|
2876 | return(old);
|
---|
2877 | }
|
---|
2878 |
|
---|